In [596]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [597]:
data, target = load_breast_cancer()["data"], load_breast_cancer()["target"]
X_train, X_test, y_train, y_test = train_test_split(data, target)

In [598]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

def sigmoid(x):
    return np.where(x < 0, np.exp(x)/(1 + np.exp(x)), 1/(1 + np.exp(-x)))

class MLP(BaseEstimator, TransformerMixin):
    
    def __init__(self, layer_sizes, layer_activations = [], learning_rate = 1.0):
        self.layer_sizes = layer_sizes
        self.layer_activations = layer_activations
        self.learning_rate = 1.0
    
    def predict(self, X):
        result = self.feed_forward(self.scaler.transform(X)).reshape(-1,1)
        result[result > 0.5] = 1
        result[result != 1]  = 0
        return result
        
    
    def feed_forward(self, X, return_intermediate = False):
        
        if return_intermediate:
            intermediate = []
        
        feed = X
        
        for w, act in zip(self.weights, self.layer_activations):
            bias_column = np.ones((X.shape[0], 1))
            feed = np.append(bias_column, feed, axis=1)
            if return_intermediate:
                intermediate.append(feed)
            feed = feed @ w
            
            if act == "sigmoid":
                feed = sigmoid(feed)
        
        if not return_intermediate:
            return feed
        return feed, intermediate
            
    
    def fit(self, X, y, n_steps = 10, warm_start = False, verbose = False):
        self.scaler = StandardScaler()
        self.scaler.fit(X)
        X = self.scaler.transform(X)
        
        if not warm_start:
            self.input_shape = X.shape[1]
            self.weights = [np.random.rand(self.input_shape+1, self.layer_sizes[0])-0.5]
        
            for l_index in range(len(self.layer_sizes)-1):
                prev_weights = np.random.rand(self.layer_sizes[l_index]+1, self.layer_sizes[l_index+1])-0.5
                self.weights.append(prev_weights)
            
        while n_steps > 0:
            if verbose:
                print("Steps remaining:", n_steps)
            result, feeds = self.feed_forward(X, True)
            yres = y.reshape(-1, 1)
            deltas = [result-yres]
            if verbose:
                print("Output cross entropy:", -1/X.shape[0]*np.sum(
                    np.multiply(yres, np.log(result)) + np.multiply(1-yres, np.log(1-result))))

            for widx in range(len(self.weights)-1, -1, -1):
                if len(deltas) > 1:
                    current_used = deltas[0][:,:-1]
                else:
                    current_used = deltas[0]
                g_prime = np.multiply(feeds[widx], 1-feeds[widx])
                gradient = current_used @ self.weights[widx].T
                new_delta = np.multiply(gradient, g_prime)
                deltas.insert(0, new_delta)
                #print(g_prime)
            
            n_steps -= 1
            
            for idx, (feed, delta) in enumerate(zip(feeds, deltas[1:])):
                #print(feed.T.shape, delta.shape, self.weights[idx].shape)
                if idx != len(deltas)-2:
                    grad = feed.T @ delta[:, :-1]
                else:
                    grad = feed.T @ delta
                self.weights[idx] -= grad*self.learning_rate/X.shape[0]
                #print(delta)
        

In [599]:
mlp = MLP([300, 300, 100, 1], ["sigmoid", "sigmoid", "sigmoid", "sigmoid"], learning_rate=0.0005)

In [601]:
mlp.fit(X_train, y_train, n_steps = 100, verbose=False)

In [602]:
result = mlp.predict(X_test)

In [603]:
from sklearn.metrics import accuracy_score

In [604]:
accuracy_score(result, y_test)

0.965034965034965

In [605]:
from tensorflow import keras

In [623]:
keras_model = keras.Sequential([
    keras.layers.Dense(300, activation="sigmoid", input_shape=X_train.shape[1:]),
    keras.layers.Dense(300, activation="sigmoid"),
    keras.layers.Dense(100, activation="sigmoid"),
    keras.layers.Dense(1, activation="sigmoid")
])

In [624]:
keras_model.compile(
    optimizer = "sgd",
    metrics = "accuracy",
    loss ="binary_crossentropy"
)
keras_model.build(input_shape = X_train.shape[1:])

In [625]:
keras_model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 300)               9300      
_________________________________________________________________
dense_14 (Dense)             (None, 300)               90300     
_________________________________________________________________
dense_15 (Dense)             (None, 100)               30100     
_________________________________________________________________
dense_16 (Dense)             (None, 1)                 101       
Total params: 129,801
Trainable params: 129,801
Non-trainable params: 0
_________________________________________________________________


In [637]:
keras_model.fit(X_train, y_train, epochs=100, batch_size=None)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f901c67b190>

In [641]:
keras_pred = (keras_model.predict(X_test) > 0.5).astype("int32")

In [643]:
accuracy_score(keras_pred, y_test)

0.9230769230769231