In [2]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None
        
    #calculate output from layer n on input from layer n-1 and feed into layer n+1
    def forwardPropagation(self, input):
        raise NotImplementedError
    def backwardPropagation(selfm, output_error, learning_rate):
        raise NotImplementedError

In [16]:
import numpy as np

class FCLayer(Layer):
    
    def __init__(self, input_size, output_size):
        #self.input = input_size #number of input neurons (num rows of weight matrix)
        #self.output = output_size #number of output neurons (num cols of weight matrix)
        
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1,output_size) - 0.5
        
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
    
    #computes  on an output_error=dE/dY the input_error=dE/dX
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error ,self.weights.T) #dE/dY*W^t
        #weights_error = np.dot(self.input.T, output_error) #X^t*dE/dY
        weights_error = np.dot(self.input.T, output_error)
        bias_error = output_error #dE/dY
        
        #update weights (activations) and biases (parameters=weights&bias)
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * bias_error
        
        return input_error
        

In [17]:
class ActivationLayer(Layer):
    
    def __init__(self, activation, activation_prime):
        self.activation = activation #activation func
        self.activation_prime = activation_prime #derivative of activation
        
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output
    
    #given dE/dY calc dE/dX
    def backward_propagation(self, output_error, learning_rate):
         return output_error * self.activation_prime(self.input)

In [5]:
#activation func
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

In [6]:
#loss functions
def mse(y_pred, y_target):
    return np.mean((np.power(y_target-y_pred, 2)))
                   
def mse_prime(y_pred, y_target):
    return 2*(y_target-y_pred)/y_target.size    

In [37]:
class Network:
    
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None
        
    def addLayer(self, layer):
        self.layers.append(layer)
        
    #loss functions i.e. mse and mse_prime
    def setLoss(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime
    
    #predict output for sample / forward pass
    def predict(self, input_data):
        #sample dimensions
        samples = len(input_data)
        results = []
        
        for i in range(samples):
            #input for first layer
            output = input_data[i]
            
            for layer in self.layers:
                output = layer.forward_propagation(output)
            results.append(np.around(output,2))
            
        return results
    
    #train NN
    def fit(self, x_train, y_train, epochs, learning_rate):
        
        samples = len(x_train)
        
        #epochs
        for i in range(epochs):
            err = 0
            for j in range(samples):
                #first layer input
                output = x_train[j]
                
                for layer in self.layers:
                    output = layer.forward_propagation(output)
                
                #compute loss
                err += self.loss(y_train[j], output)
                
                #compute error
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)
                
                err /=samples #err per sample
                print('epoch %d/%d   error=%f' % (i+1, epochs, err))
            

In [8]:
# training data XOR / true or false
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

#NN
net = Network() #baseClass
net.addLayer(FullyConnected(2,3)) #2 is input dim
net.addLayer(Activation(tanh, tanh_prime))
net.addLayer(FullyConnected(3,1)) #3 is input from last layer, 1 is for classification
net.addLayer(Activation(tanh, tanh_prime))

#train
net.setLoss(mse, mse_prime)
net.fit(x_train, y_train, epochs=1000, learning_rate=0.1)

out = net.predict(x_train)
print(out)

epoch 1/1000   error=0.025692
epoch 1/1000   error=0.158566
epoch 1/1000   error=0.145246
epoch 1/1000   error=0.110180
epoch 2/1000   error=0.053031
epoch 2/1000   error=0.113841
epoch 2/1000   error=0.106143
epoch 2/1000   error=0.115059
epoch 3/1000   error=0.064412
epoch 3/1000   error=0.100129
epoch 3/1000   error=0.092982
epoch 3/1000   error=0.117820
epoch 4/1000   error=0.069270
epoch 4/1000   error=0.094623
epoch 4/1000   error=0.087416
epoch 4/1000   error=0.119093
epoch 5/1000   error=0.071336
epoch 5/1000   error=0.092074
epoch 5/1000   error=0.084774
epoch 5/1000   error=0.119600
epoch 6/1000   error=0.072117
epoch 6/1000   error=0.090782
epoch 6/1000   error=0.083422
epoch 6/1000   error=0.119730
epoch 7/1000   error=0.072287
epoch 7/1000   error=0.090073
epoch 7/1000   error=0.082681
epoch 7/1000   error=0.119683
epoch 8/1000   error=0.072162
epoch 8/1000   error=0.089646
epoch 8/1000   error=0.082240
epoch 8/1000   error=0.119560
epoch 9/1000   error=0.071896
epoch 9/10

epoch 367/1000   error=0.001051
epoch 367/1000   error=0.000287
epoch 368/1000   error=0.000007
epoch 368/1000   error=0.000941
epoch 368/1000   error=0.001043
epoch 368/1000   error=0.000284
epoch 369/1000   error=0.000007
epoch 369/1000   error=0.000934
epoch 369/1000   error=0.001035
epoch 369/1000   error=0.000282
epoch 370/1000   error=0.000007
epoch 370/1000   error=0.000928
epoch 370/1000   error=0.001027
epoch 370/1000   error=0.000280
epoch 371/1000   error=0.000007
epoch 371/1000   error=0.000921
epoch 371/1000   error=0.001019
epoch 371/1000   error=0.000277
epoch 372/1000   error=0.000007
epoch 372/1000   error=0.000914
epoch 372/1000   error=0.001012
epoch 372/1000   error=0.000275
epoch 373/1000   error=0.000007
epoch 373/1000   error=0.000908
epoch 373/1000   error=0.001004
epoch 373/1000   error=0.000273
epoch 374/1000   error=0.000007
epoch 374/1000   error=0.000901
epoch 374/1000   error=0.000997
epoch 374/1000   error=0.000271
epoch 375/1000   error=0.000007
epoch 37

epoch 730/1000   error=0.000000
epoch 730/1000   error=0.000234
epoch 730/1000   error=0.000253
epoch 730/1000   error=0.000065
epoch 731/1000   error=0.000000
epoch 731/1000   error=0.000233
epoch 731/1000   error=0.000252
epoch 731/1000   error=0.000064
epoch 732/1000   error=0.000000
epoch 732/1000   error=0.000233
epoch 732/1000   error=0.000252
epoch 732/1000   error=0.000064
epoch 733/1000   error=0.000000
epoch 733/1000   error=0.000232
epoch 733/1000   error=0.000251
epoch 733/1000   error=0.000064
epoch 734/1000   error=0.000000
epoch 734/1000   error=0.000231
epoch 734/1000   error=0.000251
epoch 734/1000   error=0.000064
epoch 735/1000   error=0.000000
epoch 735/1000   error=0.000231
epoch 735/1000   error=0.000250
epoch 735/1000   error=0.000064
epoch 736/1000   error=0.000000
epoch 736/1000   error=0.000230
epoch 736/1000   error=0.000250
epoch 736/1000   error=0.000064
epoch 737/1000   error=0.000000
epoch 737/1000   error=0.000230
epoch 737/1000   error=0.000249
epoch 73

In [44]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras import utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28) #28*28 picture -> flattend
x_train = x_train.astype('float32')
x_train /= 255 #normalize (one channel 0-255)
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = utils.to_categorical(y_test)

# Network
net = Network()
net.addLayer(FCLayer(28*28, 100))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.addLayer(ActivationLayer(tanh, tanh_prime))
net.addLayer(FCLayer(100, 50))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.addLayer(ActivationLayer(tanh, tanh_prime))
net.addLayer(FCLayer(50, 10))                    # input_shape=(1, 50)       ;   output_shape=(1, 10) must be ten bc 10 possible outcomes
net.addLayer(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.setLoss(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=1, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

epoch 1/1   error=0.000664
epoch 1/1   error=0.000698
epoch 1/1   error=0.000631
epoch 1/1   error=0.000592
epoch 1/1   error=0.000915
epoch 1/1   error=0.000447
epoch 1/1   error=0.000601
epoch 1/1   error=0.000425
epoch 1/1   error=0.000717
epoch 1/1   error=0.000776
epoch 1/1   error=0.000440
epoch 1/1   error=0.000789
epoch 1/1   error=0.000567
epoch 1/1   error=0.000912
epoch 1/1   error=0.000581
epoch 1/1   error=0.000451
epoch 1/1   error=0.000379
epoch 1/1   error=0.000598
epoch 1/1   error=0.000938
epoch 1/1   error=0.000891
epoch 1/1   error=0.000542
epoch 1/1   error=0.000841
epoch 1/1   error=0.000357
epoch 1/1   error=0.000415
epoch 1/1   error=0.000540
epoch 1/1   error=0.000518
epoch 1/1   error=0.000556
epoch 1/1   error=0.000286
epoch 1/1   error=0.000495
epoch 1/1   error=0.000841
epoch 1/1   error=0.000552
epoch 1/1   error=0.000853
epoch 1/1   error=0.000891
epoch 1/1   error=0.000607
epoch 1/1   error=0.000433
epoch 1/1   error=0.000752
epoch 1/1   error=0.000646
e

epoch 1/1   error=0.000213
epoch 1/1   error=0.000060
epoch 1/1   error=0.000162
epoch 1/1   error=0.000082
epoch 1/1   error=0.000107
epoch 1/1   error=0.000167
epoch 1/1   error=0.000110
epoch 1/1   error=0.000112
epoch 1/1   error=0.000151
epoch 1/1   error=0.000160
epoch 1/1   error=0.000278
epoch 1/1   error=0.000267
epoch 1/1   error=0.000225
epoch 1/1   error=0.000145
epoch 1/1   error=0.000263
epoch 1/1   error=0.000287
epoch 1/1   error=0.000162
epoch 1/1   error=0.000250
epoch 1/1   error=0.000222
epoch 1/1   error=0.000133
epoch 1/1   error=0.000109
epoch 1/1   error=0.000235
epoch 1/1   error=0.000195
epoch 1/1   error=0.000301
epoch 1/1   error=0.000200
epoch 1/1   error=0.000156
epoch 1/1   error=0.000361
epoch 1/1   error=0.000418
epoch 1/1   error=0.000284
epoch 1/1   error=0.000214
epoch 1/1   error=0.000190
epoch 1/1   error=0.000098
epoch 1/1   error=0.000177
epoch 1/1   error=0.000340
epoch 1/1   error=0.000145
epoch 1/1   error=0.000173
epoch 1/1   error=0.000156
e

epoch 1/1   error=0.000180
epoch 1/1   error=0.000146
epoch 1/1   error=0.000170
epoch 1/1   error=0.000085
epoch 1/1   error=0.000084
epoch 1/1   error=0.000130
epoch 1/1   error=0.000116
epoch 1/1   error=0.000180
epoch 1/1   error=0.000150
epoch 1/1   error=0.000188
epoch 1/1   error=0.000152
epoch 1/1   error=0.000230
epoch 1/1   error=0.000089
epoch 1/1   error=0.000111
epoch 1/1   error=0.000084
epoch 1/1   error=0.000165
epoch 1/1   error=0.000067
epoch 1/1   error=0.000031
epoch 1/1   error=0.000113
epoch 1/1   error=0.000044
epoch 1/1   error=0.000054
epoch 1/1   error=0.000094
epoch 1/1   error=0.000090
epoch 1/1   error=0.000104
epoch 1/1   error=0.000133
epoch 1/1   error=0.000088


predicted values : 
[array([[-0.13,  0.24, -0.15,  0.44, -0.14,  0.01,  0.36,  0.53,  0.17,
        -0.08]]), array([[ 0.04, -0.01, -0.09,  0.65, -0.09,  0.2 ,  0.48, -0.11,  0.04,
         0.1 ]]), array([[-0.27,  0.78,  0.08,  0.42, -0.06,  0.51,  0.46,  0.31,  0.01,
        -0.08]])]
true val