<a href="https://colab.research.google.com/github/OrcunSelbasan/neural-network/blob/main/HW_Week_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
# Base class
class Layer:
    def __init__(self):
        self.input = None
        self.output = None
        self.name = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

In [48]:
#from layer import Layer
import numpy as np

# inherit from base class Layer
class FCLayer(Layer):
    # input_size = number of input neurons
    # output_size = number of edges that connects to neurons in next layer
    def __init__(self, input_size, output_size, name):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5
        self.name = name

        '''
          Layer 2,3
            Weights - [[ 0.36026768  0.34464298 -0.41013436]
                      [ 0.0305316  -0.42181174  0.09305524]]
            Bias    - [[0.0304103  0.281616   0.07227567]]
            
          Layer 3,1
            Weights - [[-0.43197654]
                      [-0.37846526]
                      [ 0.07857096]]
            Bias    - [[0.43675409]]
        '''

    # returns output for a given input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
        '''
          Layer 2,4
            Input = [[1 0]]
            Output = [[-0.03040415 -0.10734488 -0.0019822   0.00342819]]
          
          Layer 4,1
            Input = [[-0.03039478 -0.10693446 -0.0019822   0.00342818]]
            Output = [[0.57502534]]
        '''

    # computes dE/dW, dE/dB for a given output_error=dE/dY. Returns input_error=dE/dX.
    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        # dBias = output_error
        '''
          LAYER 2,3
            Input error - [[0.00307025 0.00309466]]
            Weights error - [[-0.00117569  0.00206698 -0.00446291]
                            [ 0.          0.          0.        ]]

          LAYER 3,1                           
            Input error - [[ 0.00891573 -0.00393219  0.00793399]]
            Weights error - [[ 0.00385598]
                            [ 0.00257526]
                            [-0.00205168]]
        '''

        # update parameters
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        '''
          LAYER 2,3
            Before weight - [[-2.14214651 -0.48545369 -1.147462  ]
                            [-2.13467774 -0.22201586 -1.23598115]]
            After weight  - [[-2.14214463 -0.48558004 -1.14787799]
                            [-2.13467587 -0.22214221 -1.23639715]]

            Before bias   - [[0.68279676 0.02351614 1.78193465]]
            After bias    - [[0.68279864 0.02338979 1.78151865]]

          LAYER 3,1
            Before weight - [[-2.24779919]
                             [ 0.70664231]
                             [ 2.11868674]]
            After weight  - [[-2.24789984]
                            [ 0.70663834]
                            [ 2.11852645]]

            Before bias  - [[-0.6838359]]
            After bias   - [[-0.68400555]]
        '''
        return input_error

In [49]:
#from layer import Layer

# inherit from base class Layer
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
        '''
          Layer 2,3
            Activation - <function tanh at 0x7f6aa20f99e0>
          Layer 2,3 Backwards
            Activation - <function tanh_prime at 0x7f6aa20f9b00>

          Layer 3,1
            Activation - <function tanh at 0x7f6aa20f99e0>
          Layer 3,1 Backwards
            Activation - <function tanh_prime at 0x7f6aa20f9b00>
        '''

    # returns the activated input
    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        print(f'Input {self.input}')

        '''
          Layer 2,3
            Input - [[ 0.02817087 -0.04774162 -0.10928887]]
            Output - [[ 0.02816342 -0.04770538 -0.10885582]]
         
          Layer 3,1
            Input - [[0.57383611]]
            Output - [[0.51817096]
        '''
        return self.output

    # Returns input_error=dE/dX for a given output_error=dE/dY.
    # learning_rate is not used because there is no "learnable" parameters.
    def backward_propagation(self, output_error, learning_rate):  
        '''
          Backward propagation - [[0.0028969]]
          Backward propagation - [[-0.00393368  0.0009025  -0.00201438]]

          Backward propagation - [[-0.00314247]]
          Backward propagation - [[ 0.00147748 -0.00451947  0.00352124]]
        '''
        return self.activation_prime(self.input) * output_error

In [50]:
import numpy as np

# activation function and its derivative
def tanh(x):
    return np.tanh(x);

def tanh_prime(x):
    return 1-np.tanh(x)**2;

In [51]:

import numpy as np

# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2));

def mse_prime(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size;

In [52]:
# example of a function for calculating softmax for a list of numbers
from numpy import exp
 
# calculate the softmax of a vector
def softmax(vector):
    e = exp(vector)
    return e / e.sum()

In [53]:
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

        
    # predict output for given input
    def predict(self, input_data):
        # sample dimension first
        samples = len(input_data)
        result = []

        # run network over all samples
        for i in range(samples):
            # forward propagation
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)

        return result

    # train the network 
    
    def fit(self, x_train, y_train, epochs, learning_rate):
        '''
        Fit function does the training. 
        Training data is passed 1-by-1 through the network layers during forward propagation.
        Loss (error) is calculated for each input and back propagation is performed via partial 
        derivatives on each layer.
        '''
        # sample dimension first
        samples = len(x_train)

        # training loop
        for i in range(epochs):
            err = 0
            for j in range(samples):
                # forward propagation
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                # compute loss (for display purpose only)
                err += self.loss(y_train[j], output)

                # backward propagation
                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            # calculate average error on all samples
            err /= samples
            # print('epoch %d/%d   error=%f' % (i+1, epochs, err))

#  Without pre-processed

In [54]:
import numpy as np

#from network import Network
#from fc_layer import FCLayer
#from activation_layer import ActivationLayer
#from activations import tanh, tanh_prime
#from losses import mse, mse_prime

from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
# x_train = x_train.astype('float32')
# x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
# y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
# x_test = x_test.astype('float32')
# x_test /= 255
# y_test = np_utils.to_categorical(y_test)

# Network
net = Network()
net.add(FCLayer(28*28, 100, "l1" ))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50, "l2"))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10, "l3"))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.use(mse, mse_prime)
net.fit(x_train[0:100], y_train[0:100], epochs=35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
    106.2277821   1351.54458858   480.37419694  1099.78864788]]
Input [[-3.57097562 -2.47865207  1.35023756  9.8358959   4.27627233 -7.37889775
   1.50620107  3.68891993  4.22918893  5.23943156  1.93942943  4.29507045
  -4.66966771 -3.36898657  3.48482196  4.94797387 -3.0769041   3.22474952
   5.56524183  2.94616072  5.98437535  3.8234305   5.02540537 -5.18143144
  -0.31644091 -5.47194864 -3.28184451  3.96001365  8.54290176  0.25722207
  -4.88427021 -7.13440911  6.53316984  2.15068129  4.68792748 -5.05021595
   2.8256477  -0.77128419 -3.72617075 -2.73572746  0.90694999  8.36236107
   9.00044703  3.62686394  4.86944073 -3.20319684  1.48021271  8.44099711
  -7.24097185  1.09432198]]
Input [[ 4.23354857 12.91026026  7.07733912  3.32049589  8.7437468   9.58491288
   8.95833302  7.62948412  5.2597082   8.40253111]]
Input [[-1.40671440e+02 -1.02423733e+03 -4.26613238e+02  6.99112976e+02
  -3.59237689e+02  3.34337306e+02 -5.1220

# Using normalization (before training)

In [None]:
import numpy as np

#from network import Network
#from fc_layer import FCLayer
#from activation_layer import ActivationLayer
#from activations import tanh, tanh_prime
#from losses import mse, mse_prime

from keras.datasets import mnist
from keras.utils import np_utils

# load MNIST from server
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# training data : 60000 samples
# reshape and normalize input data
x_train = x_train.reshape(x_train.shape[0], 1, 28*28)
x_train = x_train.astype('float32')
x_train /= 255
# encode output which is a number in range [0,9] into a vector of size 10
# e.g. number 3 will become [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train)

# same for test data : 10000 samples
x_test = x_test.reshape(x_test.shape[0], 1, 28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

# Network
net = Network()
net.add(FCLayer(28*28, 100, "l1" ))                # input_shape=(1, 28*28)    ;   output_shape=(1, 100)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(100, 50, "l2"))                   # input_shape=(1, 100)      ;   output_shape=(1, 50)
net.add(ActivationLayer(tanh, tanh_prime))
net.add(FCLayer(50, 10, "l3"))                    # input_shape=(1, 50)       ;   output_shape=(1, 10)
net.add(ActivationLayer(tanh, tanh_prime))

# train on 1000 samples
# as we didn't implemented mini-batch GD, training will be pretty slow if we update at each iteration on 60000 samples...
net.use(mse, mse_prime)
net.fit(x_train[0:1000], y_train[0:1000], epochs=35, learning_rate=0.1)

# test on 3 samples
out = net.predict(x_test[0:3])
print("\n")
print("predicted values : ")
print(out, end="\n")
print("true values : ")
print(y_test[0:3])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  -4.22778094  2.39030665 -5.9799304  -2.99571859 -0.37726688  1.29162954
  -5.91091195  4.97333953  3.14807158 -5.51082635  7.93597451 -5.6368858
   3.52445245  2.98284947 -5.70133404 -4.62333462 -2.93877871 -0.92864049
   2.39950295  2.37503704 -3.13084893 -1.67341266 -3.568325    6.12982858
  -2.82592088  2.77934352  0.92738594 -3.71424968  6.88817788  3.63682534
   5.59025486  2.9019234  -4.18664094 -4.01747628  2.81127196  4.0398526
   3.0069956  -0.47321386 -1.80786634  0.95988018  0.66104706  4.31230736
   2.14641772  3.81620387  3.54208904 -3.83698548  4.52762952 -5.88212287
  -3.59939256 -0.43444727  6.22395077 -3.52707937 -1.69210042 -3.3146709
  10.44019625 -1.65857183 -4.67451612 -6.76699861]]
Input [[ 5.08398442 -6.51304946  1.93080338  6.68915328  3.14668922 -4.5064503
  -8.36693223 -7.13233637  3.90272577 -3.47172495 -4.22236657 -6.8860414
   3.52503519  0.94881818  3.29299651 -5.62364104 -4.8017399  -5.683