In [282]:
import numpy as np
import gzip
import os
from sklearn import preprocessing
import pickle
from typing import Tuple
from dataclasses import dataclass
np.random.seed(1)

In [283]:
class MnistDataLoader:
    def __init__(self, data_folder_path):
        self.data_folder_path = data_folder_path
        self.train_file_name = 'train-images-idx3-ubyte.gz'
        self.train_label_file_name = 'train-labels-idx1-ubyte.gz'
        self.test_file_name = 't10k-images-idx3-ubyte.gz'
        self.test_label_file_name = 't10k-labels-idx1-ubyte.gz'
        self.data = dict()
        self.size = 28
        self.color_channel = 1
        self.data_list = [
            'train_images',
            'train_labels',
            'test_images',
            'test_labels'
        ]

    def load_images(self, data_list_index, file_name):
        images = gzip.open(os.path.join(self.data_folder_path, file_name), 'rb')
        self.data[self.data_list[data_list_index]] = np.frombuffer(images.read(), dtype=np.uint8, offset=16).reshape(-1, self.size, self.size)
        self.data[self.data_list[data_list_index]] = self.data[self.data_list[data_list_index]].reshape(self.data[self.data_list[data_list_index]].shape[0], self.size, self.size, self.color_channel).astype(np.float32)

    def load_labels(self, data_list_index, file_name):
        labels = gzip.open(os.path.join(self.data_folder_path, file_name), 'rb')
        self.data[self.data_list[data_list_index]] = np.frombuffer(labels.read(), dtype=np.uint8, offset=8)
        self.data[self.data_list[data_list_index]].resize(self.data[self.data_list[data_list_index]].shape[0],1)

    def load_mnist(self):
        self.load_images(data_list_index=0, file_name=self.train_file_name)
        self.load_labels(data_list_index=1, file_name=self.train_label_file_name)
        self.load_images(data_list_index=2, file_name=self.test_file_name)
        self.load_labels(data_list_index=3, file_name=self.test_label_file_name)

        self.assert_data_shape()

    def assert_data_shape(self):
        assert self.data[self.data_list[0]].shape == (60000, 28, 28, 1)
        assert self.data[self.data_list[1]].shape == (60000, 1)
        assert self.data[self.data_list[2]].shape == (10000, 28, 28, 1)
        assert self.data[self.data_list[3]].shape == (10000, 1)

    def preprocess_data(self):

        self.data[self.data_list[0]] /= 255
        self.data[self.data_list[2]] /= 255

        self.data[self.data_list[1]] = Utility.one_hot_encode(self.data[self.data_list[1]])

        assert self.data[self.data_list[1]].shape == (60000, 10)

In [284]:
class Cifer10DataLoader:
    def __init__(self, data_path):
        self.data_path = data_path
        self.size = 32
        self.color_channel = 3
        self.per_batch_data_size = 10000
        self.data = dict()

    def load_data(self, file_name):
        with open(os.path.join(self.data_path, file_name), 'rb') as f:

            data_dict=pickle.load(f, encoding='latin1')

            images = data_dict['data']
            labels = data_dict['labels']

            images = images.reshape(self.per_batch_data_size, self.color_channel, self.size, self.size).transpose(0,2,3,1).astype("float")
            labels = np.array(labels)
            print(labels.shape)

            return images, labels

    def concatenate_data(self):
        X1, Y1 = self.load_data('data_batch_1')
        X2, Y2 = self.load_data('data_batch_2')
        X3, Y3 = self.load_data('data_batch_3')
        X4, Y4 = self.load_data('data_batch_4')
        X5, Y5 = self.load_data('data_batch_5')

        self.data['train_images'] = np.concatenate(
            (
                X1, X2, X3, X4, X5
            ),
            axis=0
        )

        self.data['train_labels'] = np.concatenate(
            (
                Y1.reshape(self.per_batch_data_size, 1),
                Y2.reshape(self.per_batch_data_size, 1),
                Y3.reshape(self.per_batch_data_size, 1),
                Y4.reshape(self.per_batch_data_size, 1),
                Y5.reshape(self.per_batch_data_size, 1)
            ),
            axis=0
        )

        X_test, Y_test = self.load_data('test_batch')

        self.data['test_images'] = X_test
        self.data['test_labels'] = Y_test.reshape(Y_test.shape[0], 1)

        self.assert_data_shape()

        for key, data in self.data.items():
            print(f'Shape: {data.shape}')

    def assert_data_shape(self):
        assert self.data['train_images'].shape == (50000, 32, 32, 3)
        assert self.data['train_labels'].shape == (50000, 1)
        assert self.data['test_images'].shape  == (10000, 32, 32, 3)
        assert self.data['test_labels'].shape  == (10000, 1)

    def preprocess_data(self):
        self.data['train_images'] /= 255
        self.data['test_images'] /= 255

        self.data['train_labels'] = Utility.one_hot_encode(self.data['train_labels'])

        assert self.data['train_labels'].shape == (50000, 10)

In [285]:
class Convolution2D:

    # class variable
    layer_num = 1

    def __init__(self, num_out_channel, filter_size, stride, padding_size):
        self.num_out_channel = num_out_channel
        self.filter_size = filter_size
        self.stride = stride
        self.padding_size = padding_size
        self.h_prev, self.w_prev, self.num_channel_prev = None, None, None
        self.h_new, self.w_new = None, None
        self.W = None
        self.b = None
        self.output_tensor = None
        self.cache = {}
        self.layer_name = 'Conv2D__' + str(self.layer_num)
        self.layer_num += 1
        self.is_trainable = True
        self.training_mode = False

    def toggle_training_mode(self):
        if not self.training_mode:
            self.training_mode = True
        else:
            self.training_mode = False

    def initialize_output_dimensions(self, prev_layer_output_dim):
        """
        Initializes output dimensions with the dimension of the previous layers
        :param prev_layer_output_dim: output dimension of the layer immediately before this layer
        :return: None
        """
        self.h_prev, self.w_prev , self.num_channel_prev = prev_layer_output_dim
        self.h_new = (self.h_prev - self.filter_size + 2 * self.padding_size) // self.stride + 1
        self.w_new = (self.w_prev - self.filter_size + 2 * self.padding_size) // self.stride + 1

    def initialize_weights_biases(self):
        """
        Initializes weights with the proper dimensions
        :return:
        """
        self.W = np.random.randn(self.filter_size, self.filter_size, self.num_channel_prev, self.num_out_channel)
        self.b = np.random.randn(1, 1, 1, self.num_out_channel)

    def forward_wob(self, Z_prev, is_training):
        """
        Performs a forward operation of the convolution layer
        :param Z_prev: The activation of the previous layer
        :param is_training: whether we are in training mode or not
        :return:
        """

        assert Z_prev.shape == (self.h_prev, self.w_prev, self.num_channel_prev)
        Z_prev = np.array(Z_prev, copy=True)

        # create zero padded Z_prev
        Z_prev_padded = Utility.zero_pad_without_batch(Z_prev, self.padding_size)
        self.output_tensor = np.zeros((self.h_new, self.w_new, self.num_out_channel))

        # Apply convolution operation over this zero padded previous activation
        for row in range(self.h_new):

            row_start = row * self.stride

            for col in range(self.w_new):

                col_start = col *  self.stride

                for output_channel_index in range(self.num_out_channel):

                    Z_prev_windowed = Z_prev_padded[
                        row_start : row_start + self.filter_size,
                        col_start : col_start + self.filter_size,
                        :
                    ]

                    conv_step_W = self.W[:, :, :, output_channel_index]
                    conv_step_b = self.b[:, :, :, output_channel_index]

                    self.output_tensor[row, col, output_channel_index] = Utility.convolve_single_step(Z_prev_windowed, conv_step_W, conv_step_b)

        # asserting output shape
        assert(self.output_tensor.shape == (self.h_new, self.w_new, self.num_out_channel))

        if is_training:
            # cache some values
            pass

        # perform activation element wise in this case
        print(f'In forward of Convolution output tensor shape before relu {self.output_tensor.shape}')
        self.output_tensor = self.relu_activation.activation_f(self.output_tensor)

        # asserting output shape
        #assert(self.output_tensor.shape == (self.h_new, self.w_new, self.num_out_channel))
        print(f'In forward of CNN output tensor shape after relu {self.output_tensor.shape}')

    def forward_batch(self, Z_prev, is_training=True):
        """
        Performs a forward operation of the convolution layer
        :param Z_prev: The activation of the previous layer
        :param is_training: whether we are in training mode or not
        :return:
        """
        batch_size = Z_prev.shape[0]
        assert Z_prev.shape == (batch_size, self.h_prev, self.w_prev, self.num_channel_prev)

        self.output_tensor = np.zeros((batch_size, self.h_new, self.w_new, self.num_out_channel))
        Z_prev = np.array(Z_prev, copy=True)

        # create zero padded Z_prev
        print(f'Z prev shape:{Z_prev.shape}')
        Z_prev_padded = Utility.zero_pad(Z_prev, self.padding_size)

        # Apply convolution operation over this zero padded previous activation
        for image_index in range(batch_size):
            current_Z_prev_padded = Z_prev_padded[image_index] # choosing a single tensor from the batch
            for row in range(self.h_new):

                row_start = row * self.stride

                for col in range(self.w_new):

                    col_start = col *  self.stride

                    for output_channel_index in range(self.num_out_channel):

                        Z_prev_windowed = current_Z_prev_padded[
                                        row_start : row_start + self.filter_size,
                                        col_start : col_start + self.filter_size,
                                        :
                                        ]

                        conv_step_W = self.W[:, :, :, output_channel_index]
                        conv_step_b = self.b[:, :, :, output_channel_index]

                        self.output_tensor[image_index, row, col, output_channel_index] = Utility.convolve_single_step(Z_prev_windowed, conv_step_W, conv_step_b)

        # asserting output shape
        assert(self.output_tensor.shape == (batch_size, self.h_new, self.w_new, self.num_out_channel))
        print(self.output_tensor.shape)

        if is_training:
            # cache some values
            pass

        # perform activation element wise in this case
        #self.output_tensor = self.relu_activation.activation_f(self.output_tensor)

    def forward(self, Z_prev):
        """
        Performs a forward operation of the convolution layer
        :param Z_prev: The activation of the previous layer
        :param is_training: whether we are in training mode or not
        :return:
        """
        print(Z_prev.shape)
        batch_size = Z_prev.shape[0]
        assert Z_prev.shape == (batch_size, self.h_prev, self.w_prev, self.num_channel_prev)

        self.output_tensor = np.zeros((batch_size, self.h_new, self.w_new, self.num_out_channel))
        Z_prev = np.array(Z_prev, copy=True)

        # create zero padded Z_prev
        #print(f'Z prev shape:{Z_prev.shape}')
        Z_prev_padded = Utility.zero_pad(Z_prev, self.padding_size)

        # Apply convolution operation over this zero padded previous activation
        for row in range(self.h_new):

            row_start = row * self.stride

            for col in range(self.w_new):

                col_start = col *  self.stride

                for output_channel_index in range(self.num_out_channel):

                    Z_prev_windowed = Z_prev_padded[:,row_start : row_start + self.filter_size,
                                            col_start : col_start + self.filter_size,:]

                    conv_step_W = self.W[:, :, :, output_channel_index]
                    conv_step_b = self.b[:, :, :, output_channel_index]

                    # print('z shape', Z_prev_windowed.shape)
                    # print('w shape', conv_step_W.shape)

                    # self.output_tensor[:, row, col, output_channel_index] = np.sum(
                    #     Z_prev_windowed * conv_step_W,
                    #     axis=(1,2,3)
                    # ) + conv_step_b

                    self.output_tensor[:, row, col, output_channel_index] = Utility.convolve_single_step_over_batch(
                        Z_prev_windowed, conv_step_W, conv_step_b
                    )

            # asserting output shape
            assert(self.output_tensor.shape == (batch_size, self.h_new, self.w_new, self.num_out_channel))
            #print('output tensor shape:', self.output_tensor.shape)

            if self.training_mode:
                # cache some values
                pass

            # # perform activation element wise in this case
            # self.output_tensor = self.relu_activation.activation_f(self.output_tensor)

    def get_output_tensor(self):
        return self.output_tensor

    def backward(self):
        pass

    def update_CNN_parameters(self, dW : np.array, db: np.array):
        self.W = self.W - dW
        self.b = self.b - db

    def print_layer_dimensions(self):
        print(f'Output Tensor Dimensions: {self.output_tensor.shape}')
        print(f'Weight Dimension: {self.W.shape}')
        print(f'Bias Dimension: {self.b.shape}')

    def get_output_dimension(self) -> Tuple:
        return self.h_new, self.w_new, self.num_out_channel

In [286]:
class Flatten:
    # class variable
    layer_num = 1

    def __init__(self):
        self.input_dim = None
        self.output_tensor = None
        self.output_dim = None
        self.h_prev, self.w_prev, self.num_channel_prev = None, None, None
        self.layer_name = 'Flatten__' + str(self.layer_num)
        self.layer_num += 1
        self.is_trainable = False

    def initialize_flatten_layer_dimensions(self, prev_layer_output_dim):
        """
        :param prev_layer_output_dim: prev layer output of shape (new_h, new_w, new_channel)
        :return: None
        """
        self.h_prev, self.w_prev, self.num_channel_prev  = prev_layer_output_dim
        self.output_dim = self.h_prev * self.w_prev * self.num_channel_prev

    def forward(self, Z_prev: np.array) -> np.array:
        self.output_tensor = Z_prev.reshape(Z_prev.shape[0], Z_prev.shape[1] * Z_prev.shape[2] * Z_prev.shape[3])

        assert self.output_tensor.shape[1] == self.output_dim

    def forward_wob(self, Z_prev: np.array) -> np.array:
        self.output_tensor = Z_prev.reshape(1, Z_prev.shape[0] * Z_prev.shape[1] * Z_prev.shape[2])

    def get_output_dimension(self):
        return self.output_dim

    def get_output_tensor(self):
        return self.output_tensor

    def backward(self):
        pass

In [287]:
class MaxPool:
    # class variable
    layer_num = 1

    def __init__(self, filter_size, stride):
        self.filter_size = filter_size
        self.stride = stride
        self.h_prev, self.w_prev, self.num_channel_prev = None, None, None
        self.h_new, self.w_new, self.num_out_channel = None, None, None
        self.output_tensor = None
        self.cache = {}
        self.layer_name = 'MaxPool__'+ str(self.layer_num)
        self.layer_num += 1
        self.is_trainable = False
        self.training_mode = False

    def toggle_training_mode(self):
        if not self.training_mode:
            self.training_mode = True
        else:
            self.training_mode = False

    def initialize_max_pool_params(self, prev_layer_output_dim):
        """
        Initializes output dimensions with the dimension of the previous layers
        :param prev_layer_output_dim: output dimension of the layer immediately before this layer
        :return: None
        """
        self.h_prev, self.w_prev , self.num_channel_prev = prev_layer_output_dim
        self.h_new = int((self.h_prev - self.filter_size) / self.stride + 1)
        self.w_new = int((self.w_prev - self.filter_size) / self.stride + 1)
        self.num_out_channel = self.num_channel_prev

    def forward_wob(self, Z_prev, is_training):

        #print('prev z shape in maxpool:', Z_prev.shape)
        assert Z_prev.shape == (self.h_prev, self.w_prev, self.num_channel_prev)
        #print('here')
        Z_prev = np.array(Z_prev, copy=True)
        #print('there')
        self.output_tensor = np.zeros((self.h_new, self.w_new, self.num_out_channel))

        #print('Going for max pooling')
        # Apply convolution operation over this zero padded previous activation
        for row in range(self.h_new):

            row_start = row * self.stride

            for col in range(self.w_new):

                col_start = col *  self.stride

                for output_channel_index in range(self.num_out_channel):

                    Z_prev_windowed = Z_prev[
                                      row_start : row_start + self.filter_size,
                                      col_start : col_start + self.filter_size,
                                      output_channel_index
                                      ]

                    self.output_tensor[row, col, output_channel_index] = Utility.get_max_pool_window(Z_prev_windowed)

        assert self.output_tensor.shape == (self.h_new, self.w_new, self.num_out_channel)
        if is_training:
            pass
        print('Max pool forward done')

    def forward(self, Z_prev):

        batch_size = Z_prev.shape[0]
        assert Z_prev.shape == (batch_size, self.h_prev, self.w_prev, self.num_channel_prev)

        self.output_tensor = np.zeros((batch_size, self.h_new, self.w_new, self.num_out_channel))
        Z_prev = np.array(Z_prev, copy=True)

        # Apply convolution operation over this zero padded previous activation
        for row in range(self.h_new):

            row_start = row * self.stride

            for col in range(self.w_new):

                col_start = col *  self.stride

                for output_channel_index in range(self.num_out_channel):

                    Z_prev_windowed = Z_prev[:,
                                    row_start : row_start + self.filter_size,
                                    col_start : col_start + self.filter_size,
                                    output_channel_index
                                      ]

                    self.output_tensor[:, row, col, output_channel_index] = Utility.get_max_pool_window_over_batch(Z_prev_windowed)

        assert self.output_tensor.shape == (batch_size, self.h_new, self.w_new, self.num_out_channel)
        if self.training_mode:
            pass

    def print_layer_dimensions(self):
        print(f'Output Tensor Dimensions: {self.output_tensor.shape}')

    def get_output_tensor(self):
        return self.output_tensor

    def backward(self):
        pass

    def get_output_dimension(self):
        return self.h_new, self.w_new, self.num_out_channel

In [363]:
class DenseLayer:

    # class variable
    layer_num = 1

    def __init__(self, num_units):
        self.W = None
        self.b = None
        self.dW = None
        self.db = None
        self.num_units = num_units
        self.A_prev_layer_cached = None
        self.output_tensor = None
        self.layer_name = 'Dense__' + str(self.layer_num)
        self.layer_num += 1
        self.is_trainable = True
        self.training_mode = False

    def toggle_training_mode(self):
        if not self.training_mode:
            self.training_mode = True
        else:
            self.training_mode = False


    def initialize_dense_layer_weights_biases(self, prev_layer_output_dim):
        self.W = np.random.randn(self.num_units, prev_layer_output_dim)
        self.b = np.random.randn(self.num_units, 1) # will be broadcast to (hidden_units, batch_size) before addition

    def forward(self, A_prev_layer):
        """
        :param A_prev_layer: tensor of shape (batch, prev_flattened_shape)
        :return:
        """
        print(f'A_prev_layer shape: {A_prev_layer.shape}')
        assert A_prev_layer.shape[1] == self.W.shape[1]

        self.A_prev_layer_cached = A_prev_layer

        A_prev_layer = np.array(A_prev_layer, copy=True)
        A_prev_layer_reshaped = A_prev_layer.reshape(A_prev_layer.shape[1], A_prev_layer.shape[0])
        Z = np.dot(self.W, A_prev_layer_reshaped) + self.b
        self.output_tensor = Z.T # converting to (batch_size, num_units)
        #print('dense layer output: ',Z)

        # assert the output tensor shape should be (num_hidden_units, batch size)
        assert self.output_tensor.shape == (A_prev_layer_reshaped.shape[1], self.num_units)

        if self.training_mode:
            pass

    def get_output_tensor(self):
        return self.output_tensor

    def backward(self, dZ : np.array):
        """

        :param dZ: the gradient of loss with respect to this layers output Z. dZ = dL/dZ. Shape: (batch size , num_units)
        :return:
        """

        # for softmax activation after this, the value of dZ = y_pred - y
        A_prev_layer = self.A_prev_layer_cached
        mini_batch_size = dZ.shape[0]
        dW = (1/mini_batch_size) * np.dot(dZ.T, A_prev_layer) # (num unit, 1) * (1, x) --> (num unit, x)
        db = (1/mini_batch_size) * np.sum(dZ.T, axis=1, keepdims=True)
        dA_prev_layer = np.dot(dZ, self.W)

        print(dW.shape)
        print(db.shape)
        print(self.b.shape)
        assert dW.shape == self.W.shape
        assert db.shape == self.b.shape
        assert dA_prev_layer.shape == A_prev_layer.shape

        print(dW)
        print(db)

    def get_output_dimension(self):
        return self.num_units

    def print_layer_dimensions(self):
        print(f'Output Tensor Dimensions: {self.output_tensor.shape}')
        print(f'Weight Dimension: {self.W.shape}')
        print(f'Bias Dimension: {self.b.shape}')

In [364]:
class Utility:

    @staticmethod
    def one_hot_encode(y_true):
        # Define the One-hot Encoder
        ohe = preprocessing.OneHotEncoder()
        ohe.fit(y_true)
        y_true = ohe.transform(y_true).toarray()
        return y_true

    @staticmethod
    def zero_pad(tensor, pad_size):
        """
        :param tensor: tensor of shape (batch_size, h, w, num_channel)
        :return: padded tensor of shape (h + 2 * pad_size, w + 2 * pad_size, num_channel)
        """
        return np.pad(tensor, ((0,0), (pad_size, pad_size), (pad_size, pad_size), (0,0)), mode='constant', constant_values=0)

    @staticmethod
    def zero_pad_without_batch(tensor, pad_size):
        """
        :param tensor: tensor of shape (h, w, num_channel)
        :return: padded tensor of shape (h + 2 * pad_size, w + 2 * pad_size, num_channel)
        """
        return np.pad(tensor, ((pad_size, pad_size), (pad_size, pad_size), (0,0)), mode='constant', constant_values=0)

    @staticmethod
    def convolve_single_step(Z_prev_windowed, W, b):
        """
        :param Z_prev_windowed: window of shape (F, F, num_channel_Z_prev)
        :param W: kernel/filter/weight of shape (F, F, num_channel_Z_prev)
        :param b: bias term of shape (1, 1, 1)
        :return: scaler convolved value
        """
        return np.multiply(Z_prev_windowed, W).sum() + float(b)

    @staticmethod
    def convolve_single_step_over_batch(tensor, W, b):
        return np.sum(tensor * W, axis=(1,2,3)) + b


    @staticmethod
    def get_max_pool_window(Z_prev_windowed):
        return Z_prev_windowed.max()

    @staticmethod
    def get_max_pool_window_over_batch(Z_prev_windowed: np.array):
        #print('In max pool', Z_prev_windowed.shape)
        #print(np.max(Z_prev_windowed, axis=(1,2)))
        return np.max(Z_prev_windowed, axis=(1,2))

    @staticmethod
    def create_mini_batches(X: np.array, Y: np.array, mini_batch_size: int):
        total_data = X.shape[0]
        for index in range(0, total_data, mini_batch_size):
            start_index = index
            end_index = min(start_index + mini_batch_size, total_data)
            yield X[start_index: end_index,...], Y[start_index: end_index, ...]

In [365]:
class ReLUActivation:

    # class variable
    layer_num = 1
    def __init__(self):
        self.layer_name = 'ReLU__' + str(self.layer_num)
        self.layer_num += 1
        self.input_tensor_dimension = None
        self.output_tensor = None

    def set_input_tensor_dimension(self, prev_layer_tensor_dimension):
        self.input_tensor_dimension = prev_layer_tensor_dimension

    def forward(self, tensor):
        print('Relu Input Tensor Shape: ', tensor.shape)
        # print('in relu:', tensor)
        # print('After: ', np.maximum(tensor, 0))
        self.output_tensor = np.maximum(tensor, 0)

    def get_output_dimension(self):
        return self.input_tensor_dimension

    @staticmethod
    def backward(dA):
        return np.where(dA > 0, 1, 0)

In [366]:
@dataclass(unsafe_hash=True)
class InputLayer:
    """
    Class for saving input dimension
    """
    input_dimension: np.array
    is_trainable: bool
    layer_name: str

In [378]:
class SoftmaxActivation:

    # class variable
    layer_num = 1
    def __init__(self):
        self.input_tensor_dimension = None
        self.layer_name = 'Softmax__' + str(self.layer_num)
        self.layer_num += 1
        self.output_tensor = None

    def set_input_tensor_dimension(self, prev_layer_tensor_dimension):
        self.input_tensor_dimension = prev_layer_tensor_dimension


    def forward(self, tensor):
        print('softmax tensor shape:', tensor.shape) # expected tensor shape: (1/batch-size, classes)
        exponent = np.exp(tensor)
        print('softmax exp shape',exponent.shape)
        summation_along_batch = np.sum(exponent, axis=1, keepdims=True)
        print('softmax sum shape:', summation_along_batch.shape)
        self.output_tensor = exponent/summation_along_batch
        return self.output_tensor


    def get_output_dimension(self):
        return self.input_tensor_dimension

    @staticmethod
    def backward(dA):
        """ passes the backward gradient
        :param dA: the gradient wrt to the activation softmax
        :return:
        """
        return dA

In [399]:
class CrossEntropyLoss:
    epsilon = 1e-50

    def compute_cost(self, y_pred, y):
        mini_batch_size = y.shape[0]
        clipped_y_pred = np.clip(y_pred, self.epsilon, 1.0)
        cross_entropy_loss = -(1/mini_batch_size) * np.sum(np.multiply(y, np.log(clipped_y_pred)))
        return cross_entropy_loss

In [392]:
class GradientDescent:
    pass

In [400]:
class Model:
    def __init__(self):
        self.layers = None # a list of layer object according to input
        self.layer_w_gradients = dict() # {'layer name': dw}
        self.layer_b_gradients = dict() # {'layer name': db}
        self.optimizer = None
        self.cost_function = None

    def add(self, layer_list):
        self.layers = layer_list

    def initializer_layer_params(self):
        """
        This method initializes the layers in the model providing the input dimension that the layers expect to get
        :return: None
        """
        #InputLayer(input_dimension=input_dimension, is_trainable=False, layer_name='Input')

        assert self.layers[0].layer_name == 'Input'

        for previous_layer, current_layer in zip(self.layers, self.layers[1:]):
            prev_output_dim = None
            if previous_layer.layer_name == 'Input':
                prev_output_dim = previous_layer.input_dimension # H, W, Color Channel
            else:
                prev_output_dim = previous_layer.get_output_dimension()


            if current_layer.layer_name.startswith('Conv2D__'):
                current_layer.initialize_output_dimensions(prev_output_dim) # H, W, Color Channel
                current_layer.initialize_weights_biases()
            elif current_layer.layer_name.startswith('MaxPool__'):
                print(prev_output_dim)
                current_layer.initialize_max_pool_params(prev_output_dim) # H, W, Color Channel
            elif current_layer.layer_name.startswith('Dense__'):
                current_layer.initialize_dense_layer_weights_biases(prev_output_dim) # flatten layer dimension
            elif current_layer.layer_name.startswith('Flatten__'):
                current_layer.initialize_flatten_layer_dimensions(prev_output_dim) # (new_h, new_w, new_channel)
            elif current_layer.layer_name.startswith("ReLU") or current_layer.layer_name.startswith("Softmax") :
                current_layer.set_input_tensor_dimension(prev_output_dim) # for the activation layers


    def compile(self, optimizer, cost_function):
        self.optimizer = optimizer
        self.cost_function = cost_function

    def train(self, training_data, validation_data, epochs=5, learning_rate=0.01, mini_batch_size=32):

        X_val, y_Val = validation_data

        for e in range(epochs):
            # each epoch will run through a training once and update weights
            print(f'Running Epoch: {e}')
            X_train, Y_train = training_data
            total_data = X_train.shape[0]
            num_of_mini_batches = total_data//mini_batch_size

            # first we create the mini batches and then run training step through it
            i = 1
            for X, Y in Utility.create_mini_batches(X=X_train, Y=Y_train, mini_batch_size=mini_batch_size):
                """
                X shape --> (mini_batch_size, h, w, color_channel)
                Y shape --> (mini_batch_size, num_of_class) (one hot encoded vector)
                """
                Y_pred = self.forward_propagation(X)
                print('final input shape', Y_pred.shape)
                print('final output:', Y_pred)
                self.backward_propagation(Y_pred,Y)
                self.update_layer_parameters(learning_rate)
                Loss = self.cost_function.compute_cost(Y_pred, Y)
                print('Loss is : ', Loss)
                print("\rProgress {:1.1%}".format(i / num_of_mini_batches), end="")
                print()
                i += 1

            print()
            print(f'Cost After a Epoch {e+1}: {Loss * 100}')

            # perform validation step here
        print('Finish Training!')

    def predict(self):
        pass

    def forward_propagation(self, X_train) -> np.array:
        """
        Performs a forward pass through the network
        :param X_train: nd training tensor (batch_size, h, w, color_channel)
        :param is_training: whether we are training or not
        :return:
        """
        input = X_train
        for layer in self.layers[1:]:
            # skipping the input layer
            print(f'Forward for layer {layer.layer_name}')
            if not layer.layer_name.startswith('ReLU') and not layer.layer_name.startswith('Softmax') and not layer.layer_name.startswith('Flatten'):
                layer.toggle_training_mode() # toggling training mode for a layer
            layer.forward(input)
            input = layer.output_tensor # getting the output tensor of the layer to be the input tensor to the next

            if not layer.layer_name.startswith('ReLU') and not layer.layer_name.startswith('Softmax') and not layer.layer_name.startswith('Flatten'):
                layer.toggle_training_mode() # turning the training mode off here


        return input

    def backward_propagation(self, Y_out, Y):
        pass

    def update_layer_parameters(self, learning_rate):
        pass


In [371]:
mnist = MnistDataLoader('./dataset/mnist')
mnist.load_mnist()
mnist.preprocess_data()

In [372]:
np.random.seed(1)
A_prev = np.random.randn(10,5,7,4) # h, w, c of previous layer
cnn = Convolution2D(num_out_channel=8, filter_size=3, stride=2, padding_size=1)
cnn.initialize_output_dimensions((A_prev.shape[1], A_prev.shape[2], A_prev.shape[3]))
cnn.initialize_weights_biases()
#cnn.print_layer_dimensions()

In [218]:
cnn.forward_batch_i(A_prev, is_training=True)

Z prev shape:(10, 5, 7, 4)
output tensor shape: (10, 3, 4, 8)
output tensor shape: (10, 3, 4, 8)
output tensor shape: (10, 3, 4, 8)


In [219]:
print(cnn.output_tensor.shape)
print("Z's mean =\n", np.mean(cnn.output_tensor))
print("Z[3,2,1] =\n", cnn.output_tensor[3,2,1])

(10, 3, 4, 8)
Z's mean =
 0.6923608807576933
Z[3,2,1] =
 [-1.28912231  2.27650251  6.61941931  0.95527176  8.25132576  2.31329639
 13.00689405  2.34576051]


In [231]:
# Case 1: stride of 1
np.random.seed(1)
A_prev = np.random.randn(2, 5, 5, 3)
maxpool = MaxPool(filter_size=3, stride=1)
maxpool.initialize_max_pool_params((A_prev.shape[1],A_prev.shape[2],A_prev.shape[3]))
maxpool.forward_batching(A_prev, True)
maxpool.print_layer_dimensions()
print(maxpool.output_tensor)

In max pool (2, 3, 3)
[1.74481176 1.19891788]
In max pool (2, 3, 3)
[0.90159072 0.84616065]
In max pool (2, 3, 3)
[1.65980218 0.82797464]
In max pool (2, 3, 3)
[1.74481176 0.69803203]
In max pool (2, 3, 3)
[1.46210794 0.84616065]
In max pool (2, 3, 3)
[1.65980218 1.2245077 ]
In max pool (2, 3, 3)
[1.74481176 0.69803203]
In max pool (2, 3, 3)
[1.6924546  1.12141771]
In max pool (2, 3, 3)
[1.65980218 1.2245077 ]
In max pool (2, 3, 3)
[1.14472371 1.96710175]
In max pool (2, 3, 3)
[0.90159072 0.84616065]
In max pool (2, 3, 3)
[2.10025514 1.27375593]
In max pool (2, 3, 3)
[1.14472371 1.96710175]
In max pool (2, 3, 3)
[0.90159072 0.84616065]
In max pool (2, 3, 3)
[1.65980218 1.23616403]
In max pool (2, 3, 3)
[1.14472371 1.62765075]
In max pool (2, 3, 3)
[1.6924546  1.12141771]
In max pool (2, 3, 3)
[1.65980218 1.2245077 ]
In max pool (2, 3, 3)
[1.13162939 1.96710175]
In max pool (2, 3, 3)
[1.51981682 0.86888616]
In max pool (2, 3, 3)
[2.18557541 1.27375593]
In max pool (2, 3, 3)
[1.13162939 

In [56]:
cifer_dataloader = Cifer10DataLoader('/home/akil/Work/Work/Academics/4-2/ML/Assignment-3/dataset/cifer-10/cifar-10-python/cifar-10-batches-py')
cifer_dataloader.concatenate_data()
cifer_dataloader.preprocess_data()

(10000,)
(10000,)
(10000,)
(10000,)
(10000,)
(10000,)
Shape: (50000, 32, 32, 3)
Shape: (50000, 1)
Shape: (10000, 32, 32, 3)
Shape: (10000, 1)


In [57]:
cifer_dataloader.data['train_labels'][0]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.])

In [89]:
cifer_dataloader.data['train_images'].shape

(50000, 32, 32, 3)

In [90]:
cifer_dataloader.data['train_labels'].shape

(50000, 10)

In [264]:
total_data = cifer_dataloader.data['train_images'].shape[0]
mini_batch_size = 32
p = np.random.permutation(total_data)
cifer_dataloader.data['train_images'], cifer_dataloader.data['train_labels'] = cifer_dataloader.data['train_images'][p, :], cifer_dataloader.data['train_labels'][p, :]

In [265]:
X = cifer_dataloader.data['train_images']
Y = cifer_dataloader.data['train_labels']

## Testing The forward pass of the Model

In [158]:
image = mnist.data[mnist.data_list[0]][0]
label = mnist.data[mnist.data_list[1]][1]

In [297]:
X_train = mnist.data[mnist.data_list[0]]
Y_train = mnist.data[mnist.data_list[1]]
X_test = mnist.data[mnist.data_list[0]]
Y_test = mnist.data[mnist.data_list[1]]
Y_test.shape

(60000, 10)

In [406]:
cnn = Model()
cnn.add(layer_list=[
    InputLayer(input_dimension=(mnist.size, mnist.size, mnist.color_channel), is_trainable=False, layer_name='Input'),
    Convolution2D(num_out_channel=8, filter_size=3, stride=2, padding_size=1),
    ReLUActivation(),
    MaxPool(filter_size=3, stride=1),
    Flatten(),
    DenseLayer(10),
    ReLUActivation(),
    DenseLayer(10),
    SoftmaxActivation()
])

In [407]:
cnn.compile(optimizer=GradientDescent(), cost_function=CrossEntropyLoss())

In [408]:
cnn.initializer_layer_params()

(14, 14, 8)


In [409]:
# out = cnn.forward_propagation(image)

In [410]:
cnn.train(training_data=(X_train, Y_train), validation_data=(X_test, Y_test),mini_batch_size=32)

Running Epoch: 0
Forward for layer Conv2D__1
(32, 28, 28, 1)
Forward for layer ReLU__1
Relu Input Tensor Shape:  (32, 14, 14, 8)
Forward for layer MaxPool__1
Forward for layer Flatten__1
Forward for layer Dense__1
A_prev_layer shape: (32, 1152)
Forward for layer ReLU__1
Relu Input Tensor Shape:  (32, 10)
Forward for layer Dense__1
A_prev_layer shape: (32, 10)
Forward for layer Softmax__1
softmax tensor shape: (32, 10)
softmax exp shape (32, 10)
softmax sum shape: (32, 1)
final input shape (32, 10)
final output: [[7.07990849e-055 6.39026750e-041 1.00000000e+000 2.03830498e-023
  1.07039820e-053 2.90182400e-036 2.14877530e-024 3.42945286e-018
  1.45708849e-012 6.13894199e-019]
 [1.53070966e-015 9.42176887e-007 9.99998882e-001 5.40941872e-008
  1.17906721e-007 1.83150791e-009 1.97131130e-009 1.75654221e-011
  1.06088561e-012 3.46510919e-011]
 [9.32451304e-064 4.24186220e-021 1.00000000e+000 2.67181764e-032
  1.78887488e-022 6.87440810e-033 5.86683361e-037 4.16081600e-040
  6.02280947e-046

KeyboardInterrupt: 

In [325]:
dense = DenseLayer(num_units=4)
dense.W = np.array( [[ 1.4401747,   0.72498046, -0.05727674],
                     [-1.15246919, -0.39990891,  0.44136903],
                     [ 1.14171484, -1.41891945,  0.73059128],
                     [ 0.60664542, -0.08249916, -1.05893566]])
dense.W.shape

(4, 3)

In [326]:
a = np.array([ [0.83351854,  -0.55429203,   0.0702855 ]])
a.shape

(1, 3)

In [327]:
dense.b = np.array( [[-0.64243089], [0.51146315], [-0.17120088], [1.10775354]])
dense.b.shape

(4, 1)

In [328]:
dense.forward(a)

A_prev_layer shape: (1, 3)
dense layer output:  [[ 0.15210481]
 [-0.19645312]
 [ 1.61828532]
 [ 1.58470455]]


In [254]:
dz = dense.output_tensor
dense.backward(dz)

(4, 3)
(4, 1)
(4, 1)
[[ 0.12678218 -0.08431048  0.01069076]
 [-0.16374732  0.1088924  -0.01380781]
 [ 1.34887082 -0.89700266  0.11374199]
 [ 1.32088062 -0.8783891   0.11138175]]
[[ 0.15210481]
 [-0.19645312]
 [ 1.61828532]
 [ 1.58470455]]


In [330]:
s = np.array([[-0.39692968,  1.78981015, -0.54303206,  0.14530002,  1.42375341]])
s = s.reshape(1, -1)

In [335]:
soft = SoftmaxActivation()
print(soft.forward(s))

softmax tensor shape: (1, 5)
[[0.05357302 0.47712828 0.0462908  0.09213688 0.33087103]]


In [384]:
np.log(1)

0.0