In [29]:
import numpy as np
import pandas as pd
import copy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler 
from sklearn.utils import shuffle

In [43]:
def validity_check(image, conv_filter, padding, stride):
    
    is_valid = True
    image_shape = (np.shape(image))
    print('image  - ' + str(image_shape))
    filter_shape = (np.shape(conv_filter))
    print('filter ' + str(filter_shape))
    
    
    if len(image_shape) != len(filter_shape) - 1:
        print('Dimensions of both image and filter should be equal')
        is_valid = False
    
    if len(image_shape) == 3 or len(filter_shape) > 3:
        if image_shape[-1] != filter_shape[-1]:
            print('Number of channels in both image and filter should be equal')
            is_valid = False
        
    if filter_shape[1] != filter_shape[2]:
        print("Filter should be of a square matrix")
        is_valid = False
        
        

    if filter_shape[1] % 2 == 0:
        print('dimensions of filter should be of odd dimensions not even')
        is_valid = False
    

    '''
    convolution_result is the array we obtain after running the filter all over the image
    '''    
    if is_valid:
        '''one or more filters but only one channel (also for image)'''
        if len(image_shape) == 2 and len(filter_shape) == 3:
            
            convolution_result = np.zeros((
                            np.int16(((image_shape[0] + 2 * padding - filter_shape[1]) / stride) + 1),
                            np.int16(((image_shape[1] + 2 * padding - filter_shape[2]) / stride) + 1)))
            
        '''one or more filters with more than one channel (also for image)'''
        if len(image_shape) == 3 and len(filter_shape) == 4:
            convolution_result = np.zeros((
                            np.int16(((image_shape[0] + 2 * padding - filter_shape[1]) / stride) + 1),
                            np.int16(((image_shape[1] + 2 * padding - filter_shape[2]) / stride) + 1),
                            image_shape[3]))
    else:
        convolution_result = -1
    
    return [is_valid, convolution_result]


def Relu(x):
    indices = np.where(x < 0)
    x[indices] = 0
    return x

In [153]:
'''
The dimensions represent height, width and no_of_channels in the input image and filters
'''


class Conv_pool:
    '''
    self.conv_result, self.pooling_result and self.relu_result  are of shape (m, n, o, p)
    m - length of  conv_result (or pooling_result or relu_result) - no of images
    n - number of different conv_results (produced by each filter in the input) or pooling_results 
    0 - number of rows in each entry in  conv_result (or pooling_result)
    p - number of columns in each entry in conv_result (or pooling_result)
    '''
    def __init__(self, image, conv_filter, conv_bias, padding, stride, pooling_filter_size, pooling_filter_stride):
        self.curr_image         = image
        self.conv_filter        = conv_filter
        self.conv_bias          = conv_bias
        self.conv_filter_stride = stride
        self.pool_filter_size   = pooling_filter_size
        self.pool_filter_stride = pooling_filter_stride
        self.padding            = padding
        self.conv_result        = np.array([])
        self.relu_result        = np.array([])
        self.pooling_result     = np.array([])
            
    
    
    #####################################################
    
    def apply_convolution_every_image(self, convolution_result):
        image        = self.curr_image
        conv_filter  = self.conv_filter
        stride       = self.conv_filter_stride
        image_shape  = np.shape(image)
        filter_shape = np.shape(conv_filter)
        
        no_of_filters         = filter_shape[0]
        convoluted_image_list = []
        
        '''this loop runs for all the different filters in the conv_filter'''
        for filter_num in range(0, no_of_filters):
            convoluted_image = np.array([])
            filter_ = conv_filter[filter_num, :]
    
            '''if image has more than one channel'''
            if len(image_shape) == 3 and image_shape[-1] == filter_shape[-1]:

                convoluted_image = self.convolve_the_image_by_filter(image[:, :, 0], filter_[:, :, 0], 
                                                                   convolution_result)
                for channel in range(1, filter_shape[-1]):
                    convoluted_image = convoluted_image + self.convolve_the_image_by_filter(image[:, :, channel], 
                                                                                             filter_[:, :, channel],
                                                                                                 convolution_result)
            '''if image has only one channel'''
            if len(image_shape) == 2:
                
                convoluted_image = self.convolve_the_image_by_filter(image, filter_, convolution_result)
            '''here, you used copy.copy() so as to refrain the convoluted_image_list getting over-written
            by the new entries into the list'''
            
            convoluted_image = np.add(convoluted_image, self.conv_bias)
            convoluted_image_list.append(copy.copy(convoluted_image))
            
        self.conv_result = np.array(convoluted_image_list)
            
    
    
    #########################################################
    '''
    when supplied with both image and filter, perform either convolution or pooling
    '''
    def apply_conv_or_pooling_on_data(self, image, filter_size, result, 
                                       stride_length, is_pooling, conv_filter = None):
        
        row_count = 0
        column_count = 0

        image_shape = np.shape(image)[1]
        for row in range(0, image_shape, stride_length):
            if row + filter_size <= image_shape:
                column_count = 0
                for column in range(0, image_shape, stride_length):

                    if column + filter_size <= image_shape:
                        
                        '''present active region is the region in the image which will be multiplied
                            by the filter'''
                        if not is_pooling:
                            
                            present_active_region           = image[row : (row + filter_size), 
                                                                          column : (column + filter_size)]
                            image_area_product_filter       = present_active_region * conv_filter
                            sum_of_all_elements             = np.sum(image_area_product_filter)
                            result[row_count, column_count] = sum_of_all_elements
                        
                        if is_pooling:
                            
                            active_region       = image[row : (row + filter_size), 
                                                                    column : (column + filter_size)] 
                            result[row_count, column_count] = np.max(active_region)

                            
                        column_count = column_count + 1
            row_count = row_count + 1
        
        return result
        
        
           
    ######################################################
    
    def convolve_the_image_by_filter(self, image, conv_filter, convolution_result):
        
        stride      = self.conv_filter_stride
        image_shape = np.shape(image)    
        filter_size = np.shape(conv_filter)[0]
        
        result = self.apply_conv_or_pooling_on_data(image, filter_size, 
                                                    convolution_result, stride, False, conv_filter)
        return result

    
    
    ######################################################
    
    def relu_on_convolution_result(self):
        convolution_result_ = self.conv_result
        
        conv_result_shape            = np.shape(convolution_result_)
        no_of_times_image_convoluted = conv_result_shape[0]
        
        
            
        relu_result_list = []

        for result_num in range(0, no_of_times_image_convoluted):

            current_convoluted_image = convolution_result_[result_num, :]
            indices = np.where(current_convoluted_image <= 0)
            current_convoluted_image[indices] = 0
            relu_result_list.append(current_convoluted_image)

        self.relu_result = np.array(relu_result_list)

        
    ######################################################
    
    '''max pooling'''    
    def pooling_on_relu_result(self):
        relu_result = self.relu_result
        pool_size   = self.pool_filter_size
        stride      = self.pool_filter_stride
        

        relu_result_shape            = np.shape(relu_result)
        
        no_of_relu_results_per_image = np.shape(relu_result)[0]
        relu_result_shape_each_image = np.shape(relu_result)[1]

        pooling_result_list = []
        
        
    
        for relu_result_num in range(0, no_of_relu_results_per_image):

            curr_relu = relu_result[relu_result_num, :]

            pooling_result   = np.zeros((np.uint16(((relu_result_shape_each_image - pool_size) / stride) + 1),
                                   np.uint16(((relu_result_shape_each_image - pool_size) / stride) + 1)))

            result = self.apply_conv_or_pooling_on_data(image = curr_relu, filter_size = pool_size, 
                                                        result = pooling_result, stride_length = stride, 
                                                        is_pooling = True)
            pooling_result_list.append(result)


            
            
        self.pooling_result = np.array(pooling_result_list)
        
    
    
    
    
    def pad_the_image(self):
        pad_img = np.array()
        if self.conv_filter.shape[1] == 3:
            pad_img = np.zeros((30, 30))
        
        if self.conv_filter.shape[1] == 5:
            pad_img = np.zeros((32, 32))
            
        pad_img[1 : self.curr_image.shape[1] + 1, 1 : self.curr_image.shape[1] + 1]  = self.curr_image
        self.curr_image = pad_img
    
    ######################################################
    
    def CNN(self):
        
        if self.padding:
            self.pad_the_image()
            
        validity_convolution_result = validity_check(self.curr_image, self.conv_filter, 
                                                     self.padding, self.conv_filter_stride)
        if validity_convolution_result[0]:
            convolution_result = validity_convolution_result[1]
            self.apply_convolution_every_image(convolution_result)
            self.relu_on_convolution_result()
            self.pooling_on_relu_result()

                    

In [148]:
def Relu(x):
    indices = np.where(x <= 0)
    x[indices] = 0
    return x
    
def softmax(x):
    x = x.reshape(1, -1)
    B = np.exp(x - max(x))
    C = np.sum(B)
    return B / C



def cross_entropy(output, labels):
    return -np.sum(labels * np.log(output))



class fully_connected_layers_in_CNN:
    
    def __init__(self, conv_pool_object, labels, weights_1, weights_2, bias_1, bias_2, no_of_neurons):
        self.conv_pool_result = conv_pool_object
        self.pooling_result = conv_pool_object.pooling_result
        self.labels = labels
        self.pooling_result_shape = np.shape(self.pooling_result)
        self.neurons = no_of_neurons
    
        self.weights_1 = weights_1
        self.weights_2 = weights_2
        self.bias_1    = bias_1
        self.bias_2    = bias_2
    ##########################################################
    
    def feed_forward_in_fc_layers(self):
        
        
        
        
        pooling_result     = self.pooling_result
        no_of_neurons      = self.neurons
        print('shape ----- ' + str(self.pooling_result_shape))
        no_of_pool_results = self.pooling_result_shape[0]
        no_of_rows         = self.pooling_result_shape[1]
        no_of_columns      = self.pooling_result_shape[2]
        self.fc_layer_1    = pooling_result.reshape(((no_of_pool_results * no_of_rows * no_of_columns), 1))
        
#         no_of_rows_in_fc_layer_1 = self.fc_layer_1.shape[0]
#         self.weights_1 = np.random.random((no_of_neurons[0], no_of_rows_in_fc_layer_1))
#         self.bias_1 = np.zeros(shape = (no_of_neurons[0], 1))
        print('w1 - ' + str(np.shape(w1)))
        print('layer _1 - ' + str(np.shape(self.fc_layer_1)))
        print('bias_1 - ' + str(np.shape(self.bias_1)))
        self.z_1             = np.add(np.dot(self.weights_1.T, self.fc_layer_1), self.bias_1)
        self.fc_layer_2      = Relu(self.z_1)
        
#         no_of_rows_in_fc_layer_2 = self.fc_layer_2.shape[0]
#         self.weights_2 = np.random.random((no_of_neurons[1], no_of_rows_in_fc_layer_2))
#         self.bias_2 = np.zeros(shape = (no_of_neurons[1], 1))
        print('w2 - ' + str(np.shape(w2)))
        print('layer _2 - ' + str(np.shape(self.fc_layer_2)))
        print('bias_2 - ' + str(np.shape(self.bias_2)))
        self.z_2             = np.add(np.dot(self.weights_2.T, self.fc_layer_2), self.bias_2)
        self.predicted_probs = softmax(self.z_2)
          
        self.loss = cross_entropy(self.predicted_probs, self.labels)
        self.diff_btw_pred_and_actual_labels = self.predicted_probs - self.labels
        
        
    

    
    

In [123]:

class back_propagation_in_CNN:
    
    
    ##########################################################
    
    def __init__(self, fc_layer_object):
        self.fc_layer_object = fc_layer_object
        self.conv_pool_result = fc_layer_object.conv_pool_result
    
    
    
    
    ##########################################################
    
    def get_the_index(self, conv_sub_array):
        position_of_max_element = np.nanargmax(conv_sub_array)
        index_of_max_element = np.unravel_index(position_of_max_element, conv_sub_array.shape)
        return index_of_max_element
    
    
    ##########################################################
    
    def backpropagation_wrt_pooling_result(self, derivative_pool_result):
        
        conv_result = self.conv_pool_result.conv_result
        pool_filter_size = self.conv_pool_result.pool_filter_size
        pool_filter_stride = self.conv_pool_result.pool_filter_stride
        
        no_of_convolved_images, no_of_rows, no_of_columns = np.shape(conv_result)
        _, pool_row_length, _ = np.shape(derivative_pool_result)
        output = np.zeros(shape = np.shape(conv_result))
    
        row_count = 0
        column_count = 0
        for image_num in range(0, no_of_convolved_images):
            current_image = conv_result[image_num, :]
            row_count = 0
            for row in range(0, no_of_rows, pool_filter_stride):
                if row + pool_filter_size <= no_of_rows:
                    column_count = 0
                    for column in range(0, no_of_columns, pool_filter_stride):
                        if column + pool_filter_size <= no_of_columns:
                            image_sub_array = current_image[row : row + pool_filter_size, column : column + pool_filter_size]
                            '''index of the max element in the original conv_image (in the area covered by pool filter)'''
                            (i, j) = self.get_the_index(image_sub_array)
                            if row_count <= pool_row_length:
                                output[image_num, row + i, column + j] = derivative_pool_result[image_num, 
                                                                                            row_count, column_count]
                            column_count = column_count + 1
                
                row_count = row_count + 1
            
        return output
                              
    
    
    ##########################################################
    
    def backpropagation_wrt_convolution_layer(self, der_conv_prev):
                
        conv_curr_image = self.conv_pool_result.curr_image
        conv_image_curr_list = [conv_curr_image]
        conv_image_curr = np.array(conv_image_curr_list)
        conv_filter = self.conv_pool_result.conv_filter
        filter_stride = self.conv_pool_result.conv_filter_stride
        no_of_filters, flt_size, no_of_columns_f = np.shape(conv_filter)
        
        len_of_shape_of_curr_image = len(np.shape(conv_image_curr))
        
        _,no_of_rows, no_of_columns = np.shape(conv_image_curr)

        
        der_wrt_filter = np.zeros(np.shape(conv_filter))
        der_wrt_bias = np.zeros((no_of_filters, 1))
        dconv_result = np.zeros(np.shape(conv_image_curr))
        
        r_count = 0
        c_count = 0
        
        for filter_num in range(0, no_of_filters, filter_stride):
            r_count = 0
            for r in range(0, no_of_rows):
                if r + flt_size <= no_of_rows:
                    c_count = 0
                    for c in range(0, no_of_columns, filter_stride):
                        if c + flt_size <= no_of_columns:
                            '''update the filter'''
                            
                            der_wrt_filter += der_conv_prev[filter_num, r_count, c_count] * conv_image_curr[:, 
                                                                                                         r : r + flt_size,
                                                                                                         c : c + flt_size]
                            
                            dconv_result[:, r : r + flt_size, c : c + flt_size] += der_conv_prev[filter_num, r_count, c_count] * conv_filter[filter_num, :]
                            
                            c_count = c_count + 1
                r_count = r_count + 1
        
            der_wrt_bias[filter_num] = np.sum(der_conv_prev[filter_num])
        
        
        return dconv_result, der_wrt_filter, der_wrt_bias
                                
    
    
    
    ##########################################################
    
    def start_back_propagation(self):
        
        fc_layer_object = self.fc_layer_object
        fc_layer_1      = fc_layer_object.fc_layer_1
        
        self.der_wrt_weights_2  = np.dot(fc_layer_object.diff_btw_pred_and_actual_labels.T, 
                                                       fc_layer_object.z_1.T)
        
        print('w2 - ' + str(np.shape(fc_layer_object.weights_2.T)))
        print('diff - ' + str(np.shape(fc_layer_object.diff_btw_pred_and_actual_labels)))
        print('layer_1 - ' + str(np.shape(fc_layer_object.fc_layer_1.T)))
        print('bias_1 - ' + str(np.shape(fc_layer_object.bias_1.shape)))
        
        temporary_result = np.dot(fc_layer_object.weights_2,  
                                  fc_layer_object.diff_btw_pred_and_actual_labels.T)
        
        self.der_wrt_weights_1 = np.dot(temporary_result, fc_layer_object.fc_layer_1.T)
        
        self.der_wrt_bias_2    = np.sum(fc_layer_object.diff_btw_pred_and_actual_labels, axis = 1).T
        self.der_wrt_bias_1    = np.sum(temporary_result, axis = 1).reshape(fc_layer_object.bias_1.shape)
        
        
        print('w1 - ' + str(np.shape(fc_layer_object.weights_1)))
        print('pooling - ' + str(np.shape(fc_layer_object.pooling_result_shape)))
        print('temporary - ' + str(np.shape(temporary_result)))
        der_fc_layer_1 = np.dot(fc_layer_object.weights_1, temporary_result)
        reshaped_dfc_layer_1 = der_fc_layer_1.reshape(fc_layer_object.pooling_result_shape)
        
        print('reshaped - ' + str(np.shape(reshaped_dfc_layer_1)))
        '''back propagating through max-pooling layer (updating only those neurons with highest value)'''
        der_pool_layer = self.backpropagation_wrt_pooling_result(reshaped_dfc_layer_1)
        
        '''back propagating through Relu layer'''
        der_pool_layer[fc_layer_object.conv_pool_result.conv_result <= 0] = 0
        
        der_image, der_filter, der_conv_bias = self.backpropagation_wrt_convolution_layer(der_pool_layer)
        
        self.return_result = [der_filter, der_conv_bias]
        
    


In [39]:

##########################################################
def read_data(directory_path, is_train_data):
    if is_train_data:
        mnsit_data = pd.read_csv(directory_path, header=None, low_memory = False)
        mnsit_data.drop(0, axis = 0, inplace = True)
        train_Y    = mnsit_data[0].astype(np.float32)
        mnsit_data.drop(0, axis = 1, inplace = True)
        mnsit_data = mnsit_data.astype(np.float32)

        return mnsit_data, train_Y
    
    else:
        mnsit_test = pd.read_csv(directory_path, header = None, low_memory = False)
        mnsit_test.drop(0, axis = 0, inplace = True)
        mnsit_test = mnsit_test.astype(np.float32)
        
        return mnsit_test
    
    
    
##########################################################
def split_the_train_data(train_X, train_Y):
    train_x, train_y, test_x, test_y = train_test_split(train_X, train_Y, test_size = 0.2, 
                                                        random_state = 42)
    
    return [[train_x, test_x], [train_y, test_y]]
    
##########################################################
def convert_labels_to_one_hot_encoding_format(train_y, test_y):
    one_hot_encoder = OneHotEncoder(categories = 'auto')
    train_y_encoded = one_hot_encoder.fit_transform(train_y).toarray()
    test_y_encoded  = one_hot_encoder.fit_transform(test_y).toarray()
    return train_y_encoded.astype(np.float32), test_y_encoded.astype(np.float32)



##########################################################
# def convert_input_to_image_format(row):
#     return row.values.reshape(28, 28)
def convert_input_to_image_format(data):
    output = []
    no_of_rows = np.shape(data)[0]
    for row_num in range(0, no_of_rows):
        output.append(data.iloc[row_num].values.reshape(28, 28))
    
    return np.array(output).astype(np.float32)
    


##########################################################
def standardize_the_data(train_data, test_data):
    scaler = StandardScaler().fit(train_data)
    train_data_scaled = scaler.transform(train_data)
    test_data_scaled  = scaler.transform(test_data)
    return pd.DataFrame(train_data_scaled, dtype = np.float32), pd.DataFrame(test_data_scaled, dtype = np.float32)





In [40]:
train_X, train_Y = read_data('/Users/vijay/Downloads/digit-recognizer/train.csv', True)
test_X           = read_data('/Users/vijay/Downloads/digit-recognizer/test.csv', False)

train_test_data  = split_the_train_data(train_X, train_Y)
train_x, train_y = train_test_data[0]
test_x, test_y   = train_test_data[1]

train_x_standardized, test_x_standardized = standardize_the_data(train_x, test_x)
# train_x_reshaped = train_x_standardized.apply(convert_input_to_image_format, axis = 1)
# test_x_reshape   = test_x_standardized.apply(convert_input_to_image_format, axis = 1)
train_x_reshaped = convert_input_to_image_format(train_x_standardized)
test_x_reshaped   = convert_input_to_image_format(test_x_standardized)

train_y_encoded, test_y_encoded = convert_labels_to_one_hot_encoding_format(train_y.values.reshape(-1, 1), 
                                                                           test_y.values.reshape(-1, 1))


del(train_x)
del(test_x)
del(train_y)
del(test_y)
del(train_x_standardized)
del(test_x_standardized)

  return self.partial_fit(X, y)


In [135]:
l1_filter = np.zeros((2,3,3))

l1_filter[0, :, :] = np.array([[[-1, 0, 1],   
                                  [-1, 0, 1],   
                                   [-1, 0, 1]]])  
l1_filter[1, :, :] = np.array([[[1,   1,  1],   
                                   [0,   0,  0],   
                                    [-1, -1, -1]]]) 


sample_x = train_x_reshaped[0]
sample_y = train_y_encoded[0]

np.shape(sample_x)


(28, 28)

In [144]:
conv_filters = get_filters_for_convolution(3, 32, 1)
w1, w2       = get_weights_for_fc_layers(2, 28, 3, 2, 1, 2, 1, 0, [52, 10])
conv_bias    = get_biases_for_convolution(26)
b1, b2       = get_biases_for_fc_layers()


In [154]:


conv_pool = Conv_pool(sample_x, l1_filter, conv_bias, 0, 1, 2, 2)
conv_pool.CNN()
fc_layer  = fully_connected_layers_in_CNN(cnn, sample_y, w1, w2, b1, b2, [52, 10]) 
fc_layer.feed_forward_in_fc_layers()

image  - (28, 28)
filter (2, 3, 3)
2
shape ----- (32, 13, 13)
w1 - (338, 52)
layer _1 - (5408, 1)
bias_1 - (1024, 1)


ValueError: shapes (52,338) and (5408,1) not aligned: 338 (dim 1) != 5408 (dim 0)

In [124]:
bp = back_propagation_in_CNN(fc_layer)
bp.start_back_propagation()

w2 - (10, 1024)
diff - (1, 10)
layer_1 - (1, 5408)
bias_1 - (2,)
w1 - (5408, 1024)
pooling - (3,)
temporary - (1024, 1)
reshaped - (32, 13, 13)


In [127]:
bp.return_result

[array([[[-5439.60433114, -7821.41096778, -7573.79837336],
         [-8304.67211947, -9297.92974907, -8571.8787772 ],
         [-6921.8641906 , -7868.69019476, -6973.13138385]],
 
        [[-5439.60433114, -7821.41096778, -7573.79837336],
         [-8304.67211947, -9297.92974907, -8571.8787772 ],
         [-6921.8641906 , -7868.69019476, -6973.13138385]],
 
        [[-5439.60433114, -7821.41096778, -7573.79837336],
         [-8304.67211947, -9297.92974907, -8571.8787772 ],
         [-6921.8641906 , -7868.69019476, -6973.13138385]],
 
        [[-5439.60433114, -7821.41096778, -7573.79837336],
         [-8304.67211947, -9297.92974907, -8571.8787772 ],
         [-6921.8641906 , -7868.69019476, -6973.13138385]],
 
        [[-5439.60433114, -7821.41096778, -7573.79837336],
         [-8304.67211947, -9297.92974907, -8571.8787772 ],
         [-6921.8641906 , -7868.69019476, -6973.13138385]],
 
        [[-5439.60433114, -7821.41096778, -7573.79837336],
         [-8304.67211947, -9297.92974907,

In [146]:

def get_filters_for_convolution(filter_size, no_of_filters, no_of_channels):
    
    if no_of_channels == 1:
        conv_filters = np.zeros((no_of_filters, filter_size, filter_size))
        for i in range(0, no_of_filters):
            
            filter_ = np.random.random((filter_size, filter_size))
            conv_filters[i, :, :] = filter_
            
        return conv_filters
    else:
        conv_filters = np.zeros((no_of_filters, filter_size, filter_size, no_of_channels))
        
        for i in range(0, no_of_filters):
            filter_ = np.random.random((filter_size, filter_size, no_of_channels))
            conv_filters[i, :, :, :] = filter_

        return conv_filters
            
            
            
    

def get_weights_for_fc_layers(no_of_filters, image_size, conv_f_size, 
                                 pool_size, conv_f_stride, pool_stride, no_of_convolutions, padding, no_of_neurons):
    for num in range(0, no_of_convolutions):
        
        conv_size  = np.int16(((image_size + 2 * padding - conv_f_size) / conv_f_stride) + 1)
        pool_size  = np.uint16(((conv_size - pool_size) / pool_stride) + 1)
        image_size = pool_size
    
    
    weights_1 = np.random.random(((no_of_filters * image_size * image_size), no_of_neurons[0]))
    weights_2 = np.random.random((no_of_neurons[0], no_of_neurons[1]))
    
    return weights_1, weights_2
    

def get_biases_for_convolution(filter_size):
    conv_bias = np.full((filter_size, filter_size), 0.1)
    return conv_bias
    

def get_biases_for_fc_layers():
    bias_1 = np.random.random((52, 1))
    bias_2 = np.random.random((10, 1))
    
    return bias_1, bias_2




In [56]:
batch_size = 336

def update_parameters_for_each_batch(train_x, train_y, beta1, beta2, learning_rate, parameters, pooling_filter_size,
                                        pooling_filter_stride, padding, stride, loss):
    
    cf, cb, w1, w2, b1, b2 = parameters
    conv_filter_shape = np.shape(cf)
    conv_bias_shape   = np.shape(cb)
    weights_1_shape   = np.shape(w1)
    weights_2_shape   = np.shape(w2)
    bias_1_shape      = np.shape(b1)
    bias_2_shape      = np.shape(b2)
    
    
    loss_       = 0
    cov_filter_ = np.zeros(conv_filter_shape)
    conv_bias_  = np.zeros(conv_bias_shape) 
    weights_1_  = np.zeros(weights_1_shape)
    weights_2_  = np.zeros(weights_2_shape)
    bias_1_     = np.zeros(bias_1_shape)
    bias_2_     = np.zeros(bias_2_shape)

    v_cf   = np.zeros(conv_filter_shape)
    v_w_1  = np.zeros(weights_1_shape)
    v_w_2  = np.zeros(weights_2_shape)
    v_cb   = np.zeros(conv_bias_shape)
    v_w1_b = np.zeros(bias_1_shape)
    v_w2_b = np.zeros(bias_2_shape)

    u_cf   = np.zeros(conv_filter_shape)
    u_w_1  = np.zeros(weights_1_shape)
    u_w_2  = np.zeros(weights_2_shape)
    u_cb   = np.zeros(conv_bias_shape)
    u_w1_b = np.zeros(bias_1_shape)
    u_w2_b = np.zeros(bias_2_shape)



    for image_num in range(0, batch_size):

        conv_pool_object = Conv_pool(train_x[image_num], cf, cb, padding = 0, stride = 1, 
                                                    pooling_filter_size = 2, pooling_filter_stride = 2)
        fc_layer_object  = fully_connected_layers_in_CNN(conv_pool_object, train_y[image_num], 
                                                         w1, w2, b1, b2, [1024, 10]).feed_forward_in_fc_layers()
        bp_object        = back_propagation_in_CNN(fc_layer_object).start_back_propagation()


        d_conv_filter, d_conv_bias  = bp_object.return_result
        d_wt_1, d_wt_2              = bp_object.der_weights_1, db_object.der_weights_2, 
        d_bias_1, d_bias_2          = bp_object.der_bias_1, bp_object.der_bias_1

        conv_filter_ += d_conv_filter
        conv_bias_   += d_conv_bias
        weights_1_   += d_wt_1
        weights_2_   += d_wt_2
        bias_1_      += d_bias_1
        bias_2_      += d_bias_2
        loss_        += fc_layer_object.loss
            
            
            
        v_cf = beta1 * v_cf + (1 - beta1) * (conv_filter_ / batch_size) 
        u_cf = beta2 * u_cg + (1 - beta2) * (conv_filter_ / batch_size) ** 2
        cf  -= learning_rate * v_cf / np.sqrt(u_cg + 1e-7)

        v_cb = beta1 * v_cb + (1 - beta1) * conv_bias_ / batch_size
        u_cb = beta2 * u_cb + (1 - beta2) * (conv_bias_ / batch_size) ** 2
        cb  -= learning_rate * v_cb / np.sqrt(u_cb + 1e-7)

        v_w_1 = beta1 * v_w_1 + (1 - beta1) * weights_1_ / batch_size
        u_w_1 = beta2 * u_w_1 + (1 - beta2) * (weights_1_ / batch_size) ** 2
        w1   -= learning_rate * v_w_1/np.sqrt(u_w_1 + 1e-7)

        v_w_2 = beta1 * v_w_2 + (1 - beta1) * weights_2_ / batch_size
        u_w_2 = beta2 * u_w_2 + (1 - beta2) * (weights_2_ / batch_size) ** 2
        w2   -= learning_rate * v_w_2/np.sqrt(u_w_2 + 1e-7)


        v_w1_b = beta1 * v_w1_b + (1 - beta1) * bias_1_ / batch_size
        u_w1_b = beta2 * u_w1_b + (1 - beta2) * (bias_1_ / batch_size) ** 2
        b1    -= learning_rate * v_w1_b/np.sqrt(u_w1_b+1e-7)

        v_w2_b = beta1 * v_w2_b + (1 - beta1) * bias_2_ / batch_size
        u_w2_b = beta2 * u_w2_b + (1 - beta2) * (bias_2_ / batch_size) ** 2
        b2    -= learning_rate * v_w2_b/np.sqrt(u_w2_b+1e-7)
        
        loss_ = loss_ / batch_size
        loss.append(loss_)
        parameters = [cf, cb, w1, w2, b1, b2] 
        
        
        return parameters, loss
    
        
            


def train(X, Y, img_size = 28, no_of_channels = 1, lr = 0.01, beta1 = 0.95, beta2 =0.99, 
                                    no_of_conv_filter = 8, batch_size = 50, no_of_epochs = 5):
    
    conv_filter = get_filters_for_convolution(filter_size = 3, no_of_filters = 32, no_of_channels = 1)
    conv_bias   = get_biases_for_convolution(3)

    weights_1, weights_2 = get_weights_for_fc_layers(32, 28, 3, 2, 1, 2, 1, 0, [1024, 10])
    bias_1, bias_2       = get_biases_for_fc_layers()
    
    parameters = [conv_filter, conv_bias, weights_1, weights_2, bias_1, bias_2]
    loss = []
    no_of_rows = len(X)
    
    
    for epoch in range(0, no_of_epochs):
        X, Y = shuffle(X, Y)
    for row in range(0, no_of_rows, batch_size):
        train_x = X[row : row + batch_size, :]
        train_y  = Y[row : row + batch_size, :]
        parameters, loss = update_parameters_for_each_batch(train_x, train_y, beta1, beta2, lr, parameters, 
                                                             2, 2, 0, 1, loss)
    
        
    return parameters, loss
    
    

In [61]:
parameters, loss = train(X = train_x_reshaped, Y = train_y_encoded)

shape ----- (0,)


IndexError: tuple index out of range

(32,)

In [18]:
l = np.full((3, 3), 1)
k = np.full((3, 3), 1)
np.add(l, k)

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [19]:
l + k

array([[2, 2, 2],
       [2, 2, 2],
       [2, 2, 2]])

In [87]:
l = np.arange(30).reshape(6, 5)
w = np.full((6, 1), 0.1)
k = np.full((5, 1), 1)
kk = np.dot(l.T, w)
print(kk)
kkk = (kk + k)
print(kkk)
print(k)

[[7.5]
 [8.1]
 [8.7]
 [9.3]
 [9.9]]
[[ 8.5]
 [ 9.1]
 [ 9.7]
 [10.3]
 [10.9]]
[[1]
 [1]
 [1]
 [1]
 [1]]


In [101]:
def s1(x):
    #x = x.reshape(1, -1)
    B = np.exp(x - max(x))
    C = np.sum(B)
    return B / C

#s1(np.array([2345, 5678, 7654]))
s1(np.array([-99864.9067806, -96041.73535487, -161880.11328138]))

array([0., 1., 0.])

In [133]:
conv = ((np.uint16(((24 - 2) / 2) + 1),
                                   np.uint16(((24 - 2) / 2) + 1)))

(12, 12)