In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sympy import diff, symbols


In [14]:
tr_data = pd.read_csv('./../data/mnist_train.csv')
ts_data = pd.read_csv('./../data/mnist_test.csv')

In [15]:
tr_data.describe()


Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
count,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,...,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0
mean,4.453933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.200433,0.088867,0.045633,0.019283,0.015117,0.002,0.0,0.0,0.0,0.0
std,2.88927,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.042472,3.956189,2.839845,1.68677,1.678283,0.3466,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0


In [16]:
train_data = tr_data.drop(columns='label').values
train_labels = tr_data.label.values
test_data = ts_data.drop(columns='label').values
test_labels = ts_data.label.values

In [19]:
print(train_data.shape)
print(train_labels.shape)

(60000, 784)
(60000,)


In [21]:
train_data = train_data/255
test_data = test_data/255

In [186]:
class Activation:
    def softmax(self, z):
        temp = np.sum(np.exp(z), axis=1, keepdims=True) 
        return np.exp(z) / temp
    
    def relu(self, z):
        return np.maximum(0, z)

    def tanh(self, z):
        return np.tanh(z)
    
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def use(self, string):
        if string == 'relu':
            return self.relu
        elif string == 'sigmoid':
            return self.sigmoid
        elif string == 'softmax':
            return self.softmax
        elif string == 'tanh':
            return self.tanh
        else:
            return 'Please specify the activation'

In [185]:
# activation = Activation().use('softmax')
# print(activation(np.array([-1,3,4])))

In [174]:
class DenseLayer:
    def __init__(self, units, a_in, activation=None):
        self.units = units 
        self.a_in = a_in
        self.activation = activation
        self.w = np.zeros(shape=(a_in[1], units)) #!needs modification
        self.b = np.zeros(units)

    def __str__(self):
        return f'''
        Number of units: {self.units}
        Input shape(shape): {self.a_in}
        weight has shape(w.shape): {self.w.shape}
        bias has shape(b.shape): {self.b.shape}
        '''

    def output(self, a_in):
        # if(self.activation == None): return a_in@self.w + self.b
        return self.activation(a_in@self.w + self.b)

        

In [175]:
x = np.array([200, 17])
layer = DenseLayer(2, (None, 2), activation)
print(layer)
print(layer.output(x))


        Number of units: 2
        Input shape(shape): (None, 2)
        weight has shape(w.shape): (2, 2)
        bias has shape(b.shape): (2,)
        
[1.00000000e+00 3.34279552e-80]


In [267]:
class LossFunctions:
    
    def mean_squared_error(y_true, y_pred):
        mse = np.mean((y_true-y_pred)**2)
        return mse
    
    def binary_cross_entropy(y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def softmax_cross_entropy(y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
            
        ce = -np.sum(y_true * np.log(y_pred), axis=1)
        return ce

In [268]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, activation):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.activation = [Activation().use(activation[0]), Activation().use(activation[1])]
        
        # Initialize the weights and biases
        self.weights1 = np.random.randn(self.input_size, self.hidden_size)
        self.biases1 = np.zeros((1, self.hidden_size))
        self.weights2 = np.random.randn(self.hidden_size, self.output_size)
        self.biases2 = np.zeros((1, self.output_size))
        
    def forward(self, X):
        # Forward propagation
        self.hidden_layer = self.activation[0](np.dot(X, self.weights1) + self.biases1)
        self.output_layer = self.activation[1](np.dot(self.hidden_layer, self.weights2) + self.biases2)
        return self.output_layer
    
    def backward(self, X, y, learning_rate):
        # Backward propagation
        error = self.loss_derivative(y, self.output_layer)
        d_output_layer = error * self.activation[1](self.output_layer,)
        error_hidden_layer = d_output_layer.dot(self.weights2.T)
        d_hidden_layer = error_hidden_layer * self.activation[0](self.hidden_layer)
        
        # Update the weights and biases
        self.weights2 -= learning_rate * self.hidden_layer.T.dot(d_output_layer)
        self.biases2 -= learning_rate * np.sum(d_output_layer, axis=0, keepdims=True)
        self.weights1 -= learning_rate * X.T.dot(d_hidden_layer)
        self.biases1 -= learning_rate * np.sum(d_hidden_layer, axis=0)
        
    def train(self, X, y, epochs, learning_rate):
        # Train the model
        for i in range(epochs):
            output = self.forward(X)
            loss = LossFunctions.softmax_cross_entropy(y, output)
            self.backward(X, y, learning_rate)
            if i % 10 == 0:
                print("Epoch ", i, " loss: ", np.mean(loss))
                
    def predict(self, X):
        # Make predictions
        output = self.forward(X)
        predictions = np.argmax(output, axis=1)
        return predictions

    def loss_derivative(self, y_true, y_pred):
        return y_pred - y_true

In [269]:
y_train_one_hot = np.zeros((train_labels.shape[0], 10))
# one-hot encoding the labels into form [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
for i in range(len(train_labels)):
    y_train_one_hot[i, train_labels[i]] = 1

In [270]:
y_train_one_hot.shape

(60000, 10)

In [271]:
y_train_one_hot[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

In [283]:
model = NeuralNetwork(input_size=784, hidden_size=128, output_size=10, activation=['relu', 'softmax'])

In [288]:
model.train(X_train, y_train_one_hot, epochs=2, learning_rate=0.01)

Epoch  0  loss:  31.00948909362405


  temp = np.sum(np.exp(z), axis=1, keepdims=True)
  return np.exp(z) / temp
  return np.exp(z) / temp


In [289]:
y_pred = model.predict(test_data)

In [290]:
accuracy = np.mean(y_pred == test_labels)
print("Accuracy: ", accuracy)

Accuracy:  0.098
