In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sympy import diff, symbols


In [14]:
tr_data = pd.read_csv('./../data/mnist_train.csv')
ts_data = pd.read_csv('./../data/mnist_test.csv')

In [15]:
tr_data.describe()


Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
count,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,...,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0,60000.0
mean,4.453933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.200433,0.088867,0.045633,0.019283,0.015117,0.002,0.0,0.0,0.0,0.0
std,2.88927,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6.042472,3.956189,2.839845,1.68677,1.678283,0.3466,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,254.0,254.0,253.0,253.0,254.0,62.0,0.0,0.0,0.0,0.0


In [16]:
train_data = tr_data.drop(columns='label').values
train_labels = tr_data.label.values
test_data = ts_data.drop(columns='label').values
test_labels = ts_data.label.values

In [19]:
print(train_data.shape)
print(train_labels.shape)

(60000, 784)
(60000,)


In [21]:
train_data = train_data/255
test_data = test_data/255

In [79]:
class Activation:
    def softmax(self, z):
        temp = np.sum(np.exp(z)) 
        return np.exp(z) / temp
    
    def relu(self, z):
        return np.maximum(0, z)

    def tanh(self, z):
        return np.tanh(z)
    
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def use(self, string):
        match string:
            case 'relu':
                return self.relu
            case 'sigmoid':
                return self.sigmoid
            case 'softmax':
                return self.softmax
            case 'tanh':
                return self.tanh
            case default:
                return 'Please specify the activation'
    

In [80]:
activation = Activation().use('softmax')
print(activation(np.array([-1,3,4])))

[0.00490169 0.26762315 0.72747516]


In [100]:
class DenseLayer:
    def __init__(self, units, a_in, activation=None):
        self.units = units 
        self.a_in = a_in
        self.activation = activation
        self.w = np.zeros(shape=(a_in[1], units)) #!needs modification
        self.b = np.zeros(units)

    def __str__(self):
        return f'''
        Number of units: {self.units}
        Input shape(shape): {self.a_in}
        weight has shape(w.shape): {self.w.shape}
        bias has shape(b.shape): {self.b.shape}
        '''

    def output(self, a_in):
        if(self.activation == None): return a_in@self.w + self.b
        return self.activation(a_in@self.w + self.b)

        

In [102]:
x = np.array([200, 17])
layer = DenseLayer(2, (None, 2), activation)
print(layer)
print(layer.output(x))


        Number of units: 2
        Input shape(shape): (None, 2)
        weight has shape(w.shape): (2, 2)
        bias has shape(b.shape): (2,)
        
[0.5 0.5]


In [90]:
class LossFunctions:
    
    def mean_squared_error(y_true, y_pred):
        mse = np.mean((y_true-y_pred)**2)
        return mse
    
    def binary_cross_entropy(y_true, y_pred):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def softmax_cross_entropy(y_true, y_pred):
        y_true_one_hot = np.zeros_like(y_pred)
        y_true_one_hot[np.arange(len(y_true)), y_true] = 1

        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        
        ce = -np.sum(y_true_one_hot * np.log(y_pred), axis=1)
        return ce


In [92]:
class NeuralNetwork:

    def __init__(self):
        self.layers = []

    def add_layer(self, units, a_in, activation=None):
        self.layers.append(DenseLayer(units, a_in, activation))
    
    def forward_propagation(self, X):
        a = X
        for layer in self.layers:
            layer.receive_input(a)
            a = layer.output()
        return a
    
    def backward_propagation(self, X, y_true, learning_rate):
        m = X.shape[0]
        a = X
        for layer in self.layers:
            a_prev = a
            a = layer.output(a_prev)
        
        dA = None  # Gradient of the cost with respect to the activation of the last layer
        for i in reversed(range(len(self.layers))):
            layer = self.layers[i]
            if layer.activation is None:
                dZ = dA
            else:
                dZ = dA * layer.activation(a, derivative=True)
            
            dW = 1 / m * a_prev.T @ dZ
            db = 1 / m * np.sum(dZ, axis=0)
            dA_prev = dZ @ layer.w.T
            
            layer.w -= learning_rate * dW
            layer.b -= learning_rate * db
            
            dA = dA_prev
    
    def train(self, X, y_true, learning_rate, epochs):
        for epoch in range(epochs):
            y_pred = self.forward_propagation(X)
            loss = LossFunctions.softmax_cross_entropy(y_true, y_pred)
            self.backward_propagation(X, y_true, learning_rate)
            if epoch % 100 == 0:
                print(f"Epoch {epoch}: Loss = {loss}")

In [93]:
neural_network = NeuralNetwork()

In [94]:
neural_network.add_layer(units=64, a_in=(None, 784), activation='relu')
neural_network.add_layer(units=10, a_in=(None, 64), activation='relu')

In [95]:
neural_network.train(train_data, train_labels, learning_rate=0.01, epochs=1)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 784)