In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [21]:
data_=pd.read_csv('train.csv')


data=np.array(data_)
m,n=data.shape
np.random.shuffle(data)

data_dev=data[0:1000].T
y_dev=data_dev[0]
x_dev=data_dev[1:n]

data_train=data[1000:m].T
y_train=data_train[0]
x_train=data_train[1:n]

In [29]:
import numpy as np

class nn:
    def __init__(self, layers: int, units: np.array, X: np.array, Y: np.array) -> None:
        self.layers = layers
        self.units = units
        self.X = X
        self.Y = Y
        self.W = []  
        self.b = []  
        self.init_weight()

    def init_weight(self):
        for i in range(self.layers):
            if i == 0:
                weight_matrix = np.random.randn(self.X.shape[1], self.units[i]) * np.sqrt(2 / self.X.shape[1])
            else:
                weight_matrix = np.random.randn(self.units[i-1], self.units[i]) * np.sqrt(2 / self.units[i-1])
            self.W.append(weight_matrix)
            bias_vector = np.random.randn(self.units[i], 1)
            self.b.append(bias_vector)

    def ReLU(self, z):
        return np.maximum(0, z)
    
    def softmax(self, z):
        z = z - np.max(z, axis=1, keepdims=True)
        return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)

    def forward_prop(self, X):
        a = [X]
        z_list = []
        for i in range(self.layers):
            z = np.dot(a[-1], self.W[i]) + self.b[i].T
            z_list.append(z)
            if i == self.layers - 1:
                a.append(self.softmax(z))
            else:
                a.append(self.ReLU(z))
        return a, z_list

    def ReLU_derivative(self, z):
        return (z > 0).astype(float)

    @staticmethod
    def one_hot(Y):
        one_hot_Y = np.zeros((Y.size, Y.max() + 1))
        one_hot_Y[np.arange(Y.size), Y] = 1
        return one_hot_Y

    def backward_prop(self, a, z_list):
        m = self.Y.shape[0]
        one_hot_Y = self.one_hot(self.Y)
        
        dz = a[-1] - one_hot_Y
        dW = np.dot(a[-2].T, dz) / m
        db = np.sum(dz, axis=0, keepdims=True).T / m
        
        grads_W = [dW]
        grads_b = [db]

        for i in range(self.layers - 2, -1, -1):
            dz = np.dot(dz, self.W[i + 1].T) * self.ReLU_derivative(z_list[i])
            dW = np.dot(a[i].T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True).T / m
            grads_W.insert(0, dW)
            grads_b.insert(0, db)
        
        return grads_W, grads_b
    
    def update_params(self, grads_W, grads_b, learning_rate):
        for i in range(self.layers):
            self.W[i] -= learning_rate * grads_W[i]
            self.b[i] -= learning_rate * grads_b[i]




32970000