In [137]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
## Those are the libraries that we will use to create our model and train it.


In [138]:
##First, we will import the dataset.
data = pd.read_csv('train.csv')

#Must convert pandas dataframe to numpy array for matrix algebra.

data = data.to_numpy()
m,n = data.shape
print(m,n)

42000 785


In [139]:
#Now that we have the data, we will split it into the training and testing data.
np.random.shuffle(data)
train_data = data[:int(0.95*m),:]
test_data = data[int(0.95*m):,:]
# Using 95% of the data in training and 5% in testing. skewed split towards training since this is a well known dataset.

test_data.shape
# Gives us 2100 samples to test performance of our model.

(2100, 785)

In [141]:
#Now we will split the data into the input and output.
X_train = train_data[:,1:].T
Y_train = train_data[:,0].T
X_test = test_data[:,1:].T
Y_test = test_data[:,0].T

print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
print(Y_test)


(784, 39900) (39900,)
(784, 2100) (2100,)
[6 8 8 ... 1 2 3]


In [142]:
#One hot encoding the output.

def one_hot(Y):
    Y = Y.astype(int)
    Y_one_hot = np.zeros((Y.shape[0],10))
    for i in range(Y.shape[0]):
        Y_one_hot[i,Y[i]] = 1
    return Y_one_hot.T
try_Y = one_hot(Y_train)

try_Y = try_Y.T
print(try_Y.shape)
print(try_Y[0])

(39900, 10)
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]


In [155]:
#  Model architecture
##  Input layer: 784 neurons (28x28 pixels) (784 inputs)
##  Hidden layer: 100 neurons (ReLU activation)
##  Output layer: 10 neurons (0-9) (Softmax activation) 



# Implementation:
#  step 1: Initialize weights and biases
#  step 2: Forward propagation
#  step 3: Backpropagation
#  step 4: Update weights and biases
 
 
def init_params():   # step 1  
    W1 = np.random.rand(10,784)
    b1 = np.zeros((10,1))
    W2 = np.random.rand(10,10)
    b2 = np.zeros((10,1))
    return W1,b1,W2,b2

def forward_prop(X_train, W1, b1, W2, b2): #step 2
    Z1 = np.dot(W1,X_train) + b1
    A1 = np.maximum(0,Z1) #ReLU activation: if Z1 is positive, else 0.
    Z2 = W2.dot(A1) + b2
    A2 = np.exp(Z2)/np.sum(np.exp(Z2),axis=0,keepdims=True) #Softmax activation
    
    return Z1,A1, Z2, A2

def backprop(X_train, Y_train, Z1,Z2,A1,A2, W2): #step 3
    one_hot_y = one_hot(Y_train)
    dZ2 = A2 - one_hot_y
    dW2 = 1/Y_train.size * np.dot(dZ2,A1.T)
    db2 = 1/Y_train.size * np.sum(dZ2,axis=1,keepdims=True)
    dZ1 = np.dot(W2.T,dZ2) * (A1 > 0) #ReLU derivative: if A1 is positive, else 0.
    dW1 = 1/Y_train.size * np.dot(dZ1,X_train.T)
    db1 = 1/Y_train.size * np.sum(dZ1,axis=1,keepdims=True)
    return dW1,db1,dW2,db2

def update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,alpha): #step 4
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1,b1,W2,b2
    


In [156]:
  #now implementing gradient descent.
def gradient_descent(X_train, Y_train, alpha, num_iter):
    W1,b1,W2,b2 = init_params()
    for i in range(num_iter):
        Z1,A1,Z2,A2 = forward_prop(X_train, W1, b1, W2, b2)
        dW1,db1,dW2,db2 = backprop(X_train, Y_train, Z1,Z2,A1,A2, W2)
        W1,b1,W2,b2 = update_params(W1,b1,W2,b2,dW1,db1,dW2,db2,alpha)
        ## Output logging to help debug and see the training progress.  
        if i % 5 == 0:
          print("Iteration: ",i) # prints iteration number every 10 iterations.
          print('prediction: ', np.argmax(A2,0) # prints prediction every 10 iterations.
    return W1,b1,W2,b2


SyntaxError: invalid syntax (2837858648.py, line 12)

In [157]:
W1,b1, W2,b2 = gradient_descent(X_train, Y_train, .1, 200)

  A2 = np.exp(Z2)/np.sum(np.exp(Z2),axis=0,keepdims=True) #Softmax activation
  A2 = np.exp(Z2)/np.sum(np.exp(Z2),axis=0,keepdims=True) #Softmax activation


Iteration:  0
prediction:  [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
Iteration:  5
prediction:  [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
Iteration:  10
prediction:  [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
Iteration:  15
prediction:  [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
Iteration:  20
prediction:  [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 

KeyboardInterrupt: 