# Coding a NN from Scratch

## 1. Import MNIST dataset from PyTorch

In [1]:
import torch
import torchvision
import torchvision.datasets as datasets

In [2]:
transform = torchvision.transforms.ToTensor()
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_testset = datasets.MNIST(root='./data/', train=False, download=True, transform=transform)

In [3]:
print(mnist_trainset,'\n\n', mnist_testset)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor() 

 Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/
    Split: Test
    StandardTransform
Transform: ToTensor()


In [4]:
# I just want the whole dataset in a np array, I do not want to use PyTorch for anything else than downloading MNIST

batch_size_train = 60000
batch_size_test = 10000

train_loader = torch.utils.data.DataLoader(mnist_trainset, batch_size=batch_size_train, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_testset, batch_size=batch_size_test, shuffle=True)

train_examples = enumerate(train_loader) #returns an iterable object that will spit the samples by batch
test_examples = enumerate(test_loader)

# Batch size is equal to the m examples in both sets (train and test), so we only need to iterate them one time
# Both batch indexes = 0 for the same reason stated before
train_batch_idx, (X_train_tensor, y_train_tensor) = next(train_examples)
test_batch_idx, (X_test_tensor, y_test_tensor) = next(test_examples)


In [5]:
print(f'X_train_tensor.shape: {X_train_tensor.shape}, y_train_tensor.shape: {y_train_tensor.shape}')
print(f'X_test_tensor.shape: {X_test_tensor.shape}, y_test.shape: {y_test_tensor.shape}')

X_train_tensor.shape: torch.Size([60000, 1, 28, 28]), y_train_tensor.shape: torch.Size([60000])
X_test_tensor.shape: torch.Size([10000, 1, 28, 28]), y_test.shape: torch.Size([10000])


In [6]:
import numpy as np

# Let's convert those tensors to np arrays only with rows and columns

def tensor_to_vector(tensor):
    vector_nd = np.array(tensor)
    vector = vector_nd.reshape(len(tensor),-1)
    return vector

X_train = tensor_to_vector(X_train_tensor)
X_test = tensor_to_vector(X_test_tensor)
y_train = np.array(y_train_tensor)
y_test = np.array(y_test_tensor)

In [7]:
print(f'X_train.shape: {X_train.shape}, y_train.shape: {y_train.shape}')
print(f'X_test.shape: {X_test.shape}, y_test.shape: {y_test.shape}')

X_train.shape: (60000, 784), y_train.shape: (60000,)
X_test.shape: (10000, 784), y_test.shape: (10000,)


In [8]:
# We want examples by columns so let's transpose the Xs
X_train = X_train.T
X_test = X_test.T

print(f'X_train.shape: {X_train.shape}, y_train.shape: {y_train.shape}')
print(f'X_test.shape: {X_test.shape}, y_test.shape: {y_test.shape}')

X_train.shape: (784, 60000), y_train.shape: (60000,)
X_test.shape: (784, 10000), y_test.shape: (10000,)


In [34]:
# The NN is going to have: Input Layer (784) - HL1(10) - HL2(10)
# Let's initalize all our parameters
def init_parameters():
    W1 = np.random.randn(10, 784) 
    b1 = np.random.randn(10, 1)
    W2 = np.random.randn(10, 10)
    b2 = np.random.randn(10, 1)
    return W1, b1, W2, b2
    
def ReLU(Z):
    a = np.maximum(0,Z) #this will return Z if the value is positive, otherwise 0
    return a

def SoftMax(Z):
    summation_of_exps = np.sum(np.exp(Z))
    return np.exp(Z)/summation_of_exps

def fw_prop(A0, W1, b1, W2, b2):
    #A0 = X
    print(b1)
    Z1 = W1.dot(A0) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = SoftMax(Z2)
    return Z1, A1, Z2, A2

def one_hot_encoding(y):
    n_categories = y.max() + 1     # 0-9, 10 categories
    one_hot_y = np.zeros((y.size, n_categories))
    one_hot_y[np.arange(y.size), y] = 1
    one_hot_y = one_hot_y.T # We want the examples to be arranged by columns
    return one_hot_y

def d_ReLU(Z):
    return Z > 0

def b_prop(A0, Z1, A1, W2, Z2, A2, y):
    m = y.size
    one_hot_y = one_hot_encoding(y)
    
    dZ2 = (A2-one_hot_y)
    dW2 = 1/m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * d_ReLU(Z1)
    dW1 = 1/m * dZ1.dot(A0.T)
    db1 = 1/m * np.sum(dZ1)
    return dW1, db1, dW2, db2

def update_W(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2
    

In [35]:
def pred(A2):
    y_pred = np.argmax(A2,0)

def accuracy(y_pred, y):
    accuracy = np.sum(y_pred == y) / y.size * 100
    return accuracy

def grad_desc(A0, y, iterations, alpha):
    # Initialize Parmeters
    W1, b1, W2, b2 = init_parameters()
    
    # Train Loop
    for i in range(iterations):
        # Forward Propagation
        Z1, A1, Z2, A2 = fw_prop(A0, W1, b1, W2, b2)
        # Back Propagation
        dW1, db1, dW2, db2 = b_prop(A0, Z1, A1, W2, Z2, A2, y)
        # Update Parameters
        W1, b1, W2, b2 = update_W(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        
        if i%20== 0:
            print(f'Iteration: {i}')
            y_pred = pred(A2)
            score = accuracy(y_pred,y)
            print(f'Accuracy: {score}')
            
    return W1, b1, W2, b2
            
    

In [None]:
W1, b1, W2, b2 = grad_desc(X_train, y_train, 1000, 0.001)

[[-0.72393295]
 [-0.22421751]
 [ 1.2425673 ]
 [ 2.07014478]
 [ 0.25719683]
 [-1.18059311]
 [-1.1879094 ]
 [-0.969995  ]
 [ 0.34099808]
 [-0.02063908]]
Iteration: 0
Accuracy: 0.0
[[-0.72391209]
 [-0.22419665]
 [ 1.24258816]
 [ 2.07016564]
 [ 0.25721769]
 [-1.18057226]
 [-1.18788854]
 [-0.96997414]
 [ 0.34101894]
 [-0.02061822]]
[[-0.72388634]
 [-0.22417091]
 [ 1.2426139 ]
 [ 2.07019138]
 [ 0.25724343]
 [-1.18054651]
 [-1.18786279]
 [-0.9699484 ]
 [ 0.34104468]
 [-0.02059248]]
[[-0.72385549]
 [-0.22414005]
 [ 1.24264476]
 [ 2.07022224]
 [ 0.25727429]
 [-1.18051566]
 [-1.18783194]
 [-0.96991754]
 [ 0.34107553]
 [-0.02056163]]
[[-0.72381911]
 [-0.22410368]
 [ 1.24268113]
 [ 2.07025861]
 [ 0.25731066]
 [-1.18047928]
 [-1.18779556]
 [-0.96988117]
 [ 0.34111191]
 [-0.02052525]]
[[-0.72377721]
 [-0.22406178]
 [ 1.24272303]
 [ 2.07030051]
 [ 0.25735256]
 [-1.18043738]
 [-1.18775366]
 [-0.96983927]
 [ 0.34115381]
 [-0.02048335]]
[[-0.72372975]
 [-0.22401431]
 [ 1.2427705 ]
 [ 2.07034798]
 [ 0.25

[[-0.71532723]
 [-0.21561179]
 [ 1.25117302]
 [ 2.07875049]
 [ 0.26580255]
 [-1.1719874 ]
 [-1.17930368]
 [-0.96138929]
 [ 0.34960379]
 [-0.01203337]]
[[-0.71502541]
 [-0.21530997]
 [ 1.25147484]
 [ 2.07905231]
 [ 0.26610437]
 [-1.17168558]
 [-1.17900186]
 [-0.96108747]
 [ 0.34990561]
 [-0.01173155]]
[[-0.71471853]
 [-0.21500309]
 [ 1.25178172]
 [ 2.0793592 ]
 [ 0.26641125]
 [-1.17137869]
 [-1.17869497]
 [-0.96078058]
 [ 0.3502125 ]
 [-0.01142466]]
[[-0.71440639]
 [-0.21469095]
 [ 1.25209386]
 [ 2.07967133]
 [ 0.26672339]
 [-1.17106656]
 [-1.17838284]
 [-0.96046845]
 [ 0.35052463]
 [-0.01111253]]
[[-0.7140887 ]
 [-0.21437327]
 [ 1.25241154]
 [ 2.07998902]
 [ 0.26704107]
 [-1.17074887]
 [-1.17806515]
 [-0.96015076]
 [ 0.35084232]
 [-0.01079484]]
[[-0.71376587]
 [-0.21405043]
 [ 1.25273438]
 [ 2.08031186]
 [ 0.26736391]
 [-1.17042604]
 [-1.17774232]
 [-0.95982792]
 [ 0.35116515]
 [-0.01047201]]
[[-0.71343812]
 [-0.21372268]
 [ 1.25306213]
 [ 2.08063961]
 [ 0.26769166]
 [-1.17009829]
 [-1

[[-0.69276065]
 [-0.19304521]
 [ 1.2737396 ]
 [ 2.10131708]
 [ 0.28836913]
 [-1.14942082]
 [-1.1567371 ]
 [-0.9388227 ]
 [ 0.37217037]
 [ 0.01053321]]
[[-0.69218998]
 [-0.19247454]
 [ 1.27431027]
 [ 2.10188774]
 [ 0.2889398 ]
 [-1.14885015]
 [-1.15616643]
 [-0.93825204]
 [ 0.37274104]
 [ 0.01110388]]
[[-0.69161403]
 [-0.19189859]
 [ 1.27488622]
 [ 2.1024637 ]
 [ 0.28951575]
 [-1.1482742 ]
 [-1.15559048]
 [-0.93767608]
 [ 0.37331699]
 [ 0.01167983]]
[[-0.6910329 ]
 [-0.19131746]
 [ 1.27546734]
 [ 2.10304482]
 [ 0.29009688]
 [-1.14769307]
 [-1.15500935]
 [-0.93709496]
 [ 0.37389812]
 [ 0.01226096]]
[[-0.69044665]
 [-0.19073121]
 [ 1.2760536 ]
 [ 2.10363108]
 [ 0.29068313]
 [-1.14710682]
 [-1.1544231 ]
 [-0.9365087 ]
 [ 0.37448437]
 [ 0.01284721]]
[[-0.6898552 ]
 [-0.19013976]
 [ 1.27664505]
 [ 2.10422253]
 [ 0.29127458]
 [-1.14651536]
 [-1.15383165]
 [-0.93591725]
 [ 0.37507583]
 [ 0.01343867]]
[[-0.68925853]
 [-0.18954309]
 [ 1.27724172]
 [ 2.1048192 ]
 [ 0.29187125]
 [-1.14591869]
 [-1

In [33]:
print(b1)

NameError: name 'b1' is not defined