# The following Notebook is about Neural Networks Implemented from scratch! 

In [20]:
# import relevant libraries
import bonnerlib2D as bl2d
import numpy as np
import pickle as pickle
import matplotlib.pyplot as plt
import sklearn.linear_model as lin
import sklearn.neural_network as nn
import sklearn.utils

In [21]:
# In this tutorial, we work with the MNIST data
with open("/home/tahir/Desktop/Datasets/mnistTVT.pickle", "rb") as f:
    Xtrain, Ttrain, Xval, Tval, Xtest, Ttest = pickle.load(f)


# Neural Networks with sklearn

We will make this neural net with 1 hidden unit, maximum of 100 epochs of training <br>


In [23]:
# set seed for reproducability 
np.random.seed(0)  #set seed for reproducability 

def MLP_Classifier(Xtrain, Ttrain, Xtest, Ttest, hidden_layers, initial_lr, max_iters):
    """
    Given training data and testing data along with a few other parameters of the MLP classifier,
    compute the MLP classification and return the accuracy of the model. 

    Parameters
    ----------
    Xtrain : X values of the training set.
    Ttrain : T values of the training set.
    Xtest : X values of the testing set.
    Ttest : T values of the testing set.
    hidden_layers : A tuple of the number of layers.
    initial_lr : The initial learning rate value. 
    max_iterations : The training iteration values. 

    Returns
    -------
    accuracy : The score of how accurate was the MLP classifier. 

    """
    clf = nn.MLPClassifier(activation="logistic",
                        solver="sgd", 
                        hidden_layer_sizes=(hidden_layers), 
                        learning_rate_init=initial_lr,
                        tol=10e-6,
                        max_iter=max_iters)

    clf.fit(Xtrain,Ttrain)
    accuracy  = clf.score(Xtest,Ttest)
    print("The Test Accuracy is: ", accuracy)

    return accuracy

MLP1 = MLP_Classifier(Xtrain, Ttrain, Xtest, Ttest, hidden_layers=(1,), initial_lr=0.01, max_iters=1000)
print("The Test Accuracy with 1 Hidden layer is: ", MLP1)

The Test Accuracy is:  0.4456
The Test Accuracy with 1 Hidden layer is:  0.4456


<Figure size 432x288 with 0 Axes>

In [24]:
# Lets try it bunch of different hidden layers

# 2 Hidden layers
MLP2 = MLP_Classifier(Xtrain, Ttrain, Xtest, Ttest, hidden_layers=(2,), initial_lr=0.01, max_iters=1000)
plt.suptitle("Question 3(c): Neural Net with 2 hidden unit")
print("The Test Accuracy with 2 Hidden layer is: ", MLP2)



The Test Accuracy is:  0.6979
The Test Accuracy with 2 Hidden layer is:  0.6979


<Figure size 432x288 with 0 Axes>

In [25]:
# 9 Hidden Layers
MLP3 = MLP_Classifier(Xtrain, Ttrain, Xtest, Ttest, hidden_layers=(9,), initial_lr=0.01, max_iters=1000)
plt.suptitle("Question 3(d): Neural Net with 9 hidden unit")
print("The Test Accuracy with 9 Hidden layer is: ", MLP3)

The Test Accuracy is:  0.93
The Test Accuracy with 9 Hidden layer is:  0.93




<Figure size 432x288 with 0 Axes>

In [None]:
# Compute Accuracy for a Binary Output - Uses a logistic Activation

# Helper Function
def sigmoid(z):
    """ 
    Return the sigmoid version of the given equation.
    """
    return 1 / ( 1 + np.exp(-z) )

def BinaryNN(clf,X,T):
    """
    Ccomputes and returns the accuracy of classifier clf on data X,T, where clf
    is a neural network with one hidden layer.

    Parameters
    ----------
    clf : The MLP Classification object
    X : X values of the testing data.
    T : The corresponding true labels of the data.

    Returns
    -------
    The score of how accurate was the MLP Classifier with 1 Hidden layer.

    """

    # Compute the forward propogation 
    z1 = np.dot(X,clf.coefs_[0]) + clf.intercepts_[0]   # weighted sum of the inputs
    h1 = sigmoid(z1)                                    # First hidden layer
    z2 = np.dot(h1, clf.coefs_[1]) + clf.intercepts_[1] # weighted sum passed onto next layer 
    y = np.argmax(z2, axis=1)                           # output layer

    return np.mean(y==T)


In [27]:
# Compute Accuracy for a Multiclass Neural Network - Uses Softmax Activation
# The loss function in this case is the cross entorpy loss

def softmax(z):
    """
    The softmax activate funcion 
    Return the probability of each class
    """
    denominator = np.sum( np.exp(z), axis=1)
    return np.exp(z) / denominator.reshape(denominator.shape[0],1)


def ceNN(clf, X, T):
    """
    Compute and return the cross entropy of the MLP classifier in two ways. 

    Parameters
    ----------
    clf : The MLP classifier.
    X : The X testing points.
    T : The corresponding True labels.

    Returns
    -------
    CE_1 : Cross Entropy Loss computed from the sklearn build in methods
    CE_2 : Cross Entropy Loss computed from scratch. 
    """
    # Method 1
    # Get the logarithm of the probabilities for each class. 
    logProbabilities = clf.predict_log_proba(X) 

    # Encode labels a one hot vector  	   # We use np.unique() to be able to do
    labels = np.eye(len(np.unique(T)))[T]  # this for any # of classes

    # Method 2
    # Compute the forward propogation 
    z1 = np.dot(X,clf.coefs_[0]) + clf.intercepts_[0]   # weighted sum of the inputs
    h1 = sigmoid(z1)                                    # First hidden layer
    z2 = np.dot(h1, clf.coefs_[1]) + clf.intercepts_[1] # weighted sum passed onto next layer 
    y = softmax(z2)                                     # Use softmax to get the probability for each class. 

    # Compute the Cross entropy loss --  
    CE1 = np.sum( -labels * logProbabilities ) / len(labels)
    CE2 = np.sum( -labels * np.log(y) ) / len(labels)

    return CE1, CE2


## Implementing Neural Network from Scratch -- Completely! 

Do NOT try this at home. <br>

P.S; If you do try it, you can reach out to me if you need help :) 

In [29]:
# Read in the data File
with open("/home/tahir/Desktop/Datasets/mnistTVT.pickle", "rb") as f:
    Xtrain, Ttrain, Xval, Tval, Xtest, Ttest = pickle.load(f)

def get_digits(a,b,TrainData,TestData):
    """
    Given two numbers a,and b, along with 2 datasets -- Train & Test,
    Get a reduced dataset with just the specified digits. 
    Parameters
    ----------
    a : A digit from 1 to 10.
    b : A second digit from 1 to 10.
    TrainData: The Training Dataset 
    TestData: The Testing Dataset

    Returns
    -------
    Reduced Version of the Data (Features and Labels), which 
    contains only the digits specified. 
    """
    # Get the digit indices from there labels, then get the corresponding rows.
    SubsetFeatures = TrainData[(TestData == a ) | (TestData == b)] 
    # Extract the labels for the specified digits
    SubsetLabels =  TestData[(TestData == a) | (TestData == b)]
    return SubsetFeatures, SubsetLabels

# Get digits 5 and 6 from MNST dataset (subset of the data)
sub_Xtrain, sub_Ttrain = get_digits(5,6,Xtrain,Ttrain)  # (9444, 784) & (9444,)
sub_Xtest, sub_Ttest = get_digits(5,6,Xtest,Ttest)      # (1850, 784) & (1850,)

# Encode the values into 1s and 0s
sub_Ttrain = np.where(sub_Ttrain == 5,1,0)
sub_Ttest = np.where(sub_Ttest == 5, 1, 0)


In [30]:
# Helper Functions

def sigmoid(z):
    """ 
    Return the sigmoid / logistic activation function
    """
    return 1 / ( 1 + np.exp(-z) )

def cross_entropy(y,t):
    """ 
    Return the Cross Entropy loss between predicted y and t 
    (This is for a binary classification). 
    """
    return -t*np.log(y) - (1-t)*np.log(1-y)

def tanh(z):
    """
    The tanh activation function
    """
    return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))

def softmax(z):
    """
    The softmax activate funcion 
    Return the probability of each class
    """
    
    return np.exp(z) / np.sum(np.exp(z), axis=0)

def get_Acc(o,labels):
    """ 
    Get the accuracy of the prediction from our MLP classifier 
    """
    o = np.squeeze(np.where(o>0.5,1,0))
    return np.sum (np.equal(o,labels)) / len(labels) 

def get_CE(o, T):
    """ 
    Get the Cross Entropy loss of our prediction from the MLP classifier. 
    """
    o = np.squeeze(o) 
    # Compute the Cross entropy loss 
    cross_entropy = -T*np.log(o) - (1-T)*np.log(1-o)
    return np.mean( cross_entropy )


In [31]:
# Get digits 5 and 6 from MNST dataset (subset of the data)
sub_Xtrain, sub_Ttrain = get_digits(5,6,Xtrain,Ttrain)  # (9444, 784) & (9444,)
sub_Xtest, sub_Ttest = get_digits(5,6,Xtest,Ttest)      # (1850, 784) & (1850,)

# Get the reduced dataset 
# Encode the values into 1s and 0s
X = sub_Xtrain 
T = np.where(sub_Ttrain == 5,1,0)
Test_X = sub_Xtest
Test_T = np.where(sub_Ttest == 5, 1, 0)

# Set seed
np.random.seed(0)

# initialize wieghts
W = np.random.normal(0, 1, (sub_Xtrain.shape[1], 100))  # this is of shape (input features, 100)
V = np.random.normal(0, 1, (100, 100))  # Hidden layer 1 of shape 100, 100 cz next layer also has 100 
U = np.random.normal(0, 1, (100, 1))    # Hidden layer 2 of shape 100, 1 as 1 output (sigmoid) 

# initialize bais terms same shape as the Wieght matrices they are being added with. 
w0 = np.zeros(100)
v0 = np.zeros(100)
u0 = np.zeros(1)

# Initialize the learning rate
learning_rate = 0.1

# Main Gradient Descent Loop
for iteration in range(0,11): 

    # Compute the Forward Pass for Training Data
    # Lets just call the ~ on top of variables t or tilda 
    x_t = np.matmul(X,W) + w0   #(9334, 100) + bais 
    h = tanh(x_t)               # First hidden layer, tan  # (9334,100)
    h_t = np.matmul(h,V) + v0   # weighted sum passed onto next layer  (9334,100)
    g = tanh(h_t)               # Second hidden layer, tan activation 
    g_t = np.matmul(g,U) + u0   # Connecting layer between hidden layer2 and output layer
    o = sigmoid(g_t)            # Sigmoid results for binary classification

    # Compute the Forward Pass for TESTING ONLY
    x_t_test = np.matmul(Test_X,W) + w0    #(9334, 100) + bais 
    h_test = tanh(x_t_test)                # First hidden layer, tan  # (9334,100)
    h_t_test = np.matmul(h_test,V) + v0    # weighted sum passed onto next layer  (9334,100)
    g_test = tanh(h_t_test)                # Second hidden layer, tan activation 
    g_t_test = np.matmul(g_test,U) + u0    # Connecting layer between hidden layer2 and output layer
    o_test = np.squeeze(sigmoid(g_t_test)) # Sigmoid results for binary classification


    print(f"Iteration:{ iteration }, Test accuracy: { get_Acc(o_test,Test_T) }")

    # Compute the Backward pass
    DC_DGtilda = o - T.reshape(len(T),1)
    DC_DU = np.matmul(g.T,DC_DGtilda)  
    DC_DG = np.matmul(DC_DGtilda,U.T)
    DC_DHtilda = (1 - g**2) * DC_DG
    DC_DV = np.matmul(h.T,DC_DHtilda) 
    DC_DH = np.matmul(DC_DHtilda, V.T)
    DC_DXtilda = (1-h**2)*DC_DH
    DC_DW = np.matmul(X.T, DC_DXtilda)

    # Gradients with respect to the bais
    DC_du0 = DC_DGtilda.sum(axis=0)
    DC_dv0 = DC_DHtilda.sum(axis=0)
    DC_dw0 = DC_DXtilda.sum(axis=0) 


    # Preform weight updates, using Average Gradient
    W -= DC_DW * learning_rate / sub_Xtrain.shape[0]
    V -= DC_DV * learning_rate / sub_Xtrain.shape[0]
    U -= DC_DU * learning_rate / sub_Xtrain.shape[0]

    # Update the bais term 
    w0 -= DC_dw0 * learning_rate / sub_Xtrain.shape[0]
    v0 -= DC_dv0 * learning_rate / sub_Xtrain.shape[0]
    u0 -= DC_du0 * learning_rate / sub_Xtrain.shape[0] 

print(" ")
print(f"The final Test Accuracy is: { get_Acc(o_test,Test_T) }") 
print(f"The final Cross Entropy loss is: {get_CE(o_test,Test_T)}")


Iteration:0, Test accuracy: 0.6302702702702703
Iteration:1, Test accuracy: 0.6491891891891892
Iteration:2, Test accuracy: 0.6616216216216216
Iteration:3, Test accuracy: 0.6767567567567567
Iteration:4, Test accuracy: 0.6897297297297297
Iteration:5, Test accuracy: 0.7037837837837838
Iteration:6, Test accuracy: 0.7113513513513513
Iteration:7, Test accuracy: 0.72
Iteration:8, Test accuracy: 0.7297297297297297
Iteration:9, Test accuracy: 0.7367567567567568
Iteration:10, Test accuracy: 0.7427027027027027
 
The final Test Accuracy is: 0.7427027027027027
The final Cross Entropy loss is: 1.1803255469619718
