In [15]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import imgaug.augmenters as iaa
import random
from sklearn.metrics import classification_report

In [16]:
data_train = pd.read_csv('/home/siddharth/MRM_SiddharthReddy/DL_Task/mnist_train.csv')
data_test = pd.read_csv('/home/siddharth/MRM_SiddharthReddy/DL_Task/mnist_test.csv')
data_test = np.array(data_test)
data_train = np.array(data_train)
np.random.shuffle(data_train)
np.random.shuffle(data_test)
m1,n1 = data_test.shape
m2,n2 = data_train.shape
data_test = data_test[0:m1].T
Y_test = data_test[0]
X_test = data_test[1:n1]
X_test = X_test / 255.

data_train = data_train[0:m2].T
split_ratio = 0.8 

split_index = int(m2 * split_ratio)

X_train = data_train[1:n2, :split_index]
Y_train = data_train[0, :split_index]

X_val = data_train[1:n2, split_index:]
Y_val = data_train[0, split_index:]

X_train = X_train / 255.
X_val = X_val / 255.

_, m_train = X_train.shape
_, m_val = X_val.shape

# def apply_augmentations(images):
#     seq = iaa.Sequential([
#         iaa.GaussianBlur(sigma=(0, 1.0)),
#         iaa.Affine(rotate=(-10, 10)),
#         iaa.Fliplr(0.5),
#     ])
    
#     augmented_images = seq.augment_images(images.reshape(-1, 28, 28, 1))
#     return augmented_images.reshape(-1, images.shape[1])  


# X_train = apply_augmentations(X_train)
# X_test = apply_augmentations(X_test)
# example_index = random.randint(0, X_train_augmented.shape[0] - 1)


# current_image = (X_train[example_index] * 255)

# plt.gray()
# plt.imshow(current_image, interpolation='nearest')
# plt.show()

In [17]:
def init_params():
    W1 = np.random.rand(20, 784) - 0.5  
    b1 = np.random.rand(20, 1) - 0.5
    W2 = np.random.rand(15, 20) - 0.5 
    b2 = np.random.rand(15, 1) - 0.5
    W3 = np.random.rand(10, 15) - 0.5  
    b3 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2, W3, b3
    
def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A       

In [18]:
def forward_prop(W1, b1, W2, b2, W3, b3, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = ReLU(Z2)
    Z3 = W3.dot(A2) + b3
    A3 = softmax(Z3)
    return Z1, A1, Z2, A2, Z3, A3

In [19]:
def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [20]:
def backward_prop(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y):
    one_hot_Y = one_hot(Y)
    dZ3 = A3 - one_hot_Y
    dW3 = 1 / m1 * dZ3.dot(A2.T)
    db3 = 1 / m1 * np.sum(dZ3, axis=1, keepdims=True)
    dZ2 = W3.T.dot(dZ3) * ReLU_deriv(Z2)
    dW2 = 1 / m1 * dZ2.dot(A1.T)
    db2 = 1 / m1 * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m1 * dZ1.dot(X.T)
    db1 = 1 / m1 * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2, dW3, db3

In [21]:

def update_params(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    W3 = W3 - alpha * dW3
    b3 = b3 - alpha * db3
    return W1, b1, W2, b2, W3, b3

In [22]:
def get_predictions(A3):
    return np.argmax(A3, axis=0)

def get_accuracy(predictions, Y):
    accuracy = accuracy_score(Y, predictions)
    return accuracy

def cost_fn(A3, Y):
    m = Y.shape[0]  
    epsilon = 1e-15  
    cost = -1/m * np.sum(Y * np.log(A3 + epsilon))
    return cost

In [23]:
def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2, W3, b3 = init_params()
    costs = []
    
    for i in range(iterations):
        Z1, A1, Z2, A2, Z3, A3 = forward_prop(W1, b1, W2, b2, W3, b3, X)
        cost = cost_fn(A3, one_hot(Y))
        costs.append(cost)

        dW1, db1, dW2, db2, dW3, db3 = backward_prop(Z1, A1, Z2, A2, Z3, A3, W1, W2, W3, X, Y)
        W1, b1, W2, b2, W3, b3 = update_params(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha)

        if i % 10 == 0:
            print("Cost:", cost)

    return W1, b1, W2, b2, W3, b3, costs
W1, b1, W2, b2, W3, b3, costs = gradient_descent(X_train, Y_train, 0.10, 400)

Cost: 16987.670265293258


Cost: 8126.191562204309
Cost: 6927.6631576632535
Cost: 5469.506052057505
Cost: 4792.497015316516
Cost: 4444.498044200555
Cost: 3447.12260374717
Cost: 3196.8079512105564
Cost: 3017.878694079264
Cost: 2817.3285905178113
Cost: 2604.7556969418606
Cost: 2500.559021927913
Cost: 2669.3556389710934
Cost: 2195.2229775245223
Cost: 2113.716783250102
Cost: 2318.407680316063
Cost: 1987.3441483563965
Cost: 2030.7363806821368
Cost: 1941.6779374903163
Cost: 1845.8901144572392
Cost: 1827.2268310056254
Cost: 1869.9757824574217
Cost: 1909.7600804376898
Cost: 1684.3855066306999
Cost: 1650.4866177038211
Cost: 1668.123022703584
Cost: 1818.3891352306591
Cost: 1586.7153552635336
Cost: 1547.9504783973757
Cost: 1663.3889904375865
Cost: 1551.6249601927186
Cost: 1489.956470858993
Cost: 1466.5984505259441
Cost: 1447.033762909821
Cost: 1429.6919570566452
Cost: 1422.577190421152
Cost: 1593.1188226831564
Cost: 1432.3085294688624
Cost: 1376.559953345178
Cost: 1363.1747304583168


In [24]:
def make_predictions(X, W1, b1, W2, b2, W3, b3):
    _, _, _, _, _, A3 = forward_prop(W1, b1, W2, b2, W3, b3, X)
    predictions = get_predictions(A3)
    return predictions
    
def test_prediction(index, W1, b1, W2, b2,W3,b3):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2,W3,b3)

In [25]:

train_predictions = make_predictions(X_train, W1, b1, W2, b2, W3, b3)
train_report = classification_report(Y_train, train_predictions)
print("train Set Classification Report:")
print(train_report)

test_predictions = make_predictions(X_test, W1, b1, W2, b2, W3, b3)
test_report = classification_report(Y_test, test_predictions)
print("Test Set Classification Report:")
print(test_report)

val_predictions = make_predictions(X_val, W1, b1, W2, b2, W3, b3)
val_report = classification_report(Y_val, val_predictions)
print("val Set Classification Report:")
print(val_report)

train Set Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.95      4753
           1       0.96      0.96      0.96      5362
           2       0.93      0.88      0.90      4845
           3       0.88      0.88      0.88      4830
           4       0.92      0.91      0.91      4632
           5       0.90      0.85      0.87      4379
           6       0.94      0.96      0.95      4745
           7       0.95      0.92      0.93      5038
           8       0.85      0.91      0.88      4646
           9       0.86      0.91      0.89      4770

    accuracy                           0.91     48000
   macro avg       0.91      0.91      0.91     48000
weighted avg       0.91      0.91      0.91     48000

Test Set Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.97      0.95       980
           1       0.97      0.96      0.97      1135
           2 