In [1129]:
import pandas as pd
import numpy as np
import matplotlib as plt
import tensorflow as tf
import random

In [1130]:
df = pd.read_csv('/Users/sohamnivargi/Desktop/Code/week34/train.csv')

X = df[df.columns[1:len(df.columns)]]
Y = df.label

X_dev = X[:1000]
Y_dev = Y[:1000]

X_train = X[1000:]
Y_train = Y[1000:]

X_dev = X_dev / 255.0
X_train = X_train / 255.0

In [1131]:
def init_params():
    units = [784, 120, 45, 10]
    W_params = [[], [], []]
    b_params = [[], [], []]
    
    for i in range(3):
        W_params[i] = np.random.rand(units[i], units[i+1])
        b_params[i] = np.random.rand(units[i+1], 1)
    return W_params, b_params
w,b = init_params()


In [1132]:
def relu(inp):
    return np.maximum(0,inp)
    
def softmax(inp):
    max_val = np.max(inp, axis=0)
    shifted_inp = inp - max_val
    exp_val = np.exp(shifted_inp)
    soft_max = exp_val / np.sum(exp_val, axis=0, keepdims=True)
    return soft_max


def one_hot(y):
    num_classes = 10
    num_examples = len(y)
    one_hot = np.zeros((num_classes, num_examples))
    
    for i in range(num_examples):
        class_label = y[i]
        one_hot[class_label, i] = 1
    return one_hot.T


In [1133]:
def forward_propagation(weights, biases, X):

    Z = []  
    A = []  

    num_layers = len(weights)
    A_prev = X.T

    for i in range(num_layers):
        Z_current = (np.dot(weights[i].T, A_prev) + biases[i]).T
        
        Z.append(Z_current)

        if i == num_layers - 1:
            A_current = softmax(Z_current)
        else:
            A_current = relu(Z_current)
        
        A.append(A_current)
        A_prev = A_current.T
        
    return Z, A


In [1134]:
def back_prop(Z1,Z2,Z3,W1,W2,W3,A1,A2,A3,x,y):
    m = x.shape[1]
    
    dZ3 = A3-y
    
    dW3 = (1 / m) * np.dot(dZ3.T, A2)
    
    db3 = (1 / m) * np.sum(dZ3.T, axis=1, keepdims=True)
    
    
    dA2 = (np.dot(W3, dZ3.T)).T
    
    
    dZ2 = np.multiply(dA2, np.int64(A2 > 0))
    
    dW2 = (1 / m) * np.dot(dZ2.T, A1)
    db2 = (1 / m) * np.sum(dZ2.T, axis=1, keepdims=True)
    
    dA1 = (np.dot(W2, dZ2.T)).T
    
    dZ1 = np.multiply(dA1, np.int64(A1 > 0)) 
    dW1 = (1 / m) * np.dot(dZ1.T, x)
    db1 = (1 / m) * np.sum(dZ1.T, axis=1, keepdims=True)
   
    
    return dW1.T, db1, dW2.T, db2, dW3.T, db3

    

In [1135]:
def update_params(W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3, alpha):
    
    W1 = np.subtract(W1, alpha * dW1)
    b1 = np.subtract(b1, alpha * db1)
    W2 = np.subtract(W2, alpha * dW2)
    b2 = np.subtract(b2, alpha * db2)
    W3 = np.subtract(W3, alpha * dW3)
    b3 = np.subtract(b3, alpha * db3)

    return W1, b1, W2, b2, W3, b3


In [1136]:
def get_prediction(A3):
    
    predicted_labels = np.argmax(A3, axis=1)
    return predicted_labels

def get_accuracy(predicted_labels, true_labels):
    
    num_correct = np.sum(predicted_labels == true_labels)
    accuracy = num_correct / true_labels.shape[0]
    return accuracy

In [1137]:
def gradient_descent(X_train, Y_train, alpha=0.1, num_iterations=1000):

    W,b = init_params()
    
    y = one_hot(Y_train)
    
    for i in range(num_iterations):
        for j in range(len(y)):
        
            Z, A = forward_propagation(W,b,X_train)
            
            dW1, db1, dW2, db2, dW3, db3 = back_prop(Z[0], Z[1],Z[2], W[0], W[1], W[2], A[0], A[1], A[2], 
                                                    X_train, y)
            
            W[0], b[0], W[1], b[1], W[2], b[2] = update_params(W[0], b[0], W[1], b[1], W[2], b[2], 
                                                               dW1, db1, dW2, db2, dW3, db3, alpha)
        #xx
        
            if j % 10 == 0:
                predicted_labels = get_prediction(A[2])
                accuracy = get_accuracy(predicted_labels, Y_train)
                print("Iteration:", j, "Accuracy:", accuracy)
        
        
        if i % 10 == 0:
            predicted_labels = get_prediction(A[2])
            accuracy = get_accuracy(predicted_labels, Y_train)
            print(" 10th Iteration:", i, "Accuracy:", accuracy)

    
    return W, b
Y_train = Y_train.reset_index(drop=True)
X_train = X_train.reset_index(drop=True)

W_f, b_f = gradient_descent(X_train,Y_train,0.1,1000)



Iteration: 0 Accuracy: 0.09817073170731708


  shifted_inp = inp - max_val


Iteration: 10 Accuracy: 0.09817073170731708
Iteration: 20 Accuracy: 0.09817073170731708
Iteration: 30 Accuracy: 0.09817073170731708
Iteration: 40 Accuracy: 0.09817073170731708
Iteration: 50 Accuracy: 0.09817073170731708


KeyboardInterrupt: 

In [None]:
def make_predictions(X, W, b):
    
    Zx,Ax = forward_propagation(W,b,X)
    return Ax[2]

def test_prediction(index, W, b):
    
    X = X_train[index]
    true_label = Y_train[index]
    prediction = make_predictions(X, W, b)

    plt.imshow(X.reshape((28, 28)), cmap='gray')
    plt.axis('off')
    plt.title(f"Prediction: {prediction}, True Label: {true_label}")
    plt.show()
