In [62]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/train.csv")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [63]:
def fashion_mnist_data():
    # Load data
    X_tr = np.load("/content/drive/MyDrive/Colab Notebooks/datasets/fashion_mnist_train_images.npy")
    y_trlb = np.load("/content/drive/MyDrive/Colab Notebooks/datasets/fashion_mnist_train_labels.npy")
    Xte = np.load("/content/drive/MyDrive/Colab Notebooks/datasets/fashion_mnist_test_images.npy").T
    y_telb = np.load("/content/drive/MyDrive/Colab Notebooks/datasets/fashion_mnist_test_labels.npy").T

    classes = np.max(y_trlb) + 1
    yh_train = np.eye(classes)[y_trlb]
    yhte_lb = np.eye(classes)[y_telb]


    # Splitting the data into training and validation(both data and labels)
    N = X_tr.shape[0]
    split = int(0.8 * N)
    Xtr = X_tr[:split].T
    Xv = X_tr[split:].T
    yhtr_lb = yh_train[:split].T
    yhv_lb = yh_train[split:].T

    return Xtr,yhtr_lb,Xv,yhv_lb,Xte,yhte_lb

In [65]:
X_train, y_train, X_val, y_val, X_test, y_test = fashion_mnist_data()
X_train = (X_train / 255.0) - 0.5
X_val = (X_val / 255.0) - 0.5
X_test = (X_test / 255.0) - 0.5

learning_rates = [0.01]
mini_batch_size = [128]
epochs = [100]
alpha = [0.01]
NUM_HIDDEN_LAYERS = 3
NUM_INPUT = 784
NUM_HIDDEN = NUM_HIDDEN_LAYERS * [64]
NUM_OUTPUT = 10

Ws, bs = initWeightsandBiases()
weights = np.hstack([W.flatten() for W in Ws] + [b.flatten() for b in bs])

np.shape(weights)

(59210,)

In [117]:
def relu(z):
  return np.maximum(0,z)

def grad_relu(z):
  return (z > 0).astype(int)

def softmax(z):
  exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))  # Stability improvement
  return exp_z / np.sum(exp_z, axis=0, keepdims=True)

def forward_pass(x,Ws,bs):
  z = []
  h = []

  # For input and 1st hidden layer
  z0 = Ws[0].dot(x) + bs[0].reshape(-1, 1)
  h0 = relu(z0)

  z.append(z0)
  h.append(h0)

  # For n-Hidden layers
  for i in range(1,NUM_HIDDEN_LAYERS):
    zn = Ws[i].dot(h[i-1]) + bs[i].reshape(-1, 1)
    hn = relu(zn)
    z.append(zn)
    h.append(hn)

  # For last hidden layer and output
  zn = Ws[-1].dot(h[i]) + bs[-1].reshape(-1, 1)
  z.append(zn)
  y_hat = softmax(zn)

  return y_hat,z,h

def ce_loss(y_hat,y):
  m = y.shape[1]
  cost = -np.sum(y * np.log(y_hat + 1e-8)) / m
  return cost

def backward_pass(x,y,y_hat,Ws,bs,z,h):
  l = len(Ws)

  m,n = np.shape(y)
  delta = [None] * l
  grad_Ws = [None] * l
  grad_bs = [None] * l
  print(l)
  delta[l-1] = (y_hat - y) / m  # L-1 corresponds to the output layer
  grad_Ws[l-1] = delta[l-1].dot(h[l-2].T)  # h[L-2] is the activation from the last hidden layer
  grad_bs[l-1] = np.sum(delta[l-1], axis=1, keepdims=True)

  # Backpropagate through hidden layers
  for l in range(l-2, -1, -1):  # l goes from L-2 (last hidden layer) to 0 (first hidden layer)
    delta[l] = Ws[l+1].T.dot(delta[l+1]) * grad_relu(z[l])
    if l == 0:
      grad_Ws[l] = delta[l].dot(x.T)  # Input layer
    else:
      grad_Ws[l] = delta[l].dot(h[l-1].T)  # Hidden layers
    grad_bs[l] = np.sum(delta[l], axis=1, keepdims=True)

  return grad_Ws,grad_bs

def weights_update(Ws,bs,grad_Ws,grad_bs,epsilon):
  for i in range(NUM_HIDDEN_LAYERS+1):
    Ws[i] = Ws[i] - (epsilon*grad_Ws[i])
    bs[i] = bs[i] - (epsilon*grad_bs[i])

  return Ws,bs

def get_predictions(z):
    return np.argmax(z, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def train(X_train, y_train, X_test, y_test, weights,epsilon,e,b):
  m,n = np.shape(X_train)
  Ws,bs = unpack(weights)

  if b > m:
    b = m

  for i in range(e):
    ids = np.random.permutation(m)
    x_shuffle = X_train[:,ids]
    y_shuffle = y_train[:,ids]
    for j in range(0, n, b):
        # Creating a mini batch for input x and output y
        xtr = x_shuffle[:,j:j + b]
        ytr= y_shuffle[:,j:j + b]

        y_hat,z,h = forward_pass(xtr,Ws,bs)
        loss = ce_loss(y_hat,ytr)
        grad_Ws,grad_bs = backward_pass(xtr,ytr,y_hat,Ws,bs,z,h)

        Ws,bs = weights_update(Ws,bs,grad_Ws,grad_bs,epsilon)
        if i%10 == 0:
          print(f"Iteration: {i}")
          predictions = get_predictions(y_hat)
          print(get_accuracy(predictions,ytr))
  return Ws,bs