In [None]:
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist

# Load the Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Define class labels
class_labels = {0: "T-shirt/top",1: "Trouser",2: "Pullover",3: "Dress",4: "Coat",5: "Sandal",6: "Shirt",7: "Sneaker",8: "Bag",9: "Ankle boot"}



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Flatten the image data
X_train = X_train.reshape(X_train.shape[0], -1)
X_valid = X_valid.reshape(X_valid.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Normalize the image data
X_train = X_train / 255.0
X_valid = X_valid / 255.0
X_test = X_test / 255.0

# Convert labels to one-hot encoding
Y_train = to_categorical(y_train)
Y_valid = to_categorical(y_valid)
Y_test = to_categorical(y_test)
print("Training images shape:", X_train.shape)
print("Training labels shape:", Y_train.shape)
print("Training images shape:", X_valid.shape)
print("Training labels shape:", Y_valid.shape)
print("Testing images shape:", X_test.shape)
print("Testing labels shape:", Y_test.shape)


Training images shape: (48000, 784)
Training labels shape: (48000, 10)
Training images shape: (12000, 784)
Training labels shape: (12000, 10)
Testing images shape: (10000, 784)
Testing labels shape: (10000, 10)


In [None]:
import numpy as np
def init_network( num_hidden_layer, num_nodes_hidden_layers, weight, input_size, output_size ):
    network_size = []
    for i in range(num_hidden_layer):
      network_size.append(num_nodes_hidden_layers[i])
    size = [input_size] + network_size + [output_size]
    theta0 = {}
    if weight == 'random':
      for i in range(1, num_hidden_layer+2):
        theta0['W' + str(i)] = np.random.randn(size[i], size[i-1])
        theta0['b' + str(i)] = np.random.randn(size[i], 1)
    if weight == 'xavier':
      for i in range(1, num_hidden_layer+2):
          theta0["W" + str(i)] = np.random.randn(size[i], size[i-1])*(np.sqrt(2/(size[i-1])))
          theta0["b" + str(i)] = np.random.randn(size[i], 1)*(np.sqrt(2/(size[i-1])))

    return theta0

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def der_sigmoid(x):
  return sigmoid(x)*(1-sigmoid(x))

def softmax(x):
  x = x - np.max(x)
  return np.exp(x)/np.sum(np.exp(x),axis=0)


def forword_prop(x,theta, num_hidden_layer,input_size):
  a = {}
  a['a'+str(0)] = np.zeros((input_size,1))
  h = {'h0':x}
  for i in range(1,num_hidden_layer+1):
    a["a"+str(i)] = np.dot(theta['W'+str(i)],h['h'+str(i-1)]) + theta['b'+str(i)]
    h['h'+str(i)] = sigmoid(a["a"+str(i)])
  a['a'+str(num_hidden_layer+1)] = np.dot(theta['W'+str(num_hidden_layer+1)],h['h'+str(num_hidden_layer)]) + theta['b'+str(num_hidden_layer+1)]
  y_pred = softmax(a['a'+str(num_hidden_layer+1)])

  return a,h,y_pred

'''
def backprop(x, y_actual, num_hidden_layer, theta,input_size):
  a,h,y_pred = forword_prop(x,theta, num_hidden_layer,input_size)
  grad_W_b = {}
  grad_h_a = {}
  grad_h_a['a'+str(num_hidden_layer+1)] = -1*(y_actual.T - y_pred)
  for i in range(num_hidden_layer+1, 0, -1):
    grad_W_b['W'+str(i)] = np.dot(grad_h_a['a'+str(i)],h['h'+str(i-1)].T)
    grad_W_b['b'+str(i)] = np.mean(grad_h_a['a'+str(i)],axis = 0, keepdims=True)
    grad_h_a['h'+str(i-1)] = np.dot(theta['W'+str(i)].T,grad_h_a['a'+str(i)] )
    grad_h_a['a'+str(i-1)] = grad_h_a['h'+str(i-1)]*(der_sigmoid(a['a'+str(i-1)]))

  return grad_W_b

'''
def backprop(x, y_actual, num_hidden_layer, theta, input_size):
    m = x.shape[0]
    a, h, y_pred = forword_prop(x, theta, num_hidden_layer, input_size)
    grad_W_b = {}
    grad_h_a = {}
    grad_h_a['a' + str(num_hidden_layer + 1)] = -1 * (y_actual.T - y_pred)
    for i in range(num_hidden_layer + 1, 0, -1):
        grad_W_b['W' + str(i)] = (1/m)*np.dot(grad_h_a['a' + str(i)], h['h' + str(i - 1)].T)
        grad_W_b['b' + str(i)] = (1/m)*np.mean(grad_h_a['a' + str(i)], axis=1, keepdims=True)  # Adjusted dimension
        grad_h_a['h' + str(i - 1)] = np.dot(theta['W' + str(i)].T, grad_h_a['a' + str(i)])
        grad_h_a['a' + str(i - 1)] = grad_h_a['h' + str(i - 1)] * (der_sigmoid(a['a' + str(i - 1)]))

    return grad_W_b


def gradient_decent(lr, theta, x, y_actual, num_hidden_layer,input_size):
  for epoch in range(100):
    grad = backprop(x, y_actual, num_hidden_layer, theta,input_size)
    for i in range(1, num_hidden_layer+2):
      theta['W' + str(i)] -= lr*grad['W' + str(i)]
      theta['b' + str(i)] -= lr*grad['b' +str(i)]
  return theta


def calculate_accuracy(X_test,y_test,theta_new, num_hidden_layer,input_size):
  a,h,y_test_pred = forword_prop(X_test,theta_new, num_hidden_layer,input_size)

  assert y_test.shape == y_test_pred.shape
  y_test = np.argmax(y_test, axis = 0)
  y_test_pred = np.argmax(y_test_pred, axis = 0)
  correct_predictions = 0
  for i in range(y_test.shape[0]):
    if y_test[i] == y_test_pred[i]:
      correct_predictions += 1
  accuracy = correct_predictions /y_test.shape[0] # total_instances

  return accuracy
