<a href="https://colab.research.google.com/github/ShahistaAfreen/DL_A1/blob/main/DL_A1_Q2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from keras.datasets import fashion_mnist
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
import wandb
wandb.login() # Login



True

In [3]:
# Create a new run in a project
run = wandb.init(
    project="Plotting images from each class",
    notes="commit message for the run",
    config={}
)

In [7]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Normalize
X_train, X_test = X_train / 255.0, X_test / 255.0

#Flattening the data
X_train = X_train.reshape(X_train.shape[0], -1).T
X_test = X_test.reshape(X_test.shape[0], -1).T

# Convert labels to one-hot encoding
num_classes = 10
y_train_one_hot = np.eye(num_classes)[y_train].T
y_test_one_hot = np.eye(num_classes)[y_test].T

# Split training into 90% train, 10% validation
X_train, X_val, y_train_one_hot, y_val_one_hot = train_test_split(X_train.T, y_train_one_hot.T, test_size=0.1, random_state=42)
X_train, X_val = X_train.T, X_val.T
y_train_one_hot, y_val_one_hot = y_train_one_hot.T, y_val_one_hot.T

In [9]:
X_train.shape, y_train_one_hot.shape

((784, 54000), (10, 54000))

Feed Forward Neural Network

In [None]:
# Activation Functions
def relu(Z):
    return np.maximum(0, Z)

def d_relu(Z):
    return (Z > 0).astype(float)

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims=True))  # Stability trick
    return expZ / np.sum(expZ, axis=0, keepdims=True)

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def d_sigmoid(x):
    return (1 - sigmoid(x)) * sigmoid(x)

In [None]:
#Loss functions
def cross_entropy_loss(y_true, y_pred):
    """
    Computes categorical cross-entropy loss.

    y_true: One-hot encoded true labels (shape: num_classes x num_samples)
    y_pred: Softmax output predictions (shape: num_classes x num_samples)

    Returns: Average loss across all samples
    """
    m = y_true.shape[1]  # Number of samples
    loss = -np.sum(y_true * np.log(y_pred + 1e-9)) / m  # Adding epsilon to prevent log(0)
    return loss


def d_cross_entropy_loss(y_true, y_pred):
    """
    Computes derivative of cross-entropy loss w.r.t softmax output.

    y_true: One-hot encoded true labels (num_classes x num_samples)
    y_pred: Softmax output predictions (num_classes x num_samples)

    Returns: Gradient (num_classes x num_samples)
    """
    return y_pred - y_true  # Since dL/dZ = softmax - one_hot_labels


In [None]:
def FFNN(x_train, weights, hidden_layers):
    activations = [x_train]  # Store activations

    # Forward pass through hidden layers
    for i in range(hidden_layers):
        h_i = np.dot(weights[i], activations[-1])  # Linear transformation
        a_i = relu(h_i)  # Apply ReLU activation
        activations.append(a_i)  # Store activation

    # Output layer (10 neurons for 10 classes)
    h_out = np.dot(weights[-1], activations[-1])
    a_out = softmax(h_out)  # Apply softmax
    activations.append(a_out)

    return activations[-1]

In [None]:
# Layer Class
class Layer:
    activationFunctions = {
        'relu': (relu, d_relu),
        'softmax': (softmax, None)  # Softmax doesn't have simple element-wise derivative
    }

    def __init__(self, inputs, neurons, activation):
        self.W = np.random.randn(neurons, inputs) * 0.01
        self.b = np.zeros((neurons, 1))
        self.activation, self.d_activation = self.activationFunctions[activation]

    def feedforward(self, A_prev):
        self.A_prev = A_prev
        self.Z = np.dot(self.W, self.A_prev) + self.b
        self.A = self.activation(self.Z)
        return self.A

    def backprop(self, dA, optimizer, learning_rate = 0.1, v=None, s=None, t=1):
        if self.d_activation:
            dZ = dA * self.d_activation(self.Z)
        else:
            dZ = dA  # Softmax handled separately in output layer

        dW = np.dot(dZ, self.A_prev.T) / dZ.shape[1]
        db = np.sum(dZ, axis=1, keepdims=True) / dZ.shape[1]
        dA_prev = np.dot(self.W.T, dZ)

        # Optimization Step
        if optimizer == 'sgd':
            self.W -= learning_rate * dW
            self.b -= learning_rate * db

        elif optimizer == 'momentum':
            v['dW'] = 0.9 * v['dW'] + learning_rate * dW
            v['db'] = 0.9 * v['db'] + learning_rate * db
            self.W -= v['dW']
            self.b -= v['db']

        elif optimizer == 'adam':
            beta1, beta2, eps = 0.9, 0.999, 1e-8
            v['dW'] = beta1 * v['dW'] + (1 - beta1) * dW
            v['db'] = beta1 * v['db'] + (1 - beta1) * db
            s['dW'] = beta2 * s['dW'] + (1 - beta2) * (dW**2)
            s['db'] = beta2 * s['db'] + (1 - beta2) * (db**2)
            v_corrected_dW = v['dW'] / (1 - beta1**t)
            v_corrected_db = v['db'] / (1 - beta1**t)
            s_corrected_dW = s['dW'] / (1 - beta2**t)
            s_corrected_db = s['db'] / (1 - beta2**t)
            self.W -= learning_rate * v_corrected_dW / (np.sqrt(s_corrected_dW) + eps)
            self.b -= learning_rate * v_corrected_db / (np.sqrt(s_corrected_db) + eps)

        return dA_prev

In [None]:


# Run forward pass
output = FFNN(x_train_with_bias, weights, hidden_layers)
print("Output shape:", output.shape)  # Expected: (10, number of samples)
