# **$$Question-2$$**

In [1]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist

In [2]:
# Load Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [3]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [4]:
x_train

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [5]:
y_train

array([9, 0, 0, ..., 3, 0, 5], dtype=uint8)

In [6]:
# Normalize the data
x_train = x_train.reshape(x_train.shape[0], -1) / 255.0  # Flatten to (60000, 784)
x_test = x_test.reshape(x_test.shape[0], -1) / 255.0  # Flatten to (10000, 784)

In [7]:
# One-hot encoding
def one_hot_encode(y, num_classes=10):
    return np.eye(num_classes)[y]

y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

In [8]:
# Activation Functions(ReLu and Softmax)
def relu(Z):
    return np.maximum(0, Z)

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True)) 
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

In [9]:
# Derivatives
def relu_derivative(Z):
    return (Z > 0).astype(float)

In [10]:
# Loss Function (Cross-Entropy)
def cross_entropy_loss(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-8), axis=1))

In [11]:
# Neural Network Class(forward and backward Propagation)
class FeedforwardNN:
    def __init__(self, input_size=784, hidden_layers=[256,128, 64], output_size=10, lr=0.05):
        self.lr = lr
        self.layers = [input_size] + hidden_layers + [output_size]

        # Initialize weights and biases
        self.params = {}
        for i in range(1, len(self.layers)):
            self.params[f"W{i}"] = np.random.randn(self.layers[i-1], self.layers[i]) * np.sqrt(2 / self.layers[i-1]) 
            self.params[f"b{i}"] = np.zeros((1, self.layers[i]))

    def forward(self, X):
        self.cache = {"A0": X}  # (Input layer)
        for i in range(1, len(self.layers) - 1):
            self.cache[f"Z{i}"] = np.dot(self.cache[f"A{i-1}"], self.params[f"W{i}"]) + self.params[f"b{i}"]
            self.cache[f"A{i}"] = relu(self.cache[f"Z{i}"])  # ReLU (for hidden layers)

        # Output Layer (Softmax)
        L = len(self.layers) - 1
        self.cache[f"Z{L}"] = np.dot(self.cache[f"A{L-1}"], self.params[f"W{L}"]) + self.params[f"b{L}"]
        self.cache[f"A{L}"] = softmax(self.cache[f"Z{L}"])  

        return self.cache[f"A{L}"]

    def backward(self, X, y_true):
        grads = {}
        L = len(self.layers) - 1
        m = X.shape[0]

        # Output layer error (Softmax derivative with Cross-Entropy)
        dZ = self.cache[f"A{L}"] - y_true
        grads[f"W{L}"] = (1 / m) * np.dot(self.cache[f"A{L-1}"].T, dZ)
        grads[f"b{L}"] = (1 / m) * np.sum(dZ, axis=0, keepdims=True)

        # Backpropagation through hidden layers
        for i in range(L-1, 0, -1):
            dA = np.dot(dZ, self.params[f"W{i+1}"].T)
            dZ = dA * relu_derivative(self.cache[f"Z{i}"])  
            grads[f"W{i}"] = (1 / m) * np.dot(self.cache[f"A{i-1}"].T, dZ)
            grads[f"b{i}"] = (1 / m) * np.sum(dZ, axis=0, keepdims=True)

        # Update weights
        for i in range(1, len(self.layers)):
            self.params[f"W{i}"] -= self.lr * grads[f"W{i}"]
            self.params[f"b{i}"] -= self.lr * grads[f"b{i}"]

    def train(self, X, y, epochs=10, batch_size=64):
        for epoch in range(epochs):
            for i in range(0, X.shape[0], batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                self.forward(X_batch)
                self.backward(X_batch, y_batch)

            # loss for every epoch
            loss = cross_entropy_loss(y, self.forward(X))
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        probabilities = self.forward(X)
        return np.argmax(probabilities, axis=1)

    def evaluate(self, X, y):
        predictions = self.predict(X)
        true_labels = np.argmax(y, axis=1)
        accuracy = np.mean(predictions == true_labels)
        print(f"Test Accuracy: {accuracy * 100:.2f}%")
        return accuracy

In [12]:
# Train the Model
nn = FeedforwardNN(hidden_layers=[256,128, 64]) 
nn.train(x_train, y_train, epochs=20, batch_size=64)

Epoch 1/20, Loss: 0.4464
Epoch 2/20, Loss: 0.4167
Epoch 3/20, Loss: 0.3961
Epoch 4/20, Loss: 0.3525
Epoch 5/20, Loss: 0.3380
Epoch 6/20, Loss: 0.3006
Epoch 7/20, Loss: 0.2984
Epoch 8/20, Loss: 0.2939
Epoch 9/20, Loss: 0.2716
Epoch 10/20, Loss: 0.2828
Epoch 11/20, Loss: 0.2686
Epoch 12/20, Loss: 0.2504
Epoch 13/20, Loss: 0.2420
Epoch 14/20, Loss: 0.2372
Epoch 15/20, Loss: 0.2357
Epoch 16/20, Loss: 0.2206
Epoch 17/20, Loss: 0.2210
Epoch 18/20, Loss: 0.2141
Epoch 19/20, Loss: 0.2121
Epoch 20/20, Loss: 0.2019


In [13]:
# Evaluate the Train Data 
nn.evaluate(x_train,y_train)

Test Accuracy: 92.31%


np.float64(0.9230666666666667)

In [14]:
# Evaluate on Test Data
nn.evaluate(x_test, y_test)

Test Accuracy: 87.95%


np.float64(0.8795)