In [1]:
from keras.datasets import fashion_mnist
import numpy as np
import matplotlib.pyplot as plt
import wandb

In [7]:
# Initialize WandB
wandb.init(project="assignment1", entity="da6401-assignments")

In [8]:
# Define hyperparameters
config = wandb.config
config.learning_rate = 0.01
config.epochs = 10
config.batch_size = 64
config.hidden_layers = [256,128, 64]  # Two hidden layers with 128 and 64 neurons
config.input_size = 784  # 28x28 images
config.output_size = 10  # 10 classes for Fashion-MNIST

In [9]:
# Load the Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Normalize the data
x_train = x_train.reshape(-1, config.input_size) / 255.0
x_test = x_test.reshape(-1, config.input_size) / 255.0

In [10]:
class FeedforwardNeuralNetwork:
    def __init__(self, input_size, hidden_layers, output_size):
        """
        Initialize the neural network.
        :param input_size: Number of input features (e.g., 784 for 28x28 images).
        :param hidden_layers: List containing the number of neurons in each hidden layer.
        :param output_size: Number of output classes (e.g., 10 for Fashion-MNIST).
        """
        self.layers = []
        self.weights = []
        self.biases = []

        # Input layer to first hidden layer
        prev_size = input_size
        for layer_size in hidden_layers:
            self.weights.append(np.random.randn(prev_size, layer_size) * 0.01)
            self.biases.append(np.zeros((1, layer_size)))
            prev_size = layer_size

        # Hidden layers to output layer
        self.weights.append(np.random.randn(prev_size, output_size) * 0.01)
        self.biases.append(np.zeros((1, output_size)))

    def sigmoid(self, x):
        """Sigmoid activation function."""
        return 1 / (1 + np.exp(-x))

    def softmax(self, x):
        """Softmax activation function."""
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def forward(self, x):
        """
        Perform a forward pass through the network.
        :param x: Input data.
        :return: Output probabilities.
        """
        self.layers = [x]
        for w, b in zip(self.weights[:-1], self.biases[:-1]):
            x = self.sigmoid(np.dot(x, w) + b)
            self.layers.append(x)

        # Output layer with softmax
        output = self.softmax(np.dot(x, self.weights[-1]) + self.biases[-1])
        self.layers.append(output)
        return output

    def train(self, x_train, y_train, epochs=10, batch_size=64, learning_rate=0.01):
        """
        Train the network.
        :param x_train: Training data.
        :param y_train: Training labels.
        :param epochs: Number of training epochs.
        :param batch_size: Batch size for mini-batch gradient descent.
        :param learning_rate: Learning rate for weight updates.
        """
        # One-hot encode the labels
        y_train_onehot = np.eye(10)[y_train]

        for epoch in range(epochs):
            # Shuffle the data
            indices = np.arange(x_train.shape[0])
            np.random.shuffle(indices)
            x_train = x_train[indices]
            y_train_onehot = y_train_onehot[indices]

            # Mini-batch gradient descent
            for i in range(0, x_train.shape[0], batch_size):
                x_batch = x_train[i:i + batch_size]
                y_batch = y_train_onehot[i:i + batch_size]

                # Forward pass
                predictions = self.forward(x_batch)

                # Compute loss (categorical cross-entropy)
                loss = -np.mean(np.sum(y_batch * np.log(predictions + 1e-8), axis=1))

                # Backward pass (gradient computation)
                gradients = []
                delta = predictions - y_batch
                for l in range(len(self.weights) - 1, -1, -1):
                    grad_w = np.dot(self.layers[l].T, delta) / batch_size
                    grad_b = np.sum(delta, axis=0, keepdims=True) / batch_size
                    gradients.insert(0, (grad_w, grad_b))

                    if l > 0:
                        delta = np.dot(delta, self.weights[l].T) * self.layers[l] * (1 - self.layers[l])

                # Update weights and biases
                for l in range(len(self.weights)):
                    self.weights[l] -= learning_rate * gradients[l][0]
                    self.biases[l] -= learning_rate * gradients[l][1]

            # Log metrics to WandB
            wandb.log({"epoch": epoch + 1, "loss": loss})

            # Print loss for the epoch
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")
network = FeedforwardNeuralNetwork(config.input_size, config.hidden_layers, config.output_size)
network.train(x_train, y_train, epochs=config.epochs, batch_size=config.batch_size, learning_rate=config.learning_rate)
wandb.finish()


Epoch 1/10, Loss: 2.3080
Epoch 2/10, Loss: 2.3109
Epoch 3/10, Loss: 2.3008
Epoch 4/10, Loss: 2.3058
Epoch 5/10, Loss: 2.3015
Epoch 6/10, Loss: 2.3029
Epoch 7/10, Loss: 2.3058
Epoch 8/10, Loss: 2.3043
Epoch 9/10, Loss: 2.3005
Epoch 10/10, Loss: 2.3043


0,1
epoch,▁▂▃▃▄▅▆▆▇█
loss,▆█▁▅▂▃▅▄▁▄

0,1
epoch,10.0
loss,2.30433
