<a href="https://colab.research.google.com/github/RajuGuguloth/DL_Assignment/blob/main/Assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import wandb
from keras.datasets import fashion_mnist
from keras.datasets import mnist
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math

In [21]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33mrajuguguloth7670[0m ([33mrajuguguloth7670-iitm-india[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [22]:

# For seeing examples of images
(X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

wandb.init(project="Assignment 1")
run_name = "images_generating"
# Set the run name
wandb.run.name = run_name
wandb.run.save()

# this array keeps track for ind of each class
images_ind = []
# classes which are present
class_names = ["T-shirt/Top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle Boot"]

for i in range(10):
  for ind in range(X_train.shape[0]):
    if Y_train[ind] == i:
      images_ind.append(ind)
      break

images = []

for i in range(10):
  ind = images_ind[i]
  img = wandb.Image(X_train[ind], caption=[class_names[i]])
  images.append(img)

wandb.log({"Question 1": images})
wandb.finish()

plt.tight_layout()
plt.show()  # Display the plot

<Figure size 640x480 with 0 Axes>

In [23]:
import numpy as np
import wandb
from tensorflow.keras.datasets import fashion_mnist, mnist

Activation Functions

In [24]:
def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def sigmoid_grad(x):
    s = sigmoid(x)
    return s * (1 - s)

def tanh(x):
    return np.tanh(x)

def tanh_grad(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(0, x)

def relu_grad(x):
    return (x > 0).astype(float)

def softmax(x):
    x = np.clip(x, -500, 500)
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

Weight Initialization Functions

In [25]:
def xavier_init(size):
    """ Xavier Initialization """
    fan_in, fan_out = size
    stddev = np.sqrt(2 / (fan_in + fan_out))  # sqrt(2 / (fan_in + fan_out))
    return np.random.randn(*size) * stddev

def random_init(size):
    """ Random initialization """
    return np.random.randn(*size)

Loss functions

In [26]:
def cross_entropy_loss(y_true, y_pred):
    return -np.mean(np.sum(y_true * np.log(y_pred + 1e-9), axis=1))

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

Optimizers

In [27]:
class Optimizer:
    def __init__(self, optimizer='adam', learning_rate=0.01, beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, weight_decay=0.0):
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.beta = beta          # for momentum, nesterov, rmsprop
        self.beta1 = beta1        # for adam, nadam
        self.beta2 = beta2        # for adam, nadam
        self.epsilon = epsilon
        self.weight_decay = weight_decay
        self.m = {}               # momentum or first moment estimates
        self.v = {}               # second moment estimates (for adam/nadam, rmsprop)
        self.t = 0                # global timestep

    def update(self, keys, params, grads):
        """
        Update parameters given a list of unique keys, parameters, and their corresponding gradients.
        """
        self.t += 1  # update iteration count for bias correction

        if self.optimizer == "sgd":
            for key, param, grad in zip(keys, params, grads):
                if self.weight_decay > 0:
                    grad += self.weight_decay * param  # L2 regularization
                param -= self.learning_rate * grad

        elif self.optimizer == "momentum":
            for key, param, grad in zip(keys, params, grads):
                if key not in self.m:
                    self.m[key] = np.zeros_like(grad)
                if self.weight_decay > 0:
                    grad += self.weight_decay * param
                self.m[key] = self.beta * self.m[key] + (1 - self.beta) * grad
                param -= self.learning_rate * self.m[key]

        elif self.optimizer == "nesterov":
            for key, param, grad in zip(keys, params, grads):
                if key not in self.m:
                    self.m[key] = np.zeros_like(grad)
                prev_m = self.m[key].copy()
                if self.weight_decay > 0:
                    grad += self.weight_decay * param
                self.m[key] = self.beta * self.m[key] + (1 - self.beta) * grad
                # Nesterov update: lookahead using prev_m
                param -= self.learning_rate * (self.beta * prev_m + (1 - self.beta) * grad)

        elif self.optimizer == "rmsprop":
            for key, param, grad in zip(keys, params, grads):
                if key not in self.v:
                    self.v[key] = np.zeros_like(grad)
                if self.weight_decay > 0:
                    grad += self.weight_decay * param
                self.v[key] = self.beta * self.v[key] + (1 - self.beta) * (grad ** 2)
                param -= self.learning_rate * grad / (np.sqrt(self.v[key]) + self.epsilon)

        elif self.optimizer == "adam":
            for key, param, grad in zip(keys, params, grads):
                if key not in self.m:
                    self.m[key] = np.zeros_like(grad)
                    self.v[key] = np.zeros_like(grad)
                if self.weight_decay > 0:
                    grad += self.weight_decay * param
                self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * grad
                self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (grad ** 2)
                m_hat = self.m[key] / (1 - self.beta1 ** self.t)
                v_hat = self.v[key] / (1 - self.beta2 ** self.t)
                param -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

        elif self.optimizer == "nadam":
            for key, param, grad in zip(keys, params, grads):
                if key not in self.m:
                    self.m[key] = np.zeros_like(grad)
                    self.v[key] = np.zeros_like(grad)
                if self.weight_decay > 0:
                    grad += self.weight_decay * param
                self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * grad
                self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (grad ** 2)
                m_hat = self.m[key] / (1 - self.beta1 ** self.t)
                v_hat = self.v[key] / (1 - self.beta2 ** self.t)
                # Nadam update: combines Nesterov momentum with Adam
                param -= self.learning_rate * ((self.beta1 * m_hat + (1 - self.beta1) * grad / (1 - self.beta1 ** self.t)) / (np.sqrt(v_hat) + self.epsilon))

Neural network class with forward and backward propogation

In [28]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, num_hidden_layers, output_size,
                 activation='relu', weight_init_type='Xavier', optimizer='adam',
                 learning_rate=0.01, batch_size=64, loss_type='cross_entropy',
                 beta=0.9, beta1=0.9, beta2=0.999, weight_decay=0.0005, iswandb=False,dataset="fashionmnist"):

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.output_size = output_size
        self.activation = activation
        self.loss_type = loss_type
        self.batch_size = batch_size
        self.iswandb = iswandb
        self.dataset = dataset

        self.optimizer = Optimizer(optimizer, learning_rate, beta, beta1, beta2, weight_decay)
        self.initialize_weights(weight_init_type)
        self.load_dataset(input_size)

    def initialize_weights(self, weight_init_type):
        """Initialize weights and biases."""
        self.weights, self.biases = [], []
        layer_sizes = [self.input_size] + [self.hidden_size] * self.num_hidden_layers + [self.output_size]
        for i in range(len(layer_sizes) - 1):
            if weight_init_type == 'Xavier':
                init_func = xavier_init
            else:
                init_func = random_init
            self.weights.append(init_func((layer_sizes[i], layer_sizes[i+1])))
            self.biases.append(np.zeros((1, layer_sizes[i+1])))

    def load_dataset(self, input_size):
        """Load and preprocess the dataset."""
        (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data() if self.dataset=="fashionmnist" else mnist.load_data()
        X_train, X_test = X_train / 255.0, X_test / 255.0
        X_train, X_test = X_train.reshape(-1, 784), X_test.reshape(-1, 784)
        y_train, y_test = np.eye(10)[y_train], np.eye(10)[y_test]

        split_idx = int(0.9 * len(X_train))
        self.X_train, self.y_train = X_train[:split_idx], y_train[:split_idx]
        self.X_val, self.y_val = X_train[split_idx:], y_train[split_idx:]
        self.X_test, self.y_test = X_test, y_test

    def activation_function(self, x):
        """Apply activation function based on the choice."""
        if self.activation == 'relu':
            return relu(x)
        elif self.activation == 'sigmoid':
            return sigmoid(x)
        elif self.activation == 'tanh':
            return tanh(x)
        return x  # Default: Linear activation

    def activation_gradient(self, x):
        """Compute gradient of activation function."""
        if self.activation == 'relu':
            return relu_grad(x)
        elif self.activation == 'sigmoid':
            return sigmoid_grad(x)
        elif self.activation == 'tanh':
            return tanh_grad(x)
        return np.ones_like(x)  # Default: Linear activation gradient

    def loss_function(self, y_true, y_pred):
        return cross_entropy_loss(y_true, y_pred) if self.loss_type == 'cross_entropy' else mse_loss(y_true, y_pred)

    def compute_accuracy(self, y_true, y_pred):
        return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

    def forward(self, X):
        """Perform forward propagation."""
        self.activations, self.pre_activations = [X], []
        for i in range(len(self.weights) - 1):
            z = self.activations[-1] @ self.weights[i] + self.biases[i]
            self.pre_activations.append(z)
            self.activations.append(self.activation_function(z))
        z_out = self.activations[-1] @ self.weights[-1] + self.biases[-1]
        self.pre_activations.append(z_out)
        self.activations.append(softmax(z_out))
        return self.activations[-1]

    def backward(self, X, y_true):
        """Perform backward propagation with weight decay (L2 Regularization)."""
        grads = []
        loss_grad = self.activations[-1] - y_true
        grads.append(loss_grad)

        for i in range(len(self.weights) - 1, 0, -1):
            loss_grad = loss_grad @ self.weights[i].T * self.activation_gradient(self.pre_activations[i-1])
            grads.insert(0, loss_grad)

        weight_grads = [(self.activations[i].T @ grads[i]) + self.optimizer.weight_decay * self.weights[i] for i in range(len(self.weights))]
        bias_grads = [np.sum(grads[i], axis=0, keepdims=True) for i in range(len(self.weights))]

        # In the backward method, use separate keys for weights and biases:
        weight_keys = [f"w{i}" for i in range(len(self.weights))]
        bias_keys = [f"b{i}" for i in range(len(self.biases))]

        self.optimizer.update(weight_keys, self.weights, weight_grads)
        self.optimizer.update(bias_keys, self.biases, bias_grads)


    def fit(self, epochs):
        """Train the neural network with mini-batch gradient descent."""
        num_samples = self.X_train.shape[0]
        num_batches = num_samples // self.batch_size

        for epoch in range(epochs):
            indices = np.arange(num_samples)
            np.random.shuffle(indices)

            for batch in range(num_batches):
                batch_indices = indices[batch * self.batch_size : (batch + 1) * self.batch_size]
                X_batch, y_batch = self.X_train[batch_indices], self.y_train[batch_indices]

                y_pred_train = self.forward(X_batch)
                self.backward(X_batch, y_batch)

            train_loss = self.loss_function(self.y_train, self.forward(self.X_train))
            val_loss = self.loss_function(self.y_val, self.forward(self.X_val))
            train_acc = self.compute_accuracy(self.y_train, self.forward(self.X_train))
            val_acc = self.compute_accuracy(self.y_val, self.forward(self.X_val))

            if self.iswandb:
                wandb.log({
                    "epoch": epoch,
                    "train_loss": train_loss,
                    "val_loss": val_loss,
                    "train_accuracy": train_acc,
                    "val_accuracy": val_acc
                })

            print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")


In [29]:
import wandb

# Initialize WandB (optional)
# wandb.init(project="Assignment 1")

nn = NeuralNetwork(
    input_size=784,
    hidden_size=128,
    num_hidden_layers=2,
    output_size=10,
    activation='relu',
    weight_init_type='Xavier',
    optimizer='adam',
    learning_rate=0.01,
    batch_size=64,
    loss_type='cross_entropy',
    beta=0.9,
    beta1=0.9,
    beta2=0.999,
    weight_decay=0.0005,
    iswandb=False
)

#nn.fit(epochs=10)


# Evaluate on Test Data
y_pred_test = nn.forward(nn.X_test)
test_accuracy = nn.compute_accuracy(nn.y_test, y_pred_test)

print(f"Test Accuracy: {test_accuracy:.4f}")
# wandb.log({"Test Accuracy": test_accuracy})  # Log final test accuracy

# Finish WandB
# wandb.finish()


Test Accuracy: 0.1030


In [31]:
# Define the main training function
def main():
    # Initialize a new wandb run under the "Assignment 1" project
    wandb.init(project="Assignment 1")
    config = wandb.config

    # Construct a run name containing all hyperparameters
    run_name = (
        f"{config.optimiser}_"
        f"{config.activation}_"
        f"hl{config.hidden_layers}_"
        f"hs{config.hidden_layer_size}_"
        f"bs{config.batch_size}_"
        f"lr{config.learning_rate}_"
        f"wd{config.weight_decay}_"
        f"init{config.weights_initialisation}_"
        f"epochs{config.epochs}"
    )

    # Set and save the run name
    wandb.run.name = run_name
    wandb.run.save()

    # Create and train your neural network model.
    # Ensure your NeuralNetwork class is imported or defined before this.
    model = NeuralNetwork(
        input_size=784,
        hidden_size=config.hidden_layer_size,
        num_hidden_layers=config.hidden_layers,
        output_size=10,
        activation=config.activation,
        weight_init_type=config.weights_initialisation,
        optimizer=config.optimiser,
        learning_rate=config.learning_rate,
        batch_size=config.batch_size,
        loss_type='cross_entropy',
        weight_decay=config.weight_decay,
        iswandb=True
        # beta, beta1, beta2 use default values if not provided
    )


    model.fit(config.epochs)  # Train the model

    # Optionally, you can log additional metrics here if needed.
    wandb.finish()

# Define the sweep configuration
sweep_config = {
    'method': 'bayes',
    'name': 'sweep cross entropy',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'epochs': {
            'values': [5, 10]
        },
        'hidden_layers': {
            'values': [3, 4, 5]
        },
        'hidden_layer_size': {
            'values': [32, 64, 128]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.5]
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'activation': {
            'values': ['sigmoid', 'relu', 'tanh']
        },
        'optimiser': {
            'values': ['sgd', 'momentum', 'nestrov', 'rmsprop', 'adam', 'nadam']
        },
        'weights_initialisation': {
            'values': ['random', 'Xavier']
        },
        'learning_rate': {
            'values': [1e-2, 1e-3, 1e-4]
        },
    }
}







Confusion Matrix

In [32]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

# Example class names for Fashion-MNIST:
class_names = [
    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
]

def train():
    wandb.init(project="Assignment 1")  # Set your project name
    config = wandb.config

    # Construct a descriptive run name using the hyperparameters
    run_name = (
        f"{config.optimiser}_"
        f"{config.activation}_"
        f"{config.hidden_layers}_"
        f"{config.hidden_layer_size}_"
        f"{config.batch_size}_"
        f"{config.learning_rate}_"
        f"{config.weight_decay}_"
        f"{config.weights_initialisation}"
    )
    wandb.run.name = run_name
    wandb.run.save()


    model = NeuralNetwork(
        input_size=784,                     # e.g., 28x28 images flattened
        hidden_size=128,
        num_hidden_layers=5,
        output_size=10,                     # 10 classes in Fashion-MNIST
        activation='relu',
        weight_init_type='Xavier',
        optimizer='sgd',
        learning_rate=0.001,
        batch_size=32,
        loss_type='cross_entropy',          # Or 'mse' if you prefer
        weight_decay=0,
        iswandb=True  # so that your NN can log to W&B if it does so internally
    )


    # 3. Train your model

    model.fit(epochs=config.epochs)


    # 4. Evaluate on Validation

    y_pred_val_prob = model.forward(model.X_val)
    y_pred_val = np.argmax(y_pred_val_prob, axis=1)
    y_true_val = np.argmax(model.y_val, axis=1)
    val_accuracy = accuracy_score(y_true_val, y_pred_val)
    wandb.log({"val_accuracy": val_accuracy})


    # 5. Evaluate on Test set

    y_pred_test_prob = model.forward(model.X_test)
    y_pred_test = np.argmax(y_pred_test_prob, axis=1)
    y_true_test = np.argmax(model.y_test, axis=1)
    test_accuracy = accuracy_score(y_true_test, y_pred_test)
    wandb.log({"test_accuracy": test_accuracy})


    # 6. Confusion Matrix

    cm = confusion_matrix(y_true_test, y_pred_test)

    plt.figure(figsize=(8, 6))
    ax = sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Reds",
        xticklabels=class_names,
        yticklabels=class_names,
        cbar=False
    )
    # Optionally highlight diagonal
    for i in range(cm.shape[0]):
        ax.add_patch(plt.Rectangle((i, i), 1, 1, fill=False, edgecolor='green', lw=3))

    plt.title("Confusion Matrix - Fashion MNIST (Test Set)")
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.tight_layout()

    # Log the figure to W&B
    wandb.log({"confusion_matrix": wandb.Image(plt)})
    plt.close()


    # 7. Finish

    wandb.finish()


In [33]:
def run_sweep():
    # Create the sweep
    sweep_id = wandb.sweep(sweep_config, project="Assignment 1")
    # Run the sweep agent
    wandb.agent(sweep_id, function=train, count=1)


run_sweep()


Create sweep with ID: 7fuw0kd7
Sweep URL: https://wandb.ai/rajuguguloth7670-iitm-india/Assignment%201/sweeps/7fuw0kd7


[34m[1mwandb[0m: Agent Starting Run: hadde5oj with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 32
[34m[1mwandb[0m: 	hidden_layers: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	optimiser: adam
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weights_initialisation: Xavier


Epoch 1/5 - Train Loss: 0.4694, Val Loss: 0.4886, Train Acc: 0.8287, Val Acc: 0.8237
Epoch 2/5 - Train Loss: 0.3734, Val Loss: 0.3884, Train Acc: 0.8630, Val Acc: 0.8557
Epoch 3/5 - Train Loss: 0.3583, Val Loss: 0.3831, Train Acc: 0.8679, Val Acc: 0.8583
Epoch 4/5 - Train Loss: 0.3180, Val Loss: 0.3587, Train Acc: 0.8830, Val Acc: 0.8732
Epoch 5/5 - Train Loss: 0.3422, Val Loss: 0.3818, Train Acc: 0.8676, Val Acc: 0.8588


0,1
epoch,▁▃▅▆█
test_accuracy,▁
train_accuracy,▁▅▆█▆
train_loss,█▄▃▁▂
val_accuracy,▁▆▆█▆▆
val_loss,█▃▂▁▂

0,1
epoch,4.0
test_accuracy,0.8453
train_accuracy,0.86763
train_loss,0.34224
val_accuracy,0.85883
val_loss,0.38184


8th question


In [34]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score

sweep_config = {
    'method': 'bayes',
    'name': 'sweep cross entropy',
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'epochs': {
            'values': [5, 10]
        },
        'hidden_layers': {
            'values': [3, 4, 5]
        },
        'hidden_layer_size': {
            'values': [32, 64, 128]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.5]
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'activation': {
            'values': ['sigmoid', 'relu', 'tanh']
        },
        'optimiser': {
            'values': ['sgd', 'momentum', 'nestrov', 'rmsprop', 'adam', 'nadam']
        },
        'weights_initialisation': {
            'values': ['random', 'Xavier']
        },
        'learning_rate': {
            'values': [1e-2, 1e-3, 1e-4]
        },
    }
}


# Example class names for Fashion-MNIST:
class_names = [
    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
]

def train():
    wandb.init(project="Assignment 1")  # Set your project name
    config = wandb.config

    # Construct a descriptive run name using the hyperparameters
    run_name = (
        f"{config.optimiser}_"
        f"{config.activation}_"
        f"{config.hidden_layers}_"
        f"{config.hidden_layer_size}_"
        f"{config.batch_size}_"
        f"{config.learning_rate}_"
        f"{config.weight_decay}_"
        f"{config.weights_initialisation}"
    )
    wandb.run.name = run_name
    wandb.run.save()


    model = NeuralNetwork(
        input_size=784,                     # e.g., 28x28 images flattened
        hidden_size=128,
        num_hidden_layers=5,
        output_size=10,                     # 10 classes in Fashion-MNIST
        activation='relu',
        weight_init_type='Xavier',
        optimizer='sgd',
        learning_rate=0.001,
        batch_size=32,
        loss_type='mse',          # Or 'mse' if you prefer
        weight_decay=0,
        iswandb=True  # so that your NN can log to W&B if it does so internally
    )


    # 3. Train your model

    model.fit(epochs=config.epochs)

    # 4. Evaluate on Validation

    y_pred_val_prob = model.forward(model.X_val)
    y_pred_val = np.argmax(y_pred_val_prob, axis=1)
    y_true_val = np.argmax(model.y_val, axis=1)
    val_accuracy = accuracy_score(y_true_val, y_pred_val)
    wandb.log({"val_accuracy": val_accuracy})


    # 5. Evaluate on Test set

    y_pred_test_prob = model.forward(model.X_test)
    y_pred_test = np.argmax(y_pred_test_prob, axis=1)
    y_true_test = np.argmax(model.y_test, axis=1)
    test_accuracy = accuracy_score(y_true_test, y_pred_test)
    wandb.log({"test_accuracy": test_accuracy})


    # 7. Finish

    wandb.finish()


In [None]:
def run_sweep():
    # Create the sweep
    sweep_id = wandb.sweep(sweep_config, project="Assignment 1")
    # Run the sweep agent
    wandb.agent(sweep_id, function=train, count=5)


run_sweep()


Create sweep with ID: frl1rndv
Sweep URL: https://wandb.ai/rajuguguloth7670-iitm-india/Assignment%201/sweeps/frl1rndv


[34m[1mwandb[0m: Agent Starting Run: ycwl1j81 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_layer_size: 128
[34m[1mwandb[0m: 	hidden_layers: 4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	optimiser: momentum
[34m[1mwandb[0m: 	weight_decay: 0.5
[34m[1mwandb[0m: 	weights_initialisation: Xavier


Epoch 1/10 - Train Loss: 0.0225, Val Loss: 0.0228, Train Acc: 0.8417, Val Acc: 0.8328
Epoch 2/10 - Train Loss: 0.0191, Val Loss: 0.0201, Train Acc: 0.8651, Val Acc: 0.8593


In [28]:

# for printing the confusion matrix
import wandb

# Define the sweep configuration
sweep_config = {
    'method': 'grid',
    'name' : 'mnist data',
    'metric': {
      'name': 'accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'elem': {
            'values': [1, 2, 3]
        },
        # Define other parameters here
    }
}

def main():
    wandb.init()
    config = wandb.config

    if config.elem == 1:
      # Set the run name
      # Configuration for model with Momentum optimizer and Tanh activation function
      model = NeuralNetwork(
        input_size=784,                     # e.g., 28x28 images flattened
        hidden_size=64,
        num_hidden_layers=4,
        output_size=10,                     # 10 classes in Fashion-MNIST
        activation='tanh',
        weight_init_type='Xavier',
        optimizer='nadam',
        learning_rate=0.0001,
        batch_size=64,
        loss_type='cross_entropy',          # Or 'mse' if you prefer
        weight_decay=0.0005,
        iswandb=True,
        dataset = "mnist"
        )
    elif config.elem == 2:
      # Configuration for model with NAdam optimizer and ReLU activation function
      model = NeuralNetwork(
        input_size=784,                     # e.g., 28x28 images flattened
        hidden_size=128,
        num_hidden_layers=5,
        output_size=10,                     # 10 classes in Fashion-MNIST
        activation='relu',
        weight_init_type='Xavier',
        optimizer='sgd',
        learning_rate=0.001,
        batch_size=32,
        loss_type='cross_entropy',          # Or 'mse' if you prefer
        weight_decay=0,
        iswandb=True,
        dataset = "mnist"
        )

    elif config.elem == 3:
      # Configuration for model with RMSprop optimizer and ReLU activation function
      model =NeuralNetwork(
        input_size=784,                     # e.g., 28x28 images flattened
        hidden_size=64,
        num_hidden_layers=3,
        output_size=10,                     # 10 classes in Fashion-MNIST
        activation='tanh',
        weight_init_type='Xavier',
        optimizer='nadam',
        learning_rate=0.0001,
        batch_size=32,
        loss_type='cross_entropy',          # Or 'mse' if you prefer
        weight_decay=0.0005,
        iswandb=True,
        dataset = "mnist"
        )


    model.fit(epochs=10)

    wandb.finish()

sweep_id = wandb.sweep(sweep=sweep_config, project='Assignment 1')
wandb.agent(sweep_id, main, count=3)

Create sweep with ID: uwhczwbk
Sweep URL: https://wandb.ai/rajuguguloth7670-iitm-india/Assignment%201/sweeps/uwhczwbk


[34m[1mwandb[0m: Agent Starting Run: etwhopje with config:
[34m[1mwandb[0m: 	elem: 1


Epoch 1/10 - Train Loss: 0.3645, Val Loss: 0.3136, Train Acc: 0.8993, Val Acc: 0.9157
Epoch 2/10 - Train Loss: 0.2723, Val Loss: 0.2314, Train Acc: 0.9221, Val Acc: 0.9318
Epoch 3/10 - Train Loss: 0.2318, Val Loss: 0.1990, Train Acc: 0.9326, Val Acc: 0.9435
Epoch 4/10 - Train Loss: 0.2013, Val Loss: 0.1744, Train Acc: 0.9419, Val Acc: 0.9492
Epoch 5/10 - Train Loss: 0.1768, Val Loss: 0.1542, Train Acc: 0.9489, Val Acc: 0.9568
Epoch 6/10 - Train Loss: 0.1586, Val Loss: 0.1416, Train Acc: 0.9544, Val Acc: 0.9603
Epoch 7/10 - Train Loss: 0.1438, Val Loss: 0.1318, Train Acc: 0.9582, Val Acc: 0.9613
Epoch 8/10 - Train Loss: 0.1310, Val Loss: 0.1228, Train Acc: 0.9616, Val Acc: 0.9635
Epoch 9/10 - Train Loss: 0.1190, Val Loss: 0.1182, Train Acc: 0.9659, Val Acc: 0.9653
Epoch 10/10 - Train Loss: 0.1097, Val Loss: 0.1115, Train Acc: 0.9689, Val Acc: 0.9667


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▇▇▇██
train_loss,█▅▄▄▃▂▂▂▁▁
val_accuracy,▁▃▅▆▇▇▇███
val_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,9.0
train_accuracy,0.96894
train_loss,0.10967
val_accuracy,0.96667
val_loss,0.11154


[34m[1mwandb[0m: Agent Starting Run: zja5k4qq with config:
[34m[1mwandb[0m: 	elem: 2


Epoch 1/10 - Train Loss: 0.1818, Val Loss: 0.1553, Train Acc: 0.9444, Val Acc: 0.9555
Epoch 2/10 - Train Loss: 0.1194, Val Loss: 0.1206, Train Acc: 0.9642, Val Acc: 0.9658
Epoch 3/10 - Train Loss: 0.0804, Val Loss: 0.0920, Train Acc: 0.9761, Val Acc: 0.9760
Epoch 4/10 - Train Loss: 0.0646, Val Loss: 0.0896, Train Acc: 0.9805, Val Acc: 0.9728
Epoch 5/10 - Train Loss: 0.0480, Val Loss: 0.0892, Train Acc: 0.9862, Val Acc: 0.9752
Epoch 6/10 - Train Loss: 0.0373, Val Loss: 0.0808, Train Acc: 0.9889, Val Acc: 0.9778
Epoch 7/10 - Train Loss: 0.0403, Val Loss: 0.0908, Train Acc: 0.9877, Val Acc: 0.9742
Epoch 8/10 - Train Loss: 0.0316, Val Loss: 0.0897, Train Acc: 0.9896, Val Acc: 0.9757
Epoch 9/10 - Train Loss: 0.0204, Val Loss: 0.0753, Train Acc: 0.9941, Val Acc: 0.9800
Epoch 10/10 - Train Loss: 0.0222, Val Loss: 0.0903, Train Acc: 0.9930, Val Acc: 0.9768


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▄▅▆▇▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
val_accuracy,▁▄▇▆▇▇▆▇█▇
val_loss,█▅▂▂▂▁▂▂▁▂

0,1
epoch,9.0
train_accuracy,0.993
train_loss,0.02219
val_accuracy,0.97683
val_loss,0.09033


[34m[1mwandb[0m: Agent Starting Run: eq5dwton with config:
[34m[1mwandb[0m: 	elem: 3


Epoch 1/10 - Train Loss: 0.3196, Val Loss: 0.2677, Train Acc: 0.9122, Val Acc: 0.9290
Epoch 2/10 - Train Loss: 0.2489, Val Loss: 0.2120, Train Acc: 0.9290, Val Acc: 0.9397
Epoch 3/10 - Train Loss: 0.2133, Val Loss: 0.1815, Train Acc: 0.9387, Val Acc: 0.9488
Epoch 4/10 - Train Loss: 0.1826, Val Loss: 0.1576, Train Acc: 0.9480, Val Acc: 0.9560
Epoch 5/10 - Train Loss: 0.1622, Val Loss: 0.1456, Train Acc: 0.9536, Val Acc: 0.9575
Epoch 6/10 - Train Loss: 0.1435, Val Loss: 0.1336, Train Acc: 0.9593, Val Acc: 0.9625
Epoch 7/10 - Train Loss: 0.1282, Val Loss: 0.1209, Train Acc: 0.9635, Val Acc: 0.9650
Epoch 8/10 - Train Loss: 0.1132, Val Loss: 0.1128, Train Acc: 0.9682, Val Acc: 0.9665
Epoch 9/10 - Train Loss: 0.1028, Val Loss: 0.1067, Train Acc: 0.9712, Val Acc: 0.9678
Epoch 10/10 - Train Loss: 0.0953, Val Loss: 0.1005, Train Acc: 0.9736, Val Acc: 0.9705


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_accuracy,▁▃▄▅▆▆▇▇██
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▃▄▆▆▇▇▇██
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,9.0
train_accuracy,0.97361
train_loss,0.09533
val_accuracy,0.9705
val_loss,0.10046
