In [1]:
import numpy as np
import wandb
from keras.datasets import fashion_mnist
import matplotlib.pyplot as plt

# Initialize wandb sweep configuration
sweep_config = {
    'method': 'grid',  # Try different values systematically
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'learning_rate': {'values': [0.1, 0.01, 0.001]},
        'batch_size': {'values': [16, 32, 64]},
        'optimizer': {'values': ['sgd', 'momentum', 'adam']},
        'num_hidden_layers': {'values': [2, 3, 4]},
        'activation': {'values': ['relu', 'tanh']},
    }
}
sweep_id = wandb.sweep(sweep_config, project="assignment 1")

class NeuralNetwork:
    def __init__(self, input_size, hidden_layers, output_size, activation_functions, weight_init="random"):
        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.output_size = output_size
        self.num_layers = len(hidden_layers) + 1  # Hidden + Output layer
        
        self.activation_map = {
            'sigmoid': self.sigmoid,
            'tanh': self.tanh,
            'relu': self.relu,
            'softmax': self.softmax
        }
        
        self.activation_functions = [self.activation_map[fn.lower()] for fn in activation_functions]
        self.activation_derivatives = [
            self.sigmoid_derivative if fn == 'sigmoid' else
            self.tanh_derivative if fn == 'tanh' else
            self.relu_derivative if fn == 'relu' else None
            for fn in activation_functions[:-1]
        ]
        
        self.parameters = self._initialize_parameters(weight_init)
    
    def _initialize_parameters(self, weight_init):
        parameters = {}
        layer_dims = [self.input_size] + self.hidden_layers + [self.output_size]
        
        for l in range(1, len(layer_dims)):
            if weight_init == "random":
                parameters[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
            elif weight_init == "xavier":
                parameters[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(1. / layer_dims[l-1])
            parameters[f'b{l}'] = np.zeros((layer_dims[l], 1))
        
        return parameters

    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    def sigmoid_derivative(self, A):
        return A * (1 - A)
    
    def tanh(self, Z):
        return np.tanh(Z)
    
    def tanh_derivative(self, A):
        return 1 - np.square(A)
    
    def relu(self, Z):
        return np.maximum(0, Z)
    
    def relu_derivative(self, A):
        return (A > 0).astype(float)
    
    def softmax(self, Z):
        exps = np.exp(Z - np.max(Z, axis=0, keepdims=True))
        return exps / np.sum(exps, axis=0, keepdims=True)

    def forward_propagation(self, X):
        A = X.T
        activations = [A]
        
        for l in range(1, self.num_layers):
            Z = np.dot(self.parameters[f'W{l}'], A) + self.parameters[f'b{l}']
            A = self.activation_functions[l - 1](Z)
            activations.append(A)
        
        Z_out = np.dot(self.parameters[f'W{self.num_layers}'], A) + self.parameters[f'b{self.num_layers}']
        A_out = self.softmax(Z_out)
        activations.append(A_out)
        
        return activations
    
    def predict(self, X):
        A_out = self.forward_propagation(X)[-1]
        return np.argmax(A_out, axis=0)

def train_model():
    wandb.init(project="assignment 1", config=sweep_config['parameters'])
    config = wandb.config
    
    # Load data
    (X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
    X_train, X_test = (X_train - np.mean(X_train)) / np.std(X_train), (X_test - np.mean(X_test)) / np.std(X_test)
    X_train_flat, X_test_flat = X_train.reshape(X_train.shape[0], -1), X_test.reshape(X_test.shape[0], -1)
    
    # Create validation set
    val_size = int(X_train_flat.shape[0] * 0.1)
    X_val, y_val = X_train_flat[:val_size], y_train[:val_size]
    X_train_final, y_train_final = X_train_flat[val_size:], y_train[val_size:]
    
    # Configure model
    nn = NeuralNetwork(
        input_size=784,
        hidden_layers=[128] * config.num_hidden_layers,
        output_size=10,
        activation_functions=[config.activation] * config.num_hidden_layers + ['softmax'],
        weight_init="xavier"
    )
    
    # Training loop (dummy implementation for now)
    for epoch in range(5):  # Use 5 epochs as an example
        loss = np.random.rand()  # Placeholder for actual loss computation
        val_accuracy = np.random.rand()  # Placeholder for validation accuracy
        
        wandb.log({"epoch": epoch, "loss": loss, "val_accuracy": val_accuracy})
    
    wandb.finish()

wandb.agent(sweep_id, train_model)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: zejoqqq9
Sweep URL: https://wandb.ai/da24m015-iitm/assignment%201/sweeps/zejoqqq9


[34m[1mwandb[0m: Agent Starting Run: 5rpk61lq with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	num_hidden_layers: 2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: Currently logged in as: [33mda24m015[0m ([33mda24m015-iitm[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


0,1
epoch,▁▃▅▆█
loss,▁▃▆█▄
val_accuracy,▅▂▁▄█

0,1
epoch,4.0
loss,0.46089
val_accuracy,0.85929


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rmt9odqz with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	num_hidden_layers: 2
[34m[1mwandb[0m: 	optimizer: momentum


0,1
epoch,▁▃▅▆█
loss,▁▂█▄▂
val_accuracy,█▃▃▂▁

0,1
epoch,4.0
loss,0.13052
val_accuracy,0.02615


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
