In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist, fashion_mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings("ignore")
import wandb
import seaborn as sns

In [2]:
from keras.datasets import fashion_mnist


In [3]:
(X,Y),(X_test,Y_test) = fashion_mnist.load_data()

In [4]:
wandb.init(entity = 'cs22m031',project = 'CS6910_DL_assignment_1',name = 'examples of classes in fashion_mnist dataset')
visited_label = []
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
class_images = []
for x in range(len(X_test)):
    if(Y_test[x] not in visited_label):
        visited_label.append(Y_test[x])
        image = wandb.Image(X_test[x],caption = class_names[Y_test[x]])
        class_images.append(image)
wandb.log({"classes_examples": class_images})

[34m[1mwandb[0m: Currently logged in as: [33mcs22m031[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
X = X.reshape(len(X),784,1)
X[0].shape
X_test = X_test.reshape(len(X_test),784,1)



In [None]:
X = X/255.0
X_test = X_test/255.0

In [None]:
X_train,X_val,Y_train,Y_val = train_test_split(X,Y,test_size=0.1,random_state=42)

In [257]:
class NeuralNetwork:
    def __init__(self,optimizer='nadam',batchsize=32,no_of_features = 784,no_of_classes = 10,no_of_layers=5,no_of_neurons_in_each_layer = [128,128,128],max_epochs = 10,eta = 0.001,initialization_method = 'he',activation_method = 'relu',loss = 'cross',weight_decay = 0.001):
        self.optimizer  = optimizer
        self.batchsize = batchsize
        self.no_of_features = no_of_features
        self.no_of_classes = no_of_classes
        self.no_of_layers = no_of_layers
        self.no_of_neurons_in_each_layer = no_of_neurons_in_each_layer
        self.max_epochs = max_epochs
        self.eta = eta
        self.initialization = initialization_method
        self.initialization_list = {'xavier':self.xavier_initialization,'uniform':self.uniform_initialization,'normal':self.normal_initialization,'he':self.he_initialization}
        self.activation = activation_method
        self.activation_list = {'sigmoid':self.sigmoid,'relu':self.Relu,'tanh':self.tanh}
        self.activation_derivative = {'sigmoid':self.sigmoid_derivative,'relu':self.Relu_derivative,'tanh':self.tanh_derivative}
        self.loss = loss
        self.thetas = {}
        self.loss_list = []
        self.weight_decay = weight_decay
    def one_hot(self,l,no_of_classes):
        temp = np.array([0]*no_of_classes)
        temp[l] = 1
        return temp
    def sigmoid(self,x):
        return 1. / (1.+np.exp(-x))

    def sigmoid_derivative(self,x):
        return self.sigmoid(x) * (np.ones_like(x)-self.sigmoid(x))

    def Relu(self,x):
        for i in range(len(x)):
            x[i] = x[i] / max(x[i])
        return np.maximum(0,x)

    def Relu_derivative(self,x):
        # for i in range(len(x)):
        #     x[i] = x[i] / max(x[i])
        return 1*(x>0) 

    def tanh(self,x):
        # for i in range(len(x)):
        #     x[i] = x[i] / max(x[i])
        return np.tanh(x)

    def tanh_derivative(self,x):
        # for i in range(len(x)):
        #     x[i] = x[i] / max(x[i])
        return (1 - (np.tanh(x)**2))

    def softmax(self,x):
        # for i in range(len(x)):
        #     x[i] = x[i] / max(x[i])
        l = []
        for i in range(len(x)):
            l.append(np.exp(x[i])/np.sum(np.exp(x[i]),axis=0))
        return np.array(l)
        
    def softmax_derivative(self,x):
        # for i in range(len(x)):
        #     x[i] = x[i] / max(x)
        return self.softmax(x.reshape(1,x.shape[0],x.shape[1])) * (1-self.softmax(x.reshape(1,x.shape[0],x.shape[1])))



    def he_initialization(self):
        np.random.seed(42)
        thetas = {}
        for layer in range(1,self.no_of_layers):
            if(layer == 1):
                thetas['W'+str(layer)] = np.random.normal(0,1,size = (self.no_of_neurons_in_each_layer[layer-1],self.no_of_features)) * np.sqrt(2/(self.no_of_neurons_in_each_layer[layer-1]))
                thetas['b'+str(layer)] = np.random.normal(0,1,size = (self.no_of_neurons_in_each_layer[layer-1],1))*np.sqrt(2/self.no_of_neurons_in_each_layer[layer-1])
            elif(layer == self.no_of_layers-1):
                thetas['W'+str(layer)] = np.random.normal(0,1,size = (self.no_of_classes,self.no_of_neurons_in_each_layer[layer-2])) * np.sqrt(2/(self.no_of_classes))
                thetas['b'+str(layer)] = np.random.normal(0,1,size = (self.no_of_classes,1)) * np.sqrt(2/(self.no_of_classes))
            else:
                thetas['W'+str(layer)] = np.random.normal(0,1,size = (self.no_of_neurons_in_each_layer[layer-1],self.no_of_neurons_in_each_layer[layer-2])) * np.sqrt(2/(self.no_of_neurons_in_each_layer[layer-1]))
                thetas['b'+str(layer)] = np.random.normal(0,1,size = (self.no_of_neurons_in_each_layer[layer-1],1)) * np.sqrt(2/self.no_of_neurons_in_each_layer[layer-1])
        return thetas

    def xavier_initialization(self):
        np.random.seed(42)
        thetas = {}
        for layer in range(1,self.no_of_layers):
            if(layer == 1):
                thetas['W'+str(layer)] = np.random.randn(self.no_of_neurons_in_each_layer[layer-1],self.no_of_features) * np.sqrt(2/(self.no_of_neurons_in_each_layer[layer-1]+self.no_of_features))
                thetas['b'+str(layer)] = np.zeros((self.no_of_neurons_in_each_layer[layer-1],1))
            elif(layer == self.no_of_layers-1):
                thetas['W'+str(layer)] = np.random.randn(self.no_of_classes,self.no_of_neurons_in_each_layer[layer-2]) * np.sqrt(2/(self.no_of_classes + self.no_of_neurons_in_each_layer[layer-2]))
                thetas['b'+str(layer)] = np.zeros((self.no_of_classes,1))
            else:
                # print(layer)
                # print(self.no_of_neurons_in_each_layer[layer-1])
                # print(self.no_of_neurons_in_each_layer[layer-2])
                thetas['W'+str(layer)] = np.random.randn(self.no_of_neurons_in_each_layer[layer-1],self.no_of_neurons_in_each_layer[layer-2]) * np.sqrt(2/(self.no_of_neurons_in_each_layer[layer-1]+self.no_of_neurons_in_each_layer[layer-2]))
                thetas['b'+str(layer)] = np.zeros((self.no_of_neurons_in_each_layer[layer-1],1))
        return thetas

    def uniform_initialization(self):
        thetas = {}
        np.random.seed(42)
        for layer in range(1,self.no_of_layers):
            if(layer == 1):
                thetas['W'+str(layer)] = np.random.default_rng().uniform(low = -0.7,high =0.7,size = (self.no_of_neurons_in_each_layer[layer-1],self.no_of_features)) #* np.sqrt(2/(no_of_neurons_in_each_layer[layer-1]+no_of_features))
                thetas['b'+str(layer)] = np.random.default_rng().uniform(low = -0.7,high =0.7,size = (self.no_of_neurons_in_each_layer[layer-1],1))
            elif(layer == self.no_of_layers-1):
                thetas['W'+str(layer)] = np.random.default_rng().uniform(low = -0.7,high =0.7,size = (self.no_of_classes,self.no_of_neurons_in_each_layer[layer-2])) #* np.sqrt(2/(no_of_classes + no_of_neurons_in_each_layer[layer-2]))
                thetas['b'+str(layer)] = np.random.default_rng().uniform(low = -0.7,high =0.7,size = (self.no_of_classes,1))
            else:
                thetas['W'+str(layer)] = np.random.default_rng().uniform(low = -0.7,high =0.7,size =(self.no_of_neurons_in_each_layer[layer-1],self.no_of_neurons_in_each_layer[layer-2])) #*  np.sqrt(2/(no_of_neurons_in_each_layer[layer-1]+no_of_neurons_in_each_layer[layer-2]))
                thetas['b'+str(layer)] = np.random.default_rng().uniform(low = -0.7,high =0.7,size = (self.no_of_neurons_in_each_layer[layer-1],1))
        return thetas

    def normal_initialization(self):
        thetas = {}
        np.random.seed(42)
        for layer in range(1,self.no_of_layers):
            if(layer == 1):
                thetas['W'+str(layer)] = np.random.uniform(low = -0.7,high =0.7,size = (self.no_of_neurons_in_each_layer[layer-1],self.no_of_features)) #* np.sqrt(2/(no_of_neurons_in_each_layer[layer-1]+no_of_features))
                thetas['b'+str(layer)] = np.zeros((self.no_of_neurons_in_each_layer[layer-1],1))
            elif(layer == self.no_of_layers-1):
                thetas['W'+str(layer)] = np.random.uniform(low = -0.7,high =0.7,size = (self.no_of_classes,self.no_of_neurons_in_each_layer[layer-2])) #* np.sqrt(2/(no_of_classes + no_of_neurons_in_each_layer[layer-2]))
                thetas['b'+str(layer)] = np.zeros((self.no_of_classes,1))
            else:
                thetas['W'+str(layer)] = np.random.uniform(low = -0.7,high =0.7,size =(self.no_of_neurons_in_each_layer[layer-1],self.no_of_neurons_in_each_layer[layer-2])) #*  np.sqrt(2/(no_of_neurons_in_each_layer[layer-1]+no_of_neurons_in_each_layer[layer-2]))
                thetas['b'+str(layer)] = np.zeros((self.no_of_neurons_in_each_layer[layer-1],1))
        return thetas

    def feed_forward(self,data,thetas,layers):
        pre_activation = [1]*(layers)
        activation  = [1]*(layers)
        activation[0] = data
        for layer_no in range(1,layers):
            W = 'W' + str(layer_no)
            b = 'b' + str(layer_no)
            pre_activation[layer_no] = np.add(np.matmul(thetas[W],activation[layer_no - 1]),thetas[b])
            if(layer_no == layers-1):
                activation[layer_no] = self.softmax(pre_activation[layer_no])
            else:
                activation[layer_no] = self.activation_list[self.activation](pre_activation[layer_no])
        return activation,pre_activation

    def back_propagate(self,h,a,thetas,Y):
        grads = {}
        for x in thetas.keys():
            grads[x] = 0
        for x in range(len(Y)):
            temp = h[-1][x] - self.one_hot(Y[x],self.no_of_classes).reshape(self.no_of_classes,1)
            if(self.loss == 'mse'):
                temp = (temp*self.softmax_derivative(a[-1][x])).reshape(self.no_of_classes,1)
            for k in range(self.no_of_layers-1,0,-1):
                W = 'W' + str(k)
                b = 'b' + str(k)
                grads[W] += np.matmul(temp,h[k-1][x].T)/self.batchsize
                grads[b] += temp/self.batchsize
                if(k == 1):
                    break
                temp = np.matmul(thetas[W].T,temp)
                temp = np.multiply(temp,self.activation_derivative[self.activation](a[k-1][x]))
        return grads

    def momentumUpdate(self,t,maxm=.999):
        x=np.log(np.floor(t/250)+1)/np.log(2)
        x=1-2**(-1-x)
        return min(x,maxm)

    def getGamma(self,epoch):
        x=np.log((epoch/250)+1)
        x=-1-1*(x)
        x=2**x
        x=1-x
        return min(x,.9)

    def fit(self,X_train,Y_train):
        self.thetas = self.initialization_list[self.initialization]()
        delta = 1e-9
        grads = {}
        for i in self.thetas.keys():
            grads[i] = 0
        for t in range(self.max_epochs):
            #previous_update
            ut = {}
            vt = {}
            gamma = self.getGamma(t+1)
            beta = self.momentumUpdate(t+1)
            for i in self.thetas.keys():
                ut[i] = 0
                vt[i] = 0
            params_look_ahead = {}
            step = 1
            for x in range(0,X_train.shape[0],self.batchsize):
                beta1 = 0.9#self.momentumUpdate(step)
                beta2 = 0.99#self.momentumUpdate(step)
                if(self.optimizer == 'nesterov'):
                    for i in self.thetas.keys():
                        params_look_ahead[i] = self.thetas[i] - beta1*ut[i]
                    activation,preactivation = self.feed_forward(X_train[x:x+self.batchsize],self.thetas,self.no_of_layers)
                    grads = self.back_propagate(activation,preactivation,params_look_ahead,Y_train[x:x+self.batchsize])
                    for i in self.thetas.keys():
                        ut[i] = beta1*ut[i] + (1-beta1)*grads[i]
                        self.thetas[i] = self.thetas[i] - self.eta*ut[i] - self.eta*self.weight_decay*self.thetas[i]
                elif(self.optimizer == 'mgd'):
                    activation,preactivation = self.feed_forward(X_train[x:x+self.batchsize],self.thetas,self.no_of_layers)
                    grads = self.back_propagate(activation,preactivation,self.thetas,Y_train[x:x+self.batchsize])     
                    for i in self.thetas.keys():
                        ut[i] = gamma*ut[i] + grads[i]
                        self.thetas[i] = self.thetas[i] - self.eta*ut[i] - self.eta*self.weight_decay*self.thetas[i]
                elif(self.optimizer == 'sgd'):
                    activation,preactivation = self.feed_forward(X_train[x:x+self.batchsize],self.thetas,self.no_of_layers)
                    grads = self.back_propagate(activation,preactivation,self.thetas,Y_train[x:x+self.batchsize])
                    for i in self.thetas.keys():
                        self.thetas[i] = self.thetas[i] - self.eta*grads[i] - self.eta*self.weight_decay*self.thetas[i]
                elif(self.optimizer == 'RMSprop'):
                    activation,preactivation = self.feed_forward(X_train[x:x+self.batchsize],self.thetas,self.no_of_layers)
                    grads = self.back_propagate(activation,preactivation,self.thetas,Y_train[x:x+self.batchsize])
                    for i in self.thetas.keys():
                        ut[i] = beta*ut[i] + (1-beta)*np.multiply(grads[i],grads[i])
                        self.thetas[i] = self.thetas[i] - self.eta*grads[i]/((np.sqrt(ut[i])+delta)) - self.eta*self.weight_decay*self.thetas[i]
                elif(self.optimizer == 'adam'):
                    activation,preactivation = self.feed_forward(X_train[x:x+self.batchsize],self.thetas,self.no_of_layers)
                    grads = self.back_propagate(activation,preactivation,self.thetas,Y_train[x:x+self.batchsize])
                    for i in self.thetas.keys():
                        ut[i] = beta1*ut[i] + (1-beta1)*grads[i]
                        uthat = ut[i]/(1 - pow(beta1,t+1))
                        vt[i] = beta2*vt[i] + (1-beta2)*np.multiply(grads[i],grads[i])
                        vthat = vt[i]/(1 - pow(beta2,t+1))
                        self.thetas[i] = self.thetas[i] - self.eta*uthat/((np.sqrt(vthat) + delta)) - self.eta*self.weight_decay*self.thetas[i]
                elif(self.optimizer == 'nadam'):
                    activation,preactivation = self.feed_forward(X_train[x:x+self.batchsize],self.thetas,self.no_of_layers)
                    grads = self.back_propagate(activation,preactivation,self.thetas,Y_train[x:x+self.batchsize])
                    for i in self.thetas.keys():
                        ut[i] = beta1*ut[i] + (1-beta1)*grads[i]
                        uthat = ut[i]/(1 - pow(beta1,t+1))
                        vt[i] = beta2*vt[i] + (1-beta2)*np.multiply(grads[i],grads[i])
                        vthat = vt[i]/(1 - pow(beta2,t+1))
                        self.thetas[i] = self.thetas[i] - (self.eta*(beta1*uthat + (1-beta1)*grads[i]/(1-pow(beta1,t+1))))/(np.sqrt(vthat) + delta) - self.eta*self.weight_decay*self.thetas[i]
                step+=1
            yhat = self.predict(X_train)
            training_accuracy = self.accuracy_score(Y_train,yhat)
            # print(training_accuracy)
            training_loss = self.calculateLoss(yhat,Y_train)
            # print(training_loss)
            self.loss_list.append(training_loss)
            yhat = self.predict(X_val)
            validation_accuracy = self.accuracy_score(Y_val,yhat)
            # print(validation_accuracy)
            validation_loss = self.calculateLoss(yhat,Y_val)
            # print(validation_loss)
            wandb.log({'training_accuracy' : training_accuracy, 'validation_accuracy' : validation_accuracy,'training_loss' : training_loss, 'validation_loss' : validation_loss,'epoch':t+1})
                
    def predict(self,X):
            activation,preactivation = self.feed_forward(X[:],self.thetas,self.no_of_layers)
            return activation[-1]

    def accuracy_score(self,Y,yhat):
        correct = 0
        for x in range(len(yhat)):
            if(np.argmax(yhat[x]) == Y[x]):
                correct+=1
        return (correct/len(Y)*100)
    def calculateLoss(self,yHat,yBatch):
        loss=0
        l2=0
        if(self.loss == 'cross'):
            for x in range(len(yHat)):
                loss += (-1)*np.log(yHat[x][yBatch[x]] + 1e-9)
            for x in self.thetas.keys():
                l2 += np.linalg.norm(self.thetas[x])
            l2 = (self.weight_decay*l2)/2
            return (loss + l2)/len(yHat)
        if(self.loss == 'mse'):
                # error = (yHat-yBatch)
                # error=error**2
                # loss = np.sum(error,axis=0) 
                # loss = np.sum(error)  
                # loss = loss/2
            for x in range(len(yHat)):
                loss += np.sum((self.one_hot(yBatch[x],self.no_of_classes).reshape(NN.no_of_classes,1) - yHat[x])**2,axis = 0)
            for x in self.thetas.keys():
                l2 += np.linalg.norm(self.thetas[x])
            l2 = (self.weight_decay*l2)/2
            return (loss[0] + l2)/len(yHat)
        return loss

        
        
        

    
    

In [None]:
def main():
    wandb.init(project='CS6910_DL_assignment_1')
    config = wandb.config
    wandb.run.name = "op_{}_lr_{}_batch_{}_act_{}_layer_{}_neuron_{}".format(config.optimizer ,config.eta,config.batchsize,config.activation,config.no_of_layers-2,config.no_of_neurons)
    NN = NeuralNetwork(optimizer=config['optimizer'],batchsize=config['batchsize'],no_of_features=config['no_of_features'],no_of_classes=config['no_of_classes'],no_of_layers= config['no_of_layers'],no_of_neurons_in_each_layer = (config['no_of_layers']-2)*[config['no_of_neurons']],max_epochs=config['max_epochs'],eta = config['eta'],initialization_method=config['initialization'],activation_method=config['activation'],loss=config['loss'],weight_decay=config['weight_decay'])
    NN.fit(X_train,Y_train)

In [258]:
sweep_configuration = {
    'method' : 'bayes',
    'metric' : { 'goal' : 'maximize',
    'name' : 'validation_accuracy'},
    'parameters':{
        'optimizer' : { 'values' : ['sgd','mgd','nesterov','RMSprop','adam','nadam']},
        'batchsize' : { 'values' : [16,32,64,128]},
        'no_of_features' : {'values' : [784]},
        'no_of_classes' : {'values' : [10]},
        'no_of_layers' : { 'values' : [3,4,5,6]},
        'no_of_neurons' : {'values' : [32,64,128]},
        'max_epochs' : {'values' : [5,10]},
        'eta' : { 'values' : [1e-1,1e-3,1e-4]},
        'initialization' : { 'values' :['xavier','he','normal','uniform']},
        'activation' : { 'values' : ['sigmoid','relu','tanh']},
        'loss' : { 'values' : ['mse']},
        'weight_decay'  : { 'values' : [0,0.0005,0.001]}
    }
}
sweep_id = wandb.sweep(sweep = sweep_configuration,project = 'CS6910_DL_assignment_1')
wandb.agent(sweep_id,function=main,count = 5)

Create sweep with ID: bim7ctyk
Sweep URL: https://wandb.ai/cs22m031/CS6910_DL_assignment_1/sweeps/bim7ctyk


[34m[1mwandb[0m: Agent Starting Run: g241ryop with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batchsize: 32
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	initialization: xavier
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	no_of_classes: 10
[34m[1mwandb[0m: 	no_of_features: 784
[34m[1mwandb[0m: 	no_of_layers: 4
[34m[1mwandb[0m: 	no_of_neurons: 64
[34m[1mwandb[0m: 	optimizer: mgd
[34m[1mwandb[0m: 	weight_decay: 0.0005


0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▅▆▇▇███
training_loss,█▆▄▄▃▂▂▂▁▁
validation_accuracy,▁▄▅▅▆▇▇███
validation_loss,█▆▄▄▃▂▂▂▁▁

0,1
epoch,10.0
training_accuracy,76.66481
training_loss,0.36311
validation_accuracy,76.31667
validation_loss,0.36929


[34m[1mwandb[0m: Agent Starting Run: 1f5z2eq8 with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	no_of_classes: 10
[34m[1mwandb[0m: 	no_of_features: 784
[34m[1mwandb[0m: 	no_of_layers: 3
[34m[1mwandb[0m: 	no_of_neurons: 64
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	weight_decay: 0


0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▃▅▆▆▇▇███
training_loss,█▅▄▃▃▂▂▂▁▁
validation_accuracy,▁▄▅▆▆▇▇▇██
validation_loss,█▅▄▃▂▂▂▁▁▁

0,1
epoch,10.0
training_accuracy,88.85741
training_loss,0.16596
validation_accuracy,87.15
validation_loss,0.18655


[34m[1mwandb[0m: Agent Starting Run: 13jjsliu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batchsize: 64
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	initialization: xavier
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	no_of_classes: 10
[34m[1mwandb[0m: 	no_of_features: 784
[34m[1mwandb[0m: 	no_of_layers: 5
[34m[1mwandb[0m: 	no_of_neurons: 64
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	weight_decay: 0


0,1
epoch,▁▃▅▆█
training_accuracy,▆█▇▄▁
training_loss,▁▄▆▇█
validation_accuracy,▇█▇▄▁
validation_loss,▁▄▆▇█

0,1
epoch,5.0
training_accuracy,41.38889
training_loss,0.75485
validation_accuracy,40.25
validation_loss,0.75675


[34m[1mwandb[0m: Agent Starting Run: 6x4i3iqi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	batchsize: 32
[34m[1mwandb[0m: 	eta: 0.0001
[34m[1mwandb[0m: 	initialization: normal
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	max_epochs: 10
[34m[1mwandb[0m: 	no_of_classes: 10
[34m[1mwandb[0m: 	no_of_features: 784
[34m[1mwandb[0m: 	no_of_layers: 5
[34m[1mwandb[0m: 	no_of_neurons: 64
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	weight_decay: 0.001


0,1
epoch,▁▂▃▃▄▅▆▆▇█
training_accuracy,▁▄▅▆▆▇▇▇██
training_loss,█▅▃▃▂▂▂▁▁▁
validation_accuracy,▁▄▅▆▇▇▇███
validation_loss,█▅▃▂▂▂▂▁▁▁

0,1
epoch,10.0
training_accuracy,83.22593
training_loss,0.24477
validation_accuracy,82.15
validation_loss,0.25465


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4p788qvg with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batchsize: 32
[34m[1mwandb[0m: 	eta: 0.001
[34m[1mwandb[0m: 	initialization: he
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	max_epochs: 5
[34m[1mwandb[0m: 	no_of_classes: 10
[34m[1mwandb[0m: 	no_of_features: 784
[34m[1mwandb[0m: 	no_of_layers: 3
[34m[1mwandb[0m: 	no_of_neurons: 128
[34m[1mwandb[0m: 	optimizer: mgd
[34m[1mwandb[0m: 	weight_decay: 0.001


0,1
epoch,▁▃▅▆█
training_accuracy,▁▅▆▇█
training_loss,█▄▃▂▁
validation_accuracy,▁▅▆▇█
validation_loss,█▄▃▂▁

0,1
epoch,5.0
training_accuracy,71.32222
training_loss,0.39759
validation_accuracy,70.4
validation_loss,0.40807


In [None]:
sweep_configuration = {
    'method' : 'random',
    'metric' : { 'goal' : 'maximize',
    'name' : 'validation_accuracy'},
    'parameters':{
        'optimizer' : { 'values' : ['nadam']},
        'batchsize' : { 'values' : [128]},
        'no_of_features' : {'values' : [784]},
        'no_of_classes' : {'values' : [10]},
        'no_of_layers' : { 'values' : [5]},
        'no_of_neurons' : {'values' : [256]},
        'max_epochs' : {'values' : [5]},
        'eta' : { 'values' : [0.001]},
        'initialization' : { 'values' :['he']},
        'activation' : { 'values' : ['relu']},
        'loss' : { 'values' : ['mse']},
        'weight_decay'  : { 'values' : [0,0.005,0.01]}
    }
}
sweep_id = wandb.sweep(sweep = sweep_configuration,project = 'CS6910_DL_assignment_1')
wandb.agent(sweep_id,function=main,count = 1)