In [None]:
import numpy as np
class Layer:
    def __init__(self, hidden_units: int, activation:str=None):
        self.hidden_units = hidden_units
        self.activation = activation
        self.W = None
        self.b = None
        
    def initialize_params(self, n_in, hidden_units,init_type):
        np.random.seed(2)
        if init_type=="Random":
            self.W = 0.01*np.random.randn(n_in, hidden_units)
            self.b = 0.01*np.random.randn(1,hidden_units)

        elif init_type=="Xavier":
            self.W = np.random.randn(n_in, hidden_units) * np.sqrt(2/n_in) 
            self.b = np.zeros((1, hidden_units))

    def activation_fn(self, z, derivative=False):
        if self.activation == 'relu':
            if derivative:
                return np.where(z<=0,0,1)
            return np.maximum(0, z)
        if self.activation == 'sigmoid':
            if derivative:
                return (1 / (1 + np.exp(-z))) * (1-(1 / (1 + np.exp(-z))))
            return (1 / (1 + np.exp(-z)))
        if self.activation == 'tanh':
            t=(np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
            if derivative:
                return (1-t**2)
            return t

        if self.activation == 'softmax':
            if derivative: 
                exp = np.exp(z - np.max(z, axis=1, keepdims=True))
                return exp / np.sum(exp, axis=0) * (1 - exp / np.sum(exp, axis=0))
            exp = np.exp(z - np.max(z, axis=1, keepdims=True))
            return exp / np.sum(exp, axis=1, keepdims=True)

    def __repr__(self):
        return str(f'''Hidden Units={self.hidden_units}; Activation={self.activation}''')

In [None]:
import numpy as np
class Helper:
    def accuracy(self,y,y_hat):
        c = np.argmax(y_hat, axis=1) == np.argmax(y, axis=1)
        acc = list(c).count(True) / len(c) * 100
        return acc

    def compute_loss(self,Y, Y_hat,layers,loss_type="CrossEntropy",reg=0):
        if loss_type=="CrossEntropy":
            m = Y.shape[0]
            L = -1./m * np.sum(Y * np.log(Y_hat+0.0000000001))
        elif loss_type=="SquaredError":
            L = np.mean((Y- Y_hat)**2)

        if reg!=0:
            reg_error = 0.0                                                                       
            for idx in layers.keys() :
              reg_error += (reg/2)*(np.sum(np.square(layers[idx].W))) 
            L = L + reg_error

        return L
    
    def create_batches(self,x, y, batch_size):
        m = x.shape[0]
        num_batches = m / batch_size
        batches = []
        for i in range(int(num_batches+1)):
            batch_x = x[i*batch_size:(i+1)*batch_size]
            batch_y = y[i*batch_size:(i+1)*batch_size]
            batches.append((batch_x, batch_y))
        
        if m % batch_size == 0:
            batches.pop(-1)

        return batches
    

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
class Neural_Network:
    def __init__(self):
        self.layers = dict()
        self.cache = dict()
        self.grads = dict()
        
    def add(self, layer):
        self.layers[len(self.layers)+1] = layer

    def forward(self, x, init_type="Xavier"):
        for idx, layer in self.layers.items():

            layer.input = np.array(x, copy=True)
            if layer.W is None:
                layer.initialize_params(layer.input.shape[-1], layer.hidden_units,init_type)

            layer.Z = x @ layer.W + layer.b
        
            if layer.activation is not None:
                layer.A = layer.activation_fn(layer.Z)
                x = layer.A
            else:
                x = layer.Z
            #x = layer.forward(x)
            self.cache[f'W{idx}'] = layer.W
            self.cache[f'Z{idx}'] = layer.Z
            self.cache[f'A{idx}'] = layer.A
        return x

    def backward(self, y, loss_type,reg=0):
        last_layer_idx = max(self.layers.keys())
        m = y.shape[0]
        for idx in reversed(range(1, last_layer_idx+1)):
            if idx == last_layer_idx:
                if loss_type=="CrossEntropy":
                    self.grads[f'dZ{idx}'] = self.cache[f'A{idx}'] - y
                elif loss_type=="SquaredError":
                    self.grads[f'dZ{idx}'] = (self.cache[f'A{idx}'] - y) * self.layers[idx].activation_fn(self.cache[f'Z{idx}'], derivative=True)
            else:
                self.grads[f'dZ{idx}'] = self.grads[f'dZ{idx+1}'] @ self.cache[f'W{idx+1}'].T *\
                                        self.layers[idx].activation_fn(self.cache[f'Z{idx}'], derivative=True)


            self.grads[f'dW{idx}'] = 1 / m * self.layers[idx].input.T @ self.grads[f'dZ{idx}'] + reg*self.layers[idx].W
            self.grads[f'db{idx}'] = 1 / m * np.sum(self.grads[f'dZ{idx}'], axis=0, keepdims=True)
            
            assert self.grads[f'dW{idx}'].shape == self.cache[f'W{idx}'].shape

    def GDoptimize(self, idx, epoch_num, steps, learning_rate=1e-3):
        
        self.layers[idx].W -= learning_rate * self.grads[f'dW{idx}']
        self.layers[idx].b -= learning_rate * self.grads[f'db{idx}']

    
    def SGDMoptimize(self, idx, epoch_num, steps, learning_rate=1e-3, mu=0.99):
        m = dict()
        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0

        m[f'W{idx}'] = m[f'W{idx}'] * mu - learning_rate * self.grads[f'dW{idx}']
        m[f'b{idx}'] = m[f'b{idx}'] * mu - learning_rate * self.grads[f'db{idx}']

        self.layers[idx].W += m[f'W{idx}']
        self.layers[idx].b += m[f'b{idx}']

    def Nesterovoptimize(self, idx, epoch_num, steps, learning_rate=1e-3, mu=0.99):
        m = dict()
        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0

        mW_prev =  np.array(m[f'W{idx}'], copy=True)
        mb_prev = np.array(m[f'b{idx}'], copy=True)

        m[f'W{idx}'] = m[f'W{idx}'] * mu - learning_rate * self.grads[f'dW{idx}']
        m[f'b{idx}'] = m[f'b{idx}'] * mu - learning_rate * self.grads[f'db{idx}']
    
        w_update = -mu * mW_prev + (1 + mu) * m[f'W{idx}']
        b_update = -mu * mb_prev + (1 + mu) * m[f'b{idx}']

        self.layers[idx].W += w_update
        self.layers[idx].b += b_update

    def RMSpropoptimize(self, idx, epoch_num, steps,learning_rate=1e-3,decay_rate=0.99, epsilon=1e-8):
        v = dict()
        for i in self.layers.keys():
            v[f'W{i}'] = 0
            v[f'b{i}'] = 0
        v[f'W{idx}'] = decay_rate * v[f'W{idx}'] + (1 - decay_rate) * self.grads[f'dW{idx}'] **2 
        v[f'b{idx}'] = decay_rate * v[f'b{idx}'] + (1 - decay_rate) * self.grads[f'db{idx}'] **2
            
        w_update = -learning_rate * self.grads[f'dW{idx}'] / (np.sqrt(v[f'W{idx}'] + epsilon))
        b_update = -learning_rate * self.grads[f'db{idx}'] / (np.sqrt(v[f'b{idx}']+ epsilon))

        self.layers[idx].W += w_update
        self.layers[idx].b += b_update

    def Adamoptimize(self, idx, epoch_num, steps,learning_rate=1e-3, beta1=0.99, beta2=0.999, epsilon=1e-8): 
        m = dict()
        v = dict()

        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0
            v[f'W{i}'] = 0
            v[f'b{i}'] = 0

        dW = self.grads[f'dW{idx}']
        db = self.grads[f'db{idx}']

        # weights
        m[f'W{idx}'] = beta1 * m[f'W{idx}'] + (1 - beta1) * dW
        v[f'W{idx}'] = beta2 * v[f'W{idx}'] + (1 - beta2) * dW ** 2 
        
        # biases
        m[f'b{idx}'] = beta1 * m[f'b{idx}'] + (1 - beta1) * db
        v[f'b{idx}'] = beta2 * v[f'b{idx}'] + (1 - beta2) * db ** 2 

        # take timestep into account
        mt_w  = m[f'W{idx}'] / (1 - beta1 ** steps)
        vt_w = v[f'W{idx}'] / (1 - beta2 ** steps)

        mt_b  = m[f'b{idx}'] / (1 - beta1 ** steps)
        vt_b = v[f'b{idx}'] / (1 - beta2 ** steps)

        w_update = - learning_rate * mt_w / (np.sqrt(vt_w) + epsilon)
        b_update = - learning_rate * mt_b / (np.sqrt(vt_b) + epsilon)

        self.layers[idx].W += w_update
        self.layers[idx].b += b_update

    def Nadamoptimize(self, idx, epoch_num, steps,learning_rate=1e-3, beta1=0.99, beta2=0.999, epsilon=1e-8): 
        m = dict()
        v = dict()

        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0
            v[f'W{i}'] = 0
            v[f'b{i}'] = 0
        dW = self.grads[f'dW{idx}']
        db = self.grads[f'db{idx}']
            # weights
        m[f'W{idx}'] = beta1 * m[f'W{idx}'] + (1 - beta1) * dW
        v[f'W{idx}'] = beta2 * v[f'W{idx}'] + (1 - beta2) * dW ** 2 
            
            # biases
        m[f'b{idx}'] = beta1 * m[f'b{idx}'] + (1 - beta1) * db
        v[f'b{idx}'] = beta2 * v[f'b{idx}'] + (1 - beta2) * db ** 2 

            # take timestep into account
        mt_w  = m[f'W{idx}'] / (1 - beta1 ** steps)
        vt_w = v[f'W{idx}'] / (1 - beta2 ** steps)

        mt_b  = m[f'b{idx}'] / (1 - beta1 ** steps)
        vt_b = v[f'b{idx}'] / (1 - beta2 ** steps)

        w_update = - learning_rate / (np.sqrt(vt_w) + epsilon) * (beta1 * mt_w + (1 - beta1) *  dW / (1 - beta1 ** steps))
        b_update = - learning_rate / (np.sqrt(vt_b) + epsilon) * (beta1 * mt_b + (1 - beta1) *  db / (1 - beta1 ** steps))

        self.layers[idx].W += w_update
        self.layers[idx].b += b_update
            
    def fit(self, x_train, y_train,batch_size=32,epochs=500, learning_rate=1e-3, optimizer="GD",val_split=0.1,init_type="Xavier",loss_type="CrossEntropy",reg=0):
        '''Training cycle of the model object'''
        losses = []
        train_accs = []
        val_accs = []
        help=Helper()
        
        self.epochs = epochs
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.init_type=init_type
        self.reg=reg
        self.loss_type=loss_type

        x_train,x_val,y_train,y_val=train_test_split(x_train,y_train,test_size=val_split,stratify=y_train,random_state=42)

        for i in range(1, self.epochs+1):
            print(f'Epoch {i}')
            batches = help.create_batches(x_train, y_train, batch_size)
            epoch_loss = []
            steps = 0
            
            for x, y in batches:
                steps += 1
                preds = self.forward(x,self.init_type)
                #loss = help.compute_loss(y, preds,self.layers,self.loss_type,self.reg)
                #epoch_loss.append(loss)

                # Backward propagation - calculation of gradients 
                self.backward(y,self.loss_type,self.reg)
                
                # update weights and biases of each layer
                for idx in self.layers.keys():
                    if self.optimizer =="GD":
                        self.GDoptimize(idx, i, steps,learning_rate=self.learning_rate)
                    elif self.optimizer=="SGDM":
                        self.SGDMoptimize(idx, i, steps,learning_rate=self.learning_rate)
                    elif self.optimizer=="Nesterov":
                        self.Nesterovoptimize(idx, i, steps,learning_rate=self.learning_rate)
                    elif self.optimizer=="RMSprop":
                        self.RMSpropoptimize(idx, i, steps,learning_rate=self.learning_rate)
                    elif self.optimizer=="Adam":
                        self.Adamoptimize(idx, i, steps,learning_rate=self.learning_rate)
                    elif self.optimizer=="Nadam":
                        self.Nadamoptimize(idx, i, steps,learning_rate=self.learning_rate)
                
            #loss = sum(epoch_loss) / len(epoch_loss)
            #losses.append(loss)

            # Predict with network on x_train
            train_preds = self.forward(x_train)
            train_loss = help.compute_loss(y, preds,self.layers,self.loss_type,self.reg)
            train_acc=help.accuracy(train_preds,y_train)
            train_accs.append(train_acc)
            
            # Predict with network on x_val

            val_preds = self.forward(x_val)
            val_acc=help.accuracy(val_preds,y_val)
            val_accs.append(val_acc)
            val_loss = help.compute_loss(y_val, val_preds,self.layers,self.loss_type,self.reg)

            print(f'Train Loss:{train_loss} Train Acc: {train_acc} Val Acc: {val_acc} Val Loss: {val_loss}')   
            wandb.log(
        {"Train/Loss": train_loss, "Train/Accuracy": train_acc, "Val/Accuracy": val_acc, "Val/Loss":val_loss,"Epoch":i})
                
        self.history = {'train_loss': losses, 'train_acc': train_accs, 'val_acc': val_accs}
        
        


    def predict(self,x):
        preds=self.forward(x)
        return preds

In [None]:
! pip install wandb
! wandb login

[34m[1mwandb[0m: Currently logged in as: [33mcs21m013[0m (use `wandb login --relogin` to force relogin)


In [None]:
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import wandb

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
print(x_train.shape, x_test.shape)
x_train = np.array(x_train/255., dtype=np.float32)
x_test = np.array(x_test/255., dtype=np.float32)

(60000, 784) (10000, 784)


In [None]:
def one_hot(Y):
    num_labels = len(set(Y))
    new_Y = []
    for label in Y:
        encoding = np.zeros(num_labels)
        encoding[label] = 1.
        new_Y.append(encoding)
    return np.array(new_Y)

In [None]:
y_train = one_hot(y_train)
y_test = one_hot(y_test)
y_train.shape, y_test.shape

((60000, 10), (10000, 10))

In [None]:
def train():
    steps = 0
    # Default values for hyper-parameters we're going to sweep over
    config_defaults = {
        'epochs': 10,
        'no_hidden_layer':4,
        'learning_rate': 1e-3,
        'opt':'adam',
        'activation':'tanh',
        'batch_size':64,
        'size_hidden':128,
        'reg':0,
        'init_type':'Xavier'
    }

    # Initialize a new wandb run
    wandb.init(project='Sweep_test', entity='cs21m007_cs21m013',config=config_defaults)
    
    
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config
    lr = config.learning_rate
    epochs = config.epochs
    opt = config.opt
    acti=config.activation
    batch_size = config.batch_size
    hidden_size=config.size_hidden
    reg=config.reg
    init_type=config.init_type
    no_hidden_layer=config.no_hidden_layer
    if opt=="gd":
        opt="GD"
    elif opt=='adam':
      opt="Adam"
    elif opt=='rmsprop':
      opt="RMSprop"
    elif opt=='sgdm':
      opt='SGDM'
    elif opt=='nadam':
      opt="Nadam"
    elif opt=='nesterov':
      opt="Nesterov"
    # Model training here
    model = Neural_Network()
    for i in range(no_hidden_layer):
        model.add(Layer(hidden_size, activation=acti))

    model.add(Layer(10, activation='softmax'))
    print(model.layers)
    model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs, learning_rate=lr, optimizer=opt,val_split=0.1,init_type=init_type,loss_type="CrossEntropy",reg=reg)

**Config 1**

In [None]:
sweep_config = {
    'method': 'grid', #grid, random
    'metric': {
      'name': 'Val/Accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'epochs': {
            'values': [10]
        },
        'no_hidden_layer':{
            'values': [4]  
        },
        'learning_rate': {
            'values': [1e-4]
        },
        'opt': {
            'values': ['rmsprop']
        },
        'activation': {
            'values': ['tanh']
        },
        'batch_size':{
            'values':[64]
        },
        'size_hidden':{
            'values':[64]
        },
        'reg':{
            'values': [0.0005]
        },
        'init_type':{
            'values': ['Xavier']  
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="cs21m007_cs21m013", project="Mnist_best_parameter")

Create sweep with ID: hkx3zi7c
Sweep URL: https://wandb.ai/cs21m007_cs21m013/Mnist_best_parameter/sweeps/hkx3zi7c


In [None]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: h9qycewq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_type: Xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	no_hidden_layer: 4
[34m[1mwandb[0m: 	opt: rmsprop
[34m[1mwandb[0m: 	reg: 0.0005
[34m[1mwandb[0m: 	size_hidden: 64


{1: Hidden Units=64; Activation=tanh, 2: Hidden Units=64; Activation=tanh, 3: Hidden Units=64; Activation=tanh, 4: Hidden Units=64; Activation=tanh, 5: Hidden Units=10; Activation=softmax}
Epoch 1
Train Loss:0.40491474885877843 Train Acc: 94.19074074074074 Val Acc: 93.7 Val Loss: 0.34083419429348866
Epoch 2
Train Loss:0.3329108902064813 Train Acc: 95.70185185185184 Val Acc: 95.16666666666667 Val Loss: 0.28635896482110546
Epoch 3
Train Loss:0.262573483766684 Train Acc: 96.38518518518518 Val Acc: 95.73333333333333 Val Loss: 0.26307236844631676
Epoch 4
Train Loss:0.23724152907440865 Train Acc: 96.71851851851852 Val Acc: 95.93333333333334 Val Loss: 0.24700502341062186
Epoch 5
Train Loss:0.23461938563248658 Train Acc: 96.96296296296296 Val Acc: 96.11666666666666 Val Loss: 0.235549181096617
Epoch 6
Train Loss:0.24926687944105697 Train Acc: 97.11296296296297 Val Acc: 96.26666666666667 Val Loss: 0.22625598166417304
Epoch 7
Train Loss:0.24742585613979057 Train Acc: 97.12962962962963 Val Acc: 96

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train/Accuracy,▁▄▆▇▇█████
Train/Loss,█▅▂▁▁▂▂▁▁▁
Val/Accuracy,▁▅▆▇▇███▇▇
Val/Loss,█▅▄▃▂▂▁▁▁▁

0,1
Epoch,10.0
Train/Accuracy,97.25556
Train/Loss,0.23516
Val/Accuracy,96.08333
Val/Loss,0.21341


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


**Config 2**

In [None]:
sweep_config = {
    'method': 'grid', #grid, random
    'metric': {
      'name': 'Val/Accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'epochs': {
            'values': [10]
        },
        'no_hidden_layer':{
            'values': [4]  
        },
        'learning_rate': {
            'values': [1e-3]
        },
        'opt': {
            'values': ['Nadam']
        },
        'activation': {
            'values': ['tanh']
        },
        'batch_size':{
            'values':[64]
        },
        'size_hidden':{
            'values':[64]
        },
        'reg':{
            'values': [0]
        },
        'init_type':{
            'values': ['Xavier']  
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="cs21m007_cs21m013", project="Mnist_best_parameter")

Create sweep with ID: lz3rrdst
Sweep URL: https://wandb.ai/cs21m007_cs21m013/Mnist_best_parameter/sweeps/lz3rrdst


In [None]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: adusk65i with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_type: Xavier
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	no_hidden_layer: 4
[34m[1mwandb[0m: 	opt: Nadam
[34m[1mwandb[0m: 	reg: 0
[34m[1mwandb[0m: 	size_hidden: 64


{1: Hidden Units=64; Activation=tanh, 2: Hidden Units=64; Activation=tanh, 3: Hidden Units=64; Activation=tanh, 4: Hidden Units=64; Activation=tanh, 5: Hidden Units=10; Activation=softmax}
Epoch 1
Train Loss:0.3029054194641873 Train Acc: 92.46296296296296 Val Acc: 91.83333333333333 Val Loss: 0.27681843793485766
Epoch 2
Train Loss:0.21140556147806633 Train Acc: 94.15925925925926 Val Acc: 93.61666666666667 Val Loss: 0.22430180105950084
Epoch 3
Train Loss:0.16558585407425075 Train Acc: 95.07962962962962 Val Acc: 94.33333333333334 Val Loss: 0.20161405895429288
Epoch 4
Train Loss:0.14588760709665236 Train Acc: 95.71481481481482 Val Acc: 94.73333333333333 Val Loss: 0.18677609371274673
Epoch 5
Train Loss:0.13015181748730303 Train Acc: 96.12962962962963 Val Acc: 94.91666666666667 Val Loss: 0.17824551764709504
Epoch 6
Train Loss:0.1260047255737307 Train Acc: 96.39259259259259 Val Acc: 95.06666666666666 Val Loss: 0.1731738161477775
Epoch 7
Train Loss:0.11063099072621288 Train Acc: 96.62222222222

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train/Accuracy,▁▄▅▆▇▇▇███
Train/Loss,█▅▃▃▂▂▁▁▁▁
Val/Accuracy,▁▄▆▆▇▇▇███
Val/Loss,█▅▃▂▂▁▁▁▁▁

0,1
Epoch,10.0
Train/Accuracy,97.1037
Train/Loss,0.10138
Val/Accuracy,95.68333
Val/Loss,0.16586


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


**Config 3**

In [None]:
sweep_config = {
    'method': 'grid', #grid, random
    'metric': {
      'name': 'Val/Accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'epochs': {
            'values': [10]
        },
        'no_hidden_layer':{
            'values': [5]  
        },
        'learning_rate': {
            'values': [1e-4]
        },
        'opt': {
            'values': ['Nadam']
        },
        'activation': {
            'values': ['tanh']
        },
        'batch_size':{
            'values':[64]
        },
        'size_hidden':{
            'values':[128]
        },
        'reg':{
            'values': [0]
        },
        'init_type':{
            'values': ['Xavier']  
        }
    }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, entity="cs21m007_cs21m013", project="Mnist_best_parameter")

Create sweep with ID: qijk41bi
Sweep URL: https://wandb.ai/cs21m007_cs21m013/Mnist_best_parameter/sweeps/qijk41bi


In [None]:
wandb.agent(sweep_id, train)

[34m[1mwandb[0m: Agent Starting Run: nbp2o23u with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	init_type: Xavier
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	no_hidden_layer: 5
[34m[1mwandb[0m: 	opt: Nadam
[34m[1mwandb[0m: 	reg: 0
[34m[1mwandb[0m: 	size_hidden: 128


{1: Hidden Units=128; Activation=tanh, 2: Hidden Units=128; Activation=tanh, 3: Hidden Units=128; Activation=tanh, 4: Hidden Units=128; Activation=tanh, 5: Hidden Units=128; Activation=tanh, 6: Hidden Units=10; Activation=softmax}
Epoch 1
Train Loss:0.49467192619126515 Train Acc: 89.41481481481482 Val Acc: 88.78333333333333 Val Loss: 0.4016527746938117
Epoch 2
Train Loss:0.3624323886449217 Train Acc: 91.57407407407408 Val Acc: 90.91666666666667 Val Loss: 0.31146546897989497
Epoch 3
Train Loss:0.3032881822289193 Train Acc: 92.56296296296296 Val Acc: 92.03333333333333 Val Loss: 0.27515609450279216
Epoch 4
Train Loss:0.2663310770174474 Train Acc: 93.22777777777777 Val Acc: 92.41666666666667 Val Loss: 0.2524265536143191
Epoch 5
Train Loss:0.24289303349517727 Train Acc: 93.79074074074074 Val Acc: 93.05 Val Loss: 0.23571806313770396
Epoch 6
Train Loss:0.22683450945836228 Train Acc: 94.19444444444444 Val Acc: 93.35 Val Loss: 0.22230850029118562
Epoch 7
Train Loss:0.2150261876057701 Train Acc:

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Train/Accuracy,▁▄▅▅▆▇▇▇██
Train/Loss,█▅▄▃▂▂▂▁▁▁
Val/Accuracy,▁▄▅▆▆▇▇███
Val/Loss,█▅▄▃▃▂▂▂▁▁

0,1
Epoch,10.0
Train/Accuracy,95.4463
Train/Loss,0.19055
Val/Accuracy,94.3
Val/Loss,0.18565


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


Test and Confusion Matrix

In [None]:
wandb.init(project="Mnist_best_parameter", entity="cs21m007_cs21m013")

[34m[1mwandb[0m: Currently logged in as: [33mcs21m013[0m (use `wandb login --relogin` to force relogin)


In [None]:
epochs = 10
acti='tanh'
lr = 1e-3
batch_size = 64
optimizer="Nadam"
init_type="Xavier"
loss_type="CrossEntropy"
reg=0
hidden_size=64
no_hidden_layer=4



model = Neural_Network()

for i in range(no_hidden_layer):
        model.add(Layer(hidden_size, activation=acti))

model.add(Layer(10, activation='softmax'))
print(model.layers)
model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs, learning_rate=lr, optimizer=optimizer,val_split=0.1,init_type=init_type,loss_type=loss_type,reg=reg)
y_prob=model.predict(x_test)

{1: Hidden Units=64; Activation=tanh, 2: Hidden Units=64; Activation=tanh, 3: Hidden Units=64; Activation=tanh, 4: Hidden Units=64; Activation=tanh, 5: Hidden Units=10; Activation=softmax}
Epoch 1
Train Loss:0.3029054194641873 Train Acc: 92.46296296296296 Val Acc: 91.83333333333333 Val Loss: 0.27681843793485766
Epoch 2
Train Loss:0.21140556147806633 Train Acc: 94.15925925925926 Val Acc: 93.61666666666667 Val Loss: 0.22430180105950084
Epoch 3
Train Loss:0.16558585407425075 Train Acc: 95.07962962962962 Val Acc: 94.33333333333334 Val Loss: 0.20161405895429288
Epoch 4
Train Loss:0.14588760709665236 Train Acc: 95.71481481481482 Val Acc: 94.73333333333333 Val Loss: 0.18677609371274673
Epoch 5
Train Loss:0.13015181748730303 Train Acc: 96.12962962962963 Val Acc: 94.91666666666667 Val Loss: 0.17824551764709504
Epoch 6
Train Loss:0.1260047255737307 Train Acc: 96.39259259259259 Val Acc: 95.06666666666666 Val Loss: 0.1731738161477775
Epoch 7
Train Loss:0.11063099072621288 Train Acc: 96.62222222222

In [None]:
help=Helper()
accuracy=help.accuracy(y_test,y_prob)

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
class_type = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] 

In [None]:
class_list=[]
for i in range(10):
    for j in range(len(y_train)):
        if y_train[j] == i :
            class_list.append(class_type[y_train[j]])
            break

In [None]:
x_test = x_test.reshape(x_test.shape[0], -1)
x_test = np.array(x_test/255., dtype=np.float32)

In [None]:
y_prob=np.empty(np.shape(y_test))
#finding y predicted
for i,x in enumerate(x_test):
    y_prob[i]= (model.predict(x)[0]).argmax()

In [None]:
y_test,y_prob.shape

(array([7, 2, 1, ..., 4, 5, 6], dtype=uint8), (10000,))

In [None]:
wandb.log({"conf_mat" : wandb.plot.confusion_matrix(preds=y_prob, y_true=y_test, class_names=class_list),"Test Accuracy": accuracy })