In [None]:
'''
Set the wandb credentials.
'''
project="Fashion_MNIST_best_parameter"
entity="cs21m007_cs21m013"

In [None]:
'''
Class: Layer
Definations: Initialize_params, activation_fn
'''

import numpy as np  # numpy for implementing array operations inside the functions.
class Layer:
    '''
    Method: __init__ constructor for base initializations
    Input: no of hidden units of the layer, activation for the layer
    Output: None
    '''
    def __init__(self, hidden_units: int, activation:str=None):
        self.hidden_units = hidden_units
        self.activation = activation
        self.W = None
        self.b = None
    '''
    Method: intialize_params for initializing the weights and biases of each layer
    Input: dimension of the input to the layer, no of hidden neurons in the layer, the initialization type(Random or Xavier)
    Output: None
    ''' 
    def initialize_params(self, n_in, hidden_units,init_type):
        np.random.seed(2)
        if init_type=="Random":
            self.W = 0.01*np.random.randn(n_in, hidden_units)
            self.b = 0.01*np.random.randn(1,hidden_units)

        elif init_type=="Xavier":
            self.W = np.random.randn(n_in, hidden_units) * np.sqrt(2/n_in) 
            self.b = np.zeros((1, hidden_units))

    '''
    Method: activation_fn for defining the activation functions and thier derivatives to be used by the layers
    Input: The computed pre activation of each layer
    Output: calculates the activation value for forward prop or the derivative of the activation for backward prop
    '''
    def activation_fn(self, z, derivative=False):
        '''
        Relu activation and its derivative
        '''
        if self.activation == 'relu':
            if derivative:
                return np.where(z<=0,0,1)
            return np.maximum(0, z)
        '''
        sigmoid activation and its derivative
        '''
        if self.activation == 'sigmoid':
            if derivative:
                return (1 / (1 + np.exp(-z))) * (1-(1 / (1 + np.exp(-z))))
            return (1 / (1 + np.exp(-z)))
        '''
        tanh activation and its derivative
        '''
        if self.activation == 'tanh':
            t=(np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
            if derivative:
                return (1-t**2)
            return t
        '''
        softmax function and its derivativ for the output layer with 10 neurons.
        '''
        if self.activation == 'softmax':
            if derivative: 
                exp = np.exp(z - np.max(z, axis=1, keepdims=True))
                return exp / np.sum(exp, axis=0) * (1 - exp / np.sum(exp, axis=0))
            exp = np.exp(z - np.max(z, axis=1, keepdims=True))
            return exp / np.sum(exp, axis=1, keepdims=True)

In [None]:
'''
Class: Helper
Definations: Accruacy function, compute_loss function, create batches function.
'''
import numpy as np  # numpy for performing array operations.
class Helper:
    '''
    Method: accuracy
    Input: the true labels and the predicted proababilities generated by the model.
    Output: Returns the accuracy of the model on the data.
    '''
    def accuracy(self,y,y_hat):
        c = np.argmax(y_hat, axis=1) == np.argmax(y, axis=1)
        acc = list(c).count(True) / len(c) * 100
        return acc

    '''
    Method: compute_loss
    Input: the true label, predicted probabilities, loss_type(SquarredError or CrossEntropy) and the regularization coefficient if any.
    Output: Returns the value of the loss (SE or CE) plus the regularization loss if any.
    '''
    def compute_loss(self,Y, Y_hat,layers,loss_type="CrossEntropy",reg=0):
        if loss_type=="CrossEntropy":
            m = Y.shape[0]
            L = -1./m * np.sum(Y * np.log(Y_hat+0.0000000001))
        elif loss_type=="SquaredError":
            L = np.mean((Y- Y_hat)**2)

        if reg!=0:
            reg_error = 0.0                                                                       
            for idx in layers.keys() :
              reg_error += (reg/2)*(np.sum(np.square(layers[idx].W))) 
            L = L + reg_error

        return L
    
    '''
    Method: create_batches depending on the batch size for training
    Input: the training data (X,y) and the batch size
    Ouput: Batches of data for training based on the batch size.
    '''
    def create_batches(self,x, y, batch_size):
        m = x.shape[0]
        num_batches = m / batch_size
        batches = []
        for i in range(int(num_batches+1)):
            batch_x = x[i*batch_size:(i+1)*batch_size]
            batch_y = y[i*batch_size:(i+1)*batch_size]
            batches.append((batch_x, batch_y))
        
        if m % batch_size == 0:
            batches.pop(-1)

        return batches
    

In [None]:
'''
Clss: Neural_Network
Definitions: Constructor __init__, add , forward, backward, GDoptimize, 
              SGDMoptimize, Nesterovoptimize, RMSpropoptimize, Adamoptimize, 
              Nadamoptimize, fit, predict
'''
import numpy as np  # numpy to tackle all array related operations
from sklearn.model_selection import train_test_split  # train test split for splitting the train data into further train and validation.
class Neural_Network:
    '''
    Method: __init__ constructor for base initialization of layers, cache and gradients for each layer.
    Input: None
    Output: None
    '''
    def __init__(self):
        self.layers = dict()
        self.cache = dict()
        self.grads = dict()

    '''
    Method: add, to add the layer objects to the model (object of neural network).
    Input: layer dictionary
    Output: None
    '''    
    def add(self, layer):
        self.layers[len(self.layers)+1] = layer

    '''
    Method: forward, for forward propagation of the model.
    Input: input data and the initilization type of the W,b's of the layer
    Output: Returns the predicted probability distribution after forward propagation
    '''
    def forward(self, x, init_type="Xavier"):
        for idx, layer in self.layers.items():

            layer.input = np.array(x, copy=True)
            if layer.W is None:
                layer.initialize_params(layer.input.shape[-1], layer.hidden_units,init_type)  # initilaize the weights and the biases.

            layer.Z = x @ layer.W + layer.b # linear pre activation
        
            if layer.activation is not None:
                layer.A = layer.activation_fn(layer.Z) #applying non-linear activation function
                x = layer.A
            else:
                x = layer.Z
            self.cache[f'W{idx}'] = layer.W # storing the weights of the layer
            self.cache[f'Z{idx}'] = layer.Z # storing the pre activation values of each layer
            self.cache[f'A{idx}'] = layer.A # storing the activation values of each layer.
        return x

    '''
    Method: backward, for backward propagation for generating the gradients for weight updation.
    Input: true labels, loss_type, regularization coefficient
    Output: None, but save the gradients in the grad dictionary of the model(Neural Network object)
    '''
    def backward(self, y, loss_type,reg=0):
        last_layer_idx = max(self.layers.keys())
        m = y.shape[0]
        for idx in reversed(range(1, last_layer_idx+1)):  # move from output to inputs
            if idx == last_layer_idx:
                if loss_type=="CrossEntropy":
                    self.grads[f'dZ{idx}'] = self.cache[f'A{idx}'] - y  # gradient wrt output layer for cross entropy loss
                elif loss_type=="SquaredError":
                    self.grads[f'dZ{idx}'] = (self.cache[f'A{idx}'] - y) * self.layers[idx].activation_fn(self.cache[f'Z{idx}'], derivative=True) # gradients wrt output layer for squared error loss
            else:
                self.grads[f'dZ{idx}'] = self.grads[f'dZ{idx+1}'] @ self.cache[f'W{idx+1}'].T *\
                                        self.layers[idx].activation_fn(self.cache[f'Z{idx}'], derivative=True) # gradients directly wrt to the pre-activation for each layer.


            self.grads[f'dW{idx}'] = 1 / m * self.layers[idx].input.T @ self.grads[f'dZ{idx}'] + reg*self.layers[idx].W # gradients wrt the weights of each layer
            self.grads[f'db{idx}'] = 1 / m * np.sum(self.grads[f'dZ{idx}'], axis=0, keepdims=True)  # gradients wrt the biases of each layer.
            
            assert self.grads[f'dW{idx}'].shape == self.cache[f'W{idx}'].shape

    '''
    Method: GDoptimize, basically the vanilla gradient descent
    Input: learning_rate, idx indicating the layer index
    Output: None, but performs the weight updations wrt the gradients.
    '''
    def GDoptimize(self, idx, learning_rate=1e-3):
        
        self.layers[idx].W -= learning_rate * self.grads[f'dW{idx}']  # W update
        self.layers[idx].b -= learning_rate * self.grads[f'db{idx}']  # b update

    '''
    Method: SGDMoptimize, basically the momemtum based gradient descent.
    Input: learning_rate, idx, mu - fixed momentum coefficient
    Output: None, but weights, biases are updated
    '''
    def SGDMoptimize(self, idx, learning_rate=1e-3, mu=0.99):
        m = dict()
        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0

        m[f'W{idx}'] = m[f'W{idx}'] * mu - learning_rate * self.grads[f'dW{idx}'] # momentum wrt W
        m[f'b{idx}'] = m[f'b{idx}'] * mu - learning_rate * self.grads[f'db{idx}'] # momentum wrt b

        self.layers[idx].W += m[f'W{idx}']  # W update
        self.layers[idx].b += m[f'b{idx}']  # b update

    '''
    Method: Nesterovoptimize, nesterov accelerated gradien descent.
    Input: learning rate, mu - fixed momentum coefficient, idx of the layer
    Output: None, but updates the parameters(W,b)
    '''
    def Nesterovoptimize(self, idx, learning_rate=1e-3, mu=0.99):
        m = dict()
        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0

        mW_prev =  np.array(m[f'W{idx}'], copy=True)
        mb_prev = np.array(m[f'b{idx}'], copy=True)

        m[f'W{idx}'] = m[f'W{idx}'] * mu - learning_rate * self.grads[f'dW{idx}'] # moemtum update wrt W
        m[f'b{idx}'] = m[f'b{idx}'] * mu - learning_rate * self.grads[f'db{idx}'] # momentum update wrt b
        # using the lookaheads
        w_update = -mu * mW_prev + (1 + mu) * m[f'W{idx}'] 
        b_update = -mu * mb_prev + (1 + mu) * m[f'b{idx}']

        self.layers[idx].W += w_update  # W update
        self.layers[idx].b += b_update  # b update

    '''
    Method: RMSpropoptimize, basicall RMSprop gradient descent.
    Input: idx of layer, learning rate, decay rate and epsilon
    Output: None, updates the parameters.
    '''
    def RMSpropoptimize(self, idx, learning_rate=1e-3,decay_rate=0.99, epsilon=1e-8):
        v = dict()
        for i in self.layers.keys():
            v[f'W{i}'] = 0
            v[f'b{i}'] = 0
        # using the learning rate decay
        v[f'W{idx}'] = decay_rate * v[f'W{idx}'] + (1 - decay_rate) * self.grads[f'dW{idx}'] **2 
        v[f'b{idx}'] = decay_rate * v[f'b{idx}'] + (1 - decay_rate) * self.grads[f'db{idx}'] **2
        # update values calculation    
        w_update = -learning_rate * self.grads[f'dW{idx}'] / (np.sqrt(v[f'W{idx}'] + epsilon))
        b_update = -learning_rate * self.grads[f'db{idx}'] / (np.sqrt(v[f'b{idx}']+ epsilon))

        self.layers[idx].W += w_update  # W update
        self.layers[idx].b += b_update  # b update

    '''
    Method: Adamoptimize, Adam optimizer
    Input: idx,steps,learing rate, beta1, beta2 and epsilon
    Ouput: None, but updates the parameters
    '''
    def Adamoptimize(self, idx, steps, learning_rate=1e-3, beta1=0.99, beta2=0.999, epsilon=1e-8): 
        m = dict()
        v = dict()

        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0
            v[f'W{i}'] = 0
            v[f'b{i}'] = 0

        dW = self.grads[f'dW{idx}']
        db = self.grads[f'db{idx}']

        # weights
        m[f'W{idx}'] = beta1 * m[f'W{idx}'] + (1 - beta1) * dW
        v[f'W{idx}'] = beta2 * v[f'W{idx}'] + (1 - beta2) * dW ** 2 
        
        # biases
        m[f'b{idx}'] = beta1 * m[f'b{idx}'] + (1 - beta1) * db
        v[f'b{idx}'] = beta2 * v[f'b{idx}'] + (1 - beta2) * db ** 2 

        # take timestep into account for bias correction
        mt_w  = m[f'W{idx}'] / (1 - beta1 ** steps) #accumulated history
        vt_w = v[f'W{idx}'] / (1 - beta2 ** steps)

        mt_b  = m[f'b{idx}'] / (1 - beta1 ** steps) #accumulated history
        vt_b = v[f'b{idx}'] / (1 - beta2 ** steps)

        w_update = - learning_rate * mt_w / (np.sqrt(vt_w) + epsilon)
        b_update = - learning_rate * mt_b / (np.sqrt(vt_b) + epsilon)

        self.layers[idx].W += w_update  # W update
        self.layers[idx].b += b_update  # b update

    '''
    Method: Nadamoptimize, nesterov accelerated Adam
    Input: idx of layer, steps, learing rate, beat1, beta2, epsilon
    Output: None, but updates the parameters.
    '''
    def Nadamoptimize(self, idx, steps,learning_rate=1e-3, beta1=0.99, beta2=0.999, epsilon=1e-8): 
        m = dict()
        v = dict()

        for i in self.layers.keys():
            m[f'W{i}'] = 0
            m[f'b{i}'] = 0
            v[f'W{i}'] = 0
            v[f'b{i}'] = 0
        dW = self.grads[f'dW{idx}']
        db = self.grads[f'db{idx}']
        # weights
        m[f'W{idx}'] = beta1 * m[f'W{idx}'] + (1 - beta1) * dW
        v[f'W{idx}'] = beta2 * v[f'W{idx}'] + (1 - beta2) * dW ** 2 
            
        # biases
        m[f'b{idx}'] = beta1 * m[f'b{idx}'] + (1 - beta1) * db
        v[f'b{idx}'] = beta2 * v[f'b{idx}'] + (1 - beta2) * db ** 2 

        # take timestep into account for bias correction
        mt_w  = m[f'W{idx}'] / (1 - beta1 ** steps) #accumulated history
        vt_w = v[f'W{idx}'] / (1 - beta2 ** steps)

        mt_b  = m[f'b{idx}'] / (1 - beta1 ** steps) #accumulated history
        vt_b = v[f'b{idx}'] / (1 - beta2 ** steps)
        # accelerated momentum incorporation into adam
        w_update = - learning_rate / (np.sqrt(vt_w) + epsilon) * (beta1 * mt_w + (1 - beta1) *  dW / (1 - beta1 ** steps))
        b_update = - learning_rate / (np.sqrt(vt_b) + epsilon) * (beta1 * mt_b + (1 - beta1) *  db / (1 - beta1 ** steps))

        self.layers[idx].W += w_update  # W update
        self.layers[idx].b += b_update  #b update
            
    '''
    Method: fit, used to train the model by combining forward_prop, back_prop and gradient descent weight updation.
    Input: Training data, batch_size, epochs, learning rate, optimizer to use, val_split factor, initialization type of the weights and biases, loss type, and the regularization coefficient
    Output: None, but performs the training of the model 
    '''
    def fit(self, x_train, y_train,batch_size=32,epochs=500, learning_rate=1e-3, optimizer="GD",val_split=0.1,init_type="Xavier",loss_type="CrossEntropy",reg=0):
        train_accs = [] #stores the training accuracy for each epoch
        val_accs = [] #stores the validation accuracy after each epoch
        help=Helper() #creating a object of the Helper class for helper functions
        
        '''Initializations'''
        self.epochs = epochs
        self.optimizer = optimizer
        self.learning_rate = learning_rate
        self.init_type=init_type
        self.reg=reg
        self.loss_type=loss_type

        '''Splitting the training data into train and val data based on the val_split value''' 
        x_train,x_val,y_train,y_val=train_test_split(x_train,y_train,test_size=val_split,stratify=y_train,random_state=42)

        '''Training Cycle'''
        for i in range(1, self.epochs+1):
            print(f'Epoch {i}')
            batches = help.create_batches(x_train, y_train, batch_size) # create batches based on the batch size
            epoch_loss = []
            steps = 0 #count the steps in each epoch
            
            for x, y in batches:
                steps += 1
                '''Forward Propagation'''
                preds = self.forward(x,self.init_type)
   
                '''backward propagation'''
                self.backward(y,self.loss_type,self.reg)
                
                '''update weights and biases of each layer using the corresponding optimizer'''
                for idx in self.layers.keys():
                    if self.optimizer =="GD":
                        self.GDoptimize(idx, learning_rate=self.learning_rate)
                    elif self.optimizer=="SGDM":
                        self.SGDMoptimize(idx, learning_rate=self.learning_rate)
                    elif self.optimizer=="Nesterov":
                        self.Nesterovoptimize(idx, learning_rate=self.learning_rate)
                    elif self.optimizer=="RMSprop":
                        self.RMSpropoptimize(idx, learning_rate=self.learning_rate)
                    elif self.optimizer=="Adam":
                        self.Adamoptimize(idx, steps, learning_rate=self.learning_rate)
                    elif self.optimizer=="Nadam":
                        self.Nadamoptimize(idx, steps, learning_rate=self.learning_rate)
                
            '''Predict with network on x_train'''
            train_preds = self.forward(x_train)
            train_loss = help.compute_loss(y, preds,self.layers,self.loss_type,self.reg)
            train_acc=help.accuracy(train_preds,y_train)
            train_accs.append(train_acc)
            
            '''predcit with network on validation data'''
            val_preds = self.forward(x_val)
            val_acc=help.accuracy(val_preds,y_val)
            val_accs.append(val_acc)
            val_loss = help.compute_loss(y_val, val_preds,self.layers,self.loss_type,self.reg)

            print(f'Train Loss:{train_loss} Train Acc: {train_acc} Val Acc: {val_acc} Val Loss: {val_loss}')  # printing the losses and accuracy after each epoch  
            '''Wandb logging values of Train accuracy, Train loss, val accuracy and val loss'''
            wandb.log(
        {"Train/Loss": train_loss, "Train/Accuracy": train_acc, "Val/Accuracy": val_acc, "Val/Loss":val_loss,"Epoch":i})
                     

    '''
    Method: Predict, model predictions on any data
    Input: Test data to predict on
    Output: predicted probabilities of the model on the test data.
    '''
    def predict(self,x):
        preds=self.forward(x)
        return preds

Wandb credentials and login

In [None]:
'''
Installing wandb and login
'''
! pip install wandb
! wandb login

Collecting wandb
  Downloading wandb-0.12.10-py2.py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 19.3 MB/s eta 0:00:01[K     |▍                               | 20 kB 11.6 MB/s eta 0:00:01[K     |▋                               | 30 kB 10.0 MB/s eta 0:00:01[K     |▊                               | 40 kB 10.3 MB/s eta 0:00:01[K     |█                               | 51 kB 7.6 MB/s eta 0:00:01[K     |█▏                              | 61 kB 8.8 MB/s eta 0:00:01[K     |█▍                              | 71 kB 9.4 MB/s eta 0:00:01[K     |█▌                              | 81 kB 9.4 MB/s eta 0:00:01[K     |█▊                              | 92 kB 10.3 MB/s eta 0:00:01[K     |██                              | 102 kB 10.2 MB/s eta 0:00:01[K     |██                              | 112 kB 10.2 MB/s eta 0:00:01[K     |██▎                             | 122 kB 10.2 MB/s eta 0:00:01[K     |██▌                             | 133 kB 10.2 MB/s eta 0:0

In [None]:
'''
Essential Imports including the dataset library.
'''
from keras.datasets import fashion_mnist # dataset to work on.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import wandb

In [None]:
''' Datset loading'''
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
'''Date preprcessing'''
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
print(x_train.shape, x_test.shape)
x_train = np.array(x_train/255., dtype=np.float32)
x_test = np.array(x_test/255., dtype=np.float32)

(60000, 784) (10000, 784)


In [None]:
'''Method to make labels into a onehot vector'''
def one_hot(Y):
    num_labels = len(set(Y))
    new_Y = []
    for label in Y:
        encoding = np.zeros(num_labels)
        encoding[label] = 1.
        new_Y.append(encoding)
    return np.array(new_Y)

In [None]:
'''Label conversion to onehot vectors'''
y_train = one_hot(y_train)
y_test = one_hot(y_test)
y_train.shape, y_test.shape

((60000, 10), (10000, 10))

In [None]:
'''
Method: train, for sweep in wandb for hyper parameter tuning
'''
def train():
    steps = 0
    # Default values for hyper-parameters we're going to sweep over
    config_defaults = {
        'epochs': 10,
        'no_hidden_layer':4,
        'learning_rate': 1e-3,
        'opt':'adam',
        'activation':'tanh',
        'batch_size':64,
        'size_hidden':128,
        'reg':0,
        'init_type':'Xavier'
    }

    # Initialize a new wandb run
    wandb.init(project=project, entity=entity,config=config_defaults)
    
    
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config
    lr = config.learning_rate
    epochs = config.epochs
    opt = config.opt
    acti=config.activation
    batch_size = config.batch_size
    hidden_size=config.size_hidden
    reg=config.reg
    init_type=config.init_type
    no_hidden_layer=config.no_hidden_layer
    if opt=="gd":
        opt="GD"
    elif opt=='adam':
      opt="Adam"
    elif opt=='rmsprop':
      opt="RMSprop"
    elif opt=='sgdm':
      opt='SGDM'
    elif opt=='nadam':
      opt="Nadam"
    elif opt=='nesterov':
      opt="Nesterov"
    # Model training here and sweeping the values.
    model = Neural_Network()
    for i in range(no_hidden_layer):
        model.add(Layer(hidden_size, activation=acti))

    model.add(Layer(10, activation='softmax'))
    print(model.layers)
    model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs, learning_rate=lr, optimizer=opt,val_split=0.1,init_type=init_type,loss_type="CrossEntropy",reg=reg)

**Training and testing on the Best parameter set and generaing the Confusion Matrix**

In [None]:
'''
Project initialization
'''
wandb.init(project=project, entity=entity)



In [None]:
'''
Testing the models on Fashion_MNIST data for the best configuration.
'''
'''
Best Hyper parameter set
'''
epochs = 10
acti='tanh'
lr = 1e-4
batch_size = 64
optimizer="RMSprop"
init_type="Xavier"
loss_type="CrossEntropy"
reg=0.0005
hidden_size=64
no_hidden_layer=4



model = Neural_Network()

for i in range(no_hidden_layer):
        model.add(Layer(hidden_size, activation=acti))

model.add(Layer(10, activation='softmax'))
print(model.layers)
model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs, learning_rate=lr, optimizer=optimizer,val_split=0.1,init_type=init_type,loss_type=loss_type,reg=reg)
y_prob=model.predict(x_test)

{1: <__main__.Layer object at 0x7f086845bbd0>, 2: <__main__.Layer object at 0x7f086845bc90>, 3: <__main__.Layer object at 0x7f086845bcd0>, 4: <__main__.Layer object at 0x7f086845bd10>, 5: <__main__.Layer object at 0x7f086845bc50>}
Epoch 1
Train Loss:0.5043917651151516 Train Acc: 83.8537037037037 Val Acc: 83.28333333333333 Val Loss: 0.5913077578863284
Epoch 2
Train Loss:0.498067102118496 Train Acc: 85.65 Val Acc: 84.8 Val Loss: 0.5479379289976873
Epoch 3
Train Loss:0.48675478880167533 Train Acc: 86.52222222222223 Val Acc: 85.5 Val Loss: 0.5174020936693813
Epoch 4
Train Loss:0.46506470564577196 Train Acc: 87.62037037037037 Val Acc: 86.55000000000001 Val Loss: 0.4951242561134436
Epoch 5
Train Loss:0.4672634232792543 Train Acc: 87.97962962962963 Val Acc: 87.01666666666667 Val Loss: 0.48363019308616223
Epoch 6
Train Loss:0.4499980528204986 Train Acc: 88.1962962962963 Val Acc: 87.55 Val Loss: 0.47249849818380407
Epoch 7
Train Loss:0.429947433281647 Train Acc: 87.93148148148148 Val Acc: 87.16

In [None]:
help=Helper()
accuracy=help.accuracy(y_test,y_prob)

In [None]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
class_type = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] 

In [None]:
class_list=[]
for i in range(10):
    for j in range(len(y_train)):
        if y_train[j] == i :
            class_list.append(class_type[y_train[j]])
            break

In [None]:
x_test = x_test.reshape(x_test.shape[0], -1)
x_test = np.array(x_test/255., dtype=np.float32)

In [None]:
y_prob=np.empty(np.shape(y_test))
for i,x in enumerate(x_test):
    y_prob[i]= (model.predict(x)[0]).argmax()

In [None]:
y_test,y_prob.shape

(array([9, 2, 1, ..., 8, 1, 5], dtype=uint8), (10000,))

In [None]:
'''
confusion amtrix logging
'''
wandb.log({"conf_mat" : wandb.plot.confusion_matrix(preds=y_prob, y_true=y_test, class_names=class_list),"Test Accuracy": accuracy })