In [46]:
import numpy as np
import random
from matplotlib import pyplot as plt
from keras.datasets import fashion_mnist
from tqdm import tqdm_notebook as tqdm
import math
import cv2 as cv
import wandb

'''setting up the sweep configuration'''

sweep_config = {
    'method': 'grid'
    }

metric = {
    'name': 'valAcc',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'optimizer': {
        'values': ['nadam', 'momentum', 'nag', 'rmsprop', 'adam', 'sgd']
        },
    'fc_layer_size': {
        'values': [32, 64, 128]
        },
    'number_of_layers': {
        'values' : [3,4,5]
        },
    'epochs':{
        'values' : [5,10]
        },
    'decay' : {
        'values' : [0 ,0.0005, 0.5]
        },
    'learningRate' : {
        'values' : [1e-1, 1e-3, 1e-4]
        },
    'batchSize' : {
        'values' : [16, 32, 64]
        },
    'initialization' : {
        'values' : ['random', 'xavier']
        },
    'activation' : {
        'values' : ['sigmoid', 'tanh', 'relu']
        },
    }

sweep_config['parameters'] = parameters_dict

'''setting up sweep id'''
sweep_id = wandb.sweep(sweep_config, project="cs6910.cs22m028.q1")

Create sweep with ID: 01d1hhx5
Sweep URL: https://wandb.ai/cs22m028/cs6910.cs22m028.q1/sweeps/01d1hhx5


In [55]:
class PreProc:
    def __init__(self):
        '''Class used for preprocessing all images. 
        making a constructor of this class immediately loads in desired dataset
        
        visualize(n) logs into wandb 10 images each belonging to a separate class.
        
        flattenAndCentralize() makes the mean of the image arrays 0. This helps increasing the 
        training accuracy quicker per epoch
        
        getLabels() return labels in corresponding index fashion
        
        getInputsize returns the number of images present in the training sample
    '''
        (self.trainx,self.trainy),(self.testx, self.testy) = fashion_mnist.load_data()
        
    def visualize(self,n):
        ''' args -> n :: The number of images desired to be visualized
            returns-> null
            
            shows the images via matplotlib
        '''
        wandb.init()
        images = []
        coll = dict()
        labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
        for i in range(n):
            if self.trainy[i] not in coll:
                image = wandb.Image(self.trainx[i], caption=labels[self.trainy[i]])
                images.append(image)
                coll[self.trainy[i]] = 1
    
        wandb.log({"Fashion_mnist_dataset": images})
            
        
    def flattenAndCentralize(self):
        ''' args -> none
            returns -> trainx_flattened :: The training images, mean centered and flattened
                        into a 1 dimensional array
                    -> testx_flattened :: The testing images, mean centered and flattened
                        into a 1 dimensional array
        '''
        trainx_flattened = np.copy(self.trainx).astype('float64')
        testx_flattened = np.copy(self.testx).astype('float64')
        trainx_flattened -= np.mean(trainx_flattened, axis = 0)
        testx_flattened -= np.mean(testx_flattened, axis = 0)
        for image in trainx_flattened:
            image = cv.GaussianBlur(image,(3,3),cv.BORDER_DEFAULT)
        trainx_flattened.shape = (len(trainx_flattened),784)
        testx_flattened.shape = (10000,784)
        return trainx_flattened,testx_flattened
    

    
    def getLabels(self):
        ''' args -> none
            returns -> self.trainy :: The labels of the training data
                    -> self.testy :: The labels of the testing data
        '''
        return self.trainy, self.testy
    
    def getInputSize(self):
        return len(self.trainx[0])

In [51]:
if __name__ == '__main__':
    data = PreProc()
    data.visualize(35)
    train_x, test_x = data.flattenAndCentralize()
    trainx = train_x/255.0
    testx = test_x/255.0
    train_y, test_y = data.getLabels()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016751356250218428, max=1.0…