Q1. Build a small CNN model consisting of 5 convolution layers. Each convolution layer would be followed by a ReLU activation and a max pooling layer.

In [None]:
#Necessary Libraries
import numpy as np
import tensorflow
from keras.utils import np_utils
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Conv2D, BatchNormalization, Dropout, MaxPooling2D, Activation


In [None]:
#Mounting the drive to access the dataset
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Unzipping the nature_12K.zip dataset
zip_path = "/content/drive/MyDrive/Dataset /nature_12K.zip"
!cp "{zip_path}" .
!unzip -q nature_12K.zip
!rm nature_12K.zip

In [None]:
#Installing the wandb library

!pip install wandb -qqq
import wandb
from wandb.keras import WandbCallback

In [None]:
wandb.login()

In [None]:
# Storing the training and testing directories
import os 
Tags = ['Amphibia','Animalia','Arachnida','Aves','Fungi','Insecta','Mammalia','Mollusca','Plantae','Reptilia']
no_class = 10
train_dir='inaturalist_12K/train/'
test_dir='inaturalist_12K/val/'

In [None]:
#Visualizing images of each class 

import matplotlib.pyplot as plt
import cv2 
import matplotlib.image as mpimg
fig = plt.figure(figsize=(20,8))
i=1
for tag in Tags:
  dir=os.path.join(train_dir,tag)
  image_name=os.listdir(dir)[0]
  image_path=os.path.join(dir,image_name)
  img=mpimg.imread(image_path)
  img_req=cv2.resize(img,(128,128)) 
  fig.add_subplot(2,5,i)
  plt.imshow(img_req)
  plt.axis('off')
  plt.title(tag)
  i+=1   


In [None]:
# Generating the dataset for training and validation
def train_val__test_data_generation(batch_size=256,augment_data=False):
    #Augmenting the data to avoid overfitting
    if augment_data:
        train_datagen = ImageDataGenerator(rescale=1./255,
                                          rotation_range=45,
                                          zoom_range=0.2,
                                          shear_range=0.2,
                                          validation_split=0.1,
                                          horizontal_flip=True,
                                          vertical_flip=False)
        test_datagen = ImageDataGenerator(rescale=1./255)

    else:
        train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.1)
        test_datagen = ImageDataGenerator(rescale=1./255)

    #Generating data batch by batch in order to make the model faster to run the dataset
    train_generator = train_datagen.flow_from_directory(train_dir, target_size=(128,128), batch_size=batch_size, subset="training")
    val_generator = train_datagen.flow_from_directory(train_dir, target_size=(128,128), batch_size=batch_size, subset="validation")
    test_generator = test_datagen.flow_from_directory(test_dir, target_size=(128,128), batch_size=batch_size)
    
    return train_generator, val_generator, test_generator

In [None]:
train_generator, val_generator, test_generator = train_val__test_data_generation(batch_size=64,augment_data=True)

In [None]:
#Building a CNN model consisting of 5 convolution layers.

def CNN(filters,filter_size, image_size=128,
              dropout=0.2,batch_norm=False, dense_size=64, 
              regpara=0, no_of_classes=10, activation='relu'):

    model = Sequential()
    for i in range(5):
        if(i==0): #Input Layer needs the image as the input
            model.add(Conv2D(filters=filters[i], kernel_size=filter_size[i], padding = 'same', input_shape = (image_size, image_size, 3),
                             kernel_regularizer= regularizers.l2(regpara)))
        else:
            model.add(Conv2D(filters=filters[i], kernel_size=filter_size[i], padding = 'same',
                             kernel_regularizer= regularizers.l2(regpara)))
        #Adding the Activation function
        model.add(Activation(activation))
        #Batch normalization after each activation
        if batch_norm:
            model.add(BatchNormalization())
        #Max Pooling after each layer
        model.add(MaxPooling2D(pool_size=(2,2)))

    
    # FullyConnected layer
    model.add(Flatten()) #flatten to get the final feature vector
    model.add(Dense(dense_size, activation="relu",kernel_regularizer= regularizers.l2(regpara)))
    model.add(Dropout(dropout)) #Adding dropout for better regularization
    model.add(BatchNormalization())

    #Output Layer
    model.add(Dense(no_of_classes, activation = "softmax")) #using softmax since we are building a classifier  

    return model

In [None]:
#Configurations to find out the best hyperparameters out of them
sweep_config = {
    'method': 'bayes', 
    'metric': {
      'name': 'val_accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'kernel_size':{
            'values': [[(2,2),(2,2),(2,2),(2,2),(2,2)], [(3,3),(3,3),(3,3),(3,3),(3,3)],[(6,6),(5,5),(4,4),(3,3),(2,2)],[(2,2),(3,3),(4,4),(5,5),(6,6)]]
        },
        'weight_decay': {
            'values': [0, 0.0005, 0.005]
        },
        'dropout': {
            'values': [0, 0.2, 0.3, 0.4]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'activation': {
            'values': ['relu','selu','elu']
        },
        'batch_norm':{
            'values': [True,False]
        },
        'filters':{
            'values': [[32,32,32,32,32],[32,64,128,256,512],[32,16,8,4,2],[512,256,128,64,32]]
        },
        'augment_data': {
            'values': [True,False]
        },
        'batch_size': {
            'values': [32, 64, 128, 256]
        },
        'dense_size':{
            'values': [64, 128, 256, 512]
        }
    }
}

In [None]:
#Training the CNN model using the sweep configurations
def train():
    #default configurations
    config_defaults = {
        'kernel_size': [(3,3),(3,3),(3,3),(3,3),(3,3)],
        'weight_decay': 0.005,
        'dropout': 0.2,
        'learning_rate': 1e-3,
        'activation': 'relu',
        'batch_size': 64,
        'epochs': 10,
        'batch_norm': True,
        'filters' : [32,32,32,32,32],
        'augment_data': True,
        'dense_size': 256,
        'seed': 1234,
        'no_of_classes': 10
    }

    # Initialize a new wandb run
    wandb.init(config=config_defaults)
    #config will store the hyperparameters
    config = wandb.config
    wandb.run.name = 'dense_size_'+ str(config.dense_size)+'_bs_'+str(config.batch_size)+'_ac_'+ config.activation

    #calling the CNN model with the sweep configurations to build the model
    model=CNN(filters=config.filters,filter_size=config.kernel_size, image_size=128,
              dropout=config.dropout,batch_norm=config.batch_norm, dense_size=config.dense_size, 
              regpara=config.weight_decay, no_of_classes=config.no_of_classes, activation=config.activation )
    #using the Adam optimizer
    optimizer = Adam(learning_rate=config.learning_rate, beta_1=0.9, beta_2=0.999)

    model.compile(loss = "categorical_crossentropy", optimizer = optimizer, metrics=['accuracy'])

    train_generator, val_generator, test_generator = train_val__test_data_generation(batch_size=config.batch_size,augment_data=config.augment_data)
    #To train the model and store the best validation accuracy
    hist=model.fit(train_generator, epochs=config.epochs, validation_data=val_generator, callbacks=[WandbCallback()])
    val_acc=max(hist.history['val_accuracy'])
    params={'batch_norm':config.batch_norm,'augmentation':config.augment_data,'dropout':config.dropout,
            'filter_architecture':config.filters,'kernel_size':config.kernel_size,'val_acc':val_acc}
    wandb.log(params)

In [None]:
sweep_id = wandb.sweep(sweep_config, project="CS6910_DL_Assignment2", entity="nomads")

In [None]:
wandb.agent('twsg746e', train, count = 50)