## See the ReadME for a description of the project


### Importing dependencies

In [1]:
import numpy as np
import pandas as pd
import torch
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
import GPyOpt

### Load Data

In [2]:
# define number of classes
n_classes = 10

# load train and test sets
train = pd.read_csv('Data/fashion-mnist_train.csv')
test = pd.read_csv('Data/fashion-mnist_test.csv')

train = train.to_numpy()
test = test.to_numpy()

X_train = train[:,1:]
y_train = train[:,0]
X_test = test[:,1:]
y_test = test[:,0]
print("Train data shape:", X_train.shape, "Test data shape:", X_test.shape)
print("Train labels shape:", y_train.shape,"  Test labels shape:", y_test.shape)

Train data shape: (60000, 784) Test data shape: (10000, 784)
Train labels shape: (60000,)   Test labels shape: (10000,)


In [3]:
# this section is inspired by code from the article:
# "Build an Image Classification Model using Convolutional Neural Networks in PyTorch"
# Author: Pulkit Sharma
# https://www.analyticsvidhya.com/blog/2019/10/building-image-classification-models-cnn-pytorch/

# change training and test set into tensors
X_train = X_train.reshape(len(X_train),1,28,28)
X_train = torch.from_numpy(X_train)
X_train = X_train.type(torch.FloatTensor)

y_train = y_train.astype(int)
y_train = torch.from_numpy(y_train)
y_train = y_train.type(torch.LongTensor)

X_test = X_test.reshape(len(X_test),1,28,28)
X_test = torch.from_numpy(X_test)
X_test = X_test.type(torch.FloatTensor)

y_test = y_test.astype(int)
y_test = torch.from_numpy(y_test)
y_test = y_test.type(torch.LongTensor)

# verify shape of training data
print("Train data shape:", X_train.shape, "Test data shape:", X_test.shape)
print("Train labels shape:", y_train.shape,"  Test labels shape:", y_test.shape)




Train data shape: torch.Size([60000, 1, 28, 28]) Test data shape: torch.Size([10000, 1, 28, 28])
Train labels shape: torch.Size([60000])   Test labels shape: torch.Size([10000])


## Implementing the CNN model

In [4]:
# this section is inspired by code from the article:
# "Build an Image Classification Model using Convolutional Neural Networks in PyTorch"
# Author: Pulkit Sharma
# https://www.analyticsvidhya.com/blog/2019/10/building-image-classification-models-cnn-pytorch/


class Net(Module):
    def __init__(self, kernel_size = 2, stride = 1, padding = 1):
        super(Net,self).__init__()
        
        self.cnn_layers = Sequential(
            # Define convolutional layers
            # input dimensionality before each layer is added
            
            # 1st layer
            # 1x28x28
            Conv2d(1,4,kernel_size=kernel_size,stride=stride,padding=padding),
            # apply batch normalization
            #BatchNorm2d(4),
            # activation function ReLU
            ReLU(),
            # apply pooling
            MaxPool2d(kernel_size=kernel_size,stride=stride),
            
            # second layer
            # 4x14x14
            Conv2d(4,4,kernel_size=kernel_size,stride=stride,padding=padding),
            #BatchNorm2d(4),
            ReLU(),
            MaxPool2d(kernel_size=kernel_size,stride=stride),
            
            # 4x7x7
        )
        
        self.linear_layers = Sequential(
            Linear(4*7*7,n_classes)
        )
    
    def update_linear_layers(self,input_size):
        self.linear_layers = Sequential(
            Linear(input_size,n_classes)
        )
    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0),-1)
        self.update_linear_layers(x.size()[1])
        x = self.linear_layers(x)
        return x

In [5]:
def train(epoch, model, optimizer, train_losses, val_losses, criterion):
    """Train model

    Args:
        epoch (int): Current epoch (index in loop)
        model (obj): Model object
        optimizer (obj): optimizer function
        train_losses (list): List of train losses
        val_losses (list): List of test losses
        criterion (func): Criterion function

    Returns:
        list, list, obj, obj: train losses, test losses, optimizer, model
    """
    model.train()
    tr_loss = 0
    # getting the training set
    x_train_t, y_train_t = Variable(X_train), Variable(y_train)
    # getting the validation set
    x_val_t, y_val_t = Variable(X_test), Variable(y_test)
    # converting the data into GPU format
    if torch.cuda.is_available():
        x_train_t = x_train_t.cuda()
        y_train_t = y_train_t.cuda()
        x_val_t = x_val_t.cuda()
        y_val_t = y_val_t.cuda()

    # clearing the Gradients of the model parameters
    optimizer.zero_grad()
    
    # prediction for training and validation set
    output_train = model(x_train_t)
    output_val = model(x_val_t)

    # computing the training and validation loss
    loss_train = criterion(output_train, y_train_t)
    loss_val = criterion(output_val, y_val_t)
    train_losses.append(loss_train)
    val_losses.append(loss_val)

    # computing the updated weights of all the model parameters
    loss_train.backward()
    optimizer.step()
    tr_loss = loss_train.item()
    if epoch%2 == 0:
        # printing the validation loss
        print('Epoch : ',epoch+1, '\t', 'loss :', loss_val)

    return train_losses, val_losses, optimizer, model

In [6]:

def CNN_model_train(kernel_size = 2, stride = 1, padding = 1, learning_rate = 0.07, n_epochs = 5):
    """Define network and train model with given hyperparameters

    Args:
        kernel_size (int, optional): Kernel size. Defaults to 2.
        stride (int, optional): Stride. Defaults to 1.
        padding (int, optional): Padding. Defaults to 1.
        learning_rate (float, optional): Learning rate. Defaults to 0.07.
        n_epochs (int, optional): Number of epochs. Defaults to 5.
    """
    # define model
    model = Net(kernel_size, stride, padding)
    # defining the optimizer
    optimizer = Adam(model.parameters(), lr=0.07)
    # defining the loss function
    criterion = CrossEntropyLoss()
    # checking if GPU is available
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
        
    # defining the number of epochs
    # n_epochs = 5
    # empty list to store training losses
    train_losses = []
    # empty list to store validation losses
    val_losses = []
    # training the model
    for epoch in range(n_epochs):
        train_losses, val_losses, optimizer, model = train(epoch, model, optimizer, train_losses, val_losses, criterion)
    print(model)
    # return final loss value
    return val_losses[-1]

In [7]:
# Run model and print the final loss
final_loss = CNN_model_train(kernel_size = 2, stride = 1, padding = 1, learning_rate = 0.07, n_epochs = 1)
print(float(final_loss))

Epoch :  1 	 loss : tensor(23.1502, grad_fn=<NllLossBackward>)
Net(
  (cnn_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(4, 4, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=3136, out_features=10, bias=True)
  )
)
tensor(23.1502, grad_fn=<NllLossBackward>)


## 

In [10]:
# This code is inspired by code from Exercise 4 in course 02463

kernel_sizes = tuple(np.arange(1,11,1, dtype= np.int))
stride = tuple(np.arange(1,11,1, dtype= np.int))
padding = tuple(np.arange(1,5,1, dtype= np.int))
learning_rate = tuple(np.arange(0.01,0.11, 0.01, dtype= np.int))
n_epochs = tuple(np.arange(10,201, 20, dtype= np.int))

# # For testing
# kernel_sizes = tuple(np.arange(1,2,1, dtype= np.int))
# stride = tuple(np.arange(1,2,1, dtype= np.int))
# padding = tuple(np.arange(1,2,1, dtype= np.int))
# learning_rate = tuple(np.arange(0.01,0.02, 0.01, dtype= np.int))
# n_epochs = tuple(np.arange(1,2, 1, dtype= np.int))

# define the dictionary for GPyOpt
domain = [{'name': 'kernel_sizes', 'type': 'discrete', 'domain': kernel_sizes},
            {'name': 'stride', 'type': 'discrete', 'domain': stride},
            {'name': 'padding', 'type': 'discrete', 'domain': padding},
            {'name': 'learning_rate', 'type': 'discrete', 'domain': learning_rate},
            {'name': 'n_epochs', 'type': 'discrete', 'domain': n_epochs},
            ]

# TODO Figure out how to represent the learning_rate as a float. Not sure the type should be discrete

def objective_function(x): 
    # print(x)
    param = x[0]
        
    loss = CNN_model_train(kernel_size = int(param[0]), 
                            stride = int(param[1]), padding = int(param[2]), 
                            learning_rate = param[3], n_epochs = int(param[4]))
    
    return - float(loss)


opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                              domain = domain,         # box-constrains of the problem
                                              acquisition_type = "EI"      # Select acquisition function MPI, EI, LCB
                                             )
opt.acquisition.exploration_weight=.1

opt.run_optimization(max_iter = 15) 

x_best = opt.X[np.argmin(opt.Y)]
print("The best parameters obtained: " + x_best)


[[1. 1. 1. 0. 1.]]
Epoch :  1 	 loss : tensor(6.6814, grad_fn=<NllLossBackward>)
Net(
  (cnn_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(4, 4, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=4096, out_features=10, bias=True)
  )
)
[[1. 1. 1. 0. 1.]]
Epoch :  1 	 loss : tensor(7.3935, grad_fn=<NllLossBackward>)
Net(
  (cnn_layers): Sequential(
    (0): Conv2d(1, 4, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(4, 4, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=1, stride=1, padding=0, dilati