## See the ReadME for a description of the project


### Importing dependencies

In [1]:
import numpy as np
import pandas as pd
import torch
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
import GPyOpt
from timeit import default_timer as timer

## Implementing the CNN model

In [7]:
# this section is inspired by code from the article:
# "Build an Image Classification Model using Convolutional Neural Networks in PyTorch"
# Author: Pulkit Sharma
# https://www.analyticsvidhya.com/blog/2019/10/building-image-classification-models-cnn-pytorch/


class Net(Module):
    def __init__(self, kernel_size = 2, stride = 1, padding = 1):
        super(Net,self).__init__()
        
        self.n_classes = 10
        
        self.cnn_layers = Sequential(
            # Define convolutional layers
            # input dimensionality before each layer is added
            
            # 1st layer
            # 1x28x28
            Conv2d(1,4,kernel_size=kernel_size,stride=stride,padding=padding),
            # apply batch normalization
            #BatchNorm2d(4),
            # activation function ReLU
            ReLU(),
            # apply pooling
            MaxPool2d(kernel_size=kernel_size,stride=stride),
            
            # second layer
            # 4x14x14
            Conv2d(4,4,kernel_size=kernel_size,stride=stride,padding=padding),
            BatchNorm2d(4),
            ReLU(),
            MaxPool2d(kernel_size=kernel_size,stride=stride),
            
            # 4x7x7
        )
        
        self.linear_layers = Sequential(
            Linear(4*7*7,self.n_classes)
        )
    
    def update_linear_layers(self,input_size):
        self.linear_layers = Sequential(
            Linear(input_size,self.n_classes)
        )
    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.contiguous().view(x.size(0),-1)
        self.update_linear_layers(x.size()[1])
        x = self.linear_layers(x)
        return x

### Load Data

In [19]:
class Model:
    # number of classes
    n_classes = 10
    
    # load dataset
    # 60000 training points
    train_data = (pd.read_csv('Data/fashion-mnist_train.csv')).to_numpy()
    # 10000 training points
    test_data = (pd.read_csv('Data/fashion-mnist_test.csv')).to_numpy()
    
    def __init__(self,train_size=60000,test_size=10000):
        self.X_train = self.train_data[:,1:]
        self.y_train = self.train_data[:,0]
        self.X_test  = self.test_data[:,1:]
        self.y_test  = self.test_data[:,0]
        self.init_data(train_size,test_size)
    
    def get_shape(self):
        print("Train data shape:", self.X_train.shape, "Test data shape:", self.X_test.shape)
        print("Train labels shape:", self.y_train.shape,"  Test labels shape:", self.y_test.shape)
    
    def to_tensor(self):
        # this section is inspired by code from the article:
        # "Build an Image Classification Model using Convolutional Neural Networks in PyTorch"
        # Author: Pulkit Sharma
        # https://www.analyticsvidhya.com/blog/2019/10/building-image-classification-models-cnn-pytorch/

        # change training and test set into tensors
        self.X_train = self.X_train.reshape(len(self.X_train),1,28,28)
        self.X_train = torch.from_numpy(self.X_train)
        self.X_train = self.X_train.type(torch.FloatTensor)

        self.y_train = self.y_train.astype(int)
        self.y_train = torch.from_numpy(self.y_train)
        self.y_train = self.y_train.type(torch.LongTensor)

        self.X_test = self.X_test.reshape(len(self.X_test),1,28,28)
        self.X_test = torch.from_numpy(self.X_test)
        self.X_test = self.X_test.type(torch.FloatTensor)

        self.y_test = self.y_test.astype(int)
        self.y_test = torch.from_numpy(self.y_test)
        self.y_test = self.y_test.type(torch.LongTensor)
        
    def init_data(self,train_size,test_size):
        # create training and test set based
        # on train and test size
        # equal amount of each class in both training and test sets
        where_train = []
        where_test = []

        for label in range(self.n_classes):
            where_train.append(np.where(self.y_train == label)[0][:round(train_size/self.n_classes)])
            where_test.append(np.where(self.y_test == label)[0][:round(test_size/self.n_classes)])

        where_train = np.array(where_train).flatten()
        where_test = np.array(where_test).flatten()

        self.X_train = self.X_train[where_train]
        self.y_train = self.y_train[where_train]

        self.X_test = self.X_test[where_test]
        self.y_test = self.y_test[where_test]

        # change to tensors
        self.to_tensor()
        
    def train(self,epoch,model,optimizer,train_losses,val_losses,criterion):
        """Train model

        Args:
            epoch (int): Current epoch (index in loop)
            model (obj): Model object
            optimizer (obj): optimizer function
            train_losses (list): List of train losses
            val_losses (list): List of test losses
            criterion (func): Criterion function

        Returns:
            list, list, obj, obj: train losses, test losses, optimizer, model
        """
        model.train()
        tr_loss = 0
        # getting the training set
        x_train_t, y_train_t = Variable(self.X_train), Variable(self.y_train)
        # getting the validation set
        x_val_t, y_val_t = Variable(self.X_test), Variable(self.y_test)
        # converting the data into GPU format
        if torch.cuda.is_available():
            x_train_t = x_train_t.cuda()
            y_train_t = y_train_t.cuda()
            x_val_t = x_val_t.cuda()
            y_val_t = y_val_t.cuda()

        # clearing the Gradients of the model parameters
        optimizer.zero_grad()

        # prediction for training and validation set
        output_train = model(x_train_t)
        output_val = model(x_val_t)

        # computing the training and validation loss
        loss_train = criterion(output_train, y_train_t)
        loss_val = criterion(output_val, y_val_t)
        train_losses.append(loss_train)
        val_losses.append(loss_val)
        
        # computing the updated weights of all the model parameters
        loss_train.backward()
        optimizer.step()
        tr_loss = loss_train.item()
        #if epoch%2 == 0:
        #    # printing the validation loss
        #   print('Epoch : ',epoch+1, '\t', 'loss :', loss_val)

        return train_losses, val_losses, optimizer, model
    
    def CNN_model_train(self,kernel_size = 2, stride = 1, padding = 1, learning_rate = 0.07, n_epochs = 5):
        """Define network and train model with given hyperparameters

        Args:
            kernel_size (int, optional): Kernel size. Defaults to 2.
            stride (int, optional): Stride. Defaults to 1.
            padding (int, optional): Padding. Defaults to 1.
            learning_rate (float, optional): Learning rate. Defaults to 0.07.
            n_epochs (int, optional): Number of epochs. Defaults to 5.
        """
        # define model
        model = Net(kernel_size, stride, padding)
        # defining the optimizer
        optimizer = Adam(model.parameters(), lr=0.07)
        # defining the loss function
        criterion = CrossEntropyLoss()
        # checking if GPU is available
        if torch.cuda.is_available():
            model = model.cuda()
            criterion = criterion.cuda()

        # defining the number of epochs
        # n_epochs = 5
        # empty list to store training losses
        train_losses = []
        # empty list to store validation losses
        val_losses = []
        # training the model
        for epoch in range(n_epochs):
            train_losses, val_losses, optimizer, model = self.train(epoch, model, optimizer, train_losses, val_losses, criterion)
        #print(model)
        # computing the training and validation loss
        x_train_t, y_train_t = Variable(self.X_train), Variable(self.y_train)
        # getting the validation set
        x_val_t, y_val_t = Variable(self.X_test), Variable(self.y_test)
        
        #output_train = model(x_train_t)
        output_val = model(x_val_t)
        _, predicted = torch.max(output_val.data, 1)
        #acc_train = criterion(output_train,y_train_t)
        print(y_val_t)
        for t in predicted:
            print(t)
        acc_val = torch.sum(predicted==y_val_t)
        #print(acc_train)
        print((acc_val / predicted.size(0)).item())
        output_train = model(x_train_t)
        _, predicted = torch.max(output_train.data, 1)
        #acc_train = criterion(output_train,y_train_t)
        acc_train = torch.sum(predicted==y_train_t)
        #print(acc_train)
        print((acc_train / predicted.size(0)).item())
        # return final loss value
        #print(val_losses[-1])
        return val_losses[-1]

In [23]:
m = Model(train_size=60000,test_size=10000)

final_loss = m.CNN_model_train(kernel_size = 3, stride = 3, padding = 5, learning_rate = 0.05, n_epochs = 15)
print(float(final_loss))


tensor([0, 0, 0,  ..., 9, 9, 9])
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(

tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)
tensor(8)


In [24]:
# define number of classes
n_classes = 10
train_size = 6000
test_size = 1000

# load train and test sets
train = pd.read_csv('Data/fashion-mnist_train.csv')
test = pd.read_csv('Data/fashion-mnist_test.csv')

train = train.to_numpy()
test = test.to_numpy()
print(train)
X_train = train[:,1:]
y_train = train[:,0]
X_test = test[:,1:]
y_test = test[:,0]

where_train = []
where_test = []

for label in range(n_classes):
    where_train.append(np.where(y_train == label)[0][:round(train_size/10)])
    where_test.append(np.where(y_test == label)[0][:round(test_size/10)])

where_train = np.array(where_train).flatten()
where_test = np.array(where_test).flatten()


X_train = X_train[where_train]
y_train = y_train[where_train]

X_test = X_test[where_test]
y_test = y_test[where_test]
    
print("Train data shape:", X_train.shape, "Test data shape:", X_test.shape)
print("Train labels shape:", y_train.shape,"  Test labels shape:", y_test.shape)

[[2 0 0 ... 0 0 0]
 [9 0 0 ... 0 0 0]
 [6 0 0 ... 0 0 0]
 ...
 [8 0 0 ... 0 0 0]
 [8 0 0 ... 0 0 0]
 [7 0 0 ... 0 0 0]]
Train data shape: (6000, 784) Test data shape: (1000, 784)
Train labels shape: (6000,)   Test labels shape: (1000,)


In [25]:
# this section is inspired by code from the article:
# "Build an Image Classification Model using Convolutional Neural Networks in PyTorch"
# Author: Pulkit Sharma
# https://www.analyticsvidhya.com/blog/2019/10/building-image-classification-models-cnn-pytorch/

# change training and test set into tensors
X_train = X_train.reshape(len(X_train),1,28,28)
X_train = torch.from_numpy(X_train)
X_train = X_train.type(torch.FloatTensor)

y_train = y_train.astype(int)
y_train = torch.from_numpy(y_train)
y_train = y_train.type(torch.LongTensor)

X_test = X_test.reshape(len(X_test),1,28,28)
X_test = torch.from_numpy(X_test)
X_test = X_test.type(torch.FloatTensor)

y_test = y_test.astype(int)
y_test = torch.from_numpy(y_test)
y_test = y_test.type(torch.LongTensor)

# verify shape of training data
print("Train data shape:", X_train.shape, "Test data shape:", X_test.shape)
print("Train labels shape:", y_train.shape,"  Test labels shape:", y_test.shape)




Train data shape: torch.Size([6000, 1, 28, 28]) Test data shape: torch.Size([1000, 1, 28, 28])
Train labels shape: torch.Size([6000])   Test labels shape: torch.Size([1000])


In [26]:
def train(epoch, model, optimizer, train_losses, val_losses, criterion):
    """Train model

    Args:
        epoch (int): Current epoch (index in loop)
        model (obj): Model object
        optimizer (obj): optimizer function
        train_losses (list): List of train losses
        val_losses (list): List of test losses
        criterion (func): Criterion function

    Returns:
        list, list, obj, obj: train losses, test losses, optimizer, model
    """
    model.train()
    tr_loss = 0
    # getting the training set
    x_train_t, y_train_t = Variable(X_train), Variable(y_train)
    # getting the validation set
    x_val_t, y_val_t = Variable(X_test), Variable(y_test)
    # converting the data into GPU format
    if torch.cuda.is_available():
        x_train_t = x_train_t.cuda()
        y_train_t = y_train_t.cuda()
        x_val_t = x_val_t.cuda()
        y_val_t = y_val_t.cuda()

    # clearing the Gradients of the model parameters
    optimizer.zero_grad()
    
    # prediction for training and validation set
    output_train = model(x_train_t)
    output_val = model(x_val_t)

    # computing the training and validation loss
    loss_train = criterion(output_train, y_train_t)
    loss_val = criterion(output_val, y_val_t)
    train_losses.append(loss_train)
    val_losses.append(loss_val)

    # computing the updated weights of all the model parameters
    loss_train.backward()
    optimizer.step()
    tr_loss = loss_train.item()
    #if epoch%2 == 0:
    #    # printing the validation loss
    #   print('Epoch : ',epoch+1, '\t', 'loss :', loss_val)

    return train_losses, val_losses, optimizer, model

In [27]:

def CNN_model_train(kernel_size = 2, stride = 1, padding = 1, learning_rate = 0.07, n_epochs = 5):
    """Define network and train model with given hyperparameters

    Args:
        kernel_size (int, optional): Kernel size. Defaults to 2.
        stride (int, optional): Stride. Defaults to 1.
        padding (int, optional): Padding. Defaults to 1.
        learning_rate (float, optional): Learning rate. Defaults to 0.07.
        n_epochs (int, optional): Number of epochs. Defaults to 5.
    """
    # define model
    model = Net(kernel_size, stride, padding)
    # defining the optimizer
    optimizer = Adam(model.parameters(), lr=0.07)
    # defining the loss function
    criterion = CrossEntropyLoss()
    # checking if GPU is available
    if torch.cuda.is_available():
        model = model.cuda()
        criterion = criterion.cuda()
        
    # defining the number of epochs
    # n_epochs = 5
    # empty list to store training losses
    train_losses = []
    # empty list to store validation losses
    val_losses = []
    # training the model
    for epoch in range(n_epochs):
        train_losses, val_losses, optimizer, model = train(epoch, model, optimizer, train_losses, val_losses, criterion)
    #print(model)
    # return final loss value
    #print(val_losses[-1])
    return val_losses[-1]

In [28]:
# Run model and print the final loss
final_loss = CNN_model_train(kernel_size = 2, stride = 1, padding = 1, learning_rate = 0.07, n_epochs = 2)
print(float(final_loss))

2.3995399475097656


## 

In [None]:
# This code is inspired by code from Exercise 4 in course 02463


start = timer()

kernel_sizes = tuple(np.arange(1,8,1, dtype= np.int))
stride = tuple(np.arange(1,5,1, dtype= np.int))
padding = tuple(np.arange(1,3,1, dtype= np.int))
learning_rate = tuple(np.arange(0.01,0.5, 0.01, dtype= np.float))
n_epochs = tuple(np.arange(2,10, 2, dtype= np.int))

# # For testing
# kernel_sizes = tuple(np.arange(1,2,1, dtype= np.int))
# stride = tuple(np.arange(1,2,1, dtype= np.int))
# padding = tuple(np.arange(1,2,1, dtype= np.int))
# learning_rate = tuple(np.arange(0.01,0.02, 0.01, dtype= np.int))
# n_epochs = tuple(np.arange(1,2, 1, dtype= np.int))

# define the dictionary for GPyOpt
domain = [{'name': 'kernel_sizes', 'type': 'discrete', 'domain': kernel_sizes},
            {'name': 'stride', 'type': 'discrete', 'domain': stride},
            {'name': 'padding', 'type': 'discrete', 'domain': padding},
            {'name': 'learning_rate', 'type': 'discrete', 'domain': learning_rate},
            {'name': 'n_epochs', 'type': 'discrete', 'domain': n_epochs},
            ]

#domain = [{'name': 'kernel_sizes', 'type': 'discrete', 'domain': kernel_sizes},
#            {'name': 'learning_rate', 'type': 'discrete', 'domain': learning_rate},
#            ]

# TODO Figure out how to represent the learning_rate as a float. Not sure the type should be discrete

def objective_function(x): 
    # print(x)
    param = x[0]
        
    #loss = CNN_model_train(kernel_size = int(param[0]), 
    #                        stride = int(param[1]), padding = int(param[2]), 
    #                        learning_rate = param[3], n_epochs = int(param[4]))
    
    loss = CNN_model_train(kernel_size = int(param[0]), 
                            learning_rate = param[1])
    
    return float(loss)




### EI acquisition function ###
acquisition_func = "EI"

opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                              domain = domain,         # box-constrains of the problem
                                              acquisition_type = acquisition_func      # Select acquisition function MPI, EI, LCB
                                             )
opt.acquisition.exploration_weight=.1

opt.run_optimization(max_iter = 10) 

x_best = opt.X[np.argmin(opt.Y)]

end = timer()
print("For acquisition_type: {0}".format(acquisition_func))
print("time: {0}".format(end - start))
print("The best parameters obtained: {0}".format(x_best))
print("The loss was: {0}".format(min(opt.Y)))






### MPI acquisition function ###
acquisition_func = "MPI"

opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                              domain = domain,         # box-constrains of the problem
                                              acquisition_type = acquisition_func      # Select acquisition function MPI, EI, LCB
                                             )
opt.acquisition.exploration_weight=.1

opt.run_optimization(max_iter = 10) 

x_best = opt.X[np.argmin(opt.Y)]

end = timer()
print("For acquisition_type: {0}".format(acquisition_func))
print("time: {0}".format(end - start))
print("The best parameters obtained: {0}".format(x_best))
print("The loss was: {0}".format(min(opt.Y)))










### MPI acquisition function ###
acquisition_func = "LCB"

opt = GPyOpt.methods.BayesianOptimization(f = objective_function,   # function to optimize
                                              domain = domain,         # box-constrains of the problem
                                              acquisition_type = acquisition_func      # Select acquisition function MPI, EI, LCB
                                             )
opt.acquisition.exploration_weight=.1

opt.run_optimization(max_iter = 10) 

x_best = opt.X[np.argmin(opt.Y)]

end = timer()
print("For acquisition_type: {0}".format(acquisition_func))
print("time: {0}".format(end - start))
print("The best parameters obtained: {0}".format(x_best))
print("The loss was: {0}".format(min(opt.Y)))


In [None]:
opt.Y

### Plotting of acquisition function

In [None]:
opt.plot_acquisition()

### Implementing exhaustive search

In [None]:
start = timer()

kernel_sizes = tuple(np.arange(1,5,1, dtype= np.int))
stride = tuple(np.arange(1,3,1, dtype= np.int))
padding = tuple(np.arange(1,5,1, dtype= np.int))
learning_rate = tuple(np.arange(0.01,0.11, 0.01, dtype= np.int))
n_epochs = tuple(np.arange(10,50, 20, dtype= np.int))

# Specifying hyperparameters for grid search CV
domain = {
          'kernel size': kernel_sizes,
          'stride': stride,
          'padding': padding,
          'learning_rate' : learning_rate,
          'n_epochs': n_epochs}

# K = 2

#Manual exhaustive search (grid search)
GS_errors = []
best_loss = 100000

for k in kernel_sizes:
    for s in stride:
        for p in padding:
            for l in learning_rate:
                for e in n_epochs:
                    new_loss = CNN_model_train(kernel_size = k, stride = s, padding = p, learning_rate = l, n_epochs = e)
                    if new_loss < best_loss:
                        best_loss = new_loss
                        opt_params = np.array([k, s, p, l, e])
                        
                        
end = timer()
print("time: {0}".format(end - start))

In [None]:
print("best loss for GridSearch: ",best_loss)
print("best hyperparameters:", opt_params)