In [None]:
import torch
import os
import pandas as pd
import numpy as np
import time
import random
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
RANDOM_SEED = 123
DEVICE = torch.device('cpu')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import shutil
shutil.unpack_archive(filename ='/content/drive/MyDrive/DS405B/data/png-files.zip', extract_dir = 'png-files')

# Loading the dataset

The dataset consists of 10 classes similar to the original MNIST dataset. Also, it shares the same overall structure with MNIST, i.e., there are 55k training images, 5k validation images and 10k test images, and all images are black & white images of size 28x28.

In [None]:
class FashionMNISTDataset(Dataset):
    """Custom Dataset for loading FashionMNIST images"""

    def __init__(self, csv_path, img_dir, transform=True):
    
        df = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.img_names = df['image_name'].values
        self.y = df['class_label'].values
        self.transform = transform
    def __getitem__(self, index):
        img = Image.open(os.path.join(self.img_dir,
                                      self.img_names[index]))
        if self.transform is not None:
            img = self.transform(img)

        label = self.y[index]
        return img, label

    def __len__(self):
        return self.y.shape[0]

# 2) Data augmentation

In [None]:
  ############################################################
# THIS CELL CAN BE MODIFIED
############################################################

custom_train_transform_1 = transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.RandomHorizontalFlip(),
                                             transforms.RandomRotation(degrees=30),
                                             transforms.RandomCrop(size=(24,24)),
                                             transforms.Resize(size=(28,28))
])

In [None]:
####################################################################
# THIS CELL CAN BE MODIFIED BUT THERE SHOULD NOT BE ANY RANDOMNESS
####################################################################

custom_test_transform_1 = transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.Resize(size=(28,28)),
])

In [None]:
BATCH_SIZE = 60

In [None]:
train_dataset_1 = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/train.csv',
                                    img_dir='png-files/',
                                    transform=custom_train_transform_1)


train_loader_1 = DataLoader(dataset=train_dataset_1,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          drop_last=True,
                          num_workers=2)


valid_dataset_1 = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/valid.csv',
                                    img_dir='png-files/',
                                    transform=custom_test_transform_1)

valid_loader_1 = DataLoader(dataset=valid_dataset_1,
                          batch_size=BATCH_SIZE,
                          shuffle=False,
                          num_workers=2)


test_dataset_1 = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/test.csv',
                                   img_dir='png-files/',
                                   transform=custom_test_transform_1)

test_loader_1 = DataLoader(dataset=test_dataset_1,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=2)

# Multilayer Perceptron Model

The cell below contains the multi-layer perceptron model. This is the  section where you want to make changes to the architecture.

In [None]:
from math import tanh 

class MLP(torch.nn.Module):

    def __init__(self, num_layers, nb_features, nb_class, size_layers, func_name, dropout_probas):
        super(MLP, self).__init__()

        modules = []
        if func_name == 'Relu' :
          self.FF = torch.nn.ReLU()
        elif func_name == 'Tanh':
          self.FF = torch.nn.Tanh()
        elif func_name == 'Sigmoid':
          self.FF = torch.nn.Sigmoid()

        for i in range(num_layers):
          if i == 0 : # first layer
            modules.append(torch.nn.Linear(nb_features, size_layers))
            modules.append(self.FF)
            modules.append(torch.nn.Dropout(dropout_probas))
          elif i==num_layers-1 : # last layer
            modules.append(torch.nn.Linear(size_layers, nb_class))
            pass
          else: # middle layers
            modules.append(torch.nn.Linear(size_layers, size_layers))
            modules.append(self.FF)
            modules.append(torch.nn.Dropout(dropout_probas))
        self.network = torch.nn.Sequential(*modules)
    def forward(self, x):
        logits = self.network(x)
        probas = self.FF(logits)
        return logits, probas


#################################
### Model Initialization
#################################


# the random seed makes sure that the random weight initialization
# in the model is always the same.
# In practice, some weights don't work well, and we may also want
# to try different random seeds. In this homework, this is not
# necessary.
random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)



<torch._C.Generator at 0x7f4cf0986770>

In [None]:
model_1 = MLP(num_layers=8,
            nb_features = 28*28,
            nb_class = 10,
          size_layers=25,
          func_name = 'Sigmoid',
          dropout_probas=0.5)

model_1 = model_1.to(DEVICE)
model_1

MLP(
  (FF): Sigmoid()
  (network): Sequential(
    (0): Linear(in_features=784, out_features=25, bias=True)
    (1): Sigmoid()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=25, out_features=25, bias=True)
    (4): Sigmoid()
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=25, out_features=25, bias=True)
    (7): Sigmoid()
    (8): Dropout(p=0.5, inplace=False)
    (9): Linear(in_features=25, out_features=25, bias=True)
    (10): Sigmoid()
    (11): Dropout(p=0.5, inplace=False)
    (12): Linear(in_features=25, out_features=25, bias=True)
    (13): Sigmoid()
    (14): Dropout(p=0.5, inplace=False)
    (15): Linear(in_features=25, out_features=25, bias=True)
    (16): Sigmoid()
    (17): Dropout(p=0.5, inplace=False)
    (18): Linear(in_features=25, out_features=25, bias=True)
    (19): Sigmoid()
    (20): Dropout(p=0.5, inplace=False)
    (21): Linear(in_features=25, out_features=10, bias=True)
  )
)

### Searching for the best Hyper-Parameters

The strategy consists of 4 different steps. In the first, the hyper-parameters will be very large, this will make it possible to find under what orders of magnitude certain hyper-parameters may be. We will use 30 different random hyper-parameter combinations, for a single epoch. At the end of this run the values which seem binding for the networks to learn will be eliminated in order to waste less time on the other stages. 

In the second stage, we will draw 70 different random combinations of hyper-parameters for 3 epochs. The accuracy rate should become more interesting here, and we can still eliminate some parameter values. Finally we will continue to refine the model with stages 3 and 4 which will have 40 and 60 draws for 7 and 10 epochs.
At the end of all these stages, the most accurate combination of hyper-parameters will be trained and then used on the dataset test to assess the effectiveness of the model.

In [None]:
%%capture
!pip install wandb --upgrade
import wandb
wandb.login()

··········


### Parameters used in the first stage

In [None]:
sweep_config = {
    'method': 'random'
    }

metric = {
  'name': 'accuracy valid dataset',
  'goal': 'maximize'   
}

sweep_config['metric'] = metric

parameters_dict = {
    
    'Horrizontal_Flip' : {
        'values' : [0,1],
    },
    'Rotation' : {
        'values' : [0,1],
    },
    'Crop' : {
        'values' : [0,1],
    },
    'learning_rate': {
        'distribution' : 'uniform',
        'min' : 0.05,
        'max' : 0.6,
    },
    'Activation_function': {
        'values' : ['Tanh','Sigmoid', 'Relu'],
    },
    'dropout_proba' : {
        'distribution' : 'uniform',
        'min': 0.1,
        'max': 0.6,
    },
    'size_layers' : {
        'min' : 10,
        'max' : 1050,
    },
    'num_layers' : {
        'values' : [2,3,4,5,6,7,8,9,10,11,12],
    },
    'batch_size' : {
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 10,
        'max': 120,
    },
    'Num_Epoch' : {
        'value' : 1,
    },

}

sweep_config['parameters'] = parameters_dict

### Update of parameters for second stage

In [None]:
parameters_dict.update({
      'learning_rate': {
      'distribution' : 'uniform',
      'min' : 0.05,
      'max' : 0.3,
  },
      'num_layers' : {
        'values' : [2,3,4,5,6,7,8],
    },
      'size_layers' : {
        'min' : 300,
        'max' : 1050,
    },
        'dropout_proba' : {
        'distribution' : 'uniform',
        'min': 0.1,
        'max': 0.4,
    },
        'Num_Epoch' : {
        'value' : 3,
    },
    'batch_size' : {
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 30,
        'max': 120,
    },
    
})

### Update of parameters for third stage

In [None]:
parameters_dict.update({
      'learning_rate': {
      'distribution' : 'uniform',
      'min' : 0.15,
      'max' : 0.24,
  },
      'num_layers' : {
        'values' : [2,3,4,5],
    },
      'size_layers' : {
        'min' : 300,
        'max' : 1050,
    },
        'dropout_proba' : {
        'distribution' : 'uniform',
        'min': 0.1,
        'max': 0.26,
    },
        'Num_Epoch' : {
        'value' : 7,
    },
    'batch_size' : {
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 30,
        'max': 120,
    },
    
})

### Update of parameters for last stage

In [None]:
parameters_dict.update({
      'learning_rate': {
      'distribution' : 'uniform',
      'min' : 0.15,
      'max' : 0.20,
  },
      'num_layers' : {
        'values' : [2,3,4],
    },
      'size_layers' : {
        'min' : 450,
        'max' : 1050,
    },
        'dropout_proba' : {
        'distribution' : 'uniform',
        'min': 0.1,
        'max': 0.18,
    },
        'Num_Epoch' : {
        'value' : 10,
    },
    'batch_size' : {
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 30,
        'max': 120,
    },
    
})

In [None]:
sweep_id = wandb.sweep(sweep_config, project="AS2_CNN")

Create sweep with ID: j0yr777s
Sweep URL: https://wandb.ai/ojlt/AS2_CNN/sweeps/j0yr777s


In [None]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'accuracy valid dataset'},
 'parameters': {'Activation_function': {'values': ['Tanh', 'Sigmoid', 'Relu']},
                'Crop': {'values': [0, 1]},
                'Horrizontal_Flip': {'values': [0, 1]},
                'Num_Epoch': {'value': 10},
                'Rotation': {'values': [0, 1]},
                'batch_size': {'distribution': 'q_log_uniform_values',
                               'max': 120,
                               'min': 30,
                               'q': 8},
                'dropout_proba': {'distribution': 'uniform',
                                  'max': 0.18,
                                  'min': 0.1},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.2,
                                  'min': 0.15},
                'num_layers': {'values': [2, 3, 4]},
                'size_layers': {'max': 1050, 'min': 450}}}


In [None]:
############################################################
# THIS CELL CAN BE MODIFIED
############################################################
NUM_EPOCHS = 1 # Please feel free to change
############################################################

In [None]:
def compute_accuracy_and_loss(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    cross_entropy = 0.
    for i, (features, targets) in enumerate(data_loader):
        features = features.view(-1, 28*28).to(device)
        targets = targets.to(device)
        
        logits, probas = model(features)
        cross_entropy += F.cross_entropy(logits, targets).item()
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100, cross_entropy/num_examples
    

In [None]:
def wandb_train(config=None):
  with wandb.init(config=config):
    config=wandb.config

    trans = [transforms.ToTensor(),]
    if config.Horrizontal_Flip == 1 :
      trans.append(transforms.RandomHorizontalFlip())
    if config.Rotation == 1 :
      trans.append(transforms.RandomRotation(degrees=30))
    if config.Crop == 1 :
      trans.append(transforms.RandomCrop(size=(24,24)))
      trans.append(transforms.Resize(size=(28,28)))

    custom_train_transform = transforms.Compose(trans)
    custom_test_transform = transforms.Compose([transforms.ToTensor(),transforms.Resize(size=(28,28)),])

    train_dataset = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/train.csv',img_dir='png-files/',transform=custom_train_transform)
    train_loader =  DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True, drop_last=True, num_workers=2)
    valid_dataset = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/valid.csv',img_dir='png-files/',transform=custom_test_transform)
    valid_loader = DataLoader(dataset=valid_dataset,batch_size=config.batch_size,shuffle=False,num_workers=2)

    model=MLP(num_layers=config.num_layers,
            nb_features = 28*28,
            nb_class = 10,
          size_layers=config.size_layers,
          func_name = config.Activation_function,
          dropout_probas=config.dropout_proba)

    optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate) # Learning rate


    NUM_EPOCHS = config.Num_Epoch
    for epoch in range(NUM_EPOCHS):
        
        model.train()
        for batch_idx, (features, targets) in enumerate(train_loader):
            
            ### PREPARE MINIBATCH
            print(features.shape, targets.shape)
            features = features.view(-1, 28*28).to(DEVICE)
            print(features.shape)
            targets = targets.to(DEVICE)
            ### FORWARD AND BACK PROP
            logits, probas = model(features)
            cost = F.cross_entropy(logits, targets)
            optimizer.zero_grad()
            cost.backward()
            ### UPDATE MODEL PARAMETERS
            optimizer.step()

        model.eval()
        with torch.set_grad_enabled(False):
            train_acc, train_loss = compute_accuracy_and_loss(model, train_loader, device=DEVICE)
            valid_acc, valid_loss = compute_accuracy_and_loss(model, valid_loader, device=DEVICE)
            
            wandb.log({"epoch": epoch, "accuracy valid dataset": valid_acc, "train accuracy": train_acc, "valid loss": valid_loss}) 

In [None]:
start_time = time.time()
wandb.agent("9l90rdta", wandb_train, count=1)
elapsed = (time.time() - start_time)/60
print(f'Total Time: {elapsed:.2f} min')

[34m[1mwandb[0m: Agent Starting Run: ehmxcl3o with config:
[34m[1mwandb[0m: 	Activation_function: Relu
[34m[1mwandb[0m: 	Crop: 1
[34m[1mwandb[0m: 	Horrizontal_Flip: 1
[34m[1mwandb[0m: 	Num_Epoch: 10
[34m[1mwandb[0m: 	Rotation: 1
[34m[1mwandb[0m: 	batch_size: 48
[34m[1mwandb[0m: 	dropout_proba: 0.17157416375249115
[34m[1mwandb[0m: 	learning_rate: 0.1819621782858984
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	size_layers: 487


torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 784])
torch.Size([48, 1, 28, 28]) torch.Size([48])
torch.Size([48, 7

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Total Time: 1.08 min


# Report for result analysis

### Analysis of first stage

https://wandb.ai/ojlt/AS2_CNN/reports/Results-first-stage--VmlldzoyMjU0NTQw/edit?firstReport&runsetFilter

### Analysis of the last stage
https://wandb.ai/ojlt/AS2_CNN/reports/Results-last-stage--VmlldzoyMjU0NjA5

# Final Evaluation of the Model

In [None]:
#### Parammeters choosed for test set ####

num_layers = 4
size_layers = 1005
dropout_probas=0.1033
act_func = 'Relu'
BATCH_SIZE = 96
custom_train_transform = transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.RandomHorizontalFlip(),
                                             #transforms.RandomRotation(degrees=30),
                                             #transforms.RandomResizedCrop(size=(24,24)),
                                                ])
custom_test_transform = transforms.Compose([
                                             transforms.ToTensor(),])
train_dataset2 = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/train.csv',img_dir='png-files/',transform=custom_train_transform)
train_loader2 =  DataLoader(dataset=train_dataset2, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2)


valid_dataset2 = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/valid.csv',img_dir='png-files/',transform=custom_test_transform)

valid_loader2 = DataLoader(dataset=valid_dataset2,batch_size=BATCH_SIZE,shuffle=False,num_workers=2)


test_dataset2 = FashionMNISTDataset(csv_path='/content/drive/MyDrive/DS405B/data/test.csv',img_dir='png-files/',transform=custom_test_transform)

test_loader2 = DataLoader(dataset=test_dataset2,batch_size=BATCH_SIZE,shuffle=False,num_workers=2)
model2=MLP(num_layers=num_layers,
        nb_features = 28*28,
        nb_class = 10,
      size_layers=size_layers,
      func_name = act_func,
      dropout_probas=dropout_probas)

optimizer2 = torch.optim.SGD(model2.parameters(), lr=0.1502)

In [None]:
NUM_EPOCHS = 20

for epoch in range(NUM_EPOCHS):
    
    model2.train()
    for batch_idx, (features, targets) in enumerate(train_loader2):
        
        ### PREPARE MINIBATCH
        features = features.view(-1, 28*28).to(DEVICE)
        targets = targets.to(DEVICE)
        ### FORWARD AND BACK PROP
        logits, probas = model2(features)
        cost = F.cross_entropy(logits, targets)
        optimizer2.zero_grad()
        cost.backward()
        ### UPDATE MODEL PARAMETERS
        optimizer2.step()
        ### LOGGING

    # no need to build the computation graph for backprop when computing accuracy
    model2.eval()
    with torch.set_grad_enabled(False):
        train_acc, train_loss = compute_accuracy_and_loss(model2, train_loader2, device=DEVICE)
        valid_acc, valid_loss = compute_accuracy_and_loss(model2, valid_loader2, device=DEVICE)
        print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}% Valid Acc.: {valid_acc:.2f}%')

Epoch: 001/020 Train Acc.: 76.27% Valid Acc.: 76.16%
Epoch: 002/020 Train Acc.: 84.52% Valid Acc.: 84.78%
Epoch: 003/020 Train Acc.: 86.60% Valid Acc.: 86.70%
Epoch: 004/020 Train Acc.: 85.76% Valid Acc.: 85.66%
Epoch: 005/020 Train Acc.: 87.37% Valid Acc.: 86.88%
Epoch: 006/020 Train Acc.: 87.46% Valid Acc.: 87.44%
Epoch: 007/020 Train Acc.: 88.71% Valid Acc.: 87.84%
Epoch: 008/020 Train Acc.: 88.48% Valid Acc.: 87.34%
Epoch: 009/020 Train Acc.: 88.53% Valid Acc.: 87.28%
Epoch: 010/020 Train Acc.: 89.89% Valid Acc.: 88.90%
Epoch: 011/020 Train Acc.: 90.30% Valid Acc.: 88.82%
Epoch: 012/020 Train Acc.: 90.18% Valid Acc.: 88.58%
Epoch: 013/020 Train Acc.: 90.69% Valid Acc.: 89.28%
Epoch: 014/020 Train Acc.: 90.95% Valid Acc.: 88.74%
Epoch: 015/020 Train Acc.: 90.82% Valid Acc.: 88.48%
Epoch: 016/020 Train Acc.: 90.78% Valid Acc.: 88.78%
Epoch: 017/020 Train Acc.: 91.33% Valid Acc.: 89.06%
Epoch: 018/020 Train Acc.: 91.99% Valid Acc.: 89.76%
Epoch: 019/020 Train Acc.: 91.34% Valid Acc.: 

In [None]:
model2.eval()
with torch.set_grad_enabled(False): 
    test_acc, test_loss = compute_accuracy_and_loss(model2, test_loader2, DEVICE)
    print(f'Test accuracy: {test_acc:.2f}%')

Test accuracy: 88.58%
