In [1]:
# First we need to mount the Google drive
import os
from google.colab import drive
drive.mount('/content/Hadrive')

Mounted at /content/Hadrive


In [None]:
#!mkdir /content/Hadrive/MyDrive/Test1
#!mkdir /content/Hadrive/MyDrive/Test1/Tutorial1/

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt

In [3]:
data_path = '/content/Hadrive/MyDrive/Test1/Tutorial1/'

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4915, 0.4823, 0.4468), (0.2470, 0.2435, 0.2616))
])

# cifar10 = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
# cifar10_val = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)

number_samples = 100

#https://stackoverflow.com/questions/47432168/taking-subsets-of-a-pytorch-dataset
cifar10_org = datasets.CIFAR10(data_path, train=True, download=False, transform=transform)
#cifar10 = torch.utils.data.Subset(cifar10_org, list(range(0, number_samples)))
cifar10 = cifar10_org

cifar10_val_org = datasets.CIFAR10(data_path, train=False, download=False, transform=transform)
#cifar10_val = torch.utils.data.Subset(cifar10_val_org, list(range(0, number_samples)))
cifar10_val = cifar10_val_org

print(f"lencifar10: {len(cifar10)}")
print(f"lencifar10_val: {len(cifar10_val)}")

lencifar10: 50000
lencifar10_val: 10000


In [4]:
# model
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        # bài toán phân loại 10 lớp nên output ra 10 nodes
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        # flatten về dạng vector để cho vào neural network
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

#https://www.kaggle.com/code/grayphantom/cnn-on-cifar10-using-pytorch
#https://tek4.vn/batch-norm-trong-pytorch-lap-trinh-neural-network-voi-pytorch
#https://viblo.asia/p/3-cap-do-hieu-ve-batch-normalization-bai-dich-johann-huber-Yym40mRmJ91

class NetBNL(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        # bài toán phân loại 10 lớp nên output ra 10 nodes
        self.fc2 = nn.Linear(32, 10)
        self.BatchNorm1d_32 = nn.BatchNorm1d(32)
        self.BatchNorm2d_8 = nn.BatchNorm2d(8)
        self.BatchNorm2d_16 = nn.BatchNorm2d(16)

    def forward(self, x):
        out = self.BatchNorm2d_16(F.max_pool2d(torch.tanh(self.conv1(x)), 2))
        out = self.BatchNorm2d_8(F.max_pool2d(torch.tanh(self.conv2(out)), 2))
        # flatten về dạng vector để cho vào neural network
        out = out.view(-1, 8 * 8 * 8)
        out = torch.tanh(self.fc1(out))
        out = self.BatchNorm1d_32(out)
        out = self.fc2(out)
        return out

    # , nn.BatchNorm2d(6)
    # , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    # , nn.ReLU()
    # , nn.MaxPool2d(kernel_size=2, stride=2)
    # , nn.Flatten(start_dim=1)
    # , nn.Linear(in_features=12*4*4, out_features=120)
    # , nn.ReLU()
    # , nn.BatchNorm1d(120)

class CNNModel_hasBNLhasDropOut(nn.Module):
    def __init__(self):
        super().__init__()
        self.network=nn.Sequential(
            nn.Conv2d(3,32,3,padding=1),
            nn.BatchNorm2d(32),#2D Batch Normalization since our inputs are 4D.
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.25),

            nn.Conv2d(32,64,3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Conv2d(64,128,3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.25),

            nn.Flatten(),
            nn.Linear(128*4*4,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,10))
    def forward(self,x):
        return self.network(x)

class CNNModel_noBNLhasDropOut(nn.Module):
    def __init__(self):
        super().__init__()
        self.network=nn.Sequential(
            nn.Conv2d(3,32,3,padding=1),
            #nn.BatchNorm2d(32),#2D Batch Normalization since our inputs are 4D.
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.25),

            nn.Conv2d(32,64,3,padding=1),
            #nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Conv2d(64,128,3,padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Dropout(0.25),

            nn.Flatten(),
            nn.Linear(128*4*4,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,10))
    def forward(self,x):
        return self.network(x)


class CNNModel_noBNLnoDropOut(nn.Module):
    def __init__(self):
        super().__init__()
        self.network=nn.Sequential(
            nn.Conv2d(3,32,3,padding=1),
            #nn.BatchNorm2d(32),#2D Batch Normalization since our inputs are 4D.
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            #nn.Dropout(0.25),

            nn.Conv2d(32,64,3,padding=1),
            #nn.BatchNorm2d(64),
            nn.ReLU(),
            #nn.Dropout(0.25),

            nn.Conv2d(64,128,3,padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            #nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(),
            #nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            #nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            #nn.Dropout(0.25),

            nn.Flatten(),
            nn.Linear(128*4*4,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,10))
    def forward(self,x):
        return self.network(x)

class CNNModel_hasBNLnoDropOut(nn.Module):
    def __init__(self):
        super().__init__()
        self.network=nn.Sequential(
            nn.Conv2d(3,32,3,padding=1),
            nn.BatchNorm2d(32),#2D Batch Normalization since our inputs are 4D.
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            #nn.Dropout(0.25),

            nn.Conv2d(32,64,3,padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            #nn.Dropout(0.25),

            nn.Conv2d(64,128,3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            #nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            #nn.Dropout(0.25),

            nn.Conv2d(128,128,3,padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            #nn.Dropout(0.25),

            nn.Flatten(),
            nn.Linear(128*4*4,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,10))
    def forward(self,x):
        return self.network(x)


class BasicBlock_hasBNLnoDropOut(nn.Module):
    expansion = 1


    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock_hasBNLnoDropOut, self).__init__()

        DROPOUT = 0.0  #zeros probability of DROPOUT -- noDropOut == 0.0

        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(DROPOUT)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(DROPOUT)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes),
                nn.Dropout(DROPOUT)
            )

    def forward(self, x):
        out = F.relu(self.dropout(self.bn1(self.conv1(x))))
        out = self.dropout(self.bn2(self.conv2(out)))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet_hasBNLnoDropOut(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_hasBNLnoDropOut, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return F.log_softmax(out, dim=-1)


def ResNet18_hasBNLnoDropOut():
    return ResNet_hasBNLnoDropOut(BasicBlock_hasBNLnoDropOut, [2, 2, 2, 2])


###
class BasicBlock_noBNLnoDropOut(nn.Module):
    expansion = 1


    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock_noBNLnoDropOut, self).__init__()

        DROPOUT = 0.0  #zeros probability of DROPOUT -- noDropOut == 0.0

        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(DROPOUT)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(DROPOUT)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = self.conv2(out)
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet_noBNLnoDropOut(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet_noBNLnoDropOut, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return F.log_softmax(out, dim=-1)


def ResNet18_noBNLnoDropOut():
    return ResNet_noBNLnoDropOut(BasicBlock_noBNLnoDropOut, [2, 2, 2, 2])

In [5]:
import pickle

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader, val_loader, data_path):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:

            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()
        correct = 0
        with torch.no_grad():
            for data in val_loader:
                images, labels = data
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                c = (predicted == labels).squeeze()
                correct += c.sum()
        if epoch == 1 or epoch % 1 == 0:
            print('Epoch {}, Training loss {}, Val accuracy {}'.format(
                epoch,
                loss_train / len(train_loader),
                correct / len(cifar10_val)))

        scheduler.step()
        # before_lr = optimizer.param_groups[0]["lr"]
        # scheduler.step()
        # after_lr = optimizer.param_groups[0]["lr"]
        # print("Epoch %d: SGD lr %.4f -> %.4f" % (epoch, before_lr, after_lr))


        #save the model config
        model_state = model.state_dict()
        optimizer_state = optimizer.state_dict()
        scheduler_state = scheduler.state_dict()
        dict_state = dict()
        dict_state["epoch"] = epoch
        dict_state["model_state"] = model_state
        dict_state["optimizer_state"] = optimizer_state
        dict_state["scheduler_state"] = scheduler_state
        dict_state["train_loss"] = loss_train / len(train_loader)
        dict_state["val_acc"] = correct / len(cifar10_val)

        try:
            geeky_file = open(data_path + "epoch_" + str(epoch), 'wb')
            pickle.dump(dict_state, geeky_file)
            geeky_file.close()

        except:
            print("Something went wrong")

        #print(f"scheduler state: {scheduler_state}")

In [None]:
configs = dict({
#"1": { "model_type": Net(), "n_epochs": 20, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
#"2": { "model_type": NetBNL(), "n_epochs": 20, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
#"3": { "model_type": CNNModel_hasBNLhasDropOut(), "n_epochs": 20, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
#"4": { "model_type": CNNModel_noBNLhasDropOut(), "n_epochs": 10, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
#"5": { "model_type": CNNModel_noBNLnoDropOut(), "n_epochs": 10, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
#"6": { "model_type": CNNModel_hasBNLnoDropOut(), "n_epochs": 10, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
"7": { "model_type": ResNet18_hasBNLnoDropOut(),"n_epochs": 10, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64},
"8": { "model_type": ResNet18_noBNLnoDropOut(),"n_epochs": 10, "lr_initial": 0.1, "decay": 0.9, "batch_size": 64}
})


for index, config in configs.items():
  print(f"index: {index}")
  print(f"config: {config}")
  for key,value in config.items():
    print(f"key: {key}, value: {value}")

In [None]:
for index, config in configs.items():
  print(f"index: {index}")
  data_path_index = data_path + "config_" + str(index) + "_"
  model = config["model_type"]
  optimizer = optim.SGD(model.parameters(), lr=config["lr_initial"])
  loss_fn = nn.CrossEntropyLoss()

  train_loader = torch.utils.data.DataLoader(cifar10, batch_size=config["batch_size"], shuffle=True)
  val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=config["batch_size"], shuffle=True)

  scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=config["decay"])

  training_loop(
      n_epochs = config["n_epochs"],
      optimizer = optimizer,
      model = model,
      loss_fn = loss_fn,
      train_loader = train_loader,
      val_loader = val_loader,
      data_path = data_path_index
  )

index: 7
Epoch 1, Training loss 1.470175827631865, Val accuracy 0.5629000067710876
Epoch 2, Training loss 0.8520316731975511, Val accuracy 0.6366999745368958


In [None]:


  '''
    LinearLR =>> new LR = initial LR - nb_epochs*(start_factor-end_factor)/total_iters
    example, initialLR = 0.1, start = 1.0, end_factor = 0.5, total_iters = 20
    (start_factor-end_factor)/total_iters = 0.025.
    ===> epoch 1: 0.1 - 1*0.025 = 0.0975
    ===> epoch 2: 0.1 - 2*0.025 = 0.0950....
  '''
  #scheduler = lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.5, total_iters=20)
  '''
   StepLR =>>> new LR = old LR * gamma
  '''
  scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
  '''
    LambdaLR: new LR = initialLR * f(epoch)
    For example: f(epoch) = 1/t
  '''
  # lambda1 = lambda epoch: 1/(epoch+1)
  # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1])

In [None]:
import pandas as pd
epoch = 1
path = data_path + "epoch_" + str(epoch)
obj = pd.read_pickle(path)
print(obj.keys())

dict_keys(['epoch', 'model_state', 'optimizer_state'])
