# 0. Imports

In [1]:
# imports

import torch
from torch import nn
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import random_split, DataLoader
from torchsummary import summary
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, RMSprop, SGD
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

In [2]:
# for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x2860a4c6950>

In [3]:
# setup device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
device

device(type='cuda')

# 1. Dataset and Dataloader

In [4]:
# data transforms (normalization & data augmentation)
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_tfms = transforms.Compose([transforms.RandomCrop(32, padding=4, padding_mode='reflect'), 
                         transforms.RandomHorizontalFlip(), 
                         transforms.ToTensor(), 
                         transforms.Normalize(*stats,inplace=True)])
valid_tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(*stats)])

In [5]:
# get cifar10
train_dataset = CIFAR10(root='data', transform=train_tfms, download=True, train=True)
val_dataset = CIFAR10(root='data', transform=valid_tfms, download=True, train=False)
len(train_dataset), len(val_dataset)

Files already downloaded and verified
Files already downloaded and verified


(50000, 10000)

In [6]:
# create dataloaders
batch_size = 256
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)

# 2. Create a CNN

In [7]:
class CifarClassifier(nn.Module):
    
    def __init__(self, activation: str):
        super().__init__()
        assert activation in ["relu", "sigmoid", "tanh"] , "select activation from relu, sigmoid, tanh"
        
        self.activation_layer = None
        if activation == "relu":
            self.activation_layer = nn.ReLU()
        elif activation == "sigmoid":
            self.activation_layer = nn.Sigmoid()
        elif activation == "tanh":
            self.activation_layer = nn.Tanh()
            
        self.model = nn.Sequential(
            
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding="same"),
            self.activation_layer, 
            nn.BatchNorm2d(32),
            nn.MaxPool2d(2),
            nn.Dropout(0.16),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding="same"),
            self.activation_layer, 
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2),
            nn.Dropout(0.16),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding="same"),
            self.activation_layer, 
            nn.BatchNorm2d(128),
            nn.MaxPool2d(2),
            nn.Dropout(0.16),

            
            
            
            nn.Flatten(),
            nn.Linear(in_features=2048, out_features=256),
            self.activation_layer,
            nn.Linear(in_features=256, out_features=10)
        )
        
    def forward(self, x):
        return self.model(x)

In [8]:
model = CifarClassifier("relu").to(device)
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
              ReLU-2           [-1, 32, 32, 32]               0
              ReLU-3           [-1, 32, 32, 32]               0
       BatchNorm2d-4           [-1, 32, 32, 32]              64
         MaxPool2d-5           [-1, 32, 16, 16]               0
           Dropout-6           [-1, 32, 16, 16]               0
            Conv2d-7           [-1, 64, 16, 16]          18,496
              ReLU-8           [-1, 64, 16, 16]               0
              ReLU-9           [-1, 64, 16, 16]               0
      BatchNorm2d-10           [-1, 64, 16, 16]             128
        MaxPool2d-11             [-1, 64, 8, 8]               0
          Dropout-12             [-1, 64, 8, 8]               0
           Conv2d-13            [-1, 128, 8, 8]          73,856
             ReLU-14            [-1, 12

# 3. Training Loop

In [9]:
# create loss_fn
loss_fn = CrossEntropyLoss()

In [10]:
# create optimizer

lr = 1.6 * (10**-3)
optimizer = Adam(model.parameters(), lr=lr)

In [11]:
abl_lis = []

In [12]:
# run the training loop
from utils import train_step

epochs = 32
for epoch in tqdm(range(epochs)):
        tres = train_step(model, train_dataloader, loss_fn, optimizer, device)
        print(f"epoch: {epoch}")
        print(f"avg_batch_loss: {tres['avg_batch_loss']}")
        print(f"time: {tres['time']}")   
        print("")
        abl_lis.append(tres['avg_batch_loss'].item())

  0%|          | 0/32 [00:00<?, ?it/s]

In [None]:
# validation
from utils import valid_step

vres = valid_step(model, val_dataloader, device)
print(f"accuracy: {vres['accuracy']}")
print(f"confusion_matrix: \n{vres['confusion_matrix']}")

In [None]:
# avg batch loss line chart
fig, ax = plt.subplots()
ax.plot([i+1 for i in range(len(abl_lis))], abl_lis)
# Set labels and title
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.set_title('Avg Batch Loss')
fig.savefig("relu_adam.jpg")

# 4. Generalize for All Results

In [None]:
# generalize optimizers
opt_lis = ["adam", "rms", "sgd"]

def get_optimizer(model, opt_type):
    if(opt_type == "adam"):
        return Adam(model.parameters(), 1.6 * (10**-3))
    elif(opt_type == "rms"):
        return RMSprop(model.parameters(), 1.6 * (10**-3))
    elif(opt_type == "sgd"):
        return SGD(model.parameters(), 1.6 * (10**-3))

In [None]:
import os
os.makedirs("avg_batch_loss", exist_ok=True)
os.makedirs("training_results", exist_ok=True)

In [None]:
act_lis = ["relu", "tanh", "sigmoid"]

In [None]:
# build generalized function

def generalize(opt_type, act_type):
    model = CifarClassifier(activation=act_type).to(device)
    loss_fn = CrossEntropyLoss()
    optimizer = get_optimizer(model, opt_type)
    epochs = 32
    file_str = ""
    abl_lis = []
    for epoch in tqdm(range(epochs)):
        tres = train_step(model, train_dataloader, loss_fn, optimizer, device)
        print(f"epoch: {epoch}")
        print(f"avg_batch_loss: {tres['avg_batch_loss']}")
        print(f"time: {tres['time']}")   
        print("")
        file_str += f"epoch: {epoch}\n"
        file_str += f"avg_batch_loss: {tres['avg_batch_loss']}\n"
        file_str += f"time: {tres['time']}\n\n"
        abl_lis.append(tres['avg_batch_loss'].item())

    # avg batch loss line chart
    fig, ax = plt.subplots()
    ax.plot([i+1 for i in range(len(abl_lis))], abl_lis)
    # Set labels and title
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.set_title('Avg Batch Loss')
    fig.savefig(f"avg_batch_loss/{opt_type}_{act_type}.jpg")
    
    vres = valid_step(model, val_dataloader, device)
    print(f"accuracy: {vres['accuracy']}")
    print(f"confusion_matrix: \n{vres['confusion_matrix']}")
    file_str += f"accuracy: {vres['accuracy']}\n"
    file_str += f"confusion_matrix: \n{vres['confusion_matrix']}\n"
    
    with open(f"training_results/{opt_type}_{act_type}.txt", "w") as f:
        f.write(file_str)




In [None]:
# generalize("adam", "relu")

In [None]:
for opt_type in opt_lis:
    for act_type in act_lis:
        generalize(opt_type, act_type)