<a href="https://colab.research.google.com/github/JHyunjun/SNU/blob/main/Torch_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from PIL import Image

# Other dependencies
import random
import sys
import os
import tqdm
import time

import numpy as np
import matplotlib.pyplot as plt

print('Python version: ', sys.version)
print('Pytorch version: ', torch.__version__)

print("GPU available: {}".format(torch.cuda.is_available()))
print("current GPU index: {}".format(torch.cuda.current_device()))
print("current GPU card name: {}".format(torch.cuda.get_device_name(0)))

Python version:  3.7.13 (default, Apr 24 2022, 01:04:09) 
[GCC 7.5.0]
Pytorch version:  1.12.0+cu113
GPU available: True
current GPU index: 0
current GPU card name: Tesla T4


#### Model definition


> Our model follows the architecture which has 3 modules with a 3 × 3 convolutions and 64 filters, followed by
batch normalization (Ioffe & Szegedy, 2015), a ReLU nonlinearity, and 2 × 2 max-pooling. 

In [None]:
'''
class convblock(nn.Module):
    def __init__(self, in_c, out_c):
        self.conv1 = nn.Conv2d(in_c, out_c, 3) 
        self.bn1 = nn.BatchNorm2d(out_c)
        self.relu1 = nn.ReLU()
        self.max1 = nn.MaxPool2d(2)

    def forward(self, img):
        x = self.conv1(img)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.max1(x)
        return x
'''   
# TODO 1. build your model following the instruction above.
# Note that, the size of inputs will be (32, 32)
class Net(nn.Module):
  def __init__(self, nclasses):
      super(Net, self).__init__()
      # Conv2d(input_channels, output_channels, kernel_size)
      self.conv1 = nn.Conv2d(3, 64, 3) 
      self.bn1 = nn.BatchNorm2d(64)
      self.relu1 = nn.ReLU()
      self.max1 = nn.MaxPool2d(2)
      # self.convblock1 = convblock(3, 64)

      self.conv2 = nn.Conv2d(64, 64, 3)  
      self.bn2 = nn.BatchNorm2d(64)
      self.relu2 = nn.ReLU()
      self.max2 = nn.MaxPool2d(2)

      self.conv3 = nn.Conv2d(64, 64, 3)
      self.bn3 = nn.BatchNorm2d(64)
      self.relu3 = nn.ReLU()
      self.max3 = nn.MaxPool2d(2)

      self.flatten = nn.Flatten()
      self.fc = nn.Linear(256, nclasses)

  def forward(self, img):
      x = self.conv1(img)
      x = self.bn1(x)
      x = self.relu1(x)
      x = self.max1(x)
      # x = self.convblock1(img)
      
      x = self.conv2(x)
      x = self.bn2(x)
      x = self.relu2(x)
      x = self.max2(x)
      
      x = self.conv3(x)
      x = self.bn3(x)
      x = self.relu3(x)
      x = self.max3(x)

      x = self.flatten(x)
      x = self.fc(x)
      return x

In [None]:
# test if it works
net = Net(5)
img = torch.randn((1, 3, 32, 32))
net(img)

tensor([[-0.5744,  0.8703, -0.9981, -0.2039, -1.3783]],
       grad_fn=<AddmmBackward0>)

In [None]:
# prepare everything needed for training the CNN model
# load the CIFAR10 dataset from the torchvision package
train_transform = transforms.Compose([
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      ])
test_transform = transforms.Compose([
                                     transforms.ToTensor(),
                                     ])

train_dataset = torchvision.datasets.CIFAR10(root='dataset/cifar10', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.CIFAR10(root='dataset/cifar10', train=False, download=True, transform=test_transform)

# get the dataloader
train_dataloader = DataLoader(train_dataset, batch_size=256, drop_last=True, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, drop_last=False, shuffle=False)

# instantiate the model & move to GPU
model = Net(nclasses=10)
model.to("cuda:0")

criterion = nn.CrossEntropyLoss() # instantiate the loss (criterion)
criterion.to('cuda:0') # move to GPU

# get the optimizer to train the model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 180, 190], gamma=0.1)
#=================================================== 

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to dataset/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting dataset/cifar10/cifar-10-python.tar.gz to dataset/cifar10
Files already downloaded and verified


In [None]:
def train(dataloader, model, criterion, optimizer):
  model.train()
  total_loss = 0
  total_acc = 0
  n = 0
  for x, y in dataloader:#tqdm.notebook.tqdm(dataloader, desc='train', leave=False):
    optimizer.zero_grad()
    x, y = x.cuda(), y.cuda()
    logits = model(x)
    loss = criterion(logits, y)
    loss.backward()
    optimizer.step()

    pred = torch.argmax(logits, dim=1) # (batch_size, class_num)
    acc = torch.sum(pred == y)
    
    total_loss += loss.item() * x.shape[0]
    total_acc += acc.item()
    n += x.shape[0]

  return total_loss / n, total_acc / n

In [None]:
@torch.no_grad()
def test(dataloader, model, criterion):
  model.eval()
  total_loss = 0
  total_acc = 0
  n = 0
  for x, y in dataloader:#tqdm.notebook.tqdm(dataloader, desc='test'):
    x, y = x.cuda(), y.cuda()
    logits = model(x)
    loss = criterion(logits, y)

    pred = torch.argmax(logits, dim=1)
    acc = torch.sum(pred == y)

    total_loss += loss.item() * x.shape[0]
    total_acc += acc.item()
    n += x.shape[0]

  return total_loss / n, total_acc / n

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
writer = SummaryWriter('runs/cifar10')
for epc in tqdm.notebook.trange(200, desc='EPOCH'):
  tr_loss, tr_acc = train(train_dataloader, model, criterion, optimizer)
  te_loss, te_acc = test(test_dataloader, model, criterion)

  writer.add_scalar('train/loss', tr_loss, epc)
  writer.add_scalar('train/acc', tr_acc, epc)
  writer.add_scalar('test/loss', te_loss, epc)
  writer.add_scalar('test/acc', te_acc, epc)
  writer.flush()

  scheduler.step()
  
writer.close()

EPOCH:   0%|          | 0/200 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
# train CIFAR100 with the ImageNet pretrained resnet34 model provided by PyTorch
#================ YOUR CODE HERE ===================
# load the CIFAR100 dataset from the torchvision package
train_dataset = torchvision.datasets.CIFAR100(root='dataset/cifar100', train=True, download=True, transform=transforms.ToTensor())
test_dataset = torchvision.datasets.CIFAR100(root='dataset/cifar100', train=False, download=True, transform=transforms.ToTensor())

# get the dataloader
train_dataloader = DataLoader(train_dataset, batch_size=256, num_workers=1, drop_last=True, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, num_workers=1, drop_last=False, shuffle=False)

# instantiate the model & move to GPU
model = torchvision.models.resnet34(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 100)
model.to("cuda:0")

criterion = nn.CrossEntropyLoss() # instantiate the loss (criterion)
criterion.to('cuda:0') # move to GPU

# get the optimizer to train the model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 180, 190], gamma=0.1)

# SET EPOCHS
EPOCHS = 200
writer = SummaryWriter('runs/cifar100')
for epc in tqdm.notebook.trange(EPOCHS, desc='EPOCH'):
  tr_loss, tr_acc = train(train_dataloader, model, criterion, optimizer)
  te_loss, te_acc = test(test_dataloader, model, criterion)

  writer.add_scalar('train/loss', tr_loss, epc)
  writer.add_scalar('train/acc', tr_acc, epc)
  writer.add_scalar('test/loss', te_loss, epc)
  writer.add_scalar('test/acc', te_acc, epc)
  writer.flush()

  scheduler.step()
  
writer.close()

#=================================================== 

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to dataset/cifar100/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting dataset/cifar100/cifar-100-python.tar.gz to dataset/cifar100
Files already downloaded and verified


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth


  0%|          | 0.00/83.3M [00:00<?, ?B/s]

EPOCH:   0%|          | 0/200 [00:00<?, ?it/s]

KeyboardInterrupt: ignored