In [None]:
from google.colab import files
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch import Tensor

import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime 
from tqdm.notebook import tqdm 
import statistics

# parameters
RANDOM_SEED = 42
BATCH_SIZE = 100
N_EPOCHS = 475
IMG_SIZE = 32
N_CLASSES = 100

LEARNING_RATE = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-3


norm_mean, norm_std = (0.5071, 0.4867, 0.4408), (0.2023, 0.1994, 0.2010)


transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std)])

train_dataset = datasets.CIFAR100(root='cifar100_data', train=True, transform=transform, download=True)
valid_dataset = datasets.CIFAR100(root='cifar100_data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=False)


def conv3x3(in_planes, out_planes, stride=1, bias=False):
  """3x3 convolution with padding"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                   padding=1, bias=bias)

def variable_init(m, neg_slope=0.0):
  if isinstance(m, (nn.Linear, nn.Conv2d)):
    nn.init.kaiming_uniform_(m.weight.data, neg_slope)
    if m.bias is not None:
      m.bias.data.zero_()
  elif isinstance(m, nn.BatchNorm2d):
    if m.weight is not None:
      m.weight.data.fill_(1)
    if m.bias is not None:
      m.bias.data.zero_()
    m.running_mean.zero_()
    m.running_var.zero_()
def _down_sample(x):
  return nn.functional.avg_pool2d(x, 2, 2)



def _increase_planes(x, n_out_planes):
  n_samples, n_planes, spatial_size = x.size()[:-1]
  x_zeros = torch.zeros(
    n_samples, n_out_planes - n_planes, spatial_size, spatial_size, 
    dtype=x.dtype, device=x.device)
  return torch.cat([x, x_zeros], 1)

def _downsample_and_increase_planes(x, n_out_planes):
  x = _down_sample(x)
  x = _increase_planes(x, n_out_planes)
  return x

def identity_func(n_in_planes, n_out_planes, stride):
  identity = lambda x: x
  if stride == 2 and n_in_planes != n_out_planes:
    identity = lambda x: _downsample_and_increase_planes(x, n_out_planes)
  elif stride == 2:
    identity = _down_sample
  elif n_in_planes != n_out_planes:
    identity = lambda x: _increase_planes(x, n_out_planes)
  return identity

class BasicBlock(nn.Module):

  expansion = 1

  def __init__(self, n_in_planes, n_out_planes, stride=1):
    super().__init__()
    assert stride == 1 or stride == 2

    self.block = nn.Sequential(
      conv3x3(n_in_planes, n_out_planes, stride),
      nn.BatchNorm2d(n_out_planes),
      nn.ReLU(inplace=True),
      conv3x3(n_out_planes, n_out_planes),
      nn.BatchNorm2d(n_out_planes)
    )

    self.identity = identity_func(n_in_planes, n_out_planes, stride)

  def forward(self, x):
    out = self.block(x)
    identity = self.identity(x)

    out += identity
    out = nn.functional.relu(out)
    return out

class Bottleneck(nn.Module):

  expansion = 4

  def __init__(self, n_in_planes, n_out_planes, stride=1):
    super().__init__()
    
    self.conv1 = nn.Conv2d(n_in_planes, n_out_planes, kernel_size=1)
    self.bn1 = nn.BatchNorm2d(n_out_planes)

    self.conv2 = conv3x3(n_out_planes, n_out_planes, stride)
    self.bn2 = nn.BatchNorm2d(n_out_planes)

    self.conv3 = nn.Conv2d(n_out_planes, n_out_planes * 4, kernel_size=1)
    self.bn3 = nn.BatchNorm2d(n_out_planes * 4)

    self.relu = nn.ReLU(inplace=True)
    self.identity = identity_func(n_in_planes, n_out_planes * 4, stride)

  def forward(self, x):
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    identity = self.identity(x)
    out += identity
    out = self.relu(out)

    return out

class ResNet(nn.Module):

  def __init__(self, block, 
                     n_blocks, 
                     n_output_planes, 
                     n_classes):
    super(ResNet, self).__init__()
    assert len(n_blocks) == 4
    assert len(n_output_planes) == 4
    
    self.n_in_planes = n_output_planes[0]

    self.layer0 = nn.Sequential(
      conv3x3(3, self.n_in_planes),
      nn.BatchNorm2d(self.n_in_planes),
      nn.ReLU(inplace=True)
    )
    self.layer1 = self._make_layer(block, n_blocks[0], n_output_planes[0])
    self.layer2 = self._make_layer(block, n_blocks[1], n_output_planes[1], 2)
    self.layer3 = self._make_layer(block, n_blocks[2], n_output_planes[2], 2)
    self.layer4 = self._make_layer(block, n_blocks[3], n_output_planes[3], 2)
    self.fc = nn.Linear(n_output_planes[3] * block.expansion, n_classes, False)

    self.apply(variable_init)

  def _make_layer(self, block, n_blocks, n_out_planes, stride=1):
    layers = []
    layers.append(block(self.n_in_planes, n_out_planes, stride))
    self.n_in_planes = n_out_planes * block.expansion
    for i in range(1, n_blocks):
      layers.append(block(self.n_in_planes, n_out_planes))

    return nn.Sequential(*layers)

  def features(self, x):
    x = self.layer0(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    spatial_size = x.size(2)
    x = nn.functional.avg_pool2d(x, spatial_size, 1)
    x = x.view(x.size(0), -1)
    return x

  def forward(self, x):
    x = self.features(x)
    x = self.fc(x)
    return x



n_blocks = [2,2,2,2]
n_output_planes = [64, 128, 256, 512]
n_classes = 100

model_exact = ResNet(BasicBlock, n_blocks, n_output_planes, n_classes)

torch.manual_seed(RANDOM_SEED)
optimizer = torch.optim.SGD(model_exact.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) 
criterion = nn.CrossEntropyLoss()


#Instantiating CUDA device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#Verifying CUDA
print(device)
#Move the input and AlexNet_model to GPU for speed if available
model_exact.to(device)


#Testing Accuracy
def get_accuracy(model, data_loader, device):
  correct = 0
  total = 0
  with torch.no_grad():
      for data in data_loader:
          images, labels = data[0].to(device), data[1].to(device)
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  accuracy = (correct / total)
  return accuracy

def lr_schedule(epoch):
  lr = LEARNING_RATE
  if epoch > 200:
    lr *= 1e-4
  elif epoch > 180:
    lr *= 0.5e-3
  elif epoch > 160:
    lr *= 1e-3
  elif epoch > 120:
    lr *= 1e-2
  elif epoch > 80:
    lr *= 1e-1
  return lr


print_every = 10
for epoch in range(N_EPOCHS):  # loop over the dataset multiple times
    running_loss = 0.0
    
    lr = lr_schedule(epoch)
    optimizer = torch.optim.SGD(model_exact.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) 

    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        output = model_exact(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    
    # print statistics
    if epoch % print_every == (print_every - 1):
      epoch_loss = running_loss / len(train_loader.dataset)
      valid_acc = get_accuracy(model_exact, valid_loader, device)
      print(f'Epoch: {epoch+1} --- '
            f'Learning rate: {lr}\t'
            f'Train loss: {epoch_loss:.4f}\t'
            f'Valid accuracy: {100 * valid_acc:.2f}')
      if valid_acc >= 0.7:
        break


print('Finished Training of Resnet')

filename = '../dataset/Cifar100.pth'
torch.save(model_exact.state_dict(), filename)



Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to cifar100_data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting cifar100_data/cifar-100-python.tar.gz to cifar100_data
Files already downloaded and verified
cuda:0
Epoch: 10 --- Learning rate: 0.001	Train loss: 2.2220	Valid accuracy: 40.23
Epoch: 20 --- Learning rate: 0.001	Train loss: 1.5891	Valid accuracy: 50.02
Epoch: 30 --- Learning rate: 0.001	Train loss: 1.2105	Valid accuracy: 55.99
Epoch: 40 --- Learning rate: 0.001	Train loss: 0.9307	Valid accuracy: 59.15
Epoch: 50 --- Learning rate: 0.001	Train loss: 0.7092	Valid accuracy: 60.79
Epoch: 60 --- Learning rate: 0.001	Train loss: 0.5222	Valid accuracy: 62.17
Epoch: 70 --- Learning rate: 0.001	Train loss: 0.3730	Valid accuracy: 62.91
Epoch: 80 --- Learning rate: 0.001	Train loss: 0.2639	Valid accuracy: 62.82
Epoch: 90 --- Learning rate: 0.0001	Train loss: 0.1862	Valid accuracy: 64.20
Epoch: 100 --- Learning rate: 0.0001	Train loss: 0.1428	Valid accuracy: 63.78
Finished Training of Resnet
