In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt

from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.optim import Adam

RANDOM_SEED = 1
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x7f7caca6b650>

In [4]:
n_classes = 10
n_epochs = 50
batch_size = 256
learning_rate = 1e-4

In [10]:
class CNNModel(nn.Module):
  def __init__(self, n_classes):
    super(CNNModel, self).__init__()
    self.conv_layer1 = nn.Sequential(
        nn.Conv2d(3, 64, 3, stride=1, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm2d(64)
    )
    self.conv_layer2 = nn.Sequential(
        nn.Conv2d(64, 64, 3, stride=1, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm2d(64)
    )
    self.conv_layer3 = nn.Sequential(
        nn.Conv2d(64, 64, 3, stride=1, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm2d(64), 
        nn.MaxPool2d(2, 2)
    )
    self.conv_layer4 = nn.Sequential(
        nn.Conv2d(64, 128, 3, stride=1, padding='same'),
        nn.ReLU(),
        nn.BatchNorm2d(128)
    )
    self.conv_layer5 = nn.Sequential(
        nn.Conv2d(128, 128, 3, stride=1, padding='same'),
        nn.ReLU(),
        nn.BatchNorm2d(128)
    )
    self.conv_layer6 = nn.Sequential(
        nn.Conv2d(128, 128, 3, stride=1, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm2d(128), 
        nn.MaxPool2d(2, 2)
    )
    self.conv_layer7 = nn.Sequential(
        nn.Conv2d(128, 256, 3, stride=1, padding='same'),
        nn.ReLU(),
        nn.BatchNorm2d(256)
    )
    self.conv_layer8 = nn.Sequential(
        nn.Conv2d(256, 256, 3, stride=1, padding='same'),
        nn.ReLU(),
        nn.BatchNorm2d(256)
    )
    self.conv_layer9 = nn.Sequential(
        nn.Conv2d(256, 256, 3, stride=1, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm2d(256), 
        nn.MaxPool2d(2, 2)
    )
    self.conv_layer10 = nn.Sequential(
        nn.Conv2d(256, 512, 3, stride=1, padding='same'),
        nn.ReLU(),
        nn.BatchNorm2d(512)
    )
    self.conv_layer11 = nn.Sequential(
        nn.Conv2d(512, 512, 3, stride=1, padding='same'),
        nn.ReLU(),
        nn.BatchNorm2d(512)
    )
    self.conv_layer12 = nn.Sequential(
        nn.Conv2d(512, 512, 3, stride=1, padding='same'), 
        nn.ReLU(), 
        nn.BatchNorm2d(512), 
        nn.MaxPool2d(2, 2)
    )

    self.flatten = nn.Flatten()
    self.fc_layer1 = nn.Sequential(
        nn.Linear(512 * 2 * 2, 512), 
        nn.ReLU()
    )
    self.fc_layer2 = nn.Linear(512, n_classes)

  def forward(self, x):
    x = self.conv_layer1(x)
    x = self.conv_layer2(x)
    x = self.conv_layer3(x)
    x = self.conv_layer4(x)
    x = self.conv_layer5(x)
    x = self.conv_layer6(x)
    x = self.conv_layer7(x)
    x = self.conv_layer8(x)
    x = self.conv_layer9(x)
    x = self.conv_layer10(x)
    x = self.conv_layer11(x)
    x = self.conv_layer12(x)
    x = self.flatten(x)
    x = self.fc_layer1(x)
    outputs = self.fc_layer2(x)

    return outputs


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device: {}'.format(device))
model = CNNModel(n_classes)
model.to(device)

Using device: cuda


CNNModel(
  (conv_layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_layer2): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_layer3): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_layer4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_layer5): Sequential(
    (0): Conv2d(128, 128, kernel

In [7]:
transform = transforms.Compose([
    transforms.ToTensor()
])

train_set = CIFAR10(root='./data', train=True, download=True, transform=transform)
val_set = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_dataloader = DataLoader(train_set, batch_size=batch_size,
                              shuffle=True, num_workers=2)
val_dataloader = DataLoader(val_set, batch_size=batch_size, 
                            shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=learning_rate)

In [11]:
training_losses = []
val_losses = []
val_accs = []

training_steps = len(train_dataloader)
val_steps = len(val_dataloader)

In [12]:
print('START TRAINING .... \n')

for epoch in range(n_epochs):
  total_training_loss = 0

  # TRAINING
  for idx, (X_train, y_train) in enumerate(train_dataloader):
    X_train = X_train.to(device)
    y_train = y_train.to(device)

    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    total_training_loss += loss.item()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  training_loss = total_training_loss/training_steps
  training_losses.append(training_loss)
  print('EPOCH: [{}/{}], TRAINING LOSS: {}'.format(epoch + 1, n_epochs, training_loss))

  # VALIDATING
  with torch.no_grad():
    model.eval()
    n_correct_preds = 0
    n_val_samples = 0
    total_val_loss = 0
    for idx, (X_val, y_val) in enumerate(val_dataloader):
      X_val = X_val.to(device)
      y_val = y_val.to(device)

      y_pred = model(X_val)
      loss = criterion(y_pred, y_val)
      total_val_loss += loss.item()

      y_pred_max, y_pred_max_idx = torch.max(y_pred, 1)
      n_val_samples += y_pred.size(0)
      n_correct_preds += (y_val == y_pred_max_idx).sum().item()

    val_acc = n_correct_preds / n_val_samples
    val_loss = total_val_loss / val_steps
    val_accs.append(val_acc)
    val_losses.append(val_loss)
    print('VAL LOSS: {}, VAL ACCURACY: {}'.format(val_loss, val_acc))

print('DONE TRAINING!!!!!')

START TRAINING .... 

EPOCH: [1/50], TRAINING LOSS: 2.387894652327713
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [2/50], TRAINING LOSS: 2.3893058275689882
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [3/50], TRAINING LOSS: 2.3894030938343125
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [4/50], TRAINING LOSS: 2.38919982496573
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [5/50], TRAINING LOSS: 2.389516195472406
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [6/50], TRAINING LOSS: 2.3895125364770693
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [7/50], TRAINING LOSS: 2.389408620036378
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [8/50], TRAINING LOSS: 2.38928038611704
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [9/50], TRAINING LOSS: 2.3894984478853187
VAL LOSS: 2.3847280740737915, VAL ACCURACY: 0.0998
EPOCH: [10/50], TRAINING LOSS: 2.389378676609117
VAL LOSS: 2.3847280740737915, VAL AC