In [92]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
import torch
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from google.colab import drive
warnings.filterwarnings("ignore")
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [93]:
# number of subprocesses to use for data loading
NUM_WORKERS = 1
BATCH_SIZE =  10
EPOCHS = 10

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_data = CIFAR10(root='./data', train=True, download=True, transform=transform)
test_data = CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

Files already downloaded and verified
Files already downloaded and verified


In [94]:
image_batch, label_batch = next(iter(train_loader))
image_batch.shape, label_batch.shape

(torch.Size([10, 3, 32, 32]), torch.Size([10]))

# Output volume can be calculated with below formula:

- Input: n X n X nc
- Filter: f X f X nc
- Padding: p
- Stride: s
- Output: [((n+2p-f)/s)+1] X [((n+2p-f)/s)+1] X nc’ (height X width X no of output channels)
nc is the number of channels in the input and filter, while nc’ is the number of filters.

From the above structure you can see that height/width is getting reduced and number of channels are getting incresed.

Example calulating the output of first convolution + pooling layer operation -

Input image shape - 32(n) X 32(n) X 3(nc)

# 1. ConVNet filter operation - self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True)

Filter shape - 3 (f) X 3 (f) X 3(nc) Padding : P = 1 Stride : s = 1 (default value) output channels - 16 (kernel_size)

putting it in the formula given above -

[((n+2p-f)/s)+1] X [((n+2p-f)/s)+1] X nc’

[((32 + 2X1 - 3) / 1) + 1)] X [((32 + 2X1 - 3) / 1)) + 1)] X 16

output shape -> 32 X 32 X 16

# 2. output of conv1 is passed through max pooling layer.
self.pool = nn.MaxPool2d(2, 2) -> filter of 2 X 2.

this will shrink the height & width by half , however no of channels will remain same.

input to the pooling layer - 32 X 32 X 16

output of the pooing layer - 32/2 X 32/2 X 16 -> 16 X 16 X 16

In [95]:
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
    self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
    self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
    self.pool = nn.MaxPool2d(2, 2)
    self.fc1 = nn.Linear(64 * 4 * 4, 500)
    self.fc2 = nn.Linear(500, 10)
    self.dropout = nn.Dropout(0.25)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool(F.relu(self.conv3(x)))

    x = x.view(-1, 64 * 4 * 4)

    x = F.relu(self.fc1(x))
    x = self.dropout(x)
    x = self.fc2(x)

    return x

model = Model()
print(model)

Model(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)


In [96]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [97]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

In [98]:
print(torch.cuda.is_available())

True


In [99]:
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [100]:
model.to(device)

Model(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)

In [101]:
for epoch in range(EPOCHS):
  train_loss = []

  model.train()

  scheduler.step()

  for i,(data, target) in enumerate(train_loader):

    data, target = data.to(device), target.to(device)

    optimizer.zero_grad()

    output = model(data)

    loss_value = loss(output, target)

    loss_value.backward()

    optimizer.step()

    if (i+1) % 2000 == 0:
      print (f'Epoch {epoch+1}, Step {i+1}, Loss: {loss_value.item():.4f}')

Epoch 1, Step 2000, Loss: 0.9437
Epoch 1, Step 4000, Loss: 0.8935
Epoch 2, Step 2000, Loss: 1.2074
Epoch 2, Step 4000, Loss: 1.1237
Epoch 3, Step 2000, Loss: 0.8088
Epoch 3, Step 4000, Loss: 0.9106
Epoch 4, Step 2000, Loss: 0.8739
Epoch 4, Step 4000, Loss: 0.4180
Epoch 5, Step 2000, Loss: 0.9002
Epoch 5, Step 4000, Loss: 1.4210
Epoch 6, Step 2000, Loss: 0.5169
Epoch 6, Step 4000, Loss: 0.5073
Epoch 7, Step 2000, Loss: 0.6669
Epoch 7, Step 4000, Loss: 0.3486
Epoch 8, Step 2000, Loss: 0.7935
Epoch 8, Step 4000, Loss: 0.9906
Epoch 9, Step 2000, Loss: 0.7701
Epoch 9, Step 4000, Loss: 0.5769
Epoch 10, Step 2000, Loss: 1.6942
Epoch 10, Step 4000, Loss: 0.3762


In [102]:
model.eval()

with torch.no_grad():
  correct = 0
  total = 0

  for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    _, pred = torch.max(output.data, 1)
    total += target.size(0)
    correct += (pred == target).sum().item()

  print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 69 %
