## Transfer Learning ResNet

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torchsummary import summary

In [None]:
#parameters
batch_size = 128
num_classes = 10  #CIFAR-10 has 10 classes
epochs = 5
learning_rate = 0.001

In [None]:
#Data Transformation Pipeline for CIFAR-10
transform = transforms.Compose([
    transforms.Resize((224, 224)),     #Resnet expect 224*224 Image
    transforms.ToTensor(),      #Convert PIL Image to input tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])      #Normalization for pre-trained models
])

In [None]:
#Loading CIFAR-10 dataset
train_dataset = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)
test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

#DataLoader for Batch Processing
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:08<00:00, 20.6MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
#Load a pretrained ResNet18 model
model = models.resnet18(pretrained=True)
print(model)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 103MB/s] 


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
print(model.parameters())

<generator object Module.parameters at 0x7aafd18b5460>


In [None]:
#Freeze the early layers (feature extractor)
for param in model.parameters():
  print(param)
  param.requires_grad = False

Parameter containing:
tensor([[[[-1.0419e-02, -6.1356e-03, -1.8098e-03,  ...,  5.6615e-02,
            1.7083e-02, -1.2694e-02],
          [ 1.1083e-02,  9.5276e-03, -1.0993e-01,  ..., -2.7124e-01,
           -1.2907e-01,  3.7424e-03],
          [-6.9434e-03,  5.9089e-02,  2.9548e-01,  ...,  5.1972e-01,
            2.5632e-01,  6.3573e-02],
          ...,
          [-2.7535e-02,  1.6045e-02,  7.2595e-02,  ..., -3.3285e-01,
           -4.2058e-01, -2.5781e-01],
          [ 3.0613e-02,  4.0960e-02,  6.2850e-02,  ...,  4.1384e-01,
            3.9359e-01,  1.6606e-01],
          [-1.3736e-02, -3.6746e-03, -2.4084e-02,  ..., -1.5070e-01,
           -8.2230e-02, -5.7828e-03]],

         [[-1.1397e-02, -2.6619e-02, -3.4641e-02,  ...,  3.2521e-02,
            6.6221e-04, -2.5743e-02],
          [ 4.5687e-02,  3.3603e-02, -1.0453e-01,  ..., -3.1253e-01,
           -1.6051e-01, -1.2826e-03],
          [-8.3730e-04,  9.8420e-02,  4.0210e-01,  ...,  7.0789e-01,
            3.6887e-01,  1.2455e-01]

In [None]:
#Replace the classifier layer (fully connected layer)
num_ftrs = model.fc.in_features
print(f'Total Input Feature in Fully Connected Layer: {num_ftrs}')

model.fc = nn.Linear(num_ftrs, num_classes)       #Replace output feature 1000 to 10, cause CIFAR10 has 10 classes
print(model.fc)

Total Input Feature in Fully Connected Layer: 512
Linear(in_features=512, out_features=10, bias=True)


In [None]:
#Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
device

device(type='cuda')

In [None]:
#Print Model summary
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
#Optimizer and Loss Function
optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate)     #Only train the classifier layer
criterion = nn.CrossEntropyLoss()

In [None]:
#Training Function
def train_model(model, train_loader, epochs):
  model.train()   #Set the model to training mode
  for epoch in range(epochs):
    total_loss = 0
    correct = 0
    for data, target in train_loader:
      data, target = data.to(device), target.to(device)     #Move the data and target to cuda enabled GPU
      optimizer.zero_grad()     #Reset Gradients
      output = model(data)
      loss = criterion(output, target)
      loss.backward()     #Backpropagation(Calculate derivative of loss with respect to parameters)
      optimizer.step()    #Update weight
      print(f'Loss of Item: {loss.item()}')     # retrieve the scalar value from a loss tensor
      print(f'Batch Size of Tensor Data: {data.size(0)}')   #batch size of a tensor data
      total_loss += loss.item() * data.size(0)      #Accumulates total loss (not per batch).
      pred = output.argmax(dim=1)                   #Gets predicted class from Output(logits).
      correct += pred.eq(target).sum().item()       #Counts correct predictions.
    print(f'Total Loss: {total_loss}')
    print(f'Correct Predictions: {correct}')
    avg_loss = total_loss / len(train_loader.dataset)
    accuracy = correct / len(train_loader.dataset)

    print(f'Epoch: [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')


In [None]:
# Train and evaluate the model
train_model(model, train_loader, epochs)

Loss of Item: 2.401623249053955
Batch Size of Tensor Data: 128
Loss of Item: 2.3376846313476562
Batch Size of Tensor Data: 128
Loss of Item: 2.2183098793029785
Batch Size of Tensor Data: 128
Loss of Item: 2.330643653869629
Batch Size of Tensor Data: 128
Loss of Item: 2.1595098972320557
Batch Size of Tensor Data: 128
Loss of Item: 2.2020909786224365
Batch Size of Tensor Data: 128
Loss of Item: 2.0914981365203857
Batch Size of Tensor Data: 128
Loss of Item: 2.1416847705841064
Batch Size of Tensor Data: 128
Loss of Item: 2.0309975147247314
Batch Size of Tensor Data: 128
Loss of Item: 2.048609495162964
Batch Size of Tensor Data: 128
Loss of Item: 2.0119271278381348
Batch Size of Tensor Data: 128
Loss of Item: 1.8622747659683228
Batch Size of Tensor Data: 128
Loss of Item: 1.8672726154327393
Batch Size of Tensor Data: 128
Loss of Item: 1.8489923477172852
Batch Size of Tensor Data: 128
Loss of Item: 1.890444040298462
Batch Size of Tensor Data: 128
Loss of Item: 1.8699382543563843
Batch Size 

In [None]:
#Evaluation Function
def evaluate_model(model, test_loader):
  model.eval()      #Set Model to evaluation mode
  test_loss = 0
  correct = 0
  with torch.no_grad():           #used to disable gradient computation within a block of code
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      print(f'Output: {output}')
      loss = criterion(output, target)
      test_loss += loss.item() * data.size(0)
      pred = output.argmax(dim=1)       #retrieve the indices of the maximum values along a specified dimension in a tensor.
      print(f'Prediction: {pred}')
      print(f'Target: {target}')
      correct += pred.eq(target).sum().item()

  avg_loss = test_loss / len(test_loader.dataset)
  accuracy = correct / len(test_loader.dataset)

  print(f'Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}')

In [None]:
#evaluate the model
evaluate_model(model, test_loader)

Output: tensor([[-4.8580, -5.4461, -3.3562,  ..., -5.2471, -6.4713, -5.7082],
        [ 0.0781, -0.9834, -1.7896,  ..., -3.8417,  3.2115, -7.1185],
        [-0.8739, -2.5350, -5.0243,  ..., -4.3409,  4.3292, -0.0714],
        ...,
        [-1.0129, -5.3064,  1.5976,  ..., -3.5764, -3.4406, -7.2658],
        [-1.9168, -5.9966, -3.3778,  ..., -3.4628,  6.7709, -3.8583],
        [-4.7395, -4.5417, -1.6218,  ..., -4.6037, -4.9550, -6.5695]],
       device='cuda:0')
Prediction: tensor([3, 8, 8, 0, 6, 6, 9, 6, 3, 1, 0, 9, 5, 7, 9, 8, 5, 7, 8, 6, 7, 0, 4, 9,
        5, 3, 3, 0, 9, 6, 6, 5, 4, 5, 9, 2, 4, 9, 9, 5, 0, 6, 5, 3, 0, 9, 3, 9,
        7, 6, 9, 8, 0, 3, 8, 8, 7, 3, 5, 3, 7, 3, 6, 3, 6, 2, 1, 0, 3, 7, 2, 3,
        8, 8, 9, 2, 9, 3, 5, 0, 8, 1, 1, 7, 3, 7, 2, 7, 8, 9, 0, 3, 8, 6, 4, 6,
        6, 0, 0, 7, 4, 5, 6, 3, 1, 1, 3, 6, 0, 7, 4, 0, 6, 2, 1, 3, 0, 7, 3, 3,
        8, 3, 1, 2, 8, 2, 8, 3], device='cuda:0')
Target: tensor([3, 8, 8, 0, 6, 6, 1, 6, 3, 1, 0, 9, 5, 7, 9, 8, 5, 7, 8,

## Further Exploring the CIFAR10 Dataset with different dimension

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torchsummary import summary

In [None]:
#Parameters
batch_size = 128
num_classes = 10      #CIFAR10 has 10 class
epochs = 5
learning_rate = 0.001

In [None]:
#Data Transformation Pipeline for CIFAR-10
transform = transforms.Compose([
    transforms.Resize((224, 224)),    #Resnet expect 224 images
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])   #Normalization for pre-trained models
])

In [None]:
#Loading CIFAR-10 Datasets
train_dataset = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)
test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)
#DataLoader for batch processing
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
#Load a pre-trained resnet18 model
model = models.resnet18(pretrained=True)
model



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
#Freeze All Layers
for param in model.parameters():
  print(param)
  param.requires_grad=False

Parameter containing:
tensor([[[[-1.0419e-02, -6.1356e-03, -1.8098e-03,  ...,  5.6615e-02,
            1.7083e-02, -1.2694e-02],
          [ 1.1083e-02,  9.5276e-03, -1.0993e-01,  ..., -2.7124e-01,
           -1.2907e-01,  3.7424e-03],
          [-6.9434e-03,  5.9089e-02,  2.9548e-01,  ...,  5.1972e-01,
            2.5632e-01,  6.3573e-02],
          ...,
          [-2.7535e-02,  1.6045e-02,  7.2595e-02,  ..., -3.3285e-01,
           -4.2058e-01, -2.5781e-01],
          [ 3.0613e-02,  4.0960e-02,  6.2850e-02,  ...,  4.1384e-01,
            3.9359e-01,  1.6606e-01],
          [-1.3736e-02, -3.6746e-03, -2.4084e-02,  ..., -1.5070e-01,
           -8.2230e-02, -5.7828e-03]],

         [[-1.1397e-02, -2.6619e-02, -3.4641e-02,  ...,  3.2521e-02,
            6.6221e-04, -2.5743e-02],
          [ 4.5687e-02,  3.3603e-02, -1.0453e-01,  ..., -3.1253e-01,
           -1.6051e-01, -1.2826e-03],
          [-8.3730e-04,  9.8420e-02,  4.0210e-01,  ...,  7.0789e-01,
            3.6887e-01,  1.2455e-01]

In [None]:
#Unfreeze the last two layers(layer 4)
for param in model.layer4.parameters():
  print(param)
  param.requires_grad=True

Parameter containing:
tensor([[[[-1.1645e-02, -1.9010e-02, -2.1876e-02],
          [ 2.0482e-02,  2.3962e-02,  2.9161e-02],
          [ 4.3672e-02,  3.3278e-02,  4.9908e-02]],

         [[-7.4040e-03,  2.8083e-03, -4.7339e-03],
          [ 6.9030e-03,  1.4271e-02, -3.6954e-03],
          [-3.1341e-03,  1.3736e-02,  1.6127e-03]],

         [[ 1.8676e-02, -1.0553e-02, -1.4233e-02],
          [ 8.9944e-03, -2.5068e-03, -1.2145e-02],
          [-4.9455e-03, -2.9206e-02, -9.6385e-03]],

         ...,

         [[-1.2655e-02,  1.7691e-02,  9.8264e-04],
          [ 7.4271e-03,  7.6115e-03,  1.1135e-02],
          [ 2.3242e-02,  1.1058e-02,  4.0498e-03]],

         [[ 1.8557e-02,  1.2472e-02,  1.7220e-02],
          [-4.8544e-03,  8.3627e-03,  2.2811e-02],
          [-5.1675e-03,  2.3264e-02,  3.4068e-02]],

         [[ 2.4934e-02,  2.2373e-02,  4.2614e-02],
          [ 1.3486e-02,  1.6760e-03,  1.3019e-02],
          [-6.2821e-03, -1.5112e-03, -8.9229e-03]]],


        [[[-9.8089e-04, -6.3011

In [None]:
#Requires the classifier layer (fully connected layer)
num_in_features = model.fc.in_features
print(num_in_features)
model.fc = nn.Linear(num_in_features, num_classes)
print(model.fc)

512
Linear(in_features=512, out_features=10, bias=True)


In [None]:
#Move Model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [None]:
#Print the model summary
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
#Optimizer and Loss Function
#Include parameters of both the classifier layer(Fully Connected Layer) and unfrozen layer(layer4)
optimizer = optim.Adam([
    {'params': model.fc.parameters()},
    {'params': model.layer4.parameters()}
], lr=learning_rate)

criterion = nn.CrossEntropyLoss()

In [None]:
#Training Function (Same as before)
def train_model(model, train_loader, epochs):
  model.train()     #Set the model into train mode
  for epoch in range(epochs):
    total_loss = 0
    correct = 0
    for data, target in train_loader:
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()   #Reset the gradients
      output = model(data)
      loss = criterion(output, target)
      loss.backward()     #Backpropagation
      optimizer.step()    #Update weights
      print(f'Loss of Item: {loss.item()}')
      print(f'Batch Size of Tensor Data: {data.size(0)}')
      total_loss += loss.item() * data.size(0)
      pred = output.argmax(dim=1)     #Retrieve the indices of the maximum value
      #Count Correct Prediction
      correct += pred.eq(target).sum().item()     #eq() equalize the value of prediction and target, item() converts the value into python integer
    print(f'Total Loss: {total_loss}')
    print(f'Correct Predictions: {correct}')
    avg_loss = total_loss / len(train_loader.dataset)
    accuracy = correct / len(train_loader.dataset)
    print(f'Epoch: [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

In [None]:
train_model(model, train_loader, epochs)

Loss of Item: 2.6202597618103027
Batch Size of Tensor Data: 128
Loss of Item: 1.5329654216766357
Batch Size of Tensor Data: 128
Loss of Item: 1.0704424381256104
Batch Size of Tensor Data: 128
Loss of Item: 1.0701459646224976
Batch Size of Tensor Data: 128
Loss of Item: 0.7780848145484924
Batch Size of Tensor Data: 128
Loss of Item: 0.7508509159088135
Batch Size of Tensor Data: 128
Loss of Item: 0.650879442691803
Batch Size of Tensor Data: 128
Loss of Item: 0.9061073660850525
Batch Size of Tensor Data: 128
Loss of Item: 1.0104131698608398
Batch Size of Tensor Data: 128
Loss of Item: 0.5890881419181824
Batch Size of Tensor Data: 128
Loss of Item: 0.7249146699905396
Batch Size of Tensor Data: 128
Loss of Item: 0.8134014010429382
Batch Size of Tensor Data: 128
Loss of Item: 0.7557589411735535
Batch Size of Tensor Data: 128
Loss of Item: 0.6513674855232239
Batch Size of Tensor Data: 128
Loss of Item: 0.5578280687332153
Batch Size of Tensor Data: 128
Loss of Item: 0.5508901476860046
Batch Si

In [None]:
#Model Evaluation
def evaluate_model(model, test_loader):
  model.eval()    #Set the model into evaluation model
  test_loss = 0
  correct = 0
  with torch.no_grad():       #Used to disable gradient computation within a block of code
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      print(f'Output: {output}')
      loss = criterion(output, target)
      test_loss += loss.item() * data.size(0)
      pred = output.argmax(dim=1)
      correct += pred.eq(target).sum().item()
      print(f'Prediction: {pred}')
      print(f'Target: {target}')
  avg_loss = test_loss / len(test_loader.dataset)
  accuracy = correct / len(test_loader.dataset)
  print(f'Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}')


In [None]:
#Evaluate the model
evaluate_model(model, test_loader)

Output: tensor([[ -4.6545,  -6.5580,  -3.4353,  ..., -10.3584,  -4.8588,  -7.2316],
        [  5.1671,  -3.1794,  -6.2857,  ...,  -8.1622,  15.0544,  -7.3549],
        [ -0.9702,   2.6790,  -6.9634,  ...,  -5.2340,   8.1056,   1.8617],
        ...,
        [  2.7993,  -6.5426,   9.3534,  ..., -11.7028,  -1.5186,  -8.6289],
        [ -3.9793,  -6.0363,  -0.7699,  ...,  -6.5423,  12.9062,  -1.7020],
        [ -5.6009,  -5.3988,  -5.9996,  ...,  -7.0084,  -6.6051,  -7.0479]],
       device='cuda:0')
Prediction: tensor([3, 8, 8, 0, 6, 6, 9, 6, 3, 1, 0, 9, 5, 7, 9, 8, 5, 7, 8, 6, 0, 0, 4, 9,
        5, 2, 4, 0, 9, 6, 2, 5, 4, 5, 9, 2, 4, 9, 9, 5, 4, 6, 5, 6, 0, 9, 3, 9,
        7, 6, 9, 8, 0, 5, 8, 8, 7, 7, 5, 3, 7, 5, 6, 3, 6, 2, 1, 2, 6, 7, 2, 6,
        8, 8, 0, 2, 9, 3, 3, 8, 8, 1, 1, 7, 2, 5, 2, 7, 8, 9, 0, 3, 8, 6, 4, 6,
        6, 2, 0, 7, 4, 5, 6, 3, 1, 1, 3, 6, 8, 7, 4, 0, 6, 2, 1, 3, 0, 4, 2, 0,
        8, 0, 1, 2, 8, 2, 8, 3], device='cuda:0')
Target: tensor([3, 8, 8, 0, 6, 6, 1,

## More Modification on ResNet Structure and Evaluate Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torchsummary import summary

In [None]:
#parameters
batch_size = 128
num_classes = 10  #CIFAR-10 has 10 classes
epochs = 5
learning_rate = 0.001

In [None]:
#Data Transformation Pipeline for CIFAR-10
transform = transforms.Compose([
    transforms.Resize((224, 224)),     #Resnet expect 224*224 Image
    transforms.ToTensor(),      #Convert PIL Image to input tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])      #Normalization for pre-trained models
])

In [None]:
#Loading CIFAR-10 dataset
train_dataset = datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transform
)
test_dataset = datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

#DataLoader for Batch Processing
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False
)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
#Load a pretrained ResNet18 model
model = models.resnet18(pretrained=True)
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
print(model.parameters())

<generator object Module.parameters at 0x7e1e2e44b140>


In [None]:
for param in model.parameters():
  print(param)

Parameter containing:
tensor([[[[-1.0419e-02, -6.1356e-03, -1.8098e-03,  ...,  5.6615e-02,
            1.7083e-02, -1.2694e-02],
          [ 1.1083e-02,  9.5276e-03, -1.0993e-01,  ..., -2.7124e-01,
           -1.2907e-01,  3.7424e-03],
          [-6.9434e-03,  5.9089e-02,  2.9548e-01,  ...,  5.1972e-01,
            2.5632e-01,  6.3573e-02],
          ...,
          [-2.7535e-02,  1.6045e-02,  7.2595e-02,  ..., -3.3285e-01,
           -4.2058e-01, -2.5781e-01],
          [ 3.0613e-02,  4.0960e-02,  6.2850e-02,  ...,  4.1384e-01,
            3.9359e-01,  1.6606e-01],
          [-1.3736e-02, -3.6746e-03, -2.4084e-02,  ..., -1.5070e-01,
           -8.2230e-02, -5.7828e-03]],

         [[-1.1397e-02, -2.6619e-02, -3.4641e-02,  ...,  3.2521e-02,
            6.6221e-04, -2.5743e-02],
          [ 4.5687e-02,  3.3603e-02, -1.0453e-01,  ..., -3.1253e-01,
           -1.6051e-01, -1.2826e-03],
          [-8.3730e-04,  9.8420e-02,  4.0210e-01,  ...,  7.0789e-01,
            3.6887e-01,  1.2455e-01]

In [None]:
#Replace the classifier layer (fully connected layer)
num_ftrs = model.fc.in_features
print(f'Total Input Feature in Fully Connected Layer: {num_ftrs}')

model.fc = nn.Linear(num_ftrs, num_classes)       #Replace output feature 1000 to 10, cause CIFAR10 has 10 classes
print(model.fc)

Total Input Feature in Fully Connected Layer: 512
Linear(in_features=512, out_features=10, bias=True)


In [None]:
#Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
device

device(type='cpu')

In [None]:
#Print Model summary
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
#Optimizer and Loss Function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)     #Only train the classifier layer
criterion = nn.CrossEntropyLoss()

In [None]:
#Training Function
def train_model(model, train_loader, epochs):
  model.train()   #Set the model to training mode
  for epoch in range(epochs):
    total_loss = 0
    correct = 0
    for data, target in train_loader:
      data, target = data.to(device), target.to(device)     #Move the data and target to cuda enabled GPU
      optimizer.zero_grad()     #Reset Gradients
      output = model(data)
      loss = criterion(output, target)
      loss.backward()     #Backpropagation(Calculate derivative of loss with respect to parameters)
      optimizer.step()    #Update weight
      print(f'Loss of Item: {loss.item()}')     # retrieve the scalar value from a loss tensor
      print(f'Batch Size of Tensor Data: {data.size(0)}')   #batch size of a tensor data
      total_loss += loss.item() * data.size(0)      #Accumulates total loss (not per batch).
      pred = output.argmax(dim=1)                   #Gets predicted class from Output(logits).
      correct += pred.eq(target).sum().item()       #Counts correct predictions.
    print(f'Total Loss: {total_loss}')
    print(f'Correct Predictions: {correct}')
    avg_loss = total_loss / len(train_loader.dataset)
    accuracy = correct / len(train_loader.dataset)

    print(f'Epoch: [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')


In [None]:
# Train and evaluate the model
train_model(model, train_loader, epochs)

KeyboardInterrupt: 

In [None]:
#Evaluation Function
def evaluate_model(model, test_loader):
  model.eval()      #Set Model to evaluation mode
  test_loss = 0
  correct = 0
  with torch.no_grad():           #used to disable gradient computation within a block of code
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      print(f'Output: {output}')
      loss = criterion(output, target)
      test_loss += loss.item() * data.size(0)
      pred = output.argmax(dim=1)       #retrieve the indices of the maximum values along a specified dimension in a tensor.
      print(f'Prediction: {pred}')
      print(f'Target: {target}')
      correct += pred.eq(target).sum().item()

  avg_loss = test_loss / len(test_loader.dataset)
  accuracy = correct / len(test_loader.dataset)

  print(f'Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}')


#evaluate the model
evaluate_model(model, test_loader)