<a href="https://colab.research.google.com/github/NoobCoder-dweeb/AI-HandsOn-Journey/blob/main/notes/Fine_Tuning_Pre_Trained_Model_using_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models

# load pretrained resnet
model = models.resnet18(pretrained=True)

# modify the last layer to match MNIST classes
model.fc = nn.Linear(model.fc.in_features, 10)

# set the model to training mode and use GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 186MB/s]


In [10]:
# transform images to 224x224 and normalise
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3,1,1)),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)),
])

# load the MNIST dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [9]:
print(trainset.data.shape)
print(trainset.targets.shape)

torch.Size([60000, 28, 28])
torch.Size([60000])


In [11]:
# define loss function and optimiser
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# learning rate scheduler to adjust the learning rate
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [12]:
#fine tune the model
num_epochs = 1
#set num_epochs to a smaller number like 1 and use T4 GPU, wait for 4 minute may be, if training is taking too long in colab.

for epoch in range(num_epochs):
  running_loss = 0.
  for images, labels in trainloader:
    images, labels = images.to(device), labels.to(device)

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  # step the scheduler after each epoch
  scheduler.step()
  print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}")

print('Fine-tuning complete!')

Epoch [1/1], Loss: 0.0671
Fine-tuning complete!


In [13]:
# save the fine-tuned model
torch.save(model.state_dict(), 'finetuned_resnet18_mnist.pth')
print('Model saved!')

Model saved!


In [19]:
# set the model to evaluation mode
model.eval()

correct = 0
total = 0

with torch.no_grad():
  for images,labels in testloader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f"Accuracy of the fine-tuned model on the test images: {100 * correct/total:.2f}%")

Accuracy of the fine-tuned model on the test images: 99.17%


In [20]:
# load the model for inference
model1 = models.resnet18()
model1.fc = nn.Linear(model1.fc.in_features, 10)
model.load_state_dict(torch.load('finetuned_resnet18_mnist.pth', weights_only=True))
model.eval()
model = model.to(device)


# make a prediction on a single image from the test set
test_image, _ = testset[0]
test_image = test_image.unsqueeze(0).to(device) # add a batch dimension and move to device

with torch.no_grad():
    output = model(test_image)
    _, predicted = torch.max(output.data, 1)
    print(f"Output of torch.max(outputs.data,1): {torch.max(outputs.data, 1)}")
print('Predicted:', predicted.item())

Output of torch.max(outputs.data,1): torch.return_types.max(
values=tensor([14.9963,  8.2570, 13.8354, 12.2047, 13.6529, 13.3548, 10.5428,  9.0486,
        14.9766,  9.0856, 13.7272, 17.0738, 12.1201, 11.5918, 12.6828, 15.8459],
       device='cuda:0'),
indices=tensor([3, 0, 1, 9, 5, 9, 9, 3, 2, 6, 9, 4, 6, 5, 9, 7], device='cuda:0'))
Predicted: 7
