In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [6]:
# %pip install torchvision

In [7]:
from torchvision import models

In [8]:
# 1. Load the pre-trained model (ResNet-18)
# weights='IMAGENET1K_V1' ensures the ImageNet weights are loaded
model_ft=models.resnet18(weights='IMAGENET1K_V1')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\sande/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:04<00:00, 9.39MB/s]


In [17]:
# %pip install torchinfo
from torchinfo import summary
summary(model_ft,input_size=(1,3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [1, 1000]                 --
├─Conv2d: 1-1                            [1, 64, 112, 112]         9,408
├─BatchNorm2d: 1-2                       [1, 64, 112, 112]         128
├─ReLU: 1-3                              [1, 64, 112, 112]         --
├─MaxPool2d: 1-4                         [1, 64, 56, 56]           --
├─Sequential: 1-5                        [1, 64, 56, 56]           --
│    └─BasicBlock: 2-1                   [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-1                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-2             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-3                    [1, 64, 56, 56]           --
│    │    └─Conv2d: 3-4                  [1, 64, 56, 56]           36,864
│    │    └─BatchNorm2d: 3-5             [1, 64, 56, 56]           128
│    │    └─ReLU: 3-6                    [1, 64, 56, 56]           --
│

In [18]:
from torch.utils.tensorboard import SummaryWriter
import torch

writer = SummaryWriter()
dummy_input = torch.randn(1, 3, 224, 224)
writer.add_graph(model_ft, dummy_input)
writer.close()


In [13]:
print(model_ft)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
# 2. Get the number of input features for the final FC layer
model_ftrc=model_ft.fc.in_features
model_ftrc

512

In [27]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [20]:
# Move model to GPU (if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)
print("Model loaded and head modified.")

Model loaded and head modified.


In [26]:
import torch.optim as optim

# CRITICAL STEP: Define the parameters to be optimized
# The default in PyTorch here is to optimize ALL parameters.
# In a real fine-tuning, you would often freeze the earlier layers
# and use a smaller LR for the pre-trained ones.

# Example of Freezing (Feature Extraction)
# for param in model_ft.parameters():
#     param.requires_grad = False
# model_ft.fc.weight.requires_grad = True # Only unfreeze the new head layer
# model_ft.fc.bias.requires_grad = True

# For the Fine-Tuning example, we unfreeze all and rely on a low LR:
# We collect the parameters for optimization
params_to_update = model_ft.parameters()

# 4. Define the Optimizer with a VERY LOW Learning Rate
# Low learning rate is critical for fine-tuning
fine_tune_lr = 1e-5 
optimizer_ft = optim.SGD(params_to_update, lr=fine_tune_lr, momentum=0.9)

# 5. Define Loss Function
criterion = nn.CrossEntropyLoss()

print(f"Optimizer configured with LR: {fine_tune_lr}")

Optimizer configured with LR: 1e-05
