In [None]:
import torch
import torchvision.models as models

# Load a pre-trained ResNet18 model from torchvision.models.
# 'pretrained=True' means the model comes with weights pre-trained on ImageNet.
model = models.resnet18(pretrained=True)

In [None]:
# Freeze all layers of the loaded ResNet18 model.
# This is a common practice in transfer learning to prevent the pre-trained weights
# from being updated during initial training phases, especially on new, smaller datasets.
# By setting 'param.requires_grad = False', we ensure that gradients are not computed
# for these parameters, making them untrainable.
for param in model.parameters():
    param.requires_grad = False

In [None]:
# Freeze all layers of the model.
# This loop iterates through all parameters of the model and sets their 'requires_grad' attribute to False.
# This effectively prevents their weights from being updated during backpropagation.
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the last fully-connected layer (classifier head).
# In ResNet, the final classification layer is typically named 'fc'.
# By setting 'param.requires_grad = True' for parameters in this specific layer,
# we make only this layer trainable, allowing it to adapt to the new task/dataset
# while keeping the feature extraction layers frozen.
for param in model.fc.parameters():
    param.requires_grad = True

In [None]:
# Calculate the total number of trainable parameters in the model.
# It sums the number of elements (numel()) for all parameters (p) where 'p.requires_grad' is True.
# This is useful to verify which layers are being trained and to estimate model complexity.
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# Print the calculated number of trainable parameters.
print(f"Number of trainable parameters: {trainable_params}")

In [None]:
import torch.optim as optim # orch.optim module is used for optimizing model parameters during training.
# It provides implementations of optimization algorithms (like SGD, Adam, RMSprop, etc.) that update the weights of your neural network to minimize the loss function.
import torch.nn as nn

# Define a loss function: CrossEntropyLoss is commonly used for multi-class classification tasks.
criterion = nn.CrossEntropyLoss()
# Define an optimizer: Stochastic Gradient Descent (SGD) is chosen.
# It optimizes only the parameters that require gradients (i.e., unfrozen layers).
# 'lr' is the learning rate, and 'momentum' helps accelerate SGD in the relevant direction.
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001, momentum=0.9)

# Assume we have a DataLoader named train_loader which provides batches of training data.
# This loop iterates over a specified number of epochs (10 in this case) for training.
for epoch in range(10):
    running_loss = 0.0
    # Iterate over batches of data from the train_loader.
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data # Unpack inputs (e.g., images) and labels from the current batch.
        optimizer.zero_grad() # Zero the gradients before backpropagation to prevent accumulation.

        outputs = model(inputs) # Perform a forward pass: get model predictions for the inputs.
        loss = criterion(outputs, labels) # Calculate the loss between predictions and true labels.
        loss.backward() # Perform a backward pass: compute gradients of the loss with respect to model parameters.
        optimizer.step() # Update model parameters using the computed gradients and the optimizer's rules.

        running_loss += loss.item() # Accumulate the loss for the current epoch.
    # Print the average loss for the current epoch.
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')

In [None]:
# Unfreeze all layers for fine-tuning.
# This loop sets 'requires_grad = True' for all parameters in the model,
# making every layer's weights trainable. This is typical for fine-tuning,
# where you want to adapt the entire pre-trained model to your specific dataset.
for param in model.parameters():
    param.requires_grad = True

# Use a lower learning rate for fine-tuning.
# When fine-tuning, it's common to use a smaller learning rate than initial training
# to prevent large changes to the pre-trained weights and to ensure stable convergence.
# The optimizer is re-initialized with all model parameters and a reduced learning rate.
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)