In [1]:
from google.colab import drive
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,random_split
from matplotlib import pyplot as plt
import torchvision
import numpy as np

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


### 1. Load Pre-trained ResNet50 and Modify the Head

We'll load a pre-trained ResNet50 model from `torchvision.models`. By default, ResNet50 is trained on ImageNet, which has 1000 classes. We need to replace its final fully connected layer (the 'head') with a new one that outputs the correct number of classes for your specific problem. For demonstration, I'll assume 10 classes, but you should adjust `num_classes` to reflect your actual dataset.

In [3]:
import torch.nn as nn
import torchvision.models as models

# Load a pre-trained ResNet50 model
# pretrained=True downloads the weights trained on ImageNet
model = models.resnet50(pretrained=True)

# Freeze all parameters in the network
# for param in model.parameters():
#     param.requires_grad = False

# Get the number of features in the last layer of the original model
num_ftrs = model.fc.in_features

# Define the number of classes in your dataset
# IMPORTANT: Change this to the actual number of classes in your data
num_classes = 10

# Replace the final fully connected layer with a new one for your classes
model.fc = nn.Linear(num_ftrs, num_classes)

# Move the model to the appropriate device (GPU if available, else CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(f"Model loaded and moved to {device}")
print(model)




Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 168MB/s]


Model loaded and moved to cpu
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, k

### 2. Prepare Your Data

Next, you need to prepare your dataset. This involves defining transformations to preprocess your images (e.g., resizing, cropping, normalization) and then loading them using `torchvision.datasets` and `DataLoader`.

For demonstration, I'll use the CIFAR-10 dataset. **You will need to replace this section with code to load and preprocess your own dataset.** If your data is in folders, `datasets.ImageFolder` is a common choice.

In [4]:
# Define transformations for the training and validation sets
# ImageNet models expect input images to be normalized in a specific way
# mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224), # ResNet50 expects 224x224 input
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_val = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load a sample dataset (e.g., CIFAR-10) for demonstration
# IMPORTANT: Replace these with your actual dataset loading code
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
val_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)

# Create data loaders
batch_size = 32 # You can adjust this
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

print(f"Training data size: {len(train_dataset)}")
print(f"Validation data size: {len(val_dataset)}")


100%|██████████| 170M/170M [00:05<00:00, 31.6MB/s]


Training data size: 50000
Validation data size: 10000


### 3. Define Loss Function and Optimizer

We need to specify a loss function (e.g., Cross-Entropy Loss for classification) and an optimizer (e.g., Adam or SGD) to update the model's weights during training.

In [5]:
import torch.optim as optim

# Define loss function
criterion = nn.CrossEntropyLoss()

# Define optimizer
# Only optimize the parameters of the newly added final layer if you froze the base model
# If you unfroze the base model (as commented out above), then optimize all parameters
optimizer = optim.Adam(model.parameters(), lr=0.0001) # Reduced learning rate

print("Loss function and optimizer defined.")

Loss function and optimizer defined.


### 4. Training Loop

Finally, here's a basic training loop. You'll iterate over your data, perform forward and backward passes, and update the model's weights. You might want to add validation steps, learning rate scheduling, and early stopping for more robust training.

In [8]:
num_epochs = 3 # Adjusted to 3 epochs as requested
print("Starting the epoch.")
for epoch in range(num_epochs):
    model.train() # Set the model to training mode
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct_predictions / total_predictions

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

    # --- Optional: Validation step after each epoch ---
    model.eval() # Set the model to evaluation mode
    val_loss = 0.0
    val_correct_predictions = 0
    val_total_predictions = 0
    with torch.no_grad(): # No need to compute gradients during validation
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            val_total_predictions += labels.size(0)
            val_correct_predictions += (predicted == labels).sum().item()

    val_epoch_loss = val_loss / len(val_loader)
    val_epoch_accuracy = 100 * val_correct_predictions / val_total_predictions
    print(f'Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_accuracy:.2f}%')

print('\nTraining complete!')

Starting the epoch.


KeyboardInterrupt: 