First, we start off with importing all the models required, for this particular workshop, we will be using pytorch


In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt

The above cell is used to detect if there's CUDA functionality, basically if you have an Nvidia GPU, you can use that for model training, instead of using CPU power completely.

In [7]:
# âœ… Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


Now, the model we will be focusing on is called Resnet-50. For resnet-50, the input required is 224 x 224 x 3, but then the image size of the dataset is 28 x 28 x 1. So, for resnet-50, we will need to transform to 224 x 224, and now, its only 1 channel present in the image, but then the input taken by the model is 3 channels, so we use the greyscale function to convert to 3 channels, and then we convert images to tensor values, and we normalize it.

In [10]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),
                         (0.229, 0.224, 0.225))
])


This is to load the MNIST dataset (which is a digit classification dataset, has multiple hand written images from 0-9).

In [None]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

This is to load the resnet-18 model, and the resnet-18 model initially outputs 1000 classes, but then we only need 10 (0-9), so we mention 10 as well to output only 10 classes.

In [None]:
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

This initializes the loss and optimizer required.


In [None]:
# âœ… Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

Initializes the number of epochs ( the number of times the model trains ), and total images

In [None]:
num_epochs = 2
total_steps = len(train_loader)
total_images = len(train_dataset)


Training with log information as well


In [None]:
for epoch in range(num_epochs):
    running_loss = 0.0
    for batch_idx, (imgs, labels) in enumerate(train_loader):
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print progress
        processed = (batch_idx + 1) * imgs.size(0)
        print(f"Epoch [{epoch+1}/{num_epochs}] | "
              f"Step [{batch_idx+1}/{total_steps}] | "
              f"Images: {processed}/{total_images} | "
              f"Loss: {loss.item():.4f}", end="\r")

    print(f"\nâœ… Epoch [{epoch+1}/{num_epochs}] completed. "
          f"Average Loss: {running_loss / total_steps:.4f}\n")

print("ðŸŽ‰ Training complete!")

To visualize 15 predictions, using matplotlib

In [None]:
# Assuming 'imgs' and 'outputs' are from your last training batch
# Get predicted class
_, predicted = torch.max(outputs, 1)

# âœ… Denormalize images (since you used ImageNet normalization)
mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)

# Move tensors to CPU for plotting
imgs_cpu = imgs.detach().cpu()
predicted_cpu = predicted.detach().cpu()

# âœ… Denormalize
images_denorm = imgs_cpu * std + mean
images_denorm = torch.clamp(images_denorm, 0, 1)  # keep pixel range valid

# âœ… Plot 15 images with predictions
fig, axes = plt.subplots(1, 15, figsize=(15, 3))
for i in range(15):
    ax = axes[i]
    ax.imshow(images_denorm[i].permute(1, 2, 0))  # CÃ—HÃ—W â†’ HÃ—WÃ—C
    ax.set_title(f"Pred: {predicted_cpu[i].item()}", fontsize=8)
    ax.axis('off')

plt.tight_layout()
plt.show()