### Import Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.models as models
from torch.cuda.amp import GradScaler, autocast
import matplotlib.pyplot as plt

### Transform Images Function

In [None]:
# Transform: Resize images and normalize
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize images to 128x128
    transforms.ToTensor(),         # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize for faster training
])


### Loading datasets

In [None]:
# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, transform=transform, download=True
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:06<00:00, 25.3MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [None]:
# Load CIFAR-10 dataset
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, transform=transform, download=True
)

Files already downloaded and verified


### Display the data and example classes

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

print("Dataset loaded. Example classes:", train_dataset.classes)

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Dataset loaded. Example classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


### Creating a subset of the data to avoid large run-time

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

# Get the CIFAR-10 dataset labels
labels = np.array(train_dataset.targets)

# Split the dataset using stratified sampling
# Limiting to, say, 20% of the original dataset
train_indices, _ = train_test_split(np.arange(len(train_dataset)),
                                    test_size=0.8,  # Keep 20% of the data
                                    stratify=labels)  # Ensure the class distribution is maintained

# Create a subset of the dataset with the selected indices
subset_train_dataset = torch.utils.data.Subset(train_dataset, train_indices)

# Create a DataLoader for the subset of the training data
subset_train_loader = DataLoader(subset_train_dataset, batch_size=64, shuffle=True)


### Building the weakly supervised model

### Training the model

#### Defining the loss function and optimizer

#### Actual training

In [None]:
# Step 1: Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Step 2: Define image transformations (resize to smaller images, normalize)
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize to smaller size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


### Initializing the model

In [None]:
# Step 4: Load a pretrained ResNet18 model and modify it for CIFAR-10 (10 classes)
model = models.resnet18(pretrained=True)  # Using a pretrained model
model.fc = nn.Linear(model.fc.in_features, 10)  # Modify final layer for CIFAR-10 classes
model.to(device)  # Move model to device (GPU or CPU)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 123MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

### Defining the loss function and optimizer

In [None]:
# Step 5: Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

### Training

In [None]:
from torch.cuda.amp import autocast, GradScaler
import torch

# Initialize GradScaler for mixed precision
scaler = GradScaler()

# Training Loop
epochs = 5  # Set number of epochs
for epoch in range(epochs):
    running_loss = 0.0
    correct = 0
    total = 0
    model.train()  # Set model to training mode

    for images, labels in subset_train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to device

        optimizer.zero_grad()  # Zero the gradients

        # Mixed precision training
        with autocast():  # Enable mixed precision
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Calculate loss

        # Backward pass
        scaler.scale(loss).backward()  # Backpropagation with scaled loss
        scaler.step(optimizer)  # Update weights
        scaler.update()  # Update the scaler

        running_loss += loss.item()

        # Accuracy calculation
        _, predicted = torch.max(outputs, 1)  # Get the predicted class
        total += labels.size(0)  # Total number of samples
        correct += (predicted == labels).sum().item()  # Correct predictions

    # Calculate accuracy
    accuracy = 100 * correct / total

    # Print loss and accuracy for the epoch
    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(subset_train_loader):.4f}, Accuracy: {accuracy:.2f}%")

    # Optionally, save the model after each epoch
    torch.save(model.state_dict(), f"model_epoch_{epoch + 1}.pth")


  scaler = GradScaler()
  with autocast():  # Enable mixed precision


Epoch [1/5], Loss: 0.8283, Accuracy: 72.20%
Epoch [2/5], Loss: 0.4757, Accuracy: 84.10%
Epoch [3/5], Loss: 0.3091, Accuracy: 89.53%
Epoch [4/5], Loss: 0.2617, Accuracy: 91.08%
Epoch [5/5], Loss: 0.1657, Accuracy: 94.51%


### Saving the model

In [None]:
# Save the model state
torch.save(model.state_dict(), 'cifar10_model.pth')

# Optionally, save the optimizer state
torch.save(optimizer.state_dict(), 'optimizer.pth')

# To load the model later:
model.load_state_dict(torch.load('cifar10_model.pth'))


  model.load_state_dict(torch.load('cifar10_model.pth'))


<All keys matched successfully>

### Model Evaluation

In [None]:
def evaluate(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    running_loss = 0.0
    criterion = torch.nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification

    with torch.no_grad():  # Disable gradient calculation for evaluation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device

            # Mixed precision evaluation
            with autocast():  # Enable mixed precision
                outputs = model(images)  # Forward pass
                loss = criterion(outputs, labels)  # Calculate loss

            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)  # Get the predicted class
            total += labels.size(0)  # Total number of samples
            correct += (predicted == labels).sum().item()  # Correct predictions

    # Calculate and print the evaluation accuracy and average loss
    accuracy = 100 * correct / total
    avg_loss = running_loss / len(test_loader)
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")


#### Defining the test data subset

In [None]:
from torch.utils.data import Subset
import torch
import numpy as np

# Define the fraction of the dataset you want to use for the subset
subset_fraction = 0.2  # 20% of the test data

# Generate random indices for the subset
total_test_samples = len(test_dataset)
subset_size = int(subset_fraction * total_test_samples)

# Randomly sample indices without replacement
subset_indices = np.random.choice(total_test_samples, subset_size, replace=False)

# Create a Subset of the test dataset
subset_test_dataset = Subset(test_dataset, subset_indices)

# Create a DataLoader for the subset
subset_test_loader = DataLoader(subset_test_dataset, batch_size=64, shuffle=False)


In [None]:
evaluate(model, subset_test_loader, device)


  with autocast():  # Enable mixed precision


Test Loss: 0.8753, Test Accuracy: 77.80%


Model 2

Initalizing model with Dropout

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# Define a modified ResNet18 model with Dropout
class ModifiedResNet(nn.Module):
    def __init__(self):
        super(ModifiedResNet, self).__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 256),  # Add intermediate FC layer
            nn.ReLU(),
            nn.Dropout(0.5),  # Dropout with p=0.5
            nn.Linear(256, 10)  # Final output layer for CIFAR-10
        )

    def forward(self, x):
        return self.model(x)

# Instantiate the model
model2 = ModifiedResNet().to(device)


Defining optimizer

In [None]:
optimizer2 = optim.Adam(model2.parameters(), lr=0.001, weight_decay=1e-4)

Train model 2

In [None]:
from torch.cuda.amp import autocast, GradScaler
import torch

# Initialize GradScaler for mixed precision
scaler = GradScaler()
epochs = 5
for epoch in range(epochs):
    model2.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer2.zero_grad()
        outputs = model2(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer2.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")

  scaler = GradScaler()


Epoch [1/5], Loss: 0.8820, Train Accuracy: 71.63%
Epoch [2/5], Loss: 0.6037, Train Accuracy: 81.02%
Epoch [3/5], Loss: 0.5103, Train Accuracy: 83.90%
Epoch [4/5], Loss: 0.4328, Train Accuracy: 86.30%
Epoch [5/5], Loss: 0.3793, Train Accuracy: 87.98%


In [None]:
# Save the model state
torch.save(model2.state_dict(), 'cifar10_model2.pth')

# Optionally, save the optimizer state
torch.save(optimizer.state_dict(), 'optimizer2.pth')

# To load the model later:
model2.load_state_dict(torch.load('cifar10_model2.pth'))

  model2.load_state_dict(torch.load('cifar10_model2.pth'))


<All keys matched successfully>

Evaluate Model 2

In [None]:
def evaluate(model2, test_loader, device):
    model2.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    running_loss = 0.0
    criterion = torch.nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification

    with torch.no_grad():  # Disable gradient calculation for evaluation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device

            # Mixed precision evaluation
            with autocast():  # Enable mixed precision
                outputs = model(images)  # Forward pass
                loss = criterion(outputs, labels)  # Calculate loss

            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)  # Get the predicted class
            total += labels.size(0)  # Total number of samples
            correct += (predicted == labels).sum().item()  # Correct predictions

    # Calculate and print the evaluation accuracy and average loss
    accuracy = 100 * correct / total
    avg_loss = running_loss / len(test_loader)
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")


In [None]:
# Evaluate the new model
evaluate(model2, subset_test_loader, device)


  with autocast():  # Enable mixed precision


Test Loss: 0.8753, Test Accuracy: 77.80%


Model 3

Initialize model 3 with ResNet50 a bit more complex

In [None]:
class ModifiedResNet50(nn.Module):
    def __init__(self):
        super(ModifiedResNet50, self).__init__()
        self.model = models.resnet50(pretrained=True)
        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.model(x)

# Instantiate the larger model
model3 = ModifiedResNet50().to(device)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 179MB/s]


Training of Model 3 including the optimizer3

In [None]:
from torch.optim.lr_scheduler import CosineAnnealingLR

# Define optimizer and scheduler
optimizer3 = optim.Adam(model3.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer3, T_max=10, eta_min=0.00001)  # Cosine annealing over 10 epochs

# Training loop with scheduler
epochs = 10
for epoch in range(epochs):
    model3.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer3.zero_grad()
        outputs = model3(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer3.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    scheduler.step()  # Update learning rate using scheduler

    train_accuracy = 100 * correct / total
    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")


Epoch [1/10], Loss: 1.4573, Train Accuracy: 45.67%
Epoch [2/10], Loss: 1.0140, Train Accuracy: 65.03%
Epoch [3/10], Loss: 0.8032, Train Accuracy: 73.09%
Epoch [4/10], Loss: 0.6559, Train Accuracy: 78.55%
Epoch [5/10], Loss: 0.5486, Train Accuracy: 82.00%
Epoch [6/10], Loss: 0.4352, Train Accuracy: 85.90%
Epoch [7/10], Loss: 0.3329, Train Accuracy: 89.15%
Epoch [8/10], Loss: 0.2299, Train Accuracy: 92.59%
Epoch [9/10], Loss: 0.1511, Train Accuracy: 95.17%
Epoch [10/10], Loss: 0.1032, Train Accuracy: 96.72%


In [None]:
# Save the model state
torch.save(model3.state_dict(), 'cifar10_model3.pth')

# Optionally, save the optimizer state
torch.save(optimizer.state_dict(), 'optimizer3.pth')

# To load the model later:
model3.load_state_dict(torch.load('cifar10_model3.pth'))

  model3.load_state_dict(torch.load('cifar10_model3.pth'))


<All keys matched successfully>

Evaluate the model

In [None]:
def evaluate(model3, test_loader, device):
    model3.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    running_loss = 0.0
    criterion = torch.nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification

    with torch.no_grad():  # Disable gradient calculation for evaluation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device

            # Mixed precision evaluation
            with autocast():  # Enable mixed precision
                outputs = model(images)  # Forward pass
                loss = criterion(outputs, labels)  # Calculate loss

            running_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)  # Get the predicted class
            total += labels.size(0)  # Total number of samples
            correct += (predicted == labels).sum().item()  # Correct predictions

    # Calculate and print the evaluation accuracy and average loss
    accuracy = 100 * correct / total
    avg_loss = running_loss / len(test_loader)
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")

In [None]:
# Evaluate the new model
evaluate(model3, subset_test_loader, device)

  with autocast():  # Enable mixed precision


Test Loss: 0.8753, Test Accuracy: 77.80%


Load the models

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import os

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the saved models
model1 = model.to(device)
model1.load_state_dict(torch.load('/content/cifar10_model.pth'))

model2 = ModifiedResNet().to(device)
model2.load_state_dict(torch.load('/content/cifar10_model2.pth'))

model3 = ModifiedResNet50().to(device)
model3.load_state_dict(torch.load('/content/cifar10_model3.pth'))

# Set all models to evaluation mode
model1.eval()
model2.eval()
model3.eval()


  model1.load_state_dict(torch.load('/content/cifar10_model.pth'))
  model2.load_state_dict(torch.load('/content/cifar10_model2.pth'))
  model3.load_state_dict(torch.load('/content/cifar10_model3.pth'))


ModifiedResNet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
   

Define Image Transformation

In [None]:
# Define transformations (same as used during training)
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize to the input size of the model
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to match training
])


 Load Provided Images

In [None]:
# Directory containing test images
image_dir = './test_images'
image_paths = [os.path.join(image_dir, img) for img in os.listdir(image_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]

# Load and preprocess images
def load_images(image_paths):
    images = []
    for path in image_paths:
        image = Image.open(path).convert('RGB')  # Convert to RGB
        image = transform(image)  # Apply transformations
        images.append(image)
    return torch.stack(images)  # Stack into a batch

# Prepare batch of images
images = load_images(image_paths).to(device)


Test Images on Models

In [None]:
# Define a function to get predictions
def predict(model, images, classes):
    outputs = model(images)  # Forward pass
    _, predicted = torch.max(outputs, 1)  # Get class indices
    return [classes[p] for p in predicted]  # Map indices to class names

# CIFAR-10 class names
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Test all models
print("Testing Model 1...")
predictions1 = predict(model1, images, classes)

print("Testing Model 2...")
predictions2 = predict(model2, images, classes)

print("Testing Model 3...")
predictions3 = predict(model3, images, classes)

# Print predictions for each image
for idx, img_path in enumerate(image_paths):
    print(f"\nImage: {os.path.basename(img_path)}")
    print(f"Model 1 Prediction: {predictions1[idx]}")
    print(f"Model 2 Prediction: {predictions2[idx]}")
    print(f"Model 3 Prediction: {predictions3[idx]}")


Testing Model 1...
Testing Model 2...
Testing Model 3...

Image: img3.jpg
Model 1 Prediction: frog
Model 2 Prediction: bird
Model 3 Prediction: automobile

Image: img1.jpg
Model 1 Prediction: airplane
Model 2 Prediction: airplane
Model 3 Prediction: automobile

Image: img5.jpg
Model 1 Prediction: truck
Model 2 Prediction: truck
Model 3 Prediction: automobile

Image: img2.jpg
Model 1 Prediction: truck
Model 2 Prediction: automobile
Model 3 Prediction: automobile

Image: img4.jpg
Model 1 Prediction: dog
Model 2 Prediction: dog
Model 3 Prediction: automobile
