Arjun Bhan (AB5666) and Leah Uzzan (lu2166)

In [None]:
# Function to load a single batch of CIFAR-10 data
def load_cifar10_batch(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
# Load CIFAR-10 data
data_folder = '/kaggle/input/Cifar10-ADL/cifar-10-batches-py/'  # Replace with your path
batch_files = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch']
#data_batches = [load_cifar10_batch(os.path.join(data_folder, f'{file}')) for file in batch_files]

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

In [None]:
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import StepLR

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Data augmentation
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset with augmentation
full_train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Splitting full training dataset into training and validation sets
train_size = int(0.8 * len(full_train_dataset))
validation_size = len(full_train_dataset) - train_size
train_dataset, validation_dataset = random_split(full_train_dataset, [train_size, validation_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43228490.08it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Traditional VGGNet

In [None]:
class VGG16Features(nn.Module):
    def __init__(self):
        super(VGG16Features, self).__init__()
        self.features = nn.Sequential(
            # First block
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Second block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Third block
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Fourth block
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Fifth block
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

    def forward(self, x):
        x = self.features(x)
        return x


In [None]:
class VGG16Classifier(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16Classifier, self).__init__()
        # The input feature size is 512 (from the conv layers) * 1 * 1
        self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.classifier(x)
        return x

In [None]:
class ModifiedVGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(ModifiedVGG16, self).__init__()
        self.features = VGG16Features()
        input_channels = 512  # Output channels of last conv layer in VGG16Features
        self.classifier = VGG16Classifier(num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        output = self.classifier(x)
        return output

In [None]:
from tqdm import tqdm

# Function to train and validate the model
def train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=10):
    for epoch in range(num_epochs):
        # Training loop
        model.train()
        train_correct = 0
        train_loss = 0.0
        train_total = 0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            train_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            loss.backward()
            optimizer.step()

        train_loss /= train_total
        train_accuracy = 100 * train_correct / train_total

        # Validation loop
        model.eval()
        val_correct = 0
        val_loss = 0.0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100 * val_correct / val_total
        val_loss /= val_total
        val_accuracy = 100 * val_correct / val_total

        print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        scheduler.step(val_loss)

In [None]:
# Evaluation Function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total}%')


In [None]:
# Model, Loss Function, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModifiedVGG16(num_classes=10).to(device)
# Use AdamW optimizer and StepLR scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
# Use a learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# Loss function?
criterion = nn.CrossEntropyLoss()

In [None]:
# Train the model
train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=20)

100%|██████████| 625/625 [00:27<00:00, 22.45it/s]


Epoch 1, Training Loss: 2.0716, Training Accuracy: 17.53%, Validation Loss: 1.9309, Validation Accuracy: 23.60%


100%|██████████| 625/625 [00:27<00:00, 23.14it/s]


Epoch 2, Training Loss: 1.7874, Training Accuracy: 29.07%, Validation Loss: 1.6865, Validation Accuracy: 35.51%


100%|██████████| 625/625 [00:26<00:00, 23.66it/s]


Epoch 3, Training Loss: 1.5392, Training Accuracy: 40.95%, Validation Loss: 1.4256, Validation Accuracy: 44.59%


100%|██████████| 625/625 [00:26<00:00, 23.41it/s]


Epoch 4, Training Loss: 1.3448, Training Accuracy: 48.95%, Validation Loss: 1.2618, Validation Accuracy: 52.86%


100%|██████████| 625/625 [00:26<00:00, 23.50it/s]


Epoch 5, Training Loss: 1.1998, Training Accuracy: 55.51%, Validation Loss: 1.1706, Validation Accuracy: 57.19%


100%|██████████| 625/625 [00:27<00:00, 23.12it/s]


Epoch 6, Training Loss: 1.0845, Training Accuracy: 60.87%, Validation Loss: 1.0619, Validation Accuracy: 62.39%


100%|██████████| 625/625 [00:26<00:00, 23.37it/s]


Epoch 7, Training Loss: 0.9775, Training Accuracy: 65.28%, Validation Loss: 1.0785, Validation Accuracy: 62.90%


100%|██████████| 625/625 [00:26<00:00, 23.47it/s]


Epoch 8, Training Loss: 0.9059, Training Accuracy: 68.11%, Validation Loss: 0.9320, Validation Accuracy: 67.82%


100%|██████████| 625/625 [00:26<00:00, 23.30it/s]


Epoch 9, Training Loss: 0.8208, Training Accuracy: 71.18%, Validation Loss: 0.8371, Validation Accuracy: 70.97%


100%|██████████| 625/625 [00:27<00:00, 23.14it/s]


Epoch 10, Training Loss: 0.7724, Training Accuracy: 72.98%, Validation Loss: 0.8307, Validation Accuracy: 71.28%


100%|██████████| 625/625 [00:26<00:00, 23.36it/s]


Epoch 11, Training Loss: 0.7124, Training Accuracy: 75.26%, Validation Loss: 0.7975, Validation Accuracy: 72.46%


100%|██████████| 625/625 [00:26<00:00, 23.31it/s]


Epoch 12, Training Loss: 0.6664, Training Accuracy: 76.92%, Validation Loss: 0.8151, Validation Accuracy: 71.97%


100%|██████████| 625/625 [00:27<00:00, 23.08it/s]


Epoch 13, Training Loss: 0.6361, Training Accuracy: 78.23%, Validation Loss: 0.7226, Validation Accuracy: 75.88%


100%|██████████| 625/625 [00:26<00:00, 23.16it/s]


Epoch 14, Training Loss: 0.5870, Training Accuracy: 79.78%, Validation Loss: 0.7241, Validation Accuracy: 75.60%


100%|██████████| 625/625 [00:26<00:00, 23.49it/s]


Epoch 15, Training Loss: 0.5561, Training Accuracy: 80.92%, Validation Loss: 0.6921, Validation Accuracy: 77.10%


100%|██████████| 625/625 [00:26<00:00, 23.25it/s]


Epoch 16, Training Loss: 0.5248, Training Accuracy: 82.02%, Validation Loss: 0.7286, Validation Accuracy: 76.97%


100%|██████████| 625/625 [00:26<00:00, 23.19it/s]


Epoch 17, Training Loss: 0.4968, Training Accuracy: 82.98%, Validation Loss: 0.7318, Validation Accuracy: 76.44%


100%|██████████| 625/625 [00:26<00:00, 23.23it/s]


Epoch 18, Training Loss: 0.4608, Training Accuracy: 84.48%, Validation Loss: 0.6579, Validation Accuracy: 78.47%


100%|██████████| 625/625 [00:27<00:00, 22.61it/s]


Epoch 19, Training Loss: 0.4359, Training Accuracy: 85.41%, Validation Loss: 0.6679, Validation Accuracy: 78.38%


100%|██████████| 625/625 [00:26<00:00, 23.35it/s]


Epoch 20, Training Loss: 0.4098, Training Accuracy: 86.25%, Validation Loss: 0.6872, Validation Accuracy: 78.67%


In [None]:
# Training and Evaluation
evaluate_model(model, test_loader)

Accuracy: 80.02%


VGGNet with PNet

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PNet(nn.Module):
    def __init__(self, input_channels, attention_channels):
        super(PNet, self).__init__()
        # Assuming the input feature map has a shape of [batch_size, input_channels, height, width]
        # Convolutional layers to process feature maps
        self.conv1 = nn.Conv2d(input_channels, attention_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(attention_channels, attention_channels, kernel_size=3, padding=1)
        # Output layer to generate attention map with a single channel
        self.output_layer = nn.Conv2d(attention_channels, 1, kernel_size=3, padding=1)

    def forward(self, x):
        # Pass input through convolutional layers with a non-linear activation function (e.g., ReLU)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        # Generate the attention map
        # We use a sigmoid function to ensure the output values are between 0 and 1
        attention_map = torch.sigmoid(self.output_layer(x))
        return attention_map


In [None]:
class VGG16Features(nn.Module):
    def __init__(self):
        super(VGG16Features, self).__init__()
        self.features = nn.Sequential(
            # First block
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Second block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Third block
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Fourth block
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Fifth block
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

    def forward(self, x):
        x = self.features(x)
        return x


In [None]:
class VGG16Classifier(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16Classifier, self).__init__()
        # The input feature size is 512 (from the conv layers) * 1 * 1
        self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.classifier(x)
        return x

In [None]:
class ModifiedVGG16(nn.Module):
    def __init__(self, num_classes=10, attention_channels=64):
        super(ModifiedVGG16, self).__init__()
        self.features = VGG16Features()
        input_channels = 512  # Output channels of last conv layer in VGG16Features
        self.pnet = PNet(input_channels, attention_channels)
        self.classifier = VGG16Classifier(num_classes)

    def forward(self, x):
        features = self.features(x)
        attention_maps = self.pnet(features)
        attended_features = features * attention_maps

        # Flatten the attended features to match the classifier's input size
        attended_features = attended_features.view(attended_features.size(0), -1)

        output = self.classifier(attended_features)
        return output

In [None]:
from tqdm import tqdm

# Function to train and validate the model
def train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=10):
    for epoch in range(num_epochs):
        # Training loop
        model.train()
        train_correct = 0
        train_loss = 0.0
        train_total = 0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            train_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            loss.backward()
            optimizer.step()

        train_loss /= train_total
        train_accuracy = 100 * train_correct / train_total

        # Validation loop
        model.eval()
        val_correct = 0
        val_loss = 0.0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100 * val_correct / val_total
        val_loss /= val_total
        val_accuracy = 100 * val_correct / val_total

        print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        scheduler.step(val_loss)

In [None]:
# Evaluation Function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total}%')


In [None]:
# Model, Loss Function, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModifiedVGG16(num_classes=10).to(device)
# Use AdamW optimizer and StepLR scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
# Use a learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# Loss function?
criterion = nn.CrossEntropyLoss()

In [None]:
# Train the model
train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=20)

100%|██████████| 625/625 [00:28<00:00, 21.80it/s]


Epoch 1, Training Loss: 2.1158, Training Accuracy: 16.39%, Validation Loss: 1.8863, Validation Accuracy: 24.55%


100%|██████████| 625/625 [00:26<00:00, 23.42it/s]


Epoch 2, Training Loss: 1.7879, Training Accuracy: 29.90%, Validation Loss: 1.6566, Validation Accuracy: 36.72%


100%|██████████| 625/625 [00:27<00:00, 23.13it/s]


Epoch 3, Training Loss: 1.5491, Training Accuracy: 40.42%, Validation Loss: 1.4346, Validation Accuracy: 45.33%


100%|██████████| 625/625 [00:27<00:00, 22.47it/s]


Epoch 4, Training Loss: 1.3636, Training Accuracy: 48.92%, Validation Loss: 1.2683, Validation Accuracy: 51.97%


100%|██████████| 625/625 [00:27<00:00, 22.71it/s]


Epoch 5, Training Loss: 1.2124, Training Accuracy: 55.75%, Validation Loss: 1.1672, Validation Accuracy: 57.40%


100%|██████████| 625/625 [00:26<00:00, 23.35it/s]


Epoch 6, Training Loss: 1.0860, Training Accuracy: 61.41%, Validation Loss: 1.0711, Validation Accuracy: 61.95%


100%|██████████| 625/625 [00:26<00:00, 23.34it/s]


Epoch 7, Training Loss: 0.9890, Training Accuracy: 65.13%, Validation Loss: 0.9820, Validation Accuracy: 65.39%


100%|██████████| 625/625 [00:27<00:00, 22.86it/s]


Epoch 8, Training Loss: 0.9036, Training Accuracy: 68.32%, Validation Loss: 0.9510, Validation Accuracy: 65.85%


100%|██████████| 625/625 [00:26<00:00, 23.43it/s]


Epoch 9, Training Loss: 0.8369, Training Accuracy: 70.55%, Validation Loss: 0.8440, Validation Accuracy: 69.85%


100%|██████████| 625/625 [00:26<00:00, 23.56it/s]


Epoch 10, Training Loss: 0.7728, Training Accuracy: 72.73%, Validation Loss: 0.8404, Validation Accuracy: 69.73%


100%|██████████| 625/625 [00:27<00:00, 23.06it/s]


Epoch 11, Training Loss: 0.7270, Training Accuracy: 74.94%, Validation Loss: 0.7639, Validation Accuracy: 73.75%


100%|██████████| 625/625 [00:27<00:00, 22.85it/s]


Epoch 12, Training Loss: 0.6794, Training Accuracy: 76.37%, Validation Loss: 0.7393, Validation Accuracy: 73.82%


100%|██████████| 625/625 [00:27<00:00, 22.54it/s]


Epoch 13, Training Loss: 0.6376, Training Accuracy: 77.94%, Validation Loss: 0.7797, Validation Accuracy: 73.39%


100%|██████████| 625/625 [00:26<00:00, 23.35it/s]


Epoch 14, Training Loss: 0.5997, Training Accuracy: 79.47%, Validation Loss: 0.7035, Validation Accuracy: 75.68%


100%|██████████| 625/625 [00:27<00:00, 23.13it/s]


Epoch 15, Training Loss: 0.5550, Training Accuracy: 80.90%, Validation Loss: 0.7155, Validation Accuracy: 76.16%


100%|██████████| 625/625 [00:26<00:00, 23.54it/s]


Epoch 16, Training Loss: 0.5237, Training Accuracy: 82.01%, Validation Loss: 0.6702, Validation Accuracy: 77.52%


100%|██████████| 625/625 [00:26<00:00, 23.57it/s]


Epoch 17, Training Loss: 0.4977, Training Accuracy: 83.25%, Validation Loss: 0.6912, Validation Accuracy: 77.37%


100%|██████████| 625/625 [00:26<00:00, 23.44it/s]


Epoch 18, Training Loss: 0.4681, Training Accuracy: 83.90%, Validation Loss: 0.6621, Validation Accuracy: 78.57%


100%|██████████| 625/625 [00:26<00:00, 23.38it/s]


Epoch 19, Training Loss: 0.4367, Training Accuracy: 85.17%, Validation Loss: 0.6559, Validation Accuracy: 78.76%


100%|██████████| 625/625 [00:26<00:00, 23.74it/s]


Epoch 20, Training Loss: 0.4100, Training Accuracy: 86.11%, Validation Loss: 0.7205, Validation Accuracy: 76.90%


In [None]:
# Training and Evaluation
evaluate_model(model, test_loader)

Accuracy: 78.53%


VGGNet with PNet and updates

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PNet(nn.Module):
    def __init__(self, input_channels, attention_channels):
        super(PNet, self).__init__()
        # Assuming the input feature map has a shape of [batch_size, input_channels, height, width]
        # Convolutional layers to process feature maps
        self.conv1 = nn.Conv2d(input_channels, attention_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(attention_channels, attention_channels, kernel_size=3, padding=1)
        # Output layer to generate attention map with a single channel
        self.output_layer = nn.Conv2d(attention_channels, 1, kernel_size=3, padding=1)

    def forward(self, x):
        # Pass input through convolutional layers with a non-linear activation function (e.g., ReLU)
        x = F.leaky_relu(self.conv1(x), negative_slope = 0.01)
        x = F.leaky_relu(self.conv2(x), negative_slope = 0.01)
        # Generate the attention map
        # We use a sigmoid function to ensure the output values are between 0 and 1
        attention_map = torch.sigmoid(self.output_layer(x))
        return attention_map


In [None]:
class VGG16Features(nn.Module):
    def __init__(self):
        super(VGG16Features, self).__init__()
        self.features = nn.Sequential(
            # First block
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            # Second block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            # Third block
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            # Fourth block
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            # Fifth block
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),
        )

    def forward(self, x):
        x = self.features(x)
        return x


In [None]:
class VGG16Classifier(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16Classifier, self).__init__()
        # The input feature size is 512 (from the conv layers) * 1 * 1
        self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.LeakyReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.classifier(x)
        return x

In [None]:
class ModifiedVGG16(nn.Module):
    def __init__(self, num_classes=10, attention_channels=64):
        super(ModifiedVGG16, self).__init__()
        self.features = VGG16Features()
        input_channels = 512  # Output channels of last conv layer in VGG16Features
        self.pnet = PNet(input_channels, attention_channels)
        self.classifier = VGG16Classifier(num_classes)

    def forward(self, x):
        features = self.features(x)
        attention_maps = self.pnet(features)
        attended_features = features * attention_maps

        # Flatten the attended features to match the classifier's input size
        attended_features = attended_features.view(attended_features.size(0), -1)

        output = self.classifier(attended_features)
        return output

In [None]:
from tqdm import tqdm

# Function to train and validate the model
def train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=10):
    for epoch in range(num_epochs):
        # Training loop
        model.train()
        train_correct = 0
        train_loss = 0.0
        train_total = 0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            train_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            loss.backward()
            optimizer.step()

        train_loss /= train_total
        train_accuracy = 100 * train_correct / train_total

        # Validation loop
        model.eval()
        val_correct = 0
        val_loss = 0.0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100 * val_correct / val_total
        val_loss /= val_total
        val_accuracy = 100 * val_correct / val_total

        print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        scheduler.step(val_loss)

In [None]:
# Evaluation Function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total}%')


In [None]:
# Model, Loss Function, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModifiedVGG16(num_classes=10).to(device)
# Use AdamW optimizer and StepLR scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
# Use a learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# Loss function?
criterion = nn.CrossEntropyLoss()

In [None]:
# Train the model
train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=20)

100%|██████████| 625/625 [00:28<00:00, 22.09it/s]


Epoch 1, Training Loss: 1.6963, Training Accuracy: 34.71%, Validation Loss: 1.5421, Validation Accuracy: 45.56%


100%|██████████| 625/625 [00:28<00:00, 22.07it/s]


Epoch 2, Training Loss: 1.2788, Training Accuracy: 54.02%, Validation Loss: 1.6311, Validation Accuracy: 48.18%


100%|██████████| 625/625 [00:28<00:00, 22.31it/s]


Epoch 3, Training Loss: 1.0922, Training Accuracy: 61.13%, Validation Loss: 1.0831, Validation Accuracy: 61.36%


100%|██████████| 625/625 [00:28<00:00, 22.09it/s]


Epoch 4, Training Loss: 0.9652, Training Accuracy: 66.49%, Validation Loss: 0.8993, Validation Accuracy: 68.22%


100%|██████████| 625/625 [00:29<00:00, 21.39it/s]


Epoch 5, Training Loss: 0.8898, Training Accuracy: 69.20%, Validation Loss: 0.9435, Validation Accuracy: 67.69%


100%|██████████| 625/625 [00:28<00:00, 21.59it/s]


Epoch 6, Training Loss: 0.8183, Training Accuracy: 71.65%, Validation Loss: 0.8342, Validation Accuracy: 71.86%


100%|██████████| 625/625 [00:28<00:00, 21.80it/s]


Epoch 7, Training Loss: 0.7581, Training Accuracy: 74.01%, Validation Loss: 0.7640, Validation Accuracy: 73.61%


100%|██████████| 625/625 [00:28<00:00, 21.67it/s]


Epoch 8, Training Loss: 0.7177, Training Accuracy: 75.45%, Validation Loss: 0.7575, Validation Accuracy: 74.12%


100%|██████████| 625/625 [00:28<00:00, 21.78it/s]


Epoch 9, Training Loss: 0.6801, Training Accuracy: 76.92%, Validation Loss: 0.6771, Validation Accuracy: 77.25%


100%|██████████| 625/625 [00:28<00:00, 21.93it/s]


Epoch 10, Training Loss: 0.6449, Training Accuracy: 78.30%, Validation Loss: 0.6313, Validation Accuracy: 78.16%


100%|██████████| 625/625 [00:28<00:00, 21.82it/s]


Epoch 11, Training Loss: 0.6155, Training Accuracy: 79.13%, Validation Loss: 0.6077, Validation Accuracy: 78.72%


100%|██████████| 625/625 [00:28<00:00, 21.85it/s]


Epoch 12, Training Loss: 0.5937, Training Accuracy: 79.80%, Validation Loss: 0.6032, Validation Accuracy: 79.91%


100%|██████████| 625/625 [00:29<00:00, 21.37it/s]


Epoch 13, Training Loss: 0.5710, Training Accuracy: 80.67%, Validation Loss: 0.5406, Validation Accuracy: 81.77%


100%|██████████| 625/625 [00:29<00:00, 21.05it/s]


Epoch 14, Training Loss: 0.5482, Training Accuracy: 81.38%, Validation Loss: 0.5646, Validation Accuracy: 80.88%


100%|██████████| 625/625 [00:29<00:00, 21.49it/s]


Epoch 15, Training Loss: 0.5262, Training Accuracy: 82.20%, Validation Loss: 0.5895, Validation Accuracy: 80.40%


100%|██████████| 625/625 [00:28<00:00, 21.65it/s]


Epoch 16, Training Loss: 0.5057, Training Accuracy: 82.83%, Validation Loss: 0.5486, Validation Accuracy: 81.86%


100%|██████████| 625/625 [00:28<00:00, 21.78it/s]


Epoch 17, Training Loss: 0.4872, Training Accuracy: 83.56%, Validation Loss: 0.5472, Validation Accuracy: 81.74%


100%|██████████| 625/625 [00:28<00:00, 21.78it/s]


Epoch 18, Training Loss: 0.4723, Training Accuracy: 83.91%, Validation Loss: 0.5241, Validation Accuracy: 82.09%


100%|██████████| 625/625 [00:28<00:00, 21.78it/s]


Epoch 19, Training Loss: 0.4597, Training Accuracy: 84.43%, Validation Loss: 0.4939, Validation Accuracy: 83.81%


100%|██████████| 625/625 [00:28<00:00, 21.79it/s]


Epoch 20, Training Loss: 0.4378, Training Accuracy: 85.15%, Validation Loss: 0.4953, Validation Accuracy: 82.63%


In [None]:
# Training and Evaluation
evaluate_model(model, test_loader)

Accuracy: 84.43%


Traditional CNN

In [None]:
class TradCNN(nn.Module):

  def __init__(self):
    super().__init__()
    self.cnn1 = nn.Conv2d(3, 30, kernel_size= 3, padding=1)
    self.cnn2 = nn.Conv2d(30, 60, kernel_size= 3, padding=1)
    self.cnn3 = nn.Conv2d(60, 90, kernel_size= 3, padding=1)
    self.Linear1 = nn.Linear(90 * 4 *4, 480)
    self.Linear2 = nn.Linear(480, 10)
    self.maxPool = nn.MaxPool2d(kernel_size= 2, stride=2)
    self.relu = nn.ReLU()


  def forward(self, x):
    x = self.cnn1(x)
    x = self.relu(x)
    x = self.maxPool(x)

    x = self.cnn2(x)
    x = self.relu(x)
    x = self.maxPool(x)

    x = self.cnn3(x)
    x = self.relu(x)
    x = self.maxPool(x)

    x = torch.flatten(x, start_dim = 1)

    x = self.Linear1(x)
    x = self.relu(x)
    x = self.Linear2(x)
    return x


In [None]:
from tqdm import tqdm

# Function to train and validate the model
def train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=10):
    for epoch in range(num_epochs):
        # Training loop
        model.train()
        train_correct = 0
        train_loss = 0.0
        train_total = 0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            train_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            loss.backward()
            optimizer.step()

        train_loss /= train_total
        train_accuracy = 100 * train_correct / train_total

        # Validation loop
        model.eval()
        val_correct = 0
        val_loss = 0.0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100 * val_correct / val_total
        val_loss /= val_total

        print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        scheduler.step(val_loss)

In [None]:
# Evaluation Function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total}%')


In [None]:
# Model, Loss Function, and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TradCNN().to(device)
# Use AdamW optimizer and StepLR scheduler
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
# Use a learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# Loss function?
criterion = nn.CrossEntropyLoss()

In [None]:
# Train the model
train_model(model, train_loader, validation_loader, criterion, optimizer, scheduler, num_epochs=20)

100%|██████████| 625/625 [00:20<00:00, 31.04it/s]


Epoch 1, Training Loss: 1.8036, Training Accuracy: 35.24%, Validation Loss: 1.5287, Validation Accuracy: 44.82%


100%|██████████| 625/625 [00:21<00:00, 29.68it/s]


Epoch 2, Training Loss: 1.4722, Training Accuracy: 46.84%, Validation Loss: 1.4209, Validation Accuracy: 48.59%


100%|██████████| 625/625 [00:20<00:00, 30.65it/s]


Epoch 3, Training Loss: 1.3797, Training Accuracy: 50.47%, Validation Loss: 1.3506, Validation Accuracy: 51.29%


100%|██████████| 625/625 [00:20<00:00, 30.40it/s]


Epoch 4, Training Loss: 1.3098, Training Accuracy: 53.48%, Validation Loss: 1.2768, Validation Accuracy: 54.40%


100%|██████████| 625/625 [00:20<00:00, 30.68it/s]


Epoch 5, Training Loss: 1.2500, Training Accuracy: 55.42%, Validation Loss: 1.2386, Validation Accuracy: 56.13%


100%|██████████| 625/625 [00:20<00:00, 30.32it/s]


Epoch 6, Training Loss: 1.2032, Training Accuracy: 57.45%, Validation Loss: 1.2139, Validation Accuracy: 56.49%


100%|██████████| 625/625 [00:20<00:00, 30.46it/s]


Epoch 7, Training Loss: 1.1598, Training Accuracy: 59.05%, Validation Loss: 1.1451, Validation Accuracy: 59.04%


100%|██████████| 625/625 [00:20<00:00, 30.57it/s]


Epoch 8, Training Loss: 1.1187, Training Accuracy: 60.44%, Validation Loss: 1.1244, Validation Accuracy: 59.48%


100%|██████████| 625/625 [00:20<00:00, 30.18it/s]


Epoch 9, Training Loss: 1.0811, Training Accuracy: 61.95%, Validation Loss: 1.0798, Validation Accuracy: 61.42%


100%|██████████| 625/625 [00:20<00:00, 30.63it/s]


Epoch 10, Training Loss: 1.0549, Training Accuracy: 62.90%, Validation Loss: 1.0757, Validation Accuracy: 62.04%


100%|██████████| 625/625 [00:19<00:00, 31.30it/s]


Epoch 11, Training Loss: 1.0271, Training Accuracy: 63.87%, Validation Loss: 1.0386, Validation Accuracy: 62.94%


100%|██████████| 625/625 [00:19<00:00, 31.33it/s]


Epoch 12, Training Loss: 1.0001, Training Accuracy: 65.02%, Validation Loss: 1.0190, Validation Accuracy: 63.52%


100%|██████████| 625/625 [00:19<00:00, 31.46it/s]


Epoch 13, Training Loss: 0.9734, Training Accuracy: 66.02%, Validation Loss: 1.0180, Validation Accuracy: 64.36%


100%|██████████| 625/625 [00:19<00:00, 31.53it/s]


Epoch 14, Training Loss: 0.9485, Training Accuracy: 66.78%, Validation Loss: 0.9682, Validation Accuracy: 65.56%


100%|██████████| 625/625 [00:19<00:00, 31.39it/s]


Epoch 15, Training Loss: 0.9273, Training Accuracy: 67.67%, Validation Loss: 0.9657, Validation Accuracy: 66.29%


100%|██████████| 625/625 [00:19<00:00, 31.55it/s]


Epoch 16, Training Loss: 0.9108, Training Accuracy: 68.23%, Validation Loss: 0.9543, Validation Accuracy: 66.94%


100%|██████████| 625/625 [00:20<00:00, 31.17it/s]


Epoch 17, Training Loss: 0.8881, Training Accuracy: 69.03%, Validation Loss: 0.9396, Validation Accuracy: 67.25%


100%|██████████| 625/625 [00:20<00:00, 30.85it/s]


Epoch 18, Training Loss: 0.8701, Training Accuracy: 69.67%, Validation Loss: 0.9535, Validation Accuracy: 67.01%


100%|██████████| 625/625 [00:20<00:00, 30.52it/s]


Epoch 19, Training Loss: 0.8511, Training Accuracy: 70.51%, Validation Loss: 0.9036, Validation Accuracy: 68.13%


100%|██████████| 625/625 [00:20<00:00, 30.61it/s]


Epoch 20, Training Loss: 0.8380, Training Accuracy: 70.97%, Validation Loss: 0.8863, Validation Accuracy: 68.75%


In [None]:
# Training and Evaluation
evaluate_model(model, test_loader)

Accuracy: 69.72%
