In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.data import DataLoader, Subset
import torchvision.datasets as datasets

from torch.cuda.amp import autocast, GradScaler

import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

In [2]:
#- Conv(001, 064, 3, 1, 1)- BatchNorm(064)- ReLU- MaxPool(2, 2)
#Conv(064, 128, 3, 1, 1)- BatchNorm(128)- ReLU- MaxPool(2, 2)
#Conv(128, 256, 3, 1, 1)- BatchNorm(256)- ReLU
#Conv(256, 256, 3, 1, 1)- BatchNorm(256)- ReLU- MaxPool(2, 2)
#Conv(256, 512, 3, 1, 1)- BatchNorm(512)- ReLU
#Conv(512, 512, 3, 1, 1)- BatchNorm(512)- ReLU- MaxPool(2, 2)
#Conv(512, 512, 3, 1, 1)- BatchNorm(512)- ReLU
#Conv(512, 512, 3, 1, 1)- BatchNorm(512)- ReLU- MaxPool(2, 2)
#Linear(0512, 4096)- ReLU- Dropout(0.5)
#Linear(4096, 4096)- ReLU- Dropout(0.5)
#Linear(4096, 10)

In [3]:
# the assignment shows Conv(001, 064, 3, 1, 1)
# which "001" is the number of input channels which is basically RGB.
# which also doesn't make sense because the images we are training and testing on 
# are all RGB

In [4]:
#indexing for classes
def select_class_index(labels, n):
    labels = np.array(labels)
    selected = []

    for c in range(10):
        idx = np.where(labels == c)[0][:n]
        selected.extend(idx)

    return selected

In [5]:
# cifar-10 mean and std 
mean = [0.4914, 0.4822, 0.4465]
std = [0.2470, 0.2435, 0.2616]

transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [6]:
# Load full dataset without transform, to access targets for indexing
full_train_set = datasets.CIFAR10(root="F:/CIFAR10_Project/data", train=True, download=True)
full_test_set = datasets.CIFAR10(root="F:/CIFAR10_Project/data", train=False, download=True)

# returns indices
train_indices = select_class_index(full_train_set.targets, 500)  
test_indices = select_class_index(full_test_set.targets, 100)   

# Create datasets with transform
train_set = datasets.CIFAR10(root="F:/CIFAR10_Project/data", train=True, transform=transform)
test_set = datasets.CIFAR10(root="F:/CIFAR10_Project/data", train=False, transform=transform)

# Wrap in Subset to get only selected indices
train_subset = Subset(train_set, train_indices)
test_subset = Subset(test_set, test_indices)

# Create loaders
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_subset, batch_size=64, shuffle=False, num_workers=2, pin_memory=True)

In [7]:
class VGG11(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG11, self).__init__()
        
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), # <--- project description showed conv(001,..) ?? 
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),

            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(256, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),

            nn.Conv2d(512, 512, 3, 1, 1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )

        # the fully-connected layers are denoted as Linear(number of input features, number of output features)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG11().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [10]:
def train(model, loader):
    model.train()
    total_loss = 0

    for X, y in loader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        out = model(X)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        #optimizer.zero_grad()

        total_loss += loss.item()

    return total_loss / len(loader)

In [11]:
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)

            out = model(X)
            preds = torch.argmax(out, dim=1)

            correct += (preds == y).sum().item()
            total += y.size(0)

    return correct / total

In [12]:
def evaluation_loop_full(model, loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            out = model(X)
            preds = torch.argmax(out, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.cpu().numpy())

    return all_labels, all_preds

In [13]:
# training

num_epochs = 20
for epoch in range(num_epochs):
    train_loss = train(model, train_loader)
    test_acc = evaluate(model, test_loader)
    print(f"Epoch {epoch+1:02d} | Loss: {train_loss:.4f} | Test Accuracy: {test_acc*100:.2f}%")

# After training, get full predictions and true labels for detailed evaluation
true_labels, pred_labels = evaluation_loop_full(model, test_loader, device)
#evaluate_model(true_labels, pred_labels)

Epoch 01 | Loss: 1.9846 | Test Accuracy: 25.50%
Epoch 02 | Loss: 1.7823 | Test Accuracy: 36.80%
Epoch 03 | Loss: 1.5969 | Test Accuracy: 38.10%
Epoch 04 | Loss: 1.4893 | Test Accuracy: 36.70%
Epoch 05 | Loss: 1.4368 | Test Accuracy: 39.00%
Epoch 06 | Loss: 1.3267 | Test Accuracy: 37.50%
Epoch 07 | Loss: 1.2602 | Test Accuracy: 40.20%
Epoch 08 | Loss: 1.1924 | Test Accuracy: 43.30%
Epoch 09 | Loss: 1.1332 | Test Accuracy: 48.90%
Epoch 10 | Loss: 1.1065 | Test Accuracy: 51.00%
Epoch 11 | Loss: 1.0312 | Test Accuracy: 51.20%
Epoch 12 | Loss: 1.0326 | Test Accuracy: 47.50%
Epoch 13 | Loss: 0.9292 | Test Accuracy: 51.90%
Epoch 14 | Loss: 0.9065 | Test Accuracy: 30.40%
Epoch 15 | Loss: 0.9035 | Test Accuracy: 55.70%
Epoch 16 | Loss: 0.8406 | Test Accuracy: 43.10%
Epoch 17 | Loss: 0.8045 | Test Accuracy: 54.30%
Epoch 18 | Loss: 0.7280 | Test Accuracy: 43.90%
Epoch 19 | Loss: 0.6948 | Test Accuracy: 52.50%
Epoch 20 | Loss: 0.6694 | Test Accuracy: 53.80%


In [14]:
# Save the model's state_dict to a file
torch.save(model.state_dict(), "vgg11_1.pth")

In [15]:
def evaluate_model(y_true, y_pred):
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='macro'))
    print("Recall :", recall_score(y_true, y_pred, average='macro'))
    print("F1-score :", f1_score(y_true, y_pred, average='macro'))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

evaluate_model(true_labels, pred_labels)

Accuracy: 0.538
Precision: 0.6142523448649987
Recall : 0.538
F1-score : 0.5307327059627751
Confusion Matrix:
 [[43  3  7  7  3  5  1  4 10 17]
 [ 0 36  1  0  0  0  0  2  0 61]
 [ 6  0 31 15 16 20  5  6  0  1]
 [ 0  0  4 39  4 44  1  4  0  4]
 [ 0  0  2  7 45 13  0 30  0  3]
 [ 0  0  2 13  2 74  1  8  0  0]
 [ 0  0  2 18 19 16 36  7  0  2]
 [ 0  0  0  1  0 16  0 82  0  1]
 [ 5  3  0  6  3  1  0  0 62 20]
 [ 0  2  0  2  0  1  0  1  4 90]]
