## **VGG-Implementation**

Small introduction about the implementation

#### **Importing required libraries**

In [1]:
import gzip
import os
import random
import urllib.request

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score,recall_score
from torch.utils.data import DataLoader, Dataset, Subset, random_split
from torchvision import transforms

### **Data Loading**

In [2]:
urls = {
    "train_images": "https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz",
    "train_labels": "https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz",
    "test_images": "https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz",
    "test_labels": "https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz"
}

NOTE: Official URL dataset extract fails - http://yann.lecun.com/exdb/mnist/

In [3]:
# Function to download and extract files
def download_and_extract(url, path, is_label=False):
    if not os.path.exists(path):
        print(f"Downloading {url}...")
        urllib.request.urlretrieve(url, path)
        print(f"Downloaded {path}")
    else:
        print(f"{path} already exists, skipping download.")

    offset = 8 if is_label else 16 # Handle labels separately as their header is 8 bytes instead of 16
    with gzip.open(path, 'rb') as f:
        return np.frombuffer(f.read(), np.uint8, offset=offset)

In [4]:
def load_mnist_data():
    os.makedirs('./data', exist_ok=True)

    train_images = download_and_extract(urls['train_images'], './data/train-images.gz')
    train_labels = download_and_extract(urls['train_labels'], './data/train-labels.gz', is_label=True)
    test_images = download_and_extract(urls['test_images'], './data/test-images.gz')
    test_labels = download_and_extract(urls['test_labels'], './data/test-labels.gz', is_label=True)

    # Reshape and normalize the images
    train_images = train_images.reshape(-1, 28, 28) / 255.0
    test_images = test_images.reshape(-1, 28, 28) / 255.0

    # Combine train and test datasets for custom split
    images = np.concatenate((train_images, test_images), axis=0)
    labels = np.concatenate((train_labels, test_labels), axis=0)

    return images, labels

images, labels = load_mnist_data()
print("MNIST data downloaded and loaded.")

./data/train-images.gz already exists, skipping download.
./data/train-labels.gz already exists, skipping download.
./data/test-images.gz already exists, skipping download.
./data/test-labels.gz already exists, skipping download.
MNIST data downloaded and loaded.


### **1.2 Data Preprocessing**

In [5]:
def split_data(images, labels):
    total_size = len(images)

    # Sizes for each split
    train_size = int(0.6 * total_size)
    val_size = int(0.2 * total_size)
    test_size = total_size - train_size - val_size

    train_images, train_labels = images[:train_size], labels[:train_size]
    val_images, val_labels = images[train_size:train_size+val_size], labels[train_size:train_size+val_size]
    test_images, test_labels = images[train_size+val_size:], labels[train_size+val_size:]

    return (train_images, train_labels), (val_images, val_labels), (test_images, test_labels)

# Split into training, validation, and testing sets (60%, 20%, 20%)
(train_images, train_labels), (val_images, val_labels), (test_images, test_labels) = split_data(images, labels)

print(f"Training set size: {len(train_images)}")
print(f"Validation set size: {len(val_images)}")
print(f"Test set size: {len(test_images)}")

Training set size: 42000
Validation set size: 14000
Test set size: 14000


Convert Numpy arrays into tensor format

In [9]:
class MNISTDataset(Dataset):
    def __init__(self, images, labels):
        # Convert numpy arrays to torch tensors
        self.images = torch.tensor(images, dtype=torch.float32).unsqueeze(1)
        self.labels = torch.tensor(labels, dtype=torch.long).clone().detach()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

    def get_images(self):
        return self.images

In [10]:
# Create dataset objects
train_dataset = MNISTDataset(train_images, train_labels)
val_dataset = MNISTDataset(val_images, val_labels)
test_dataset = MNISTDataset(test_images, test_labels)

# Create DataLoader objects
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

train_iter = iter(train_loader)
images, labels = next(train_iter)
print(f"Batch image shape: {images.shape}, Batch label shape: {labels.shape}")

Batch image shape: torch.Size([64, 1, 28, 28]), Batch label shape: torch.Size([64])


In [None]:
print(f"Number of rows in train_dataset: {len(train_dataset)}")
print(f"Number of rows in test_dataset: {len(test_dataset)}")
print(f"Number of rows in val_dataset: {len(val_dataset)}")

Number of rows in train_dataset: 42000
Number of rows in test_dataset: 14000
Number of rows in val_dataset: 14000


### **2. Model Building**

### 2.1 Model Creation ###

In [16]:
from torchvision import datasets, transforms

# Preprocessing for grayscale images
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert 1-channel to 3-channel
    transforms.Resize((224, 224)),                # Resize to VGG16 input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Update dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

# Define constants
BATCH_SIZE = 100
LEARNING_RATE = 0.1
N_CLASSES = 10
EPOCH = 10

def conv_layer(channel_in, channel_out, k_size, p_size):
    layer = nn.Sequential(
        nn.Conv2d(channel_in, channel_out, kernel_size=k_size, padding=p_size),
        nn.BatchNorm2d(channel_out),
        nn.ReLU()
    )
    return layer

def vgg_conv_block(in_List, out_list, k_list, p_list, pooling_k, pooling_s):
    layers = [conv_layer(in_List[i], out_list[i], k_list[i], p_list[i]) for i in range(len(in_List))]
    layers += [nn.MaxPool2d(kernel_size=pooling_k, stride=pooling_s)]
    return nn.Sequential(*layers)

def vgg_fc_layer(size_in, size_out):
    layer = nn.Sequential(
        nn.Linear(size_in, size_out),
        nn.BatchNorm1d(size_out),  # Changed from BatchNorm2d to BatchNorm1d
        nn.ReLU()
    )
    return layer

class VGG16(nn.Module):
    def __init__(self, n_classes=1000):
        super(VGG16, self).__init__()

        # Conv blocks (BatchNorm + ReLU activation added in each block)
        self.layer1 = vgg_conv_block([3, 64], [64, 64], [3, 3], [1, 1], 2, 2)
        self.layer2 = vgg_conv_block([64, 128], [128, 128], [3, 3], [1, 1], 2, 2)
        self.layer3 = vgg_conv_block([128, 256, 256], [256, 256, 256], [3, 3, 3], [1, 1, 1], 2, 2)
        self.layer4 = vgg_conv_block([256, 512, 512], [512, 512, 512], [3, 3, 3], [1, 1, 1], 2, 2)
        self.layer5 = vgg_conv_block([512, 512, 512], [512, 512, 512], [3, 3, 3], [1, 1, 1], 2, 2)

        # FC layers
        self.layer6 = vgg_fc_layer(7 * 7 * 512, 4096)
        self.layer7 = vgg_fc_layer(4096, 4096)

        # Final layer
        self.layer8 = nn.Linear(4096, n_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        vgg16_features = self.layer5(out)
        out = vgg16_features.view(out.size(0), -1)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)

        return vgg16_features, out

# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
vgg16 = VGG16(n_classes=N_CLASSES).to(device)

# Loss, Optimizer & Scheduler
cost = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

# Training
for epoch in range(EPOCH):
    avg_loss = 0.0
    cnt = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward + Backward + Optimize
        optimizer.zero_grad()
        _, outputs = vgg16(images)
        loss = cost(outputs, labels)
        avg_loss += loss.item()
        cnt += 1
        print("[E: %d] loss: %f, avg_loss: %f" % (epoch, loss.item(), avg_loss / cnt))
        loss.backward()
        optimizer.step()
    scheduler.step(avg_loss / cnt)

# Testing
vgg16.eval()
correct = 0
total = 0
for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    _, outputs = vgg16(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    print(predicted, labels, correct, total)
print("Average Accuracy: %f" % (100 * correct / total))


[E: 0] loss: 2.322901, avg_loss: 2.322901
[E: 0] loss: 77.279266, avg_loss: 39.801083
[E: 0] loss: 138.544617, avg_loss: 72.715595
[E: 0] loss: 114.744499, avg_loss: 83.222821
[E: 0] loss: 74.535858, avg_loss: 81.485428
[E: 0] loss: 65.646431, avg_loss: 78.845595
[E: 0] loss: 63.372414, avg_loss: 76.635141
[E: 0] loss: 50.825844, avg_loss: 73.408979
[E: 0] loss: 39.005375, avg_loss: 69.586356
[E: 0] loss: 26.294472, avg_loss: 65.257168
[E: 0] loss: 21.978323, avg_loss: 61.322727
[E: 0] loss: 12.326973, avg_loss: 57.239748
[E: 0] loss: 6.511987, avg_loss: 53.337612
[E: 0] loss: 19.193344, avg_loss: 50.898736
[E: 0] loss: 4.866952, avg_loss: 47.829950
[E: 0] loss: 7.894782, avg_loss: 45.334002
[E: 0] loss: 4.171123, avg_loss: 42.912656
[E: 0] loss: 4.377792, avg_loss: 40.771831
[E: 0] loss: 2.648678, avg_loss: 38.765349
[E: 0] loss: 3.017067, avg_loss: 36.977935
[E: 0] loss: 2.350429, avg_loss: 35.329006
[E: 0] loss: 3.500649, avg_loss: 33.882262
[E: 0] loss: 2.214882, avg_loss: 32.50542