In [16]:
import torch
import torchvision
import os

from torchsummary import summary
from tqdm import tqdm
from torch.utils.data import random_split, DataLoader

In [46]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and prep it for CIFAR
model = torchvision.models.vgg11(pretrained=True)
model.classifier[6] = torch.nn.Linear(4096, 10)
for module in model.modules():
    if isinstance(module, torch.nn.Dropout):
        module.p = 0.1  # Reduce Dropout rate
model = model.to(device)

# Data Transform
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616]) # Standard for CIFAR10
])

# Load dataset
dataset_path = os.path.join(os.path.dirname(os.getcwd()), 'datasets/')
dataset = torchvision.datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform)

train_val_test_split = [.7,.15,.15]
# Seed split so that it is consistent across multiple runs
train_dataset, val_dataset, test_dataset = random_split(dataset, train_val_test_split, generator=torch.Generator().manual_seed(42))
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=8)



In [45]:
print(summary(model, (3, 32, 32)))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 1, 1]           --
|    └─Conv2d: 2-1                       [-1, 64, 32, 32]          1,792
|    └─ReLU: 2-2                         [-1, 64, 32, 32]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 16, 16]          --
|    └─Conv2d: 2-4                       [-1, 128, 16, 16]         73,856
|    └─ReLU: 2-5                         [-1, 128, 16, 16]         --
|    └─MaxPool2d: 2-6                    [-1, 128, 8, 8]           --
|    └─Conv2d: 2-7                       [-1, 256, 8, 8]           295,168
|    └─ReLU: 2-8                         [-1, 256, 8, 8]           --
|    └─Conv2d: 2-9                       [-1, 256, 8, 8]           590,080
|    └─ReLU: 2-10                        [-1, 256, 8, 8]           --
|    └─MaxPool2d: 2-11                   [-1, 256, 4, 4]           --
|    └─Conv2d: 2-12                      [-1, 512, 4, 4]           1

In [44]:
#print(summary(model, (3, 32, 32)))
for name, param in model.named_parameters():
    if param.requires_grad and param.grad is not None:
        print(name, param.grad.mean())

features.0.weight tensor(0., device='cuda:0')
features.0.bias tensor(0., device='cuda:0')
features.3.weight tensor(0., device='cuda:0')
features.3.bias tensor(0., device='cuda:0')
features.6.weight tensor(0., device='cuda:0')
features.6.bias tensor(0., device='cuda:0')
features.8.weight tensor(0., device='cuda:0')
features.8.bias tensor(0., device='cuda:0')
features.11.weight tensor(0., device='cuda:0')
features.11.bias tensor(0., device='cuda:0')
features.13.weight tensor(0., device='cuda:0')
features.13.bias tensor(0., device='cuda:0')
features.16.weight tensor(0., device='cuda:0')
features.16.bias tensor(7.6204e-05, device='cuda:0')
features.18.weight tensor(3.5673e-08, device='cuda:0')
features.18.bias tensor(4.4045e-06, device='cuda:0')
classifier.0.weight tensor(3.3780e-09, device='cuda:0')
classifier.0.bias tensor(1.4323e-07, device='cuda:0')
classifier.3.weight tensor(1.0081e-07, device='cuda:0')
classifier.3.bias tensor(6.1836e-07, device='cuda:0')
classifier.6.weight tensor(5

In [47]:
epochs = 10
lr = 1e-2
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

for epoch in range(epochs):
    model.train()
    train_running_loss = val_running_loss = 0.0
    train_corr = val_corr = 0
    train_total = val_total = 0

    for batch in tqdm(train_loader):

        x, y = batch
        x = x.to(device)
        y = y.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(x)
        loss = criterion(outputs, y)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Accumulate loss and accuracy
        train_running_loss += loss.item()
        predicted = torch.argmax(outputs, axis=1)
        train_total += y.size(0)
        train_corr += predicted.eq(y).sum().item()

    print(f"Epoch [{epoch+1}/{epochs}], Train_Loss: {train_running_loss/len(train_loader):.4f}, Train_Accuracy: {100. * train_corr / train_total:.2f}%")

    for batch in tqdm(val_loader):

        x, y = batch
        x = x.to(device)
        y = y.to(device)

        # Forward pass
        outputs = model(x)
        loss = criterion(outputs, y)

        # Accumulate loss and accuracy
        val_running_loss += loss.item()
        predicted = torch.argmax(outputs, axis=1)
        val_total += y.size(0)
        val_corr += predicted.eq(y).sum().item()

    print(f"Epoch [{epoch+1}/{epochs}], Val_Loss: {val_running_loss/len(train_loader):.4f}, Val_Accuracy: {100. * val_corr / val_total:.2f}%")



100%|██████████| 547/547 [00:06<00:00, 78.44it/s]


Epoch [1/10], Train_Loss: 0.8252, Train_Accuracy: 71.51%


100%|██████████| 118/118 [00:00<00:00, 146.44it/s]


Epoch [1/10], Val_Loss: 0.1353, Val_Accuracy: 78.27%


100%|██████████| 547/547 [00:06<00:00, 78.81it/s]


Epoch [2/10], Train_Loss: 0.4874, Train_Accuracy: 83.08%


100%|██████████| 118/118 [00:00<00:00, 149.37it/s]


Epoch [2/10], Val_Loss: 0.1264, Val_Accuracy: 79.68%


100%|██████████| 547/547 [00:06<00:00, 79.58it/s]


Epoch [3/10], Train_Loss: 0.3387, Train_Accuracy: 88.22%


100%|██████████| 118/118 [00:00<00:00, 150.70it/s]


Epoch [3/10], Val_Loss: 0.1278, Val_Accuracy: 81.04%


100%|██████████| 547/547 [00:07<00:00, 77.86it/s]


Epoch [4/10], Train_Loss: 0.2326, Train_Accuracy: 91.81%


100%|██████████| 118/118 [00:00<00:00, 143.76it/s]


Epoch [4/10], Val_Loss: 0.1216, Val_Accuracy: 83.07%


100%|██████████| 547/547 [00:07<00:00, 74.28it/s]


Epoch [5/10], Train_Loss: 0.1489, Train_Accuracy: 94.93%


100%|██████████| 118/118 [00:00<00:00, 147.08it/s]


Epoch [5/10], Val_Loss: 0.1289, Val_Accuracy: 83.11%


100%|██████████| 547/547 [00:07<00:00, 77.85it/s]


Epoch [6/10], Train_Loss: 0.0901, Train_Accuracy: 96.98%


100%|██████████| 118/118 [00:00<00:00, 143.37it/s]


Epoch [6/10], Val_Loss: 0.1439, Val_Accuracy: 83.37%


100%|██████████| 547/547 [00:06<00:00, 78.49it/s]


Epoch [7/10], Train_Loss: 0.0566, Train_Accuracy: 98.17%


100%|██████████| 118/118 [00:00<00:00, 146.56it/s]


Epoch [7/10], Val_Loss: 0.1622, Val_Accuracy: 83.47%


100%|██████████| 547/547 [00:07<00:00, 77.58it/s]


Epoch [8/10], Train_Loss: 0.0340, Train_Accuracy: 98.95%


100%|██████████| 118/118 [00:00<00:00, 144.81it/s]


Epoch [8/10], Val_Loss: 0.1734, Val_Accuracy: 83.84%


100%|██████████| 547/547 [00:07<00:00, 77.11it/s]


Epoch [9/10], Train_Loss: 0.0163, Train_Accuracy: 99.59%


100%|██████████| 118/118 [00:00<00:00, 144.70it/s]


Epoch [9/10], Val_Loss: 0.1870, Val_Accuracy: 84.56%


100%|██████████| 547/547 [00:07<00:00, 77.36it/s]


Epoch [10/10], Train_Loss: 0.0213, Train_Accuracy: 99.45%


100%|██████████| 118/118 [00:00<00:00, 145.44it/s]

Epoch [10/10], Val_Loss: 0.1813, Val_Accuracy: 84.47%





In [50]:
# Save model
path = os.path.join(os.path.join(os.path.dirname(os.getcwd()), "models"), "vgg11_cifar10.pth")
torch.save(model.state_dict(), path)