In [6]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# ------------------------------------------------------------
# 2. Data transforms and loaders
# ------------------------------------------------------------
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), 
                         (0.2470, 0.2435, 0.2616))
])

train_dataset = datasets.CIFAR10(root="./data", train=True, 
                                 transform=transform, download=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, 
                                transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

Using device: cuda
Files already downloaded and verified
Files already downloaded and verified


In [7]:
# ============================================================
# Basic CNN on CIFAR-10 (PyTorch + CUDA)
# ============================================================

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
import time

# ------------------------------------------------------------
# 1. Check device (GPU/CPU)
# ------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ------------------------------------------------------------
# 2. Data transforms and loaders
# ------------------------------------------------------------
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), 
                         (0.2470, 0.2435, 0.2616))
])

train_dataset = datasets.CIFAR10(root="./data", train=True, 
                                 transform=transform, download=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, 
                                transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=2)

# ------------------------------------------------------------
# 3. Define CNN model
# ------------------------------------------------------------
class BasicCNN(nn.Module):
    def __init__(self):
        super(BasicCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Apply pooling after the first convolution block
        x = self.pool(F.relu(self.conv1(x)))
        
        # Apply pooling after the second convolution block
        x = self.pool(F.relu(self.conv2(x)))
        
        # Flatten the tensor. The input shape is now correctly (batch_size, 64, 8, 8)
        x = x.view(-1, 64 * 8 * 8)
        
        # Apply activation and dropout after the first fully-connected layer
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        # Final output layer
        x = self.fc2(x)
        return x

model = BasicCNN().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified
BasicCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=4096, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)
Epoch [1/20] | Loss: 1.6629 | Train Acc: 39.22%
Epoch [2/20] | Loss: 1.3505 | Train Acc: 50.84%
Epoch [3/20] | Loss: 1.2128 | Train Acc: 56.64%
Epoch [4/20] | Loss: 1.1372 | Train Acc: 59.74%
Epoch [5/20] | Loss: 1.0961 | Train Acc: 61.14%
Epoch [6/20] | Loss: 1.0551 | Train Acc: 62.70%
Epoch [7/20] | Loss: 1.0247 | Train Acc: 63.77%
Epoch [8/20] | Loss: 0.9961 | Train Acc: 64.95%
Epoch [9/20] | Loss: 0.9761 | Train Acc: 65.99%
Epoch [10/20] | Loss: 0.9594 | Train Acc: 66.31%
Epoch [

In [8]:
#One More Convolutional Layer
class CNN_MoreConv(nn.Module):
    def __init__(self):
        super(CNN_MoreConv, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(128 * 8 * 8, 256)  # one extra pool halves the size
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
model = CNN_MoreConv().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")



CNN_MoreConv(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=8192, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1/20] | Loss: 1.6600 | Train Acc: 39.00%
Epoch [2/20] | Loss: 1.3109 | Train Acc: 52.83%
Epoch [3/20] | Loss: 1.1571 | Train Acc: 58.94%
Epoch [4/20] | Loss: 1.0698 | Train Acc: 62.23%
Epoch [5/20] | Loss: 1.0120 | Train Acc: 64.35%
Epoch [6/20] | Loss: 0.9697 | Train Acc: 66.07%
Epoch [7/20] | Loss: 0.9345 | Train Acc: 67.24%
Epoch [8/20] | Loss: 0.9103 | Train Acc: 67.95%
Epoch [9/20] | Loss: 0.8903 | Train Acc: 68.74%
Epoch [10/20] | Loss: 0.8706 | Train Acc: 69.45%
Epoch [11/20] | Loss

In [9]:
class CNN_LessConv(nn.Module):
    def __init__(self):
        super(CNN_LessConv, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(32 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
    
model = CNN_LessConv().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")


CNN_LessConv(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=8192, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1/20] | Loss: 1.7924 | Train Acc: 34.05%
Epoch [2/20] | Loss: 1.6076 | Train Acc: 40.92%
Epoch [3/20] | Loss: 1.5496 | Train Acc: 43.51%
Epoch [4/20] | Loss: 1.5157 | Train Acc: 44.68%
Epoch [5/20] | Loss: 1.4826 | Train Acc: 46.11%
Epoch [6/20] | Loss: 1.4597 | Train Acc: 47.11%
Epoch [7/20] | Loss: 1.4386 | Train Acc: 47.86%
Epoch [8/20] | Loss: 1.4298 | Train Acc: 48.30%
Epoch [9/20] | Loss: 1.4154 | Train Acc: 48.82%
Epoch [10/20] | Loss: 1.3973 | Train Acc: 49.70%
Epoch [11/20] | Loss: 1.3844 | Train Acc: 49.99%
Epoch [12/20] | Loss: 1.3748 | Train Acc: 50.63%
Epoch [13/20] | Loss: 1.3748 | Train Acc: 50.40%
Epoch [14/20] | Loss: 1.3549

In [10]:
class CNN_BiggerKernel(nn.Module):
    def __init__(self):
        super(CNN_BiggerKernel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
model = CNN_BiggerKernel().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")


CNN_BiggerKernel(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=16384, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1/20] | Loss: 1.6756 | Train Acc: 38.75%
Epoch [2/20] | Loss: 1.4044 | Train Acc: 49.19%
Epoch [3/20] | Loss: 1.3103 | Train Acc: 53.11%
Epoch [4/20] | Loss: 1.2583 | Train Acc: 54.90%
Epoch [5/20] | Loss: 1.2217 | Train Acc: 56.29%
Epoch [6/20] | Loss: 1.1891 | Train Acc: 57.75%
Epoch [7/20] | Loss: 1.1633 | Train Acc: 58.73%
Epoch [8/20] | Loss: 1.1418 | Train Acc: 59.69%
Epoch [9/20] | Loss: 1.1256 | Train Acc: 60.21%
Epoch [10/20] | Loss: 1.1055 | Train Acc: 60.91%
Epoch [11/20] | Loss: 1.0906 | Train Acc: 61.53%
Epoch [12/20] | Loss: 1.0777 | Train Acc: 62

In [11]:
class CNN_NoDropout(nn.Module):
    def __init__(self):
        super(CNN_NoDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
model = CNN_NoDropout().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")


CNN_NoDropout(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16384, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1/20] | Loss: 1.4885 | Train Acc: 46.00%
Epoch [2/20] | Loss: 1.1341 | Train Acc: 59.51%
Epoch [3/20] | Loss: 1.0170 | Train Acc: 63.76%
Epoch [4/20] | Loss: 0.9430 | Train Acc: 66.76%
Epoch [5/20] | Loss: 0.8878 | Train Acc: 68.63%
Epoch [6/20] | Loss: 0.8450 | Train Acc: 70.20%
Epoch [7/20] | Loss: 0.8065 | Train Acc: 71.62%
Epoch [8/20] | Loss: 0.7705 | Train Acc: 72.62%
Epoch [9/20] | Loss: 0.7465 | Train Acc: 73.89%
Epoch [10/20] | Loss: 0.7288 | Train Acc: 74.48%
Epoch [11/20] | Loss: 0.7093 | Train Acc: 75.33%
Epoch [12/20] | Loss: 0.6944 | Train Acc: 75.67%
Epoch [13/20] | Loss: 0.6751 | Train Acc:

In [12]:
class CNN_AvgPool(nn.Module):
    def __init__(self):
        super(CNN_AvgPool, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.AvgPool2d(2, 2)  # changed pooling
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
model = CNN_AvgPool().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")


CNN_AvgPool(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=16384, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1/20] | Loss: 1.6631 | Train Acc: 39.19%
Epoch [2/20] | Loss: 1.4022 | Train Acc: 48.99%
Epoch [3/20] | Loss: 1.2795 | Train Acc: 53.96%
Epoch [4/20] | Loss: 1.2123 | Train Acc: 56.74%
Epoch [5/20] | Loss: 1.1611 | Train Acc: 58.65%
Epoch [6/20] | Loss: 1.1231 | Train Acc: 60.43%
Epoch [7/20] | Loss: 1.0994 | Train Acc: 60.91%
Epoch [8/20] | Loss: 1.0717 | Train Acc: 62.17%
Epoch [9/20] | Loss: 1.0521 | Train Acc: 62.64%
Epoch [10/20] | Loss: 1.0370 | Train Acc: 63.40%
Epoch [11/20] | Loss: 1.0262 | Train Acc: 63.77%
Epoch [12/20] | Loss: 1.0076 | Train Acc: 64.41%
Epoch [13/20] | Loss: 0.9935 

In [13]:
class CNN_MorePooling(nn.Module):
    def __init__(self):
        super(CNN_MorePooling, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)  # smaller spatial size after extra pooling
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # first pooling
        x = self.pool(F.relu(self.conv2(x)))   # second pooling
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
model = CNN_MorePooling().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")


CNN_MorePooling(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=4096, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
Epoch [1/20] | Loss: 1.6657 | Train Acc: 38.76%
Epoch [2/20] | Loss: 1.4051 | Train Acc: 49.20%
Epoch [3/20] | Loss: 1.2895 | Train Acc: 53.54%
Epoch [4/20] | Loss: 1.2132 | Train Acc: 56.72%
Epoch [5/20] | Loss: 1.1719 | Train Acc: 58.13%
Epoch [6/20] | Loss: 1.1354 | Train Acc: 59.71%
Epoch [7/20] | Loss: 1.1038 | Train Acc: 60.88%
Epoch [8/20] | Loss: 1.0794 | Train Acc: 61.70%
Epoch [9/20] | Loss: 1.0565 | Train Acc: 62.78%
Epoch [10/20] | Loss: 1.0504 | Train Acc: 63.13%
Epoch [11/20] | Loss: 1.0270 | Train Acc: 63.55%
Epoch [12/20] | Loss: 1.0145 | Train Acc: 64.3

In [None]:
class CNN_MorePooling(nn.Module):
    def __init__(self):
        super(CNN_MorePooling, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)  # smaller spatial size after extra pooling
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # first pooling
        x = self.pool(F.relu(self.conv2(x)))   # second pooling
        x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
model = CNN_MorePooling().to(device)
print(model)

# ------------------------------------------------------------
# 4. Define loss and optimizer
# ------------------------------------------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ------------------------------------------------------------
# 5. Training loop
# ------------------------------------------------------------
epochs = 20
start_time = time.time()

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2f}%")

# ------------------------------------------------------------
# 6. Evaluation on test data
# ------------------------------------------------------------
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

test_acc = 100 * correct / total
print(f"\n✅ Test Accuracy: {test_acc:.2f}%")
print(f"⏱ Total Training Time: {(time.time() - start_time)/60:.2f} mins")
