In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

print("--- [START] Phase 1 & 2: Loading Data and Initializing Baseline Model ---")

# 1. Data Preparation
# Normalizing data to have a mean and std of 0.5
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

print("Downloading and loading Fashion-MNIST dataset...")
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)
print(f"Data Loaded: {len(trainset)} training images and {len(testset)} test images.")

# 2. Architecture Definition
print("\nDefining the Baseline CNN architecture (2 Conv layers + Dropout)...")
class BaselineCNN(nn.Module):
    def __init__(self):
        super(BaselineCNN, self).__init__()
        # Feature Extraction: Convolution -> ReLU -> Convolution -> ReLU -> MaxPool -> Dropout
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3), nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        # Classification: Fully Connected layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 12 * 12, 128), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model = BaselineCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(model.parameters(), lr=1.0)
print("Model initialized. Optimizer: Adadelta | Loss: CrossEntropy.")

# 3. Training Loop
print("\n--- Phase 3: Training Phase (5 Epochs) ---")
for epoch in range(5):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader):
        optimizer.zero_grad() # Reset gradients
        outputs = model(inputs) # Forward pass
        loss = criterion(outputs, labels) # Calculate loss
        loss.backward() # Backward pass (Backpropagation)
        optimizer.step() # Update weights
        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/5 finished. Avg Loss: {running_loss/len(trainloader):.4f}")

# 4. Evaluation
print("\n--- Evaluation: Final Testing ---")
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Final Test Accuracy of Model 1: {100 * correct / total:.2f}%")

--- [START] Phase 1 & 2: Loading Data and Initializing Baseline Model ---
Downloading and loading Fashion-MNIST dataset...


100%|██████████| 26.4M/26.4M [00:01<00:00, 16.8MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 306kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 5.57MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 23.4MB/s]


Data Loaded: 60000 training images and 10000 test images.

Defining the Baseline CNN architecture (2 Conv layers + Dropout)...
Model initialized. Optimizer: Adadelta | Loss: CrossEntropy.

--- Phase 3: Training Phase (5 Epochs) ---
Epoch 1/5 finished. Avg Loss: 0.5330
Epoch 2/5 finished. Avg Loss: 0.3361
Epoch 3/5 finished. Avg Loss: 0.2885
Epoch 4/5 finished. Avg Loss: 0.2652
Epoch 5/5 finished. Avg Loss: 0.2459

--- Evaluation: Final Testing ---
Final Test Accuracy of Model 1: 89.98%
