In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torch.autograd import Variable

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.fc1 = nn.Linear(in_features=32 * 7 * 7, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.out = nn.Linear(in_features=84, out_features=10)
        self.dropout = nn.Dropout(0.5)  # Add dropout for regularization

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = x.view(x.size(0), -1)
        x = nn.ReLU()(self.fc1(x))
        x = self.dropout(x)  # Apply dropout after ReLU
        x = nn.ReLU()(self.fc2(x))
        x = self.dropout(x)  # Apply dropout after ReLU
        return self.out(x)

# Hyperparameters
num_epochs = 20  # Increased epochs to show early stopping effect
batch_size = 64
learning_rate = 0.001
weight_decay = 1e-5  # L2 regularization

# Data loading and preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

full_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

loaders = {"train": train_loader, "val": val_loader, "test": test_loader}

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cnn = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Early stopping parameters
patience = 5
best_val_loss = float('inf')
counter = 0

# Training loop
for epoch in range(num_epochs):
    cnn.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(loaders["train"]):
        b_x = Variable(images).to(device)
        b_y = Variable(labels).to(device)

        output = cnn(b_x)
        loss = loss_func(output, b_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_loss = running_loss / len(loaders["train"])

    # Validation
    cnn.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in loaders["val"]:
            images = images.to(device)
            labels = labels.to(device)
            outputs = cnn(images)
            loss = loss_func(outputs, labels)
            val_loss += loss.item()

    val_loss /= len(loaders["val"])

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        torch.save(cnn.state_dict(), 'best_cnn_model.pth')
    else:
        counter += 1
        if counter >= patience:
            print(f'Early stopping triggered after epoch {epoch+1}')
            break

# Load best model for evaluation
cnn.load_state_dict(torch.load('best_cnn_model.pth'))

# Evaluation
cnn.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in loaders["test"]:
        images = images.to(device)
        labels = labels.to(device)
        outputs = cnn(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:12<00:00, 825531.20it/s] 


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 347911.12it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3187560.85it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2283688.42it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Epoch [1/20], Train Loss: 0.4246, Val Loss: 0.0754
Epoch [2/20], Train Loss: 0.1430, Val Loss: 0.0546
Epoch [3/20], Train Loss: 0.1010, Val Loss: 0.0466
Epoch [4/20], Train Loss: 0.0830, Val Loss: 0.0458
Epoch [5/20], Train Loss: 0.0716, Val Loss: 0.0402
Epoch [6/20], Train Loss: 0.0587, Val Loss: 0.0438
Epoch [7/20], Train Loss: 0.0553, Val Loss: 0.0433
Epoch [8/20], Train Loss: 0.0508, Val Loss: 0.0356
Epoch [9/20], Train Loss: 0.0444, Val Loss: 0.0402
Epoch [10/20], Train Loss: 0.0385, Val Loss: 0.0358
Epoch [11/20], Train Loss: 0.0353, Val Loss: 0.0397
Epoch [12/20], Train Loss: 0.0361, Val Loss: 0.0413
Epoch [13/20], Train Loss: 0.0327, Val Loss: 0.0408
Early stopping triggered after epoch 13


  cnn.load_state_dict(torch.load('best_cnn_model.pth'))


Test Accuracy: 99.10%
