In [None]:
!pip install torch torchvision matplotlib seaborn scikit-learn

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support, accuracy_score

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Hyperparameters
batch_size = 100
learning_rate = 0.001
num_epochs = 15

# Task 1: Data Loading and Exploration
# (a) Load the MNIST dataset using torchvision.datasets.MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST mean and std
])

# Loading the training dataset
train_dataset = torchvision.datasets.MNIST(
    root='./data', 
    train=True, 
    transform=transform,
    download=True
)

# Loading the test dataset
test_dataset = torchvision.datasets.MNIST(
    root='./data', 
    train=False, 
    transform=transform,
    download=True
)

# Creating data loaders
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False
)

# (b) Perform exploratory data analysis (EDA)
# Display sample images from the dataset
def show_sample_images(dataset, num_samples=5):
    fig, axes = plt.subplots(1, num_samples, figsize=(12, 2))
    for i in range(num_samples):
        idx = np.random.randint(0, len(dataset))
        img, label = dataset[idx]
        img = img.squeeze().numpy()
        axes[i].imshow(img, cmap='gray')
        axes[i].set_title(f'Label: {label}')
        axes[i].axis('off')
    plt.tight_layout()
    plt.savefig('sample_images.png')
    plt.close()

# Calculate and report class distribution
def plot_class_distribution(dataset):
    labels = [label for _, label in dataset]
    unique_labels, counts = np.unique(labels, return_counts=True)
    
    plt.figure(figsize=(10, 6))
    plt.bar(unique_labels, counts)
    plt.xticks(unique_labels)
    plt.xlabel('Digit Classes')
    plt.ylabel('Frequency')
    plt.title('Class Distribution in MNIST Dataset')
    plt.savefig('class_distribution.png')
    plt.close()
    
    print("Class Distribution:")
    for label, count in zip(unique_labels, counts):
        print(f"Class {label}: {count} samples ({count/len(dataset)*100:.2f}%)")

# Perform EDA
print("Performing Exploratory Data Analysis...")
show_sample_images(train_dataset)
plot_class_distribution(train_dataset)

# Task 2: CNN Implementation
# (a) Design and implement a CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        # First convolutional layer
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        
        # Second convolutional layer
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.relu3 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)  # 10 output classes for digits 0-9
    
    def forward(self, x):
        # Input shape: [batch_size, 1, 28, 28]
        x = self.pool1(self.relu1(self.conv1(x)))  # -> [batch_size, 32, 14, 14]
        x = self.pool2(self.relu2(self.conv2(x)))  # -> [batch_size, 64, 7, 7]
        x = x.view(-1, 64 * 7 * 7)                 # -> [batch_size, 64*7*7]
        x = self.relu3(self.fc1(x))                # -> [batch_size, 128]
        x = self.dropout(x)
        x = self.fc2(x)                            # -> [batch_size, 10]
        return x

# Initialize the model
model = CNNModel().to(device)
print(model)

# Task 3: Model Training
# (a) Split data into training and testing sets (already done with train_dataset and test_dataset)

# (b) Define a loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# (c) Implement a training loop for at least 15 epochs
def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs):
    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []
    
    # Training loop
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        train_losses.append(epoch_loss)
        train_accs.append(epoch_acc)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_epoch_loss = val_loss / len(test_loader)
        val_epoch_acc = 100 * val_correct / val_total
        val_losses.append(val_epoch_loss)
        val_accs.append(val_epoch_acc)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.2f}%, '
              f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.2f}%')
    
    # Plot training and validation accuracy/loss curves
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')
    plt.plot(range(1, num_epochs+1), val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(range(1, num_epochs+1), train_accs, label='Training Accuracy')
    plt.plot(range(1, num_epochs+1), val_accs, label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('training_curves.png')
    plt.close()
    
    return model

# Train the model
print("Starting model training...")
trained_model = train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs)

# Task 4: Evaluation and Analysis
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
    
    print("\nModel Evaluation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    
    # Create and display confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(10), yticklabels=range(10))
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.savefig('confusion_matrix.png')
    plt.close()
    
    # Identify most commonly misclassified digits
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    misclassification_rates = 1 - np.diag(cm_normalized)
    most_misclassified = np.argsort(misclassification_rates)[-3:][::-1]
    
    print("\nMost commonly misclassified digits:")
    for digit in most_misclassified:
        print(f"Digit {digit}: Misclassification rate of {misclassification_rates[digit]:.4f}")
    
    # Analyze misclassifications
    common_confusions = []
    for i in range(10):
        for j in range(10):
            if i != j and cm[i, j] > 0:
                common_confusions.append((i, j, cm[i, j]))
    
    common_confusions.sort(key=lambda x: x[2], reverse=True)
    print("\nMost common confusions:")
    for true_label, pred_label, count in common_confusions[:5]:
        print(f"True digit {true_label} predicted as {pred_label}: {count} times")
    
    return accuracy, precision, recall, f1, cm


# Evaluate the model
print("Evaluating the model...")
accuracy, precision, recall, f1, confusion_matrix = evaluate_model(trained_model, test_loader)


# Save the trained model
torch.save(trained_model.state_dict(), 'mnist_cnn_model.pth')
print("\nModel saved successfully as 'mnist_cnn_model.pth'")

print("\nEvaluation report completed. Check the generated images for visualizations.")

Using device: cpu


100%|██████████| 9.91M/9.91M [00:14<00:00, 690kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 116kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 848kB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 3.80MB/s]


Performing Exploratory Data Analysis...
Class Distribution:
Class 0: 5923 samples (9.87%)
Class 1: 6742 samples (11.24%)
Class 2: 5958 samples (9.93%)
Class 3: 6131 samples (10.22%)
Class 4: 5842 samples (9.74%)
Class 5: 5421 samples (9.04%)
Class 6: 5918 samples (9.86%)
Class 7: 6265 samples (10.44%)
Class 8: 5851 samples (9.75%)
Class 9: 5949 samples (9.92%)
CNNModel(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (relu3): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)
Starting model training...
Epoch [1/15], Train Loss: 0.2160, Train Acc: 93.39%,

the model can be improved by the following techniques :

The model can  be improved by the following techniques:

1. Increase model complexity by adding more convolutional layers
2. Apply data augmentation techniques (rotation, shifting) to improve generalization
3. Implement batch normalization to stabilize and accelerate training
4. Experiment with different optimizers like SGD with momentum
5. Use learning rate scheduling to fine-tune the training process