In [None]:
import torch
import torchvision
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
device = torch.cuda.is_available()

In [None]:
device

In [None]:
transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:
mnist_full = MNIST(root='./data', train=True, download=True, transform=transforms)
mnist_test = MNIST(root = './data', train = False, download = True, transform = transforms)

In [None]:
train_size = int(0.7 *(len(mnist_full)))
val_size = int(len(mnist_full)) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(mnist_full,[train_size,val_size])

In [None]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64,shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=64, shuffle=False)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        hidden_units = 64
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Dropout(0.6),
            nn.Linear(128 * 3 * 3, hidden_units),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_units, 10)
        )
    def forward(self,x):
        return self.model(x)



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)

In [None]:
epochs = 10
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 10
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for features, labels in train_loader:
        features = features.to(device)
        labels = labels.to(device)

        # Forward pass
        y_pred = model(features)

        # Loss
        loss = loss_fn(y_pred, labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validation step
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for b_features, l_features in val_loader:
            b_features = b_features.to(device)
            l_features = l_features.to(device)

            y_pred = model(b_features)
            loss = loss_fn(y_pred, l_features)
            val_loss += loss.item()

            _, predicted = torch.max(y_pred.data, 1)
            total += l_features.size(0)
            correct += (predicted == l_features).sum().item()

    val_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy:.2f}%")

In [None]:
model.eval()
accuracy = [] # This list will now store batch accuracies
predicted_labels = []
true_labels = []

with torch.no_grad(): # Good practice for evaluation
    for features, labels in test_loader:
        features = features.to(device)
        labels = labels.to(device)
        outputs = model(features) # Model outputs logits
        print(outputs.size())
        _, predicted = torch.max(outputs, 1) # Get the predicted class
        batch_accuracy = (predicted == labels).float().mean().item()
        accuracy.append(batch_accuracy) # Storing batch accuracy

        predicted_labels.extend(predicted.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
conf_matrix = confusion_matrix(true_labels,predicted_labels)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
conf_matrix = confusion_matrix(labels, preds)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot = True, fmt = 'd', cmap = 'Blues', cbar = True)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix ')

# Show the plot
plt.show()