In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define a CNN model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # 1 input channel (grayscale), 32 output channels
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Fully connected layer
        self.fc2 = nn.Linear(128, 10)  # 10 classes for digits (0-9)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# Prepare the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

# Initialize model, loss function, and optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}')

# Testing the model
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n')

# Set the device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Train and test the model
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

# Save the model
torch.save(model.state_dict(), "mnist_cnn.pth")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 16109833.76it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 498355.19it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3833211.34it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2192992.84it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Train Epoch: 1 [0/60000] Loss: 2.300517
Train Epoch: 1 [6400/60000] Loss: 0.177034
Train Epoch: 1 [12800/60000] Loss: 0.045098
Train Epoch: 1 [19200/60000] Loss: 0.113862
Train Epoch: 1 [25600/60000] Loss: 0.235293
Train Epoch: 1 [32000/60000] Loss: 0.011197
Train Epoch: 1 [38400/60000] Loss: 0.035591
Train Epoch: 1 [44800/60000] Loss: 0.075982
Train Epoch: 1 [51200/60000] Loss: 0.033336
Train Epoch: 1 [57600/60000] Loss: 0.019702

Test set: Average loss: 0.0000, Accuracy: 9854/10000 (99%)

Train Epoch: 2 [0/60000] Loss: 0.078447
Train Epoch: 2 [6400/60000] Loss: 0.066185
Train Epoch: 2 [12800/60000] Loss: 0.049212
Train Epoch: 2 [19200/60000] Loss: 0.031648
Train Epoch: 2 [25600/60000] Loss: 0.106962
Train Epoch: 2 [32000/60000] Loss: 0.018657
Train Epoch: 2 [38400/60000] Loss: 0.043766
Train Epoch: 2 [44800/60000] Loss: 0.011998
Train Epoch: 2 [51200/60000] Loss: 0.167713
Train Epoch: 2 [57600/60000] Loss: 0.0

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Function to test a random image from the test dataset
def test_random_images(model, test_loader, device, num_images=2):
    # Set the model to evaluation mode
    model.eval()

    # Fetch a batch of test images
    dataiter = iter(test_loader)
    images, labels = next(dataiter)  # Correct way to fetch next batch

    # Select random indices for testing
    random_indices = np.random.choice(len(images), num_images, replace=False)
    random_images = images[random_indices]
    random_labels = labels[random_indices]

    # Move images to the device (GPU or CPU)
    random_images = random_images.to(device)

    # Make predictions
    with torch.no_grad():
        outputs = model(random_images)
        _, predicted = torch.max(outputs, 1)

    # Plot the images with their predicted and true labels
    for i in range(num_images):
        plt.figure(figsize=(2, 2))
        plt.imshow(random_images[i].cpu().numpy().squeeze(), cmap='gray')
        plt.title(f"Predicted: {predicted[i].item()}, True: {random_labels[i].item()}")
        plt.axis('off')
        plt.show()

# Call the function to test random images
test_random_images(model, test_loader, device, num_images=2)
