# Import Required Libraries
Import PyTorch modules for neural networks, optimization, data handling, and visualization.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm

# Define the Neural Network Model
Define a SimpleNN class with fully connected layers for MNIST classification.

In [2]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load and Preprocess MNIST Dataset
Download and transform the MNIST dataset using torchvision.

In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 506kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.61MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.48MB/s]


# Set Up Data Loaders
Create DataLoader objects for training and testing with batching and shuffling.

In [4]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize Model, Loss, and Optimizer
Instantiate the model, CrossEntropyLoss, and Adam optimizer.

In [5]:
model = SimpleNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Configure GPU Usage
Check for CUDA availability and move the model and data to GPU if available.

In [7]:
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Count: {torch.cuda.device_count()}")
else:
    print("No GPU available")

GPU Name: Tesla T4
GPU Count: 1


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f'Using device: {device}')

Using device: cuda


# Train the Model
Run the training loop for a specified number of epochs, computing loss and updating weights.

In [11]:
num_epochs = 5
for epoch in range(num_epochs):
    total_loss = 0
    batch_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    for images, labels in batch_bar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        batch_bar.set_postfix(loss=loss.item())
    avg_loss = total_loss / len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Avg Loss: {avg_loss:.4f}')

                                                                          

Epoch 1/5, Avg Loss: 0.1242


                                                                         

Epoch 2/5, Avg Loss: 0.1033


                                                                          

Epoch 3/5, Avg Loss: 0.0872


                                                                          

Epoch 4/5, Avg Loss: 0.0772


                                                                          

Epoch 5/5, Avg Loss: 0.0689




# Evaluate the Model
Evaluate the trained model on the test set and calculate accuracy.

In [16]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    eval_bar = tqdm(test_loader, desc="Evaluating", leave=False)
    for images, labels in eval_bar:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        eval_bar.set_postfix(batch_acc=(predicted == labels).float().mean().item())
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

                                                                              

Accuracy: 96.81%




## Save the Trained Model

In [26]:
import base64
from IPython.display import HTML

# Read the model file and encode to base64
with open('mnist_model.pth', 'rb') as f:
    model_data = f.read()
b64 = base64.b64encode(model_data).decode()

# Create download link
html = f'<a download="mnist_model.pth" href="data:application/octet-stream;base64,{b64}" target="_blank">Download Trained Model</a>'
HTML(html)