In [11]:
# Step 1: Load and Preprocess the Data

# Load the dataset using torchvision.datasets.MNIST
# Apply transformations (ToTensor() and normalization)
# Create DataLoader objects for training and test datasets.

In [12]:
pip install torchvision

Note: you may need to restart the kernel to use updated packages.


In [4]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader



In [5]:
# Define transformations: Convert to Tensor and Normalize
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1] range
])

# Load MNIST dataset
train_dataset = torchvision.datasets.MNIST(
    root="./data", train=True, transform=transform, download=True
)

test_dataset = torchvision.datasets.MNIST(
    root="./data", train=False, transform=transform, download=True
)

# Create DataLoaders
batch_size = 64  # Batch size for training
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Check dataset size
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")

Number of training samples: 60000
Number of test samples: 10000


In [6]:
# Explanation:
# ToTensor() → Converts images to PyTorch tensors.
# Normalize((0.5,), (0.5,)) → Normalizes pixel values - to stabilize training.
# train=True and train=False → Loads the training and test datasets separately.
# DataLoader → Batches and shuffles data for efficient processing.

In [7]:
# Step 2: Visualizing and Exploring the Data

# Check the shape and size of the dataset.
# Visualize sample images to understand how the input looks.

In [11]:
# Check dataset properties
sample_image, sample_label = train_dataset[0]  # Get first sample

print(f"Shape of a single image tensor: {sample_image.shape}")  # Should be [1, 28, 28]
print(f"Label of the first image: {sample_label}")

NameError: name 'train_dataset' is not defined

In [10]:
import matplotlib.pyplot as plt
import numpy as np

# Function to denormalize and show images
def show_images(dataset, num_images=6):
    fig, axes = plt.subplots(1, num_images, figsize=(10, 3))
    
    for i in range(num_images):
        image, label = dataset[i]
        
        # Convert tensor to numpy array for visualization
        image = image.numpy().squeeze()  # Remove extra dimension
        
        axes[i].imshow(image, cmap="gray")
        axes[i].set_title(f"Label: {label}")
        axes[i].axis("off")

    plt.show()

# Display sample images
show_images(train_dataset)


NameError: name 'train_dataset' is not defined

In [1]:
import torch
torch.cuda.empty_cache()  # Clears GPU memory

In [4]:
# Step 3: Building the Neural Network using PyTorch

# Define a neural network using torch.nn.Module
# Select a loss function and optimizer
# Train the model using the training DataLoader

In [5]:
# Define the Neural Network
# use a simple fully connected feedforward network with:

# Input layer: 784 neurons (flattened 28x28 images)
# Hidden layers: 2 layers with 128 and 64 neurons
# Activation function: ReLU (for non-linearity)
# Output layer: 10 neurons (one for each digit 0-9) with softmax

In [6]:
import torch.nn as nn
import torch.optim as optim

# Define the neural network
class MNISTClassifier(nn.Module):
    def __init__(self):
        super(MNISTClassifier, self).__init__()
        self.flatten = nn.Flatten()  # Flatten 28x28 images to 1D
        self.fc1 = nn.Linear(28*28, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)  # Second hidden layer
        self.output = nn.Linear(64, 10)  # Output layer
        
    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))  # ReLU activation
        x = torch.relu(self.fc2(x))
        x = self.output(x)  # No softmax (handled in loss function)
        return x

# Instantiate the model
model = MNISTClassifier()

# Print model architecture
print(model)

MNISTClassifier(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (output): Linear(in_features=64, out_features=10, bias=True)
)


In [7]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adaptive learning rate optimizer


In [8]:
# Train the Model

# Iterate through train_loader
# Compute loss
# Backpropagate to update weights
# Print loss every few epochs

In [9]:
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
epochs = 10  # Number of times the model sees the entire dataset
for epoch in range(epochs):
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move to GPU if available
        
        optimizer.zero_grad()  # Reset gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

print("Training Complete!")

NameError: name 'train_loader' is not defined

In [12]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Training the Model
epochs = 5  # Reduce epochs to prevent memory overload
for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

print("Training Complete!")

NameError: name 'train_loader' is not defined

In [13]:

# Step 5: Evaluating the Model
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

evaluate_model(model, test_loader)

# Step 6: Save the Model
torch.save(model.state_dict(), "mnist_model.pth")
print("Model Saved!")

NameError: name 'test_loader' is not defined