![Under Maintenance](https://media0.giphy.com/media/v1.Y2lkPTc5MGI3NjExeHNuZWxoMHZheXZ6NXp0aGRlYnhucGcybXA2NWNlem5iZ3J2a2I1aSZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/7ZoMAYSgQJ8oe5gCYE/giphy.gif)

In [2]:
# First, let's import all the libraries we need

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set random seeds for reproducibility (so we get the same results each time)
torch.manual_seed(42)
np.random.seed(42)

Using device: cuda


In [3]:
# Load the MNIST dataset (handwritten digits 0-9)
# This is a classic dataset for learning deep learning

# Transform: Convert images to tensors and normalize them
# Normalization helps the model learn faster
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Mean and std of MNIST dataset
])

# Download and load training data
train_dataset = datasets.MNIST(root='./datasets', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./datasets', train=False, download=True, transform=transform)

# Create data loaders (they feed data to the model in small batches)
batch_size = 64  # Process 64 images at a time
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Batches per epoch: {len(train_loader)}")

Training samples: 60000
Test samples: 10000
Batches per epoch: 938
