<a href="https://colab.research.google.com/github/Krixna-Kant/Infosys-Springboard-AI-ML-Internship/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importing PyTorch for building and training machine learning models
import torch

# Importing nn module to define neural network layers and architectures
import torch.nn as nn

# Importing optim module to adjust weights during training
import torch.optim as optim

# Importing torchvision for working with datasets and applying transformations
from torchvision import datasets, transforms

# Importing DataLoader to handle batching and shuffling of datasets
from torch.utils.data import DataLoader

# Importing matplotlib for plotting and visualizing data
import matplotlib.pyplot as plt


In [2]:
# Define transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors (from [0, 255] to [0, 1])
    transforms.Normalize((0.5,), (0.5,))  # Normalize pixel values to the range [-1, 1]
])

# Load the MNIST training dataset
train_dataset = datasets.MNIST(
    root='./data',  # Directory to store the data
    train=True,     # Load the training set
    download=True,  # Download the dataset if not already downloaded
    transform=transform  # Apply the transformations defined above
)

# Load the MNIST test dataset
test_dataset = datasets.MNIST(
    root='./data',  # Directory to store the data
    train=False,    # Load the test set
    download=True,  # Download the dataset if not already downloaded
    transform=transform  # Apply the transformations defined above
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.81MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 153kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.45MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.62MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [3]:
# Create a DataLoader for the training dataset
train_loader = DataLoader(
    train_dataset,   # Dataset to load
    batch_size=64,   # Number of images per batch
    shuffle=True     # Shuffle the data for better training performance
)

# Create a DataLoader for the test dataset
test_loader = DataLoader(
    test_dataset,   # Dataset to load
    batch_size=64,  # Number of images per batch
    shuffle=False   # No need to shuffle test data for evaluation
)


In [4]:
# Define the LeNet-5 model architecture as a subclass of nn.Module
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()

        # Define layers of LeNet-5 architecture:
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2)  # First convolutional layer (input: grayscale)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)                  # Average pooling layer

        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)                       # Second convolutional layer

        self.fc1 = nn.Linear(16 * 5 * 5, 120)                              # First fully connected layer (input size adjusted)
        self.fc2 = nn.Linear(120, 84)                                      # Second fully connected layer
        self.fc3 = nn.Linear(84, 10)                                       # Output layer (10 classes)

        self.relu = nn.ReLU()                                              # ReLU activation function

    def forward(self, x):
        x = self.relu(self.conv1(x))                                      # First convolution + ReLU activation
        x = self.pool(x)                                                  # Pooling after first conv

        x = self.relu(self.conv2(x))                                      # Second convolution + ReLU activation
        x = self.pool(x)                                                  # Pooling after second conv

        x = x.view(-1, 16 * 5 * 5)                                        # Flatten output for fully connected layers

        x = self.relu(self.fc1(x))                                        # First fully connected layer + ReLU activation
        x = self.relu(self.fc2(x))                                        # Second fully connected layer + ReLU activation

        x = self.fc3(x)                                                   # Final output layer

        return x                                                          # Return raw scores (no softmax yet)


In [5]:
# Initialize model, loss function, and optimizer based on device availability (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LeNet5().to(device)

criterion = nn.CrossEntropyLoss()                                     # Loss function combining softmax and negative log likelihood loss
optimizer = optim.Adam(model.parameters(), lr=0.001)                 # Adam optimizer for weight adjustment during training


In [6]:
# Function to train the model over multiple epochs
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0

        for images, labels in train_loader:

            images, labels = images.to(device), labels.to(device)     # Move data to device

            outputs = model(images)                                     # Forward pass through model

            loss = criterion(outputs, labels)                          # Calculate loss

            optimizer.zero_grad()                                       # Zero gradients from previous iteration

            loss.backward()                                             # Backward pass to calculate gradients

            optimizer.step()                                            # Update weights based on gradients

            running_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")


In [7]:
# Function to evaluate model performance on test data
def evaluate_model(model, test_loader):
    model.eval()

    correct = 0                                                        # Counter for correct predictions
    total = 0                                                          # Counter for total predictions

    with torch.no_grad():                                              # Disable gradient calculation during evaluation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)     # Move data to device

            outputs = model(images)                                     # Get predictions from model

            _, predicted = torch.max(outputs.data, 1)                 # Get predicted class (index with highest score)

            total += labels.size(0)                                    # Update total count
            correct += (predicted == labels).sum().item()             # Update count of correct predictions

    print(f"Test Accuracy: {100 * correct / total:.2f}%")              # Print accuracy percentage


In [8]:
# Train the model and evaluate its performance on test set
train_model(model, train_loader, criterion, optimizer, epochs=10)
evaluate_model(model, test_loader)


Epoch 1/10, Loss: 0.3112
Epoch 2/10, Loss: 0.0868
Epoch 3/10, Loss: 0.0617
Epoch 4/10, Loss: 0.0483
Epoch 5/10, Loss: 0.0409
Epoch 6/10, Loss: 0.0351
Epoch 7/10, Loss: 0.0308
Epoch 8/10, Loss: 0.0266
Epoch 9/10, Loss: 0.0239
Epoch 10/10, Loss: 0.0219
Test Accuracy: 99.02%
