# Neural Networks 
- This project will I wil be using neural networks by implementing a multilayer perceptron (MLP) model in PyTorch to classify handwritten digits from the Digits dataset.
- The dataset contains 1,797 images of handwritten digits, each image being an 8x8 pixel grayscale
image of a digit (0-9). Each image is represented as a 64-feature input vector, corresponding to
the grayscale values of the pixels. 

### Import Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
import torch  # Pytorch is imported as torch
from tqdm import tqdm

Looking in indexes: https://download.pytorch.org/whl/cu118


In [15]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### Load & Scale Digits Data

In [18]:
torch.manual_seed(231)

digits = load_digits()
X = digits.data  
y = digits.target  

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


### Prepare Data for PyTorch Training

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=231)

# Converting NumPy arrays to PyTorch tensors for model training
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Creating dataset objects for PyTorch
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Use DataLoader to load data in batches for efficient training
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

### Define Neural Network Model

In [24]:
class DigitClassifier(nn.Module):
    def __init__(self):
        super(DigitClassifier, self).__init__()
        self.fc1 = nn.Linear(64, 128)  
        self.fc2 = nn.Linear(128, 10)  

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x  
model = DigitClassifier()

### Defining Loss Function, Optimizer, and Training the Neural Network

In [27]:
# Defining the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # using Adam for faster convergence

# using a function to train the neural network
def train(model, loader, criterion, optimizer, epochs=15):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for batch_X, batch_y in loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * batch_X.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()

        # Computing the average loss and accuracy for the epoch
        avg_loss = running_loss / total
        accuracy = 100 * correct / total
        print(f"Epoch {epoch+1}: Loss = {avg_loss:.3f}, Accuracy = {accuracy:.1f}%")
# Training the model using the DataLoader
train(model, train_loader, criterion, optimizer)

Epoch 1: Loss = 2.072, Accuracy = 58.1%
Epoch 2: Loss = 1.403, Accuracy = 87.0%
Epoch 3: Loss = 0.803, Accuracy = 91.2%
Epoch 4: Loss = 0.511, Accuracy = 92.1%
Epoch 5: Loss = 0.374, Accuracy = 93.5%
Epoch 6: Loss = 0.298, Accuracy = 94.5%
Epoch 7: Loss = 0.249, Accuracy = 95.2%
Epoch 8: Loss = 0.212, Accuracy = 96.0%
Epoch 9: Loss = 0.185, Accuracy = 96.5%
Epoch 10: Loss = 0.167, Accuracy = 96.8%
Epoch 11: Loss = 0.151, Accuracy = 97.1%
Epoch 12: Loss = 0.139, Accuracy = 97.5%
Epoch 13: Loss = 0.127, Accuracy = 98.1%
Epoch 14: Loss = 0.119, Accuracy = 97.6%
Epoch 15: Loss = 0.112, Accuracy = 98.1%


### Evaluating Model Performance on Test Set with Accuracy and Predictions

In [37]:
#Evaluating the model on the test dataset
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    examples = []

    with torch.no_grad():
        for batch_X, batch_y in loader:
            outputs = model(batch_X)
            _, predicted = torch.max(outputs, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()

            # Collect 5 example predictions for display
            for i in range(len(batch_y)):
                if len(examples) < 5: 
                    examples.append((batch_X[i], predicted[i].item(), batch_y[i].item()))
                else:
                    break
     # Compute accuracy percentage
    accuracy = 100 * correct / total
    print(f"\nTest Accuracy: {accuracy:.1f}%")
    print("Sample Predictions:")
    for i, (img, pred, actual) in enumerate(examples):
        print(f"Image {i+1}: Predicted = {pred}, Actual = {actual}")

evaluate(model, test_loader)


Test Accuracy: 95.3%
Sample Predictions:
Image 1: Predicted = 4, Actual = 4
Image 2: Predicted = 5, Actual = 5
Image 3: Predicted = 1, Actual = 1
Image 4: Predicted = 4, Actual = 4
Image 5: Predicted = 1, Actual = 1


In [30]:
# neural network that allows different activation functions

class AltActivationNet(nn.Module):
    def __init__(self, activation='sigmoid'):
        super(AltActivationNet, self).__init__()
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 10)
        self.activation = activation

    def forward(self, x):
        if self.activation == 'sigmoid':
            x = torch.sigmoid(self.fc1(x))
        elif self.activation == 'tanh':
            x = torch.tanh(self.fc1(x))
        else:
            x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

AltActivationNet()

AltActivationNet(
  (fc1): Linear(in_features=64, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [39]:
def train_and_test_model(activation):
    model = AltActivationNet(activation=activation)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    print(f"\nTraining with activation: {activation}")
    train(model, train_loader, criterion, optimizer)
    evaluate(model, test_loader)

In [41]:
# make loop
for act in ['sigmoid', 'tanh', 'relu']:
    train_and_test_model(act)


Training with activation: sigmoid
Epoch 1: Loss = 2.270, Accuracy = 24.5%
Epoch 2: Loss = 2.137, Accuracy = 49.4%
Epoch 3: Loss = 1.985, Accuracy = 65.9%
Epoch 4: Loss = 1.788, Accuracy = 77.3%
Epoch 5: Loss = 1.558, Accuracy = 83.6%
Epoch 6: Loss = 1.329, Accuracy = 83.1%
Epoch 7: Loss = 1.126, Accuracy = 87.9%
Epoch 8: Loss = 0.956, Accuracy = 87.6%
Epoch 9: Loss = 0.822, Accuracy = 88.8%
Epoch 10: Loss = 0.714, Accuracy = 90.2%
Epoch 11: Loss = 0.628, Accuracy = 91.2%
Epoch 12: Loss = 0.559, Accuracy = 91.0%
Epoch 13: Loss = 0.501, Accuracy = 92.3%
Epoch 14: Loss = 0.456, Accuracy = 92.7%
Epoch 15: Loss = 0.413, Accuracy = 92.8%

Test Accuracy: 93.1%
Sample Predictions:
Image 1: Predicted = 4, Actual = 4
Image 2: Predicted = 5, Actual = 5
Image 3: Predicted = 1, Actual = 1
Image 4: Predicted = 4, Actual = 4
Image 5: Predicted = 2, Actual = 1

Training with activation: tanh
Epoch 1: Loss = 1.984, Accuracy = 55.6%
Epoch 2: Loss = 1.274, Accuracy = 86.3%
Epoch 3: Loss = 0.749, Accurac

#### ReLU achieved the highest test accuracy at 95.8%, outperforming tanh (94.4%) and sigmoid (93.1%). Tanh and ReLU both reached ~98% training accuracy, while sigmoid lagged behind at ~92.8%. This was expected as ReLU is better suited for better suited for neural networks.