# **Import Libraries**

In [None]:
!pip install idx2numpy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim
import matplotlib.pyplot as plt
import pandas as pd
import idx2numpy




# **Data Preparation**



*  MNIST data is loaded and split into training and validation sets.
* DataLoader objects are created for both sets to facilitate batch processing
during training and validation.



In [None]:
# Load MNIST data using idx2numpy
testLabel = idx2numpy.convert_from_file('/content/t10k-labels.idx1-ubyte')
train = idx2numpy.convert_from_file('/content/train-images.idx3-ubyte')
testData = idx2numpy.convert_from_file('/content/t10k-images.idx3-ubyte')
trainLabel = idx2numpy.convert_from_file('/content/train-labels.idx1-ubyte')

# Data preprocessing: normalize and convert to PyTorch tensors
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
data = torch.from_numpy(train).data.view(-1, 28 * 28).float()
labels = torch.from_numpy(trainLabel)
# Split the data into training and validation sets
train_data, val_data, train_labels, val_labels = train_test_split(data, labels, stratify=labels, test_size=0.2, random_state=42)
train_dataset = TensorDataset(train_data, train_labels)
val_dataset = TensorDataset(val_data, val_labels)
# Create DataLoader objects for training and validation sets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# **Neural Network Architecture**

* A simple neural network with three fully connected layers (fc1, fc2, fc3).
* ReLU activation functions are used between hidden layers.
* The model is designed for image classification with 10 output classes.
* Log-softmax is applied to the output for training with negative log likelihood loss.

In [None]:
# Define a simple neural network architecture
class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x
# Instantiate the model
model = CustomNet()


# **Training Function**

* The training process involves iterating over batches, performing forward and backward passes, and updating the model parameters.
* Training accuracy and losses are tracked.

In [None]:
# Training Process

# Define training hyperparameters and loss function
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

def train(epoch):
    model.train()
    correct_train = 0
    total_train = 0
    running_loss = 0.0

    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = output.max(1)
        total_train += target.size(0)
        correct_train += predicted.eq(target).sum().item()

    train_accuracy = 100. * correct_train / total_train
    train_losses.append(running_loss / len(train_loader))
    train_accuracies.append(train_accuracy)

    #print('Train Epoch: {} \tLoss: {:.6f} \tAccuracy: {:.2f}%'.format(epoch, train_losses[-1], train_accuracy))




# **Validation function**

* The model is evaluated on the validation set without updating parameters.
* Validation accuracy and losses are tracked.

In [None]:
def validate():
    model.eval()
    correct_val = 0
    total_val = 0
    val_loss = 0.0

    with torch.no_grad():
        for data, target in val_loader:
            output = model(data)
            val_loss += criterion(output, target).item()
            _, predicted = output.max(1)
            total_val += target.size(0)
            correct_val += predicted.eq(target).sum().item()

    val_accuracy = 100. * correct_val / total_val
    val_losses.append(val_loss / len(val_loader))
    val_accuracies.append(val_accuracy)

    print('Validation set: Average loss: {:.4f}, Accuracy: {:.2f}%'.format(val_losses[-1], val_accuracy))

# **Training**

In [None]:
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(1, 11):
    train(epoch)
validate()

# **Plotting**

In [None]:
def plot_loss(train_losses, val_losses,title):
    plt.plot(train_losses, label='Training loss')
    plt.plot(val_losses, label='Validation loss')
    plt.legend()
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.show()

def plot_accuracy(train_accuracies, val_accuracies,title):
    plt.plot(train_accuracies, label='Training accuracy')
    plt.plot(val_accuracies, label='Validation accuracy')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.title(title)
    plt.show()
plt.figure(figsize=(12, 5))
plot_loss(train_losses, val_losses, "Loss Plot")
plt.figure(figsize=(12, 5))
plot_accuracy(train_accuracies, val_accuracies, "Accuracy Plot")

# **Analysis**

* Learning rates and batch sizes are varied to find the best hyperparameters.
* Models are trained for each combination, and the one with the best validation accuracy is selected.

# 1-Changing Learning Rate

In [None]:
learning_rates = [0.001, 0.01, 0.1, 0.5, 1.0]

best_lr = None
best_val_accuracy = 0.0

for lr in learning_rates:
    model = CustomNet()
    optimizer = optim.SGD(model.parameters(), lr=lr)
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    for epoch in range(1, 11):
        train(epoch)
        validate()

    # Check if the current learning rate gives better validation accuracy
    if val_accuracies[-1] > best_val_accuracy:
        best_lr = lr
        best_val_accuracy = val_accuracies[-1]

# Now, train the model with the best learning rate
model = CustomNet()
optimizer = optim.SGD(model.parameters(), lr=best_lr)
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(1, 11):
    train(epoch)
    validate()


# Plotting for the best learning rate


In [None]:
plt.figure(figsize=(12, 5))
plot_loss(train_losses, val_losses,f"Losses for Learning Rate: {best_lr}")
plt.figure(figsize=(12, 5))
plot_accuracy(train_accuracies, val_accuracies, f"Accuracies for Learning Rate: {best_lr}")
print(f"Best Learning Rate: {best_lr}, Final Validation Accuracy: {best_val_accuracy:.2f}%")

# 2-Changing Batch Size

In [None]:
#Changing the batch size and plot best one
batch_sizes = [32, 64, 128, 256, 512]

best_batch_size = None
best_val_accuracy_batch = 0.0

for batch_size in batch_sizes:
    model = CustomNet()
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    for epoch in range(1, 11):
        train(epoch)
        validate()

    # Check if the current batch size gives better validation accuracy
    if val_accuracies[-1] > best_val_accuracy_batch:
        best_batch_size = batch_size
        best_val_accuracy_batch = val_accuracies[-1]

# Now, train the model with the best batch size using your original functions
model = CustomNet()
optimizer = optim.SGD(model.parameters(), lr=0.01)
train_loader = DataLoader(train_dataset, batch_size=best_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch_size, shuffle=False)

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(1, 11):
    train(epoch)
    validate()


# Plotting for the best batch size


In [None]:
plt.figure(figsize=(12, 5))
plot_loss(train_losses, val_losses,f"Losses for Batch Size: {best_batch_size}")
plt.figure(figsize=(12, 5))
plot_accuracy(train_accuracies, val_accuracies,f"Accuracies for Batch Size: {best_batch_size}")
print(f"Best Batch Size: {best_batch_size}, Final Validation Accuracy: {best_val_accuracy_batch:.2f}%")


#**Insights:**

#1-Best Learning Rate (lr):
The model achieved the highest validation accuracy when trained with a learning rate of 0.01.
A learning rate that is too high can lead to overshooting, while a too-low learning rate may result in slow convergence.
The choice of the learning rate is crucial for achieving optimal model performance.

#2-Best Batch Size (32):
The optimal batch size for training the model was found to be 32.
Batch size affects the stability of the training process and the memory requirements.
Larger batch sizes might lead to faster convergence but require more memory.

#Final Validation Accuracy:
The model trained with the best hyperparameters achieved a final validation accuracy around [97]%.
The final accuracy is a key metric indicating the model's ability to generalize to unseen data.

In [None]:
test_data = torch.from_numpy(testData).float().view(-1, 28 * 28)
test_labels = torch.from_numpy(testLabel)
# Create a TensorDataset for test data and labels
test_dataset = TensorDataset(test_data, test_labels)

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
print(test_labels)

In [None]:
def Test():
    model.eval()
    correct_test = 0
    total_test = 0
    test_loss = 0.0

    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += criterion(output, target).item()
            _, predicted = output.max(1)
            total_test += target.size(0)
            correct_test += predicted.eq(target).sum().item()

    test_accuracy = 100. * correct_test / total_test
    test_losses.append(test_loss / len(val_loader))
    test_accuracies.append(test_accuracy)

    print('Test set: Average loss: {:.4f}, Accuracy: {:.2f}%'.format(val_losses[-1], test_accuracy))


In [None]:
test_losses = []
test_accuracies = []

for epoch in range(1, 11):
    train(epoch)
Test()

#**Bonus**

* A modified architecture is introduced with dropout layers and layer normalization.
* Dropout helps prevent overfitting, and layer normalization normalizes inputs to hidden layers.
* This modified model is compared with the original in terms of performance.

# Modified Neural Network Architecture






In [None]:
class CustomNetBonus(nn.Module):
    def __init__(self):
        super(CustomNetBonus, self).__init__()
        self.fc = nn.Linear(28 * 28, 256)
        self.fc1 = nn.Linear(256, 128)
        self.dropout1 = nn.Dropout(0.5)  # Add dropout with 50% probability
        self.layer_norm1 = nn.LayerNorm(128)  # Add layer normalization
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.5)
        self.layer_norm2 = nn.LayerNorm(64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.fc(x))
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.layer_norm1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.layer_norm2(x)
        x = F.relu(self.fc3(x))
        return F.log_softmax(x, dim=1)

In [None]:
model_modified = CustomNetBonus()
optimizer_modified = optim.SGD(model_modified.parameters(), lr=0.01)
criterion_modified = nn.CrossEntropyLoss()

In [None]:
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
for epoch in range(1, 11):
    train(epoch)
    validate()
    Test()
    print(f"Epoch {epoch}: Train Loss {train_losses[-1]}, Val Loss {val_losses[-1]}, Val Acc {val_accuracies[-1]}")


In [None]:
# Plotting for the modified architecture
plt.figure(figsize=(12, 5))
plot_loss(train_losses, val_losses,"Modified Architecture Evaluation Loss")
plt.figure(figsize=(12, 5))
plot_accuracy(train_accuracies, val_accuracies,"Modified Architecture Evaluation Accuracy")
print("Modified Architecture: Final Validation Accuracy {:.2f}%".format(val_accuracies[-1]))