
#### Program 4:

##### Objective:
Write a program to implement the SGD and Adagrad optimizers using the PyTorch framework, and compare results using the MNIST digit classification dataset. Use a simple CNN to illustrate the difference between the two optimizers.

Perform the following steps:
1. **Preprocess data**
2. **Define SGD and Adagrad optimizers from scratch**
3. **Define a simple CNN model architecture**
4. **Train CNN model using suitable criterion and each optimizer**

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from torch.optim import Optimizer

# Data preprocessing
transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

train_subset = Subset(train_dataset, range(200))
test_subset = Subset(test_dataset, range(50))

train_loader = DataLoader(train_subset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_subset, batch_size=10, shuffle=False)

# Simple CNN model definition
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

# SGD update function
def sgd_update(parameters, lr):
    with torch.no_grad():
        for param in parameters:
            if param.grad is not None:
                param.data -= lr * param.grad.data
                param.grad.zero_()

# Custom Adagrad optimizer
class CustomAdagrad(Optimizer):
    def __init__(self, parameters, lr=0.01, epsilon=1e-10):
        self.parameters = list(parameters)
        self.lr = lr
        self.epsilon = epsilon
        self.sum_squared_gradients = [torch.zeros_like(p) for p in self.parameters]

    def step(self):
        with torch.no_grad():
            for param, sum_sq_grad in zip(self.parameters, self.sum_squared_gradients):
                if param.grad is not None:
                    sum_sq_grad += param.grad.data ** 2
                    adjusted_lr = self.lr / (self.epsilon + torch.sqrt(sum_sq_grad))
                    param.data -= adjusted_lr * param.grad.data
                    param.grad.zero_()

    def zero_grad(self):
        with torch.no_grad():
            for param in self.parameters:
                if param.grad is not None:
                    param.grad.zero_()

# Training setup
device = torch.device('cpu')
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()

# Training function
def train_model(num_epochs,model, optimizer_choice='adagrad'):
    if optimizer_choice == 'sgd':
        optimizer = None
    else:
        optimizer = CustomAdagrad(model.parameters(), lr=0.01)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        correct_train = 0
        total_train = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            if optimizer_choice == 'adagrad':
                optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            if optimizer_choice == 'sgd':
                sgd_update(model.parameters(), lr=0.01)
            else:
                optimizer.step()
            train_loss += loss.item()
            predicted = torch.argmax(output.data, dim=1)
            total_train += target.size(0)
            correct_train += (predicted == target).sum().item()
        avg_train_loss = train_loss / len(train_loader)
        train_acc = 100 * correct_train / total_train

        model.eval()
        test_loss = 0
        correct_test = 0
        total_test = 0
        with torch.no_grad():
            for data, target in test_loader:
                output = model(data)
                loss = criterion(output, target)
                test_loss += loss.item()
                predicted = torch.argmax(output.data, dim=1)
                total_test += target.size(0)
                correct_test += (predicted == target).sum().item()
        avg_test_loss = test_loss / len(test_loader)
        test_acc = 100 * correct_test / total_test

        print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_acc:.8f}%, '
              f'Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_acc:.8f}%')



In [5]:
# Training setup
device = torch.device('cpu')
model1 = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(5,model=model1, optimizer_choice='adagrad')

Epoch 1, Train Loss: 2.1702, Train Accuracy: 25.50000000%, Test Loss: 1.6772, Test Accuracy: 48.00000000%
Epoch 2, Train Loss: 1.0823, Train Accuracy: 68.00000000%, Test Loss: 0.7921, Test Accuracy: 82.00000000%
Epoch 3, Train Loss: 0.5414, Train Accuracy: 85.00000000%, Test Loss: 0.5840, Test Accuracy: 82.00000000%
Epoch 4, Train Loss: 0.3642, Train Accuracy: 91.00000000%, Test Loss: 0.5486, Test Accuracy: 84.00000000%
Epoch 5, Train Loss: 0.2654, Train Accuracy: 91.50000000%, Test Loss: 0.3987, Test Accuracy: 88.00000000%


In [6]:
# Training setup
device = torch.device('cpu')
model2 = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()

# Train the model
train_model(5,model=model2, optimizer_choice='sgd')


Epoch 1, Train Loss: 2.3045, Train Accuracy: 10.50000000%, Test Loss: 2.2851, Test Accuracy: 12.00000000%
Epoch 2, Train Loss: 2.2999, Train Accuracy: 11.00000000%, Test Loss: 2.2813, Test Accuracy: 14.00000000%
Epoch 3, Train Loss: 2.2958, Train Accuracy: 11.00000000%, Test Loss: 2.2778, Test Accuracy: 14.00000000%
Epoch 4, Train Loss: 2.2922, Train Accuracy: 13.00000000%, Test Loss: 2.2745, Test Accuracy: 18.00000000%
Epoch 5, Train Loss: 2.2893, Train Accuracy: 14.50000000%, Test Loss: 2.2712, Test Accuracy: 24.00000000%
