In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from matplotlib import pyplot as plt

# Define transformations for the test set
test_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize the tensors
])

# Load the MNIST training and test datasets
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=None)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=test_transform)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 3363569.58it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 14450160.30it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1469083.21it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 5916313.28it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw



In [5]:
from sklearn.model_selection import train_test_split
train_set, val_set = train_test_split(trainset, test_size=0.2, random_state=42)

# Augmentation and normalization for training
train_transform = transforms.Compose([
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),  # Apply random rotation, translation, and scaling
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Optional: apply random brightness and contrast adjustment
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize the tensors
])

# Normalization (without augmentation) for validation
val_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize the tensors
])

# Apply transformations to train and validation sets
train_set = [(train_transform(image), label) for image, label in train_set]
val_set = [(val_transform(image), label) for image, label in val_set]

# Define batch size
batch_size = 64

# Create DataLoader instances for train, validation, and test sets
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False)

# Check the sizes of train, validation, and test sets
print(f"Training set size: {len(train_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Test set size: {len(testset)}")


Training set size: 48000
Validation set size: 12000
Test set size: 10000


# RELU NETWORK

In [6]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # First convolutional layer: 1 input channel (grayscale image), 32 output channels, 3x3 kernel
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        # Second convolutional layer: 32 input channels, 64 output channels, 3x3 kernel
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # Max pooling with 2x2 kernel
        self.pool = nn.MaxPool2d(2, 2)
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 64)  # 7*7 from image dimension reduction after convolutions and pooling
        self.fc2 = nn.Linear(64, 10)  # 10 output classes

    def forward(self, x):
        # Apply convolutions, followed by ReLU, then max pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Flatten the tensor for the fully connected layer
        x = torch.flatten(x, 1)
        # Apply the fully connected layers with ReLU and output through fc2
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
def train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=10):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            # images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        model.eval()
        running_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                # images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss = running_loss / len(val_loader)
        val_accuracy = correct_val / total_val
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        print(f"Epoch [{epoch + 1}/{num_epochs}], "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

    return train_losses, val_losses, train_accuracies, val_accuracies

# Projected Subgradient Descent

In [8]:
class PSGD(optim.Optimizer):
    def __init__(self, params, lr, projection_fn):
        if lr < 0.0:
            raise ValueError(f"Invalid learning rate: {lr}")
        if projection_fn is None:
            raise ValueError("projection_fn must be specified")

        defaults = dict(lr=lr, projection_fn=projection_fn)
        super(PSGD, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad
                p.data -= group['lr'] * grad
                p.data = group['projection_fn'](p.data)

        return loss
  
# Define projection function for PSGD
def projection_fn(x):
    return torch.clamp(x, 0, 1)  # Example: projecting onto the [0, 1] interval


# Stochastic Subgradient Descent

In [9]:
class StochasticSGD(optim.Optimizer):
    def __init__(self, params, lr=lr):
        if lr < 0.0:
            raise ValueError(f"Invalid learning rate: {lr}")

        defaults = dict(lr=lr)
        super(StochasticSGD, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad
                p.data -= group['lr'] * grad

        return loss

NameError: name 'lr' is not defined

# Training & Testing for Projected Subgradient Descent

In [10]:
# Define the criterion (loss function) and the number of epochs
model=CNN()
criterion = nn.CrossEntropyLoss()
num_epochs = 50

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nn.DataParallel(model).to(device)

# Create optimizers
optimizer1 = PSGD(model.parameters(), lr=0.01, projection_fn=projection_fn)

# Train the model with optimizer1
train_losses, val_losses, train_accuracies, val_accuracies = train_model(model, train_loader, val_loader, optimizer1, criterion, num_epochs=num_epochs)

# Plot the loss vs. epoch curves for both optimizers
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss (Projected Subgradient Descent)')
plt.plot(range(1, num_epochs + 1), val_losses, label='Validation Loss (Projected Subgradient Descent)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch')
plt.legend()
plt.show()

# Plot the accuracy vs. epoch curves for both optimizers
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy (Projected Subgradient Descent)')
plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation Accuracy (Projected Subgradient Descent)')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch')
plt.legend()
plt.show()

Epoch [1/50], Train Loss: 1.9680, Train Accuracy: 0.3498, Val Loss: 1.0660, Val Accuracy: 0.6633
Epoch [2/50], Train Loss: 1.4530, Train Accuracy: 0.5173, Val Loss: 1.0245, Val Accuracy: 0.6247
Epoch [3/50], Train Loss: 1.3315, Train Accuracy: 0.5597, Val Loss: 0.7887, Val Accuracy: 0.7276
Epoch [4/50], Train Loss: 1.2616, Train Accuracy: 0.5869, Val Loss: 0.7245, Val Accuracy: 0.7535
Epoch [5/50], Train Loss: 1.2029, Train Accuracy: 0.6104, Val Loss: 0.6139, Val Accuracy: 0.8146
Epoch [6/50], Train Loss: 1.1492, Train Accuracy: 0.6289, Val Loss: 0.5717, Val Accuracy: 0.8337
Epoch [7/50], Train Loss: 1.0783, Train Accuracy: 0.6526, Val Loss: 0.4766, Val Accuracy: 0.8666
Epoch [8/50], Train Loss: 0.9813, Train Accuracy: 0.6872, Val Loss: 0.5659, Val Accuracy: 0.8111
Epoch [9/50], Train Loss: 0.8849, Train Accuracy: 0.7207, Val Loss: 0.5686, Val Accuracy: 0.7956
Epoch [10/50], Train Loss: 0.7929, Train Accuracy: 0.7514, Val Loss: 0.3923, Val Accuracy: 0.8663
Epoch [11/50], Train Loss: 0.

KeyboardInterrupt: 

## Test Accuracy

In [None]:
def evaluate_model(model, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move data to device
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    avg_loss = test_loss / len(test_loader)
    
    print(f'Test Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%')


evaluate_model(model, test_loader, criterion)


Test Loss: 0.1513, Accuracy: 95.37%


# Training & Testing for Stochastic Subgradient Descent

In [None]:
lr = 0.01
optimizer2 = StochasticSGD(model.parameters(), lr=lr)

# Train the model with optimizer2
train_losses2, val_losses2, train_accuracies2, val_accuracies2 = train_model(model, train_loader, val_loader, optimizer2, criterion, num_epochs=num_epochs)

# Plot the loss vs. epoch curves for both optimizers
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_losses2, label='Train Loss (Stochastic Subgradient Descent)')
plt.plot(range(1, num_epochs + 1), val_losses2, label='Validation Loss (Stochastic Subgradient Descent)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epoch')
plt.legend()
plt.show()

# Plot the accuracy vs. epoch curves for both optimizers
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs + 1), train_accuracies2, label='Train Accuracy (Stochastic Subgradient Descent)')
plt.plot(range(1, num_epochs + 1), val_accuracies2, label='Validation Accuracy (Stochastic Subgradient Descent)')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch')
plt.legend()
plt.show()

Epoch [1/50], Train Loss: 2.3013, Train Accuracy: 0.1129, Val Loss: 2.3015, Val Accuracy: 0.1102
Epoch [2/50], Train Loss: 2.3012, Train Accuracy: 0.1129, Val Loss: 2.3015, Val Accuracy: 0.1102
Epoch [3/50], Train Loss: 2.3012, Train Accuracy: 0.1129, Val Loss: 2.3015, Val Accuracy: 0.1102


KeyboardInterrupt: 

## Test Accuracy

In [None]:
evaluate_model(model, test_loader, criterion)

Test Loss: 2.3026, Accuracy: 9.80%


In [None]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

# Load the California housing dataset
data = fetch_california_housing()
X, y = data.data, data.target
