In [1]:
import matplotlib.pyplot as plt
import torch
from torchvision import datasets, transforms

In [2]:
transform = transforms.Compose([transforms.ToTensor()])
mnist_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

batch_size = 512
#split train, val, test using train_test_split
from sklearn.model_selection import train_test_split
train_data, val_data = train_test_split(mnist_data, test_size= 0.3, random_state=42)
val_data, test_data = train_test_split(val_data,test_size=0.5, random_state=42)

train_loader_pytorch = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader_pytorch = torch.utils.data.DataLoader(val_data,batch_size=batch_size, shuffle=True)
test_loader_pytorch = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)



In [3]:
# Baseline model with Convolutional model
class ConvNet(torch.nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = torch.nn.Conv2d(1,32,3,1)
        self.pool1 = torch.nn.MaxPool2d(2,2)
        self.conv2 = torch.nn.Conv2d(32,64,3,1)
        self.pool2 = torch.nn.MaxPool2d(2,2)
        self.fc1 = torch.nn.Linear(1600,128)
        self.fc2 = torch.nn.Linear(128,10)
        self.relu = torch.nn.ReLU()
    def forward(self,X):
        X = self.conv1(X)
        X = self.relu(X)
        X = self.pool1(X)
        X = self.conv2(X)
        X = self.relu(X)
        X = self.pool2(X)
        X = torch.flatten(X,1)
        X = self.fc1(X)
        X = self.relu(X)
        X = self.fc2(X)
        return X


In [None]:
# Training loop
import tqdm
criterion = torch.nn.CrossEntropyLoss()
from optimizers import StochasticArmijoSGD, StochasticArmijoAdam

num_epochs = 10  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
for optimizer_name in ["Armijo", "Adam"," Armijo_adam"]:
    model = ConvNet()
        if optimizer_name == "Adam":
            optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        if optimizer_name == "Armijo":
            optimizer = StochasticArmijoSGD(model.parameters(), initial_step=1, c=0.000001, tau=0.5, max_backtracks=20)
        if optimizer_name == "Armijo_adam":
            optimizer = StochasticArmijoAdam(model.parameters(), initial_step=1, c=0.000001, tau=0.5, max_backtracks=20)

    for epoch in range(num_epochs):
    
        #reset model weights


        for images, labels in tqdm.tqdm(train_loader_pytorch):
            images, labels = images.to(device), labels.to(device)
            if optimizer_name == "Adam":
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
            if optimizer_name == "Armijo":  # armijo
                def closure(backward=True):
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    if backward:
                        loss.backward()
                    return loss
                optimizer.step(closure)
            if optimizer_name == "Armijo_adam":
                def closure(backward=True):
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    if backward:
                        loss.backward()
                    return loss
                optimizer.step(closure)
        # validation
        model.eval()
        val_loss = 0
        correct =0 
        total= 0
        with torch.no_grad():
            for images, labels in val_loader_pytorch:
                outputs = model(images)
                loss = criterion(outputs,labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data,1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            val_loss = val_loss / len(val_loader_pytorch)
            val_accuracy = correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Optimizer: {optimizer_name}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')



100%|██████████| 83/83 [00:33<00:00,  2.46it/s]


Epoch [1/10], Optimizer: Armijo, Val Loss: 2.3077, Val Accuracy: 0.0981


100%|██████████| 83/83 [00:23<00:00,  3.58it/s]


Epoch [2/10], Optimizer: Armijo, Val Loss: 2.3102, Val Accuracy: 0.1131


100%|██████████| 83/83 [00:31<00:00,  2.67it/s]


Epoch [3/10], Optimizer: Armijo, Val Loss: 2.3027, Val Accuracy: 0.1131


100%|██████████| 83/83 [00:25<00:00,  3.20it/s]


Epoch [4/10], Optimizer: Armijo, Val Loss: 2.3076, Val Accuracy: 0.0981


 12%|█▏        | 10/83 [00:03<00:24,  2.96it/s]


KeyboardInterrupt: 