In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
SEED_NUM = 5
SEEDS = torch.arange(SEED_NUM)
EPOCHS = 100
OPTIMIZERS = {
    'SGD': optim.SGD,
    'Adam': optim.Adam,
    'RMSprop': optim.RMSprop,
    "AdaGrad": optim.Adagrad,
    "AMSGrad": optim.Adam
}
# OPTIMIZERS = {
#     'Adam': optim.Adam,
#     "AMSGrad": optim.Adam
# }
C = 5

In [3]:
# Define the logistic regression model
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.x = nn.Parameter(torch.randn(1)) 
        
    def forward(self, input):
        if input % 3 == 1:
            out = C * self.x
        else:
            out = -self.x

        if self.x < -1:
            out += 10 * (self.x + 1) ** 2
        elif self.x > 1:
            out += 10 * (self.x - 1) ** 2
        return out

In [4]:
def trainval(model, optimizer, epochs=10):
    # Train the model
    train_loss = []
    x_vals = [model.x.item()]
    for epoch in range(epochs):
        model.train()

        # Zero the gradients
        optimizer.zero_grad()
            
        # Forward pass
        loss = model(epoch)
            
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Update the training loss
        train_loss.append(model.x.grad.item())
        x_vals.append(model.x.item())

    return train_loss, x_vals

In [5]:
# if torch.cuda.is_available():
#     device = torch.device('cuda')
# elif torch.backends.mps.is_available():
#     device = torch.device('mps')
# else:
#     device = torch.device('cpu')
device = torch.device('cpu')


In [6]:
train_losses = {}
x_vals = {}
for optimizer_name in OPTIMIZERS:
    print("Running optimizer:", optimizer_name)
    train_losses[optimizer_name] = []
    x_vals[optimizer_name] = []
    for seed in SEEDS:
        torch.manual_seed(seed)

        # Instantiate the logistic regression model
        model = Model()
        model.to(device)

        # Define the optimizer and the loss function
        if optimizer_name == "AMSGrad":
            beta = 1 / (1 + C ** 2)
            optimizer = OPTIMIZERS[optimizer_name](model.parameters(), betas=(0, beta), lr=0.1, amsgrad=True)
        elif optimizer_name == "Adam":
            beta = 1 / (1 + C ** 2)
            optimizer = OPTIMIZERS[optimizer_name](model.parameters(), betas=(0, beta), lr=0.1)
        else:
            optimizer = OPTIMIZERS[optimizer_name](model.parameters(), lr=0.1)

        train_loss, x_val = trainval(model, optimizer, epochs=EPOCHS)
        train_losses[optimizer_name].append(train_loss)
        x_vals[optimizer_name].append(x_val)

Running optimizer: SGD
Running optimizer: Adam
Running optimizer: RMSprop
Running optimizer: AdaGrad
Running optimizer: AMSGrad


In [7]:
# x = np.arange(EPOCHS)
# x = np.tile(x, SEED_NUM)
# for optimizer_name in OPTIMIZERS:
#     y = np.concatenate(train_losses[optimizer_name])
#     sns.lineplot(x=x, y=y, label=optimizer_name)
# plt.title("Gradients")
# plt.xlabel("Epoch")
# plt.ylabel("Gradient")
# plt.savefig(f"../results/special_grad_C{C}.pdf")
# plt.close()

In [8]:
x = np.arange(EPOCHS + 1)
x = np.tile(x, SEED_NUM)
for optimizer_name in OPTIMIZERS:
    y = np.concatenate(x_vals[optimizer_name])
    sns.lineplot(x=x, y=y, label=optimizer_name)
plt.title("X value")
plt.xlabel("Epoch")
plt.ylabel("x")
plt.savefig(f"../results/special_x_C{C}.pdf")
plt.close()