<img src="../rsag_convex.png" alt="algoconvex" />
<img src="../x_update.png" alt="x_update" />
<img src="../mean.png" alt="mean" />
<img src="../rsag_composite.png" alt="algo" />

__Parameters :__
- $\alpha$: (1-$\alpha$) weight of aggregated x on current state, i.e. momentum
- $\lambda$: learning rate
- $\beta$: change for aggregated x
- $p_k$ termination probability



In [1]:
from  torch.optim import Adam, SGD, RMSprop
import torch
from torch.nn import functional as F
from torch import nn
import numpy as np

In [2]:
import torch.utils.data as data_utils

In [3]:
print('Using PyTorch version:', torch.__version__)
if torch.cuda.is_available():
    print('Using GPU, device name:', torch.cuda.get_device_name(0))
    device = torch.device('cuda')
else:
    print('No GPU found, using CPU instead.') 
    device = torch.device('cpu')

Using PyTorch version: 2.1.2+cu121
Using GPU, device name: NVIDIA GeForce GTX 1660 Ti


In [4]:
import path
import sys
sys.path.append('../')
from models import MLP
from optimizers import RSAG, AccSGD
from util import DataLoader
from util import calc_accuracy, train_model, HPScheduler


### Run MLP:
__TUNE DIFFERENT OPTIMIZERS__:
- Nesterov w/ weight decay w/ Scheduled LR (SGD)
- Momentum w/ weight decay w/ Scheduled LR (SGD)
- Basic SGD
- Adagrad?
- Adam?



In [5]:
data_loader = DataLoader()
loaders = data_loader.get_loaders()
# loss_function = torch.nn.CrossEntropyLoss()
# model = MLP().to(device)
# print(model)

# optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, nesterov=True, momentum=0.9)
# optimizer = RSAG(model.parameters(), lr=1e-4, alpha=.9, beta=9e-5)


#### Nesterov w/ weight decay w/ Scheduled LR

In [6]:
loss_function = torch.nn.CrossEntropyLoss()
model = MLP().to(device)

optimizer = SGD(model.parameters(), lr=0.9, nesterov=True, momentum=0.9)

In [7]:
lr = HPScheduler.lambda_scheduler(optimizer=optimizer)

In [8]:
train_model(model, loss_function, optimizer, loaders, device, n_epochs=100, print_every=2)

Starting Epoch 1


Epoch 1 finished
Epoch 1/100
----------
Loss 0.3942
Accuracy:  88.6958
Validation Loss 0.3559
Validation Accuracy:  89.7500
Starting Epoch 2
Epoch 2 finished
Starting Epoch 3
Epoch 3 finished
Epoch 3/100
----------
Loss 0.3158
Accuracy:  91.0771
Validation Loss 0.3245
Validation Accuracy:  90.6083
Starting Epoch 4
Epoch 4 finished
Starting Epoch 5
Epoch 5 finished
Epoch 5/100
----------
Loss 0.3063
Accuracy:  91.4208
Validation Loss 0.3275
Validation Accuracy:  90.8250
Early stopping at epoch 4
Training has completed


({'loss': [0.3941815403290093,
   0.3248839430200557,
   0.31583735146559777,
   0.3109839874474953,
   0.30630107706723114],
  'accuracy': [88.69583333333334,
   90.82708333333333,
   91.07708333333333,
   91.0875,
   91.42083333333333],
  'v_loss': [0.3559361208230257,
   0.32003985326737167,
   0.32445109051962695,
   0.3205532369514306,
   0.3275000611320138],
  'v_accuracy': [89.75,
   90.91666666666667,
   90.60833333333333,
   90.75833333333334,
   90.825],
  'v_loss_std': [0.11666602211074947,
   0.09884205020363171,
   0.09338897123260617,
   0.11243451601954904,
   0.1067623683076376],
  'v_accuracy_std': [3.3322915038553673,
   2.9988423692410966,
   2.7757756673685923,
   2.807715065473861,
   2.7828717182076503],
  'loss_std': [0.2305265310742368,
   0.10814583791131628,
   0.10320107471420607,
   0.1004741430196087,
   0.09932773567516279],
  'accuracy_std': [7.244767029073851,
   2.7608301577102172,
   2.951746628645864,
   2.8585853873317597,
   2.766236788892488]},
 90

In [None]:
def tune_nesterov(alpha_values, lr_values, save_log=False):
    loss_function = torch.nn.CrossEntropyLoss()
    best_alpha, best_lr = 0.0, 0.0
    best_accuracy = 0.0
    v_accs, acc_std, v_loss, loss_std = [], [], [], []
    acc, loss = [], []
    
    for alpha in alpha_values:
        for lr in lr_values:
            beta = lr * alpha
            
            print(f"----------- Training with alpha={alpha}, lr={lr} -----------------")
            
            model = MLP().to(device)
            optimizer = SGD(model.parameters(), lr=lr, nesterov=True, momentum=0.9)


            log, best_accuracy = train_model(model,loss_function,optimizer,loaders,print_every=5):
            if log['v_accuracy'][-1] > best_accuracy:
                print(f"Found a new best accuracy: {log['v_accuracy'][-1]}")
                print(f"best alpha: {alpha}, best lr: {lr}")
                best_accuracy = log['v_accuracy'][-1]
                best_alpha = alpha
                best_lr = lr
            
            v_accs.append(log['v_accuracy'])
            acc_std.append(log['v_accuracy_std'])
            v_loss.append(log['v_loss'])
            loss_std.append(log['v_loss_std'])
            acc.append(log['accuracy'])
            loss.append(log['loss'])
            

    
    return best_alpha, best_lr, v_accs, acc_std, v_loss, loss_std, acc, loss


In [6]:
model = MLP().to(device)
print(model)

loss_function = torch.nn.CrossEntropyLoss()


log = train_model(model, loss_function, optimizer, loaders, device)


MLP(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=10, bias=True)
  )
)
Starting Epoch 1


NameError: name 'copy' is not defined

In [81]:
def train_with_hyperparameters(alpha_values, lr_values, save_log=False):
    loss_function = torch.nn.CrossEntropyLoss()
    best_alpha, best_lr = 0.0, 0.0
    best_accuracy = 0.0
    v_accs, acc_std, v_loss, loss_std = [], [], [], []
    acc, loss = [], []
    
    for alpha in alpha_values:
        for lr in lr_values:
            beta = lr * alpha
            
            print(f"----------- Training with alpha={alpha}, lr={lr} -----------------")
            
            model = MLP().to(device)
            optimizer = RSAG(model.parameters(), lr=lr, alpha=alpha, beta=beta)
            log = train_model(model, loaders, optimizer, loss_function, device, epochs=20)
            
            if log['v_accuracy'][-1] > best_accuracy:
                print(f"Found a new best accuracy: {log['v_accuracy'][-1]}")
                print(f"best alpha: {alpha}, best lr: {lr}")
                best_accuracy = log['v_accuracy'][-1]
                best_alpha = alpha
                best_lr = lr
            
            v_accs.append(log['v_accuracy'])
            acc_std.append(log['v_accuracy_std'])
            v_loss.append(log['v_loss'])
            loss_std.append(log['v_loss_std'])
            acc.append(log['accuracy'])
            loss.append(log['loss'])
            

    
    return best_alpha, best_lr, v_accs, acc_std, v_loss, loss_std, acc, loss


IndentationError: unindent does not match any outer indentation level (<tokenize>, line 58)

In [46]:

optimizer = RSAG(model.parameters(), lr=1e-4, alpha=.9, beta=9e-5)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, nesterov=True, momentum=0.9)
train_model(model, loaders, optimizer, loss_function, device, epochs=5)

Starting Epoch 1
Epoch 1 finished
loss 4.6072
Accuracy:  7.4900
Starting Epoch 2
Epoch 2 finished
loss 4.6027
Accuracy:  8.8633
Starting Epoch 3
Epoch 3 finished
loss 4.5982
Accuracy:  10.3017
Starting Epoch 4
Epoch 4 finished
loss 4.5937
Accuracy:  11.6917
Starting Epoch 5
Epoch 5 finished
loss 4.5893
Accuracy:  13.1383
Training has completed
