In [1]:
from tqdm.notebook import tqdm  # Import tqdm for Jupyter Notebook

from src.optimizee import *
from src.optimizer import *
from src.initializer import *

from src.train_lstm import *
from src.test_optimizer import *

import shutil
import time
import matplotlib.pyplot as plt

from torch.utils.tensorboard import SummaryWriter

## Quadratic Optimizee

In [18]:
torch.manual_seed(0)  # Set random seed for reproducibility
np.random.seed(0)

n=10
W = torch.randn(n, n)  # Random weights for the linear model
theta0 = torch.ones(n,1)  # Random theta for the linear model

optimizee = QuadraticOptimizee(W, theta0)
optimizee.set_params()
optimizer_cls = optim.Adam
optimizer_kwargs = {"lr":0.01}

writer = SummaryWriter(f"quad_test/Adam")
test_optimzier(optimizer_cls, optimizee,optimizer_kwargs, time_horizon=1000, writer=writer)

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Training Progress:   0%|          | 0/1000 [00:00<?, ?time step/s]

tensor([[1.4532],
        [0.4442],
        [1.6159],
        [0.6725],
        [0.7176],
        [0.9404],
        [0.0115],
        [1.2270],
        [0.9543],
        [0.7636]], requires_grad=True)

In [15]:
shutil.rmtree("quad_train/1_optim", ignore_errors=True)  # Remove the directory if it exists
shutil.rmtree("quad_test/1_optim", ignore_errors=True)  # Remove the directory if it exists

torch.manual_seed(0)  # Set random seed for reproducibility
np.random.seed(0)
n=10
W = torch.randn(n, n)  # Random weights for the linear model
theta0 = torch.ones(n,1)  # Random theta for the linear model

kwargs = {"W": [W], "theta0": [theta0], "noise_std": [0.01]}

initializer = Initializer(QuadraticOptimizee, kwargs)

lstm_optimizer = LSTMConcurrent(num_optims=initializer.get_num_optims(), preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.001)

t0 = time.time()
writer = SummaryWriter("quad_train/1_optim") 
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=500, discount=0.9, writer=writer)
t1 = time.time()
writer = SummaryWriter("quad_test/1_optim")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)
t2 = time.time()

print("Training time: ", t1-t0)
print("Testing time: ", t2-t1)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Epoch [10/100], Cumulative Loss: 204141.9531, LR: 1.000e-03
Final parameters: [[-21.200607 -20.023066 -20.559832 -20.607178 -18.638407 -21.253262
  -20.952675 -21.072395 -20.955984 -19.61591 ]]...
Epoch [20/100], Cumulative Loss: 63267.5039, LR: 1.000e-03
Final parameters: [[13.115947 13.976924 15.679885 13.805742 14.063043 15.144784 13.409777
  12.409916 12.021747 12.871058]]...
Epoch [30/100], Cumulative Loss: 331.5034, LR: 1.000e-03
Final parameters: [[2.5255766 2.1779127 2.7346442 1.1947067 1.1588609 1.8090003 1.5469613
  0.9976194 3.0362272 1.2220871]]...
Epoch [40/100], Cumulative Loss: 33.2028, LR: 1.000e-03
Final parameters: [[ 1.0584853   0.47112054  0.09122726  1.3366938   1.8059851  -0.07279566
   1.1780066   0.75298226 -0.5075195   1.1647782 ]]...
Epoch [50/100], Cumulative Loss: 34.3671, LR: 1.000e-03
Final parameters: [[1.073365   0.6386564  1.0903507  0.80861986 0.96155095 0.03038208
  0.8623191  0.28999883 0.5310506  0.91675633]]...
Epoch [60/100], Cumulative Loss: 49.5

In [2]:
for discount in [0, 1e-3, 0.1, 0.9, 1]:
    
    shutil.rmtree(f"quad_train/disc_{discount}", ignore_errors=True)  # Remove the directory if it exists
    shutil.rmtree(f"quad_test/disc_{discount}", ignore_errors=True)  # Remove the directory if it exists

    torch.manual_seed(0)  # Set random seed for reproducibility
    np.random.seed(0)
    n=10
    W = torch.randn(n, n)  # Random weights for the linear model
    theta0 = torch.ones(n,1)  # Random theta for the linear model

    kwargs = {"W": [W], "theta0": [theta0], "noise_std": [0.01]}

    initializer = Initializer(QuadraticOptimizee, kwargs)

    lstm_optimizer = LSTMConcurrent(num_optims=initializer.get_num_optims(), preproc=True)
    meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.001)

    writer = SummaryWriter(f"quad_train/disc_{discount}")
    lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=500, discount=discount, writer=writer)
    writer = SummaryWriter(f"quad_test/disc_{discount}")
    params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Epoch [10/100], Cumulative Loss: 21098.5938, LR: 1.000e-03
Final parameters: [[-21.203257 -20.025824 -20.56266  -20.610067 -18.64131  -21.255947
  -20.95514  -21.07485  -20.958523 -19.61851 ]]...
Epoch [20/100], Cumulative Loss: 6563.7642, LR: 1.000e-03
Final parameters: [[13.08989  13.950383 15.653728 13.779223 14.03656  15.118164 13.384108
  12.384328 11.995342 12.845782]]...
Epoch [30/100], Cumulative Loss: 28.3870, LR: 1.000e-03
Final parameters: [[2.4676085 2.0858662 2.6382992 1.0905981 1.0507662 1.8148259 1.5754422
  1.0004959 3.014398  1.1786183]]...
Epoch [40/100], Cumulative Loss: 3.1654, LR: 1.000e-03
Final parameters: [[ 1.0787108   0.46757174  0.07779662  1.3304827   1.7932662  -0.04698625
   1.1926962   0.7521064  -0.49394643  1.1703732 ]]...
Epoch [50/100], Cumulative Loss: 3.4609, LR: 1.000e-03
Final parameters: [[1.0640693  0.61482483 1.068398   0.7898686  0.94438845 0.00926477
  0.8425668  0.2714095  0.51407254 0.9004774 ]]...
Epoch [60/100], Cumulative Loss: 4.9118, L

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 21119.6348, LR: 1.000e-03
Final parameters: [[-21.203255 -20.02582  -20.56266  -20.610067 -18.64131  -21.255947
  -20.955133 -21.074848 -20.958523 -19.61851 ]]...
Epoch [20/100], Cumulative Loss: 6571.8018, LR: 1.000e-03
Final parameters: [[13.091261 13.951673 15.654954 13.780412 14.037737 15.11964  13.385599
  12.385804 11.996779 12.847163]]...
Epoch [30/100], Cumulative Loss: 28.0922, LR: 1.000e-03
Final parameters: [[2.4624274 2.0775957 2.6298916 1.0827422 1.0445697 1.8155314 1.5661902
  0.9957029 3.0065703 1.1707017]]...
Epoch [40/100], Cumulative Loss: 3.2111, LR: 1.000e-03
Final parameters: [[ 1.0735769   0.46413773  0.07845154  1.3320149   1.7979472  -0.05035071
   1.1903772   0.751243   -0.50089407  1.1690739 ]]...
Epoch [50/100], Cumulative Loss: 3.4210, LR: 1.000e-03
Final parameters: [[1.0676081  0.620054   1.0742989  0.79549074 0.94846374 0.02000681
  0.8501008  0.27936858 0.52117455 0.9094968 ]]...
Epoch [60/100], Cumulative Loss: 4.9398, L

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 23433.3516, LR: 1.000e-03
Final parameters: [[-21.203226 -20.025795 -20.56263  -20.61003  -18.641277 -21.255915
  -20.955101 -21.07482  -20.958488 -19.618483]]...
Epoch [20/100], Cumulative Loss: 7291.1045, LR: 1.000e-03
Final parameters: [[13.091328 13.951825 15.655134 13.780593 14.0379   15.119729 13.385673
  12.385836 11.996829 12.847211]]...
Epoch [30/100], Cumulative Loss: 31.3381, LR: 1.000e-03
Final parameters: [[2.4669251 2.0809813 2.6336682 1.0876105 1.0499085 1.8058467 1.5692897
  0.997573  3.0142682 1.1806659]]...
Epoch [40/100], Cumulative Loss: 3.5658, LR: 1.000e-03
Final parameters: [[ 1.0690924   0.46440542  0.08146286  1.3360641   1.8001242  -0.04949269
   1.1966345   0.7466923  -0.50184894  1.174934  ]]...
Epoch [50/100], Cumulative Loss: 3.6312, LR: 1.000e-03
Final parameters: [[1.0713183  0.63446105 1.0861561  0.8036357  0.95867175 0.02339666
  0.8628807  0.29122877 0.52550495 0.9064343 ]]...
Epoch [60/100], Cumulative Loss: 5.4375, L

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 204141.9531, LR: 1.000e-03
Final parameters: [[-21.200607 -20.023066 -20.559832 -20.607178 -18.638407 -21.253262
  -20.952675 -21.072395 -20.955984 -19.61591 ]]...
Epoch [20/100], Cumulative Loss: 63267.5039, LR: 1.000e-03
Final parameters: [[13.115947 13.976924 15.679885 13.805742 14.063043 15.144784 13.409777
  12.409916 12.021747 12.871058]]...
Epoch [30/100], Cumulative Loss: 331.5034, LR: 1.000e-03
Final parameters: [[2.5255766 2.1779127 2.7346442 1.1947067 1.1588609 1.8090003 1.5469613
  0.9976194 3.0362272 1.2220871]]...
Epoch [40/100], Cumulative Loss: 33.2028, LR: 1.000e-03
Final parameters: [[ 1.0584853   0.47112054  0.09122726  1.3366938   1.8059851  -0.07279566
   1.1780066   0.75298226 -0.5075195   1.1647782 ]]...
Epoch [50/100], Cumulative Loss: 34.3671, LR: 1.000e-03
Final parameters: [[1.073365   0.6386564  1.0903507  0.80861986 0.96155095 0.03038208
  0.8623191  0.28999883 0.5310506  0.91675633]]...
Epoch [60/100], Cumulative Loss: 49.5

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 3792439.7500, LR: 1.000e-03
Final parameters: [[-21.145998 -19.966682 -20.501846 -20.548136 -18.579493 -21.198912
  -20.902508 -21.02206  -20.904034 -19.56244 ]]...
Epoch [20/100], Cumulative Loss: 1130105.5000, LR: 1.000e-03
Final parameters: [[13.715738  14.514662  16.21979   14.349406  14.617306  15.816805
  14.054712  13.039429  12.626572  13.4777155]]...
Epoch [30/100], Cumulative Loss: 38108.1328, LR: 1.000e-03
Final parameters: [[3.2691176 3.559902  4.121642  2.6199927 2.5886042 1.9399824 1.7853973
  1.3029677 3.4895191 1.8137199]]...
Epoch [40/100], Cumulative Loss: 25490.1133, LR: 1.000e-03
Final parameters: [[ 0.9352377   0.51485217  0.14200492  1.3979598   1.8615947  -0.19777319
   1.1296288   0.7710861  -0.5311663   1.1731731 ]]...
Epoch [50/100], Cumulative Loss: 10127.9844, LR: 1.000e-03
Final parameters: [[1.1394066  0.6038906  1.0644623  0.8351197  0.95856875 0.12291773
  0.87901956 0.34490484 0.51281554 0.94119203]]...
Epoch [60/100], C

In [3]:
torch.manual_seed(0)  # Set random seed for reproducibility
np.random.seed(0)
n=10
W = torch.randn(n, n)  # Random weights for the linear model
theta0 = torch.ones(n,1)  # Random theta for the linear model

kwargs = {"W": [W], "theta0": [theta0], "noise_std": [0.01]}

initializer = Initializer(QuadraticOptimizee, kwargs)

lstm_optimizer = LSTMConcurrent(num_optims=initializer.get_num_optims(), preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.001)

writer = SummaryWriter(f"quad_train/disc_2")
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=50, discount=2, writer=writer)
writer = SummaryWriter(f"quad_test/disc_2")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Epoch [10/100], Cumulative Loss: 84178197905670144.0000, LR: 1.000e-03
Final parameters: [[-4.006369   -2.7218328  -2.849578   -2.5857615  -0.46286067 -2.2528164
  -1.1990061  -2.118211   -2.6895108  -2.0435967 ]]...
Epoch [20/100], Cumulative Loss: 109254398823104512.0000, LR: 1.000e-03
Final parameters: [[2.976119  2.0218222 4.081002  2.8993735 3.56812   4.7710614 3.6241539
  2.8254986 2.333707  2.967144 ]]...
Epoch [30/100], Cumulative Loss: 89577152185368576.0000, LR: 1.000e-03
Final parameters: [[13.183223  11.1836405 11.593998  10.919093  11.54511   11.975571
  12.938058  10.868667  13.593101  11.783046 ]]...
Epoch [40/100], Cumulative Loss: 170529013607956480.0000, LR: 1.000e-03
Final parameters: [[18.529066 18.495052 16.22962  15.942067 19.819832 18.450245 19.14051
  16.827236 17.731441 17.343493]]...
Epoch [50/100], Cumulative Loss: 69716927055921152.0000, LR: 1.000e-03
Final parameters: [[13.249968  16.56107   16.45122   14.882063  15.832477  14.98335
  16.553722  14.3855095 

In [3]:
times_train, times_test = [], []
for count in range(1, 10):
    # shutil.rmtree(f"quad_train/{count}_optim", ignore_errors=True)  # Remove the directory if it exists
    # shutil.rmtree(f"quad_test/{count}_optim", ignore_errors=True)  # Remove the directory if it exists

    torch.manual_seed(0)  # Set random seed for reproducibility
    np.random.seed(0)

    n=10
    W = torch.randn(n, n)  # Random weights for the linear model
    theta0 = torch.ones(n,1)  # Random theta for the linear model

    kwargs = {"W": [W], "theta0": [theta0], "noise_std": list(np.arange(0.01, (count+1)*0.01, 0.01))}

    initializer = Initializer(QuadraticOptimizee, kwargs)

    lstm_optimizer = LSTMConcurrent(num_optims=initializer.get_num_optims(), preproc=True)
    meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.001)

    t0 = time.time()
    # writer = SummaryWriter(f"quad_train/{count}_optim") 
    lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=500, discount=0.9)
    t1 = time.time()
    # writer = SummaryWriter(f"quad_test/{count}_optim")
    params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000)
    t2 = time.time()

    times_train.append(t1-t0)
    times_test.append(t2-t1)

print("Times_Train", times_train)
print("Times_Test", times_test)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Epoch [10/100], Cumulative Loss: 204141.9531, LR: 1.000e-03
Final parameters: [[-21.200607 -20.023066 -20.559832 -20.607178 -18.638407 -21.253262
  -20.952675 -21.072395 -20.955984 -19.61591 ]]...
Epoch [20/100], Cumulative Loss: 63267.5039, LR: 1.000e-03
Final parameters: [[13.115947 13.976924 15.679885 13.805742 14.063043 15.144784 13.409777
  12.409916 12.021747 12.871058]]...
Epoch [30/100], Cumulative Loss: 331.5034, LR: 1.000e-03
Final parameters: [[2.5255766 2.1779127 2.7346442 1.1947067 1.1588609 1.8090003 1.5469613
  0.9976194 3.0362272 1.2220871]]...
Epoch [40/100], Cumulative Loss: 33.2028, LR: 1.000e-03
Final parameters: [[ 1.0584853   0.47112054  0.09122726  1.3366938   1.8059851  -0.07279566
   1.1780066   0.75298226 -0.5075195   1.1647782 ]]...
Epoch [50/100], Cumulative Loss: 34.3671, LR: 1.000e-03
Final parameters: [[1.073365   0.6386564  1.0903507  0.80861986 0.96155095 0.03038208
  0.8623191  0.28999883 0.5310506  0.91675633]]...
Epoch [60/100], Cumulative Loss: 49.5

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 690565.6875, LR: 1.000e-03
Final parameters: [[-35.510696 -36.95648  -35.78062  -35.71172  -36.209694 -35.455154
  -40.19661  -40.513863 -40.108624 -37.70981 ]]...
Epoch [20/100], Cumulative Loss: 2.3801, LR: 1.000e-03
Final parameters: [[1.2955385  0.6017042  1.007606   0.8358114  0.88453066 0.7350635
  0.64454585 0.8778035  0.6384119  1.0100452 ]]...
Epoch [30/100], Cumulative Loss: 20.8423, LR: 1.000e-03
Final parameters: [[0.19667731 2.3528948  0.97802436 1.2658257  1.1278677  1.9070171
  1.8521829  1.3594611  2.306548   0.9645561 ]]...
Epoch [40/100], Cumulative Loss: 9242.3330, LR: 1.000e-03
Final parameters: [[ 1.2643993  4.4577723  3.3559437  5.8808756  4.3642607  9.608824
  12.475751   8.718078   8.928857   4.0330205]]...
Epoch [50/100], Cumulative Loss: 69.9667, LR: 1.000e-03
Final parameters: [[-1.552021    3.5208123  -0.78611493  2.2184274   2.6969712   1.5048018
   3.694567    2.0556705   2.4231415   1.3329409 ]]...
Epoch [60/100], Cumulati

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 1.4837, LR: 1.000e-03
Final parameters: [[0.251435   1.9633739  0.15200551 1.5481122  1.4695761  1.2357233
  2.46543    0.8362832  1.2663385  1.347946  ]]...
Epoch [20/100], Cumulative Loss: 0.3386, LR: 1.000e-03
Final parameters: [[0.8527968  1.1933498  0.86091363 1.0874513  1.0565115  1.0052192
  1.2423048  0.9405756  1.0574645  1.0888116 ]]...
Epoch [30/100], Cumulative Loss: 0.3626, LR: 1.000e-03
Final parameters: [[1.2054505 0.7888997 1.2132678 0.8795615 0.9228395 0.9903994 0.692695
  1.0993975 0.9733969 0.9169263]]...
Epoch [40/100], Cumulative Loss: 0.4954, LR: 1.000e-03
Final parameters: [[1.2561276  0.71441823 1.3986927  0.8072749  0.80682164 0.9856966
  0.42166957 1.0878129  1.0372778  0.823684  ]]...
Epoch [50/100], Cumulative Loss: 0.5735, LR: 1.000e-03
Final parameters: [[0.7247144  1.405236   0.8142145  1.1881926  1.1095321  1.1052197
  1.5001669  0.95219487 1.1925559  1.1129807 ]]...
Epoch [60/100], Cumulative Loss: 0.6863, LR: 1.000e-03


Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 1325232.7500, LR: 1.000e-03
Final parameters: [[52.971962 53.60106  54.6115   53.66979  52.700935 57.221287 60.067997
  58.97629  56.57265  55.00469 ]]...
Epoch [20/100], Cumulative Loss: 81.8304, LR: 1.000e-03
Final parameters: [[ 0.00746383  2.1467996  -0.8080026   1.9483395   2.9706497   0.8210478
   2.971868    1.9350698   0.38112378  1.4948225 ]]...
Epoch [30/100], Cumulative Loss: 1239.9465, LR: 1.000e-03
Final parameters: [[ 1.7981057  -2.3523204  -4.147284    0.15388629  1.9801304   1.5460348
   2.0705955   1.8137037  -0.28214744 -1.5228446 ]]...
Epoch [40/100], Cumulative Loss: 217.3334, LR: 1.000e-03
Final parameters: [[ 3.2777438  -1.2662773   2.1980739  -0.8436632  -0.74562824  0.22049603
  -0.42626816 -0.34086412  0.6216042  -0.2279318 ]]...
Epoch [50/100], Cumulative Loss: 26.8955, LR: 1.000e-03
Final parameters: [[ 2.865678   -0.8514421   1.492713    0.4120065   0.847373    0.44545585
  -1.0577203   1.5136322  -0.4700408   0.4805364 ]]...

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 4.4270, LR: 1.000e-03
Final parameters: [[1.664736   0.04881512 1.2610643  0.60886437 0.6660063  0.656706
  0.03427867 0.8592129  0.3234664  0.9128431 ]]...
Epoch [20/100], Cumulative Loss: 0.6183, LR: 1.000e-03
Final parameters: [[0.3734385  1.7936358  0.12160977 1.4420393  1.3960598  1.0923876
  2.4203148  0.69219977 1.0685942  1.3485073 ]]...
Epoch [30/100], Cumulative Loss: 5.7905, LR: 1.000e-03
Final parameters: [[0.29571882 1.6821228  0.7774876  1.0351187  1.1214334  1.1179144
  1.8328946  0.8475578  1.7002941  1.168805  ]]...
Epoch [40/100], Cumulative Loss: 0.4385, LR: 1.000e-03
Final parameters: [[0.9630605  1.0852915  0.83206064 1.0762353  1.0933669  0.9643168
  1.2103008  0.9082111  0.8922623  1.0546865 ]]...
Epoch [50/100], Cumulative Loss: 0.5069, LR: 1.000e-03
Final parameters: [[1.125268   0.7353674  1.1068683  0.86182815 0.91620576 0.92519474
  0.70132446 0.99571943 0.86629224 0.9259975 ]]...
Epoch [60/100], Cumulative Loss: 2.7227, LR: 

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 378870.7188, LR: 1.000e-03
Final parameters: [[-25.947302 -24.837069 -22.63337  -24.601492 -23.794838 -28.94256
  -35.61303  -32.811512 -32.397644 -26.574942]]...
Epoch [20/100], Cumulative Loss: 1.0905, LR: 1.000e-03
Final parameters: [[1.1122539  0.8006468  1.0542152  0.93534505 0.93197423 0.91759497
  0.8454598  0.90281284 0.84454167 0.98721147]]...
Epoch [30/100], Cumulative Loss: 4.1246, LR: 1.000e-03
Final parameters: [[-0.02045661  2.318726   -0.2915365   1.7728301   1.7158645   1.2407796
   3.0014994   0.85074323  1.2121484   1.5028743 ]]...
Epoch [40/100], Cumulative Loss: 10104.7080, LR: 1.000e-03
Final parameters: [[-2.9941118 12.642021  14.196784  13.005314   7.751782   5.44036
   3.1671546  6.9581695  8.209994   2.77736  ]]...
Epoch [50/100], Cumulative Loss: 5.0685, LR: 1.000e-03
Final parameters: [[-0.47228616  2.8497796  -0.88541764  2.1555374   2.0894775   1.4098623
   3.9557772   0.6187732   1.1853906   1.667405  ]]...
Epoch [60/100], 

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 2.6643, LR: 1.000e-03
Final parameters: [[-0.10907411  2.4518516  -0.2896857   1.8426203   1.708436    1.377574
   3.2464387   0.7716345   1.4116678   1.4678583 ]]...
Epoch [20/100], Cumulative Loss: 0.3235, LR: 1.000e-03
Final parameters: [[0.8495149  1.2320554  0.9253528  1.0920693  1.0735232  1.0649972
  1.2643389  0.99800414 1.1475273  1.0532882 ]]...
Epoch [30/100], Cumulative Loss: 0.4294, LR: 1.000e-03
Final parameters: [[0.6493181  1.4571834  0.594379   1.283869   1.2168149  1.0489781
  1.718395   0.85043514 1.1047229  1.1852971 ]]...
Epoch [40/100], Cumulative Loss: 0.2594, LR: 1.000e-03
Final parameters: [[1.1483604  0.82998514 1.1639776  0.91398025 0.87688935 0.952021
  0.70947737 1.054645   0.96698207 0.9495594 ]]...
Epoch [50/100], Cumulative Loss: 0.2779, LR: 1.000e-03
Final parameters: [[0.8078366  1.1619687  0.83075535 1.092417   1.068833   1.0186788
  1.2687432  0.9395051  1.0769093  1.0595633 ]]...
Epoch [60/100], Cumulative Loss: 0.41

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 2.6643, LR: 1.000e-03
Final parameters: [[-0.10907411  2.4518516  -0.2896857   1.8426203   1.708436    1.377574
   3.2464387   0.7716345   1.4116678   1.4678583 ]]...
Epoch [20/100], Cumulative Loss: 0.3235, LR: 1.000e-03
Final parameters: [[0.8495149  1.2320554  0.9253528  1.0920693  1.0735232  1.0649972
  1.2643389  0.99800414 1.1475273  1.0532882 ]]...
Epoch [30/100], Cumulative Loss: 0.4294, LR: 1.000e-03
Final parameters: [[0.6493181  1.4571834  0.594379   1.283869   1.2168149  1.0489781
  1.718395   0.85043514 1.1047229  1.1852971 ]]...
Epoch [40/100], Cumulative Loss: 0.2594, LR: 1.000e-03
Final parameters: [[1.1483604  0.82998514 1.1639776  0.91398025 0.87688935 0.952021
  0.70947737 1.054645   0.96698207 0.9495594 ]]...
Epoch [50/100], Cumulative Loss: 0.2779, LR: 1.000e-03
Final parameters: [[0.8078366  1.1619687  0.83075535 1.092417   1.068833   1.0186788
  1.2687432  0.9395051  1.0769093  1.0595633 ]]...
Epoch [60/100], Cumulative Loss: 0.41

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 55145.5586, LR: 1.000e-03
Final parameters: [[ 7.156487  8.567442  9.605099 10.300525  9.896771 18.116533 20.257769
  19.59545  15.353335  9.653481]]...
Epoch [20/100], Cumulative Loss: 1.8792, LR: 1.000e-03
Final parameters: [[0.25882918 1.9126992  0.27160236 1.5393112  1.4378369  1.1747446
  2.3738606  0.74386406 1.1978915  1.3253478 ]]...
Epoch [30/100], Cumulative Loss: 56.9132, LR: 1.000e-03
Final parameters: [[ 3.8837216  -2.8686438   2.1249998  -0.37240723  0.11634634 -0.02873537
  -2.8324099   0.8790602  -1.5712477   0.4921749 ]]...
Epoch [40/100], Cumulative Loss: 1054.1113, LR: 1.000e-03
Final parameters: [[ 3.028984   -3.0394974  -2.8560052  -0.5447544   1.4817808   1.3222718
   0.65737134  1.6676389  -0.7723293  -1.4507815 ]]...
Epoch [50/100], Cumulative Loss: 3.2930, LR: 1.000e-03
Final parameters: [[ 2.1498463  -0.41490316  2.4454143   0.18995366  0.31746927  0.77361757
  -1.2436024   1.3669012   0.80500656  0.50051355]]...
Epoch [60/100]

In [9]:
torch.manual_seed(0)  # Set random seed for reproducibility
np.random.seed(0)
n=10
W = torch.randn(n, n)  # Random weights for the linear model
theta0 = torch.ones(n,1)  # Random theta for the linear model

kwargs = {"W": [W], "theta0": [theta0], "noise_std": [0.01]}

initializer = Initializer(QuadraticOptimizee, kwargs)

lstm_optimizer = LSTMConcurrent(num_optims=initializer.get_num_optims(), preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.01)

scheduler = optim.lr_scheduler.ExponentialLR(meta_optimizer, gamma=0.9)

writer = SummaryWriter(f"quad_train_lr/exp")
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=500, discount=0.9, writer=writer, scheduler=scheduler)
writer = SummaryWriter(f"quad_test_lr/exp")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Epoch [10/100], Cumulative Loss: 22.0672, LR: 3.487e-03
Final parameters: [[0.7458178  0.48710084 0.7246131  1.1129686  1.2794619  0.03819936
  0.7985495  0.69870615 0.12967493 1.140651  ]]...
Epoch [20/100], Cumulative Loss: 2202.9973, LR: 1.216e-03
Final parameters: [[ 0.87161595 -2.5762007  -1.0964168  -1.740776   -0.8742697  -3.1788495
  -4.399133   -2.7612875  -2.9782355  -0.340473  ]]...
Epoch [30/100], Cumulative Loss: 230.4241, LR: 4.239e-04
Final parameters: [[ 2.8051443  -0.9810329   0.5666674  -0.59469014 -0.09969285  0.35050195
   0.62905145 -0.07248963  0.1487947  -0.04518276]]...
Epoch [40/100], Cumulative Loss: 1222.1696, LR: 1.478e-04
Final parameters: [[ 2.831877   -3.6281383  -3.741819   -2.1823993   0.93304986 -1.2606273
   0.45565212  0.36349714 -2.3948965  -0.8059255 ]]...
Epoch [50/100], Cumulative Loss: 94.7108, LR: 5.154e-05
Final parameters: [[ 1.0056808   0.29039174  0.8656239   0.585132    1.0019178  -0.60585976
   0.48516583  0.0814171   0.11609113  0.706845

In [None]:
torch.manual_seed(0)  # Set random seed for reproducibility
np.random.seed(0)
n=10
W = torch.randn(n, n)  # Random weights for the linear model
theta0 = torch.ones(n,1)  # Random theta for the linear model

kwargs = {"W": [W], "theta0": [theta0], "noise_std": [0.01]}

initializer = Initializer(QuadraticOptimizee, kwargs)

lstm_optimizer = LSTMConcurrent(num_optims=initializer.get_num_optims(), preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.001)

writer = SummaryWriter(f"quad_train/lstm_no_preproc")
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=500, discount=0.9, writer=writer)
writer = SummaryWriter(f"quad_test/lstm_no_preproc")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

  self.W = torch.tensor(W, dtype=torch.float32)
  self.theta0 = torch.tensor(theta0, dtype=torch.float32)


Epoch [10/100], Cumulative Loss: 1566403.2500, LR: 1.000e-03
Final parameters: [[57.911163 64.48485  65.12734  64.53079  66.00644  77.2321   69.50971
  63.033337 59.33736  58.664852]]...
Epoch [20/100], Cumulative Loss: 211578.9844, LR: 1.000e-03
Final parameters: [[19.878113 27.124207 23.42374  25.65271  25.073217 32.0088   28.965328
  25.711296 23.307188 21.79594 ]]...
Epoch [30/100], Cumulative Loss: 720.3181, LR: 1.000e-03
Final parameters: [[-3.5164587  8.447317   1.8405029  4.256819   2.5125027  4.838291
   7.192434   3.2630103  7.986643   1.4101461]]...
Epoch [40/100], Cumulative Loss: 188.4844, LR: 1.000e-03
Final parameters: [[-1.6718128  5.3076997  1.327415   2.630596   1.7788293  3.2326741
   4.4692245  2.07053    4.763102   1.3009313]]...
Epoch [50/100], Cumulative Loss: 109.9922, LR: 1.000e-03
Final parameters: [[-2.0991793   5.4988213   0.11261505  2.8293707   2.2051938   2.698927
   5.504279    1.5587337   4.321713    1.4922891 ]]...
Epoch [60/100], Cumulative Loss: 75.9

## NN Optimizee

In [4]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
y = (y-np.mean(y))/np.std(y)
X.shape, y.shape

((442, 10), (442,))

In [5]:
opt = XYNNOptimizee(X, y, num_samples=50)


torch.manual_seed(0)
np.random.seed(0)

indices = torch.randint(0, X.shape[0], (50,))
X_loss = torch.tensor(X[indices], dtype=torch.float32)
y_loss = torch.tensor(y[indices], dtype=torch.float32).squeeze()
print(X_loss.shape, y_loss.shape)

out = opt.forward(X_loss).squeeze()
print(out.shape)

print(nn.MSELoss()(out, y_loss))

torch.manual_seed(0)
np.random.seed(0)
opt.compute_loss(opt.all_parameters(), return_grad=False)

torch.Size([50, 10]) torch.Size([50])
torch.Size([50])
tensor(6.1015, grad_fn=<MseLossBackward0>)


tensor(6.1015, grad_fn=<MseLossBackward0>)

In [6]:
kwargs = {"X": [X], "y": [y], "hidden_size": [20], "num_layers": [2], "num_samples":[100], "loss_fn":[nn.MSELoss()]}

lstm_optimizer = LSTMConcurrent(num_optims=1, preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.001)
initializer = Initializer(XYNNOptimizee, kwargs)

writer = SummaryWriter("diab_train/lr1e-3_MSE_2_layers") 
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=100, time_horizon=500, discount=0.9, writer=writer)
writer = SummaryWriter("diab_test/lr1e-3_MSE_2_layers")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/100 [00:00<?, ?it/s]

Epoch [10/100], Cumulative Loss: 188.5305, LR: 1.000e-03
Final parameters: [[-5.00232   -4.271275  -3.8146198 -5.3463044 -4.31671   -5.051714
  -4.3848834 -5.4401565 -6.6796904 -6.7991896 -4.688251  -4.0982018
  -4.540951  -4.3195634 -6.4075446 -3.155995  -5.8427973 -4.937065
  -4.9463043 -4.72288   -5.063637  -4.9825754 -2.0175254 -5.7878437
  -8.036008  -3.1914062 -3.8180373 -6.362682  -6.5460677 -6.552737
  -3.1508644 -6.081562  -5.2259865 -4.301706  -4.3299723 -5.457913
  -5.337422  -3.3684278 -5.65675   -5.0899405 -5.1289296 -5.2214065
  -6.647633  -4.0242653 -4.4766145 -4.6060386 -5.3128114 -5.258121
  -4.4312263 -4.122672  -4.2232976 -4.2077928 -6.2140527 -5.3323884
  -5.7695274 -5.633792  -6.4506516 -6.982246  -4.9281034 -5.116632
  -3.4738486 -6.2441077 -4.5609994 -3.2823188 -4.0803595 -5.0488086
  -4.0124993 -6.419482  -6.1446214 -5.3685822 -6.0068045 -3.6715684
  -4.8357706 -4.2404127 -5.758171  -4.050371  -6.1975193 -5.9214997
  -4.975137  -5.123497  -5.5436993 -4.9571953 -

In [8]:
kwargs = {"X": [X], "y": [y], "hidden_size": [20], "num_layers": [2], "num_samples":[100], "loss_fn":[nn.MSELoss()]}

lstm_optimizer = LSTMConcurrent(num_optims=1, preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.0005)
initializer = Initializer(XYNNOptimizee, kwargs)

writer = SummaryWriter("diab_train/lr5e-4_MSE_2_layers") 
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=500, time_horizon=500, discount=0.9, writer=writer)
writer = SummaryWriter("diab_test/lr5e-4_MSE_2_layers")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [50/500], Cumulative Loss: 5001.1738, LR: 5.000e-04
Final parameters: [[-23.396877 -24.608862 -23.49851  -23.936354 -26.391235 -23.971785
  -22.233263 -22.525581 -24.933506 -23.00616  -25.495253 -23.448362
  -21.919514 -22.958647 -21.785585 -24.376057 -22.45343  -21.993933
  -21.390047 -22.530174 -21.478031 -22.816158 -22.059082 -21.983599
  -21.396011 -23.81721  -21.901081 -22.355263 -23.101    -22.637474
  -23.580708 -22.207954 -23.177761 -21.942753 -21.46756  -23.402388
  -23.70645  -21.9947   -23.57644  -22.215427 -21.887955 -23.699196
  -24.087643 -21.345852 -22.376211 -21.441242 -22.500937 -22.729412
  -23.451931 -21.952562 -22.06522  -22.2372   -22.513393 -21.229807
  -20.639444 -21.741886 -25.373957 -22.485346 -23.829636 -22.915464
  -22.118513 -23.524437 -22.987751 -21.907087 -21.971085 -22.720703
  -24.189623 -22.173397 -21.19357  -23.48855  -21.814814 -23.417967
  -23.241966 -23.847506 -22.367666 -23.414423 -23.894053 -23.628983
  -20.992355 -22.34199  -23.493004 -23.5

In [9]:
kwargs = {"X": [X], "y": [y], "hidden_size": [20], "num_layers": [2], "num_samples":[100], "loss_fn":[nn.MSELoss()]}

lstm_optimizer = LSTMConcurrent(num_optims=1, preproc=True)
meta_optimizer = optim.Adam(lstm_optimizer.parameters(), lr=0.01)
initializer = Initializer(XYNNOptimizee, kwargs)
scheduler = optim.lr_scheduler.StepLR(meta_optimizer, step_size=150, gamma=0.1)

writer = SummaryWriter("diab_train/lr1e-4_MSE_2_layers_scheduled") 
lstm_optimizer = train_LSTM(lstm_optimizer, meta_optimizer, initializer, num_epochs=500, time_horizon=500, discount=0.9, writer=writer, scheduler=scheduler)    
writer = SummaryWriter("diab_test/lr1e-4_MSE_2_layers_scheduled")
params = test_LSTM(lstm_optimizer, initializer, time_horizon=1000, writer=writer)

Training Progress:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch [50/500], Cumulative Loss: 622416.6250, LR: 1.000e-02
Final parameters: [[-258.9308  -255.476   -250.7169  -247.74266 -244.85626 -242.66255
  -242.75192 -239.99274 -241.9641  -241.69618 -240.84    -242.4618
  -240.18727 -240.99045 -240.35583 -239.26526 -239.0608  -240.35892
  -241.9938  -243.39935 -240.61446 -240.34555 -241.08736 -240.9596
  -240.42476 -241.6978  -241.83907 -240.49252 -240.93233 -241.28664
  -240.46823 -240.14223 -239.9669  -241.83862 -240.32803 -241.61313
  -240.25885 -241.3645  -239.669   -241.53156 -241.24248 -241.8147
  -239.38412 -241.32516 -242.10023 -239.6766  -241.76685 -240.849
  -241.12642 -239.94559 -240.742   -243.29272 -242.24384 -239.00012
  -239.50458 -240.4185  -239.21141 -240.53125 -240.85172 -240.31442
  -241.5686  -241.8736  -240.39442 -241.11095 -241.54852 -241.6148
  -239.00084 -239.41054 -240.25255 -240.20898 -240.6845  -238.85994
  -240.31863 -240.55476 -242.15332 -242.5638  -241.43033 -241.88904
  -240.64154 -241.92067 -242.55489 -241.7148