In [1]:
import gurobipy as gp
from gurobipy import GRB
import numpy as np
import pyepo
from pyepo.model.grb import optGrbModel
import torch
from torch import nn
from torch.utils.data import DataLoader
from gurobipy import Model, GRB, quicksum
from sklearn.preprocessing import StandardScaler
import pandas as pd
import wandb 
wandb.login()
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import time
from tqdm import tqdm
from pyepo.metric.regretParams import regretParams
# train model

#from sklearn_extra.cluster import KMedoids
import copy

Auto-Sklearn cannot be imported.


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33madh[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
# load data
red = (0.77, 0, 0.05) # (196, 0, 13)
blue = (0.12, 0.24, 1) # (31, 61, 255)
# green = (0.31, 1, 0.34) # (79, 255, 87)
green = (0.122, 00.816, 0.51) # (31, 208, 130)
navyblue = (0, 0, 0.4) # (0, 0, 102)
black = (0, 0, 0)
white = (1, 1, 1)
cgreen = (0.57254902, 0.7254902 , 0.51372549) # (146, 185, 131)
cblue = (0.70196078, 0.83137255, 1) # (179, 212, 255)

top_domain = 53.32 # 90% quantile



def import_data(negative_prices=False):
    # import data and set constants
    all_data = pd.read_csv("2020_data.csv")
    prices_UP = np.maximum(all_data["UP"].to_numpy(),0)
    prices_DW = np.maximum(all_data["DW"].to_numpy(),0)
    prices_F = np.maximum(all_data["forward_RE"].to_numpy(),0)
    prices_forecast = np.maximum(all_data["forward_FC"].to_numpy(), 0)

    nominal_wind = 10
    features = all_data.loc[:, ["Offshore DK2", "Offshore DK1", "Onshore DK2", "Onshore DK1", "production_FC"]]
    features["forward"] = prices_F
    features_red = all_data.loc[:, ["production_FC"]]
    features_red["forward"] = prices_F
    realized = all_data.loc[:, "production_RE"].to_numpy()
    realized *= nominal_wind

    price_H = 35.2
    penalty = np.quantile(prices_UP, 0.95) # 95% quantile of deficit_settle price over all 2 years
    # penalty = 2 * price_H
    # penalty = np.max(prices_B) # Something HIGHER is needed apparently

    return (
        prices_UP,
        prices_DW,
        prices_F,
        prices_forecast,
        features,
        features_red,
        realized,
        price_H,
        penalty
    )

In [11]:
#Import data
(prices_UP,prices_DW,prices_F,prices_forecast,features,features_red,realized,price_H,penalty) = import_data()

# Change forward prices to forecast prices in features
features["forward"] = prices_forecast

periods = list(range(0, len(prices_F) )) # Total time considered 2020-2021
n_periods = 24 # Number of periods in a day
n_days = 365 # Number of days in training set and test set
n_hours = n_days * n_periods
num_cost = 4 # number of cost parameters
num_feat = n_periods*6 # size of feature
num_feat_rf = 2 # size of feature
num_item = num_cost*n_periods # number of predictions (Forward bid and Hydrogen)
#n_val_days = 10 # number of validation days 
#n_hours_val = n_periods*n_val_days
lambda_H_list = [price_H for i in range(n_periods)]
penalty_list = [-penalty for i in range(n_periods)]

lambda_H_list = [price_H for i in range(n_periods)]
penalty_list = [-penalty for i in range(n_periods)]

def flatten_extend(matrix):
     flat_list = []
     for row in matrix:
         flat_list.extend(row)
     return flat_list

In [12]:
from pyepo.model.grb import optGrbModel

# optimization model
class hydrogenPlanning(optGrbModel):
    def __init__(self, realized, *args, **kwargs):        
        #Fixed parameters
        self.max_elec = 10
        self.max_wind = 10
        self.nominal_wind = 10
        self.min_production = 50
        self.periods = np.arange(len(realized))
        self.E_real = realized
        super().__init__()

    def _getModel(self):

        self.initial_plan = Model("Gurobi.Optimizer")

        # Definition of variables
        self.var = self.initial_plan.addVars((4*len(self.periods)), name="x")
        # 1-24: Hydrogen plan, 25-48: Forward bids, 49-72: Up regulation, 73-96: Down regulation
        # Objective: Maximize profit
        self.initial_plan.modelSense = GRB.MAXIMIZE

        # Constraints
        # Max capacity
        
        self.initial_plan.addConstr(self.min_production <= gp.quicksum(self.var[t] for t in self.periods), name="min_hydrogen_production")
        for t in np.arange(0,len(self.periods)):
            self.initial_plan.addConstr(self.var[t] >= 0, name=f"elec_capacity_lb_{t}")
            self.initial_plan.addConstr(self.var[t] <= self.max_elec, name=f"elec_capacity_ub_{t}")
        for t in np.arange(len(self.periods),2*len(self.periods)):
            self.initial_plan.addConstr(self.var[t] >= -self.max_elec, name=f"wind_capacity_lb_{t}")
            self.initial_plan.addConstr(self.var[t] <= self.max_wind, name=f"wind_capacity_ub_{t}")
        for t in np.arange(2*len(self.periods),3*len(self.periods)):
            self.initial_plan.addConstr(self.var[t] >= 0, name=f"up_regulation_lb_{t}")
            self.initial_plan.addConstr(self.var[t] <= 10*self.max_wind, name=f"up_regulation_ub_{t}")
        for t in np.arange(3*len(self.periods),4*len(self.periods)):
            self.initial_plan.addConstr(self.var[t] >= 0, name=f"dw_regulation_lb_{t}")
            self.initial_plan.addConstr(self.var[t] <= 10*self.max_wind, name=f"dw_regulation_ub_{t}")
        for t in np.arange(0,len(self.periods)):
            self.initial_plan.addConstr(self.E_real[t] - self.var[t] - self.var[t+24] == -self.var[t+48] + self.var[t+72], name=f"balancing_{t}")
            #initial_plan.addConstr(-x[0,t] + self.min_production/len(self.periods) - x[4,t] <= 0, name=f"slack_{t}")
        self.initial_plan.addConstr(gp.quicksum(self.var[t] for t in np.arange(0,len(self.periods))) == self.min_production, name="min_hydrogen_production")
        
        return self.initial_plan, self.var
    
    def setObjective(self, c):
        # Objective: Maximize profit
        self.initial_plan.setObjective(gp.quicksum(self.var[t]*c[t] for t in np.arange(0,4*len(self.periods))), GRB.MAXIMIZE)

    def get_plan(self):
        self.initial_plan.optimize()
        self.initial_plan.update()
        x_values = []
        for var in self.initial_plan.getVars():
            x_values.append(var.x)
        hydrogen = x_values[0:len(self.periods)]
        forward_bids = x_values[len(self.periods):2*len(self.periods)]
        return forward_bids, hydrogen


In [13]:
from matplotlib import pyplot as plt

def visLearningCurve(loss_log, loss_log_regret):
    # create figure and subplots
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,4))

    # draw plot for training loss
    ax1.plot(loss_log, color="c", lw=1)
    ax1.tick_params(axis="both", which="major", labelsize=12)
    ax1.set_xlabel("Iters", fontsize=16)
    ax1.set_ylabel("Loss", fontsize=16)
    ax1.set_title("Learning Curve on Training Set", fontsize=16)

    # draw plot for regret on test
    ax2.plot(loss_log_regret, color="royalblue", ls="--", alpha=0.7, lw=1)
    ax2.set_xticks(range(0, len(loss_log_regret), 2))
    ax2.tick_params(axis="both", which="major", labelsize=12)
    ax2.set_ylim(0, 1)
    ax2.set_xlabel("Epochs", fontsize=16)
    ax2.set_ylabel("Regret", fontsize=16)
    ax2.set_title("Learning Curve on Test Set", fontsize=16)

    plt.show()

In [14]:
num_feat = n_periods*6 # size of feature
num_item = 4*24 # number of predictions (Forward bid and Hydrogen)

wind_train = np.asarray([flatten_extend([realized[d:d+n_periods]]) for d in range(int(n_hours/n_periods))])
#wind_val   = wind_train[-n_val_days:,:]
#wind_train   = wind_train[:(n_days-n_val_days),:]
wind_test = np.asarray([flatten_extend([realized[d:d+n_periods]]) for d in range(int(n_hours/n_periods), int(2*n_hours/n_periods))])
"""
x_train = np.asarray([flatten_extend(features.values[d:d+n_periods]) for d in range(int(n_hours/n_periods))])
x_test = np.asarray([flatten_extend(features.values[d:d+n_periods]) for d in range(int(n_hours/n_periods), int(2*n_hours/n_periods))])
"""


x_train_df = features.iloc[:n_hours]

#x_train_df = features.iloc[:(n_hours-n_hours_val)]
#x_val_df = features.iloc[(n_hours-n_hours_val):n_hours]
x_test_df = features.iloc[n_hours:(n_hours+n_hours)]


# Create a StandardScaler object (fitted on train data)
scaler = StandardScaler()
scaler.fit(x_train_df)

# Standardize train and test dataframes separately
x_train_df = pd.DataFrame(scaler.transform(x_train_df), columns=x_train_df.columns)
#x_val_df = pd.DataFrame(scaler.transform(x_val_df), columns=x_val_df.columns)
x_test_df = pd.DataFrame(scaler.transform(x_test_df), columns=x_test_df.columns)


x_train = []
#x_val = []
#for i in range(0, len(x_val_df), 24):
#    x_val.append((x_val_df.iloc[i:i+24]).values.T.flatten())
x_test = []
for i in range(0, len(x_train_df), 24):
    x_train.append((x_train_df.iloc[i:i+24]).values.T.flatten())  # Extract 24 rows for each day

for i in range(0, len(x_test_df), 24):
    x_test.append((x_test_df.iloc[i:i+24]).values.T.flatten()) 


# Standardize x_train and x_test
#train_mean = np.mean(x_train, axis=0)
#train_std = np.std(x_train, axis=0)
#x_train_stand = (x_train - train_mean) / train_std
#x_test_stand = (x_test - train_mean) / train_std

c_train = np.asarray([flatten_extend([lambda_H_list, prices_F[d: d+n_periods], -prices_UP[d: d+n_periods], prices_DW[d: d+n_periods]]) for d in range(int(n_hours/n_periods))])
#c_val   = c_train[-n_val_days:,:]
#c_train   = c_train[:(n_days-n_val_days),:]
c_test = np.asarray([flatten_extend([lambda_H_list, prices_F[d: d+n_periods], -prices_UP[d: d+n_periods], prices_DW[d: d+n_periods]]) for d in range(int(n_hours/n_periods), int(2*n_hours/n_periods))])


In [15]:
from pyepo.data.datasetParams import optDatasetParams
dataset_train = optDatasetParams(hydrogenPlanning, x_train, c_train, wind_train)
#dataset_val = optDatasetParams(hydrogenPlanning, x_val, c_val, wind_val)
dataset_test = optDatasetParams(hydrogenPlanning, x_test, c_test, wind_test)

batch_size = 1
loader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=False)
#loader_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=False)
loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)

Optimizing for optDataset...


100%|██████████| 365/365 [00:01<00:00, 185.25it/s]


Optimizing for optDataset...


100%|██████████| 365/365 [00:02<00:00, 177.07it/s]


In [16]:
# prediction model
class LinearRegression(nn.Module):

    def __init__(self, input_size, output_size,neurons,dropout):
        super(LinearRegression, self).__init__()
        #self.linear = nn.Linear(num_feat, num_item)
        self.linear = nn.Sequential( 
            nn.Linear(input_size, neurons),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(neurons, neurons),
            nn.ReLU(),
            nn.Linear(neurons, output_size)
        )

    def forward(self, x):
        out = self.linear(x)
        return out


In [17]:
def trainModel(config=None):#, num_epochs=20, lr=1e-2):
    # set adam optimizer
    with wandb.init(config=config):
        config = wandb.config
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        #pprint(config)
        reg = LinearRegression(num_feat, num_item,config.neurons,config.dropout)
        # cuda
        if torch.cuda.is_available():
            reg = reg.cuda()
        # init SPO+ loss
        spop = pyepo.func.SPOPlus
    
        optimizer = torch.optim.Adam(reg.parameters(), lr=config.lr)
        lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = config.gamma)
        # train mode
        reg.train()
        # init log
        loss_log = []
        # using validation regret instead of test regret
        loss_log_regret = [regretParams(reg, hydrogenPlanning, loader_test, wind_test)]
        # init elpased time
        elapsed = 0
        wandb.watch(reg, log_freq=100)
        for epoch in range(config.num_epochs):
            # start timing
            tick = time.time()
            # load data
            for i, data in enumerate(tqdm(loader_train)):
                wind = wind_train[i]
                opt_model = hydrogenPlanning(wind)
                loss_func = spop(opt_model, processes=1)
                x, c, w, z = data
                # cuda
                if torch.cuda.is_available():
                    x, c, w, z = x.cuda(), c.cuda(), w.cuda(), z.cuda()
                # forward pass
                cp = reg(x)
                if config.method_name == "spo+":
                    loss = loss_func(cp, c, w, z)
                if config.method_name in ["ptb", "pfy", "imle", "nce", "cmap"]:
                    loss = loss_func(cp, w)
                if config.method_name in ["dbb", "nid"]:
                    loss = loss_func(cp, c, z)
                if config.method_name == "ltr":
                    loss = loss_func(cp, c)
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                #for name, param in reg.named_parameters():
                #    wandb.log({f"{name}.grad": param.grad.norm()}, step=epoch)
                optimizer.step()
                # record time
                tock = time.time()
                elapsed += tock - tick
                # log
                loss_log.append(loss.item())
                wandb.log({"Linear loss": loss})
            lr_scheduler.step()
            # validation regret
            regret = regretParams(reg, hydrogenPlanning, loader_test, wind_test)
            loss_log_regret.append(regret)
            wandb.log({"Regret": regret})
            print("Epoch {:2},  Loss: {:9.4f},  Regret: {:7.4f}%".format(epoch+1, loss.item(), regret*100))
      
        print("Total Elapsed Time: {:.2f} Sec.".format(elapsed))
        return reg, loss_log, loss_log_regret

In [10]:
# Hyper parameters

import pprint
sweep_config = {
     'name': 'Basic Model Sweep',
    'method': 'random', # grid, random
    'metric': {
    'name': 'Regret',
      'goal': 'minimize'   
    },
}
parameters_dict =  {
        'lr': {
            'values': [1e-2, 1e-3, 1e-4]
        },
        'gamma': {
            'values': [0.9, 0.95, 0.99]
        },
        'num_epochs': {
            'values': [10,20,30]
        },
        'neurons': {
            'values': [32, 64, 128]
            },
            'dropout': {
                'values': [0.2,0.5,0.7,0.9]
            },
            #"loss_function": {"value":spop},
            "method_name": {"value":"spo+"},
    }
sweep_config['parameters'] = parameters_dict
pprint.pprint(sweep_config)
sweep_id = wandb.sweep(sweep_config, entity="Pyepo_special",project="Sweep Pyepo Basic Model")

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'Regret'},
 'name': 'Basic Model Sweep',
 'parameters': {'dropout': {'values': [0.2, 0.5, 0.7, 0.9]},
                'gamma': {'values': [0.9, 0.95, 0.99]},
                'lr': {'values': [0.01, 0.001, 0.0001]},
                'method_name': {'value': 'spo+'},
                'neurons': {'values': [32, 64, 128]},
                'num_epochs': {'values': [10, 20, 30]}}}
Create sweep with ID: 21l2bgkz
Sweep URL: https://wandb.ai/Pyepo_special/Sweep%20Pyepo%20Basic%20Model/sweeps/21l2bgkz


In [12]:
wandb.agent(sweep_id, function=trainModel,count=30)

[34m[1mwandb[0m: Agent Starting Run: dwk0psgm with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 30
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33madh[0m ([33mPyepo_special[0m). Use [1m`wandb login --relogin`[0m to force relogin


100%|██████████| 180/180 [00:01<00:00, 136.31it/s]
100%|██████████| 180/180 [00:02<00:00, 72.17it/s]
100%|██████████| 180/180 [00:01<00:00, 133.68it/s]


Epoch  1,  Loss: 18474.5742,  Regret: 11.1446%


100%|██████████| 180/180 [00:02<00:00, 71.50it/s]
100%|██████████| 180/180 [00:01<00:00, 120.19it/s]


Epoch  2,  Loss: 16107.9199,  Regret: 16.8112%


100%|██████████| 180/180 [00:02<00:00, 73.12it/s]
100%|██████████| 180/180 [00:01<00:00, 121.77it/s]


Epoch  3,  Loss: 15413.8594,  Regret: 14.6075%


100%|██████████| 180/180 [00:03<00:00, 56.06it/s]
100%|██████████| 180/180 [00:01<00:00, 108.94it/s]


Epoch  4,  Loss: 10957.2939,  Regret: 12.0378%


100%|██████████| 180/180 [00:03<00:00, 54.03it/s]
100%|██████████| 180/180 [00:02<00:00, 79.58it/s] 


Epoch  5,  Loss: 14003.2734,  Regret: 15.8918%


100%|██████████| 180/180 [00:04<00:00, 37.42it/s]
100%|██████████| 180/180 [00:04<00:00, 40.16it/s]


Epoch  6,  Loss: 15397.7715,  Regret:  7.7423%


100%|██████████| 180/180 [00:04<00:00, 40.95it/s]
100%|██████████| 180/180 [00:03<00:00, 48.35it/s]


Epoch  7,  Loss: 15819.1162,  Regret: 11.2649%


100%|██████████| 180/180 [00:04<00:00, 40.59it/s]
100%|██████████| 180/180 [00:03<00:00, 50.81it/s]


Epoch  8,  Loss: 10783.7891,  Regret: 11.2553%


100%|██████████| 180/180 [00:04<00:00, 41.95it/s]
100%|██████████| 180/180 [00:03<00:00, 45.39it/s]


Epoch  9,  Loss: 12274.5078,  Regret:  8.9165%


100%|██████████| 180/180 [00:04<00:00, 39.88it/s]
100%|██████████| 180/180 [00:02<00:00, 81.03it/s] 


Epoch 10,  Loss: 12945.9727,  Regret: 11.2500%


100%|██████████| 180/180 [00:03<00:00, 49.83it/s]
100%|██████████| 180/180 [00:01<00:00, 123.47it/s]


Epoch 11,  Loss: 13772.5811,  Regret:  8.3364%


100%|██████████| 180/180 [00:04<00:00, 42.61it/s]
100%|██████████| 180/180 [00:01<00:00, 129.53it/s]


Epoch 12,  Loss: 13742.7324,  Regret:  8.5483%


100%|██████████| 180/180 [00:04<00:00, 41.14it/s]
100%|██████████| 180/180 [00:03<00:00, 46.02it/s]


Epoch 13,  Loss: 14683.6270,  Regret:  8.8675%


100%|██████████| 180/180 [00:04<00:00, 39.38it/s]
100%|██████████| 180/180 [00:04<00:00, 43.77it/s]


Epoch 14,  Loss: 7806.4150,  Regret: 12.8043%


100%|██████████| 180/180 [00:04<00:00, 39.08it/s]
100%|██████████| 180/180 [00:04<00:00, 41.62it/s]


Epoch 15,  Loss: 10160.7021,  Regret:  8.2341%


100%|██████████| 180/180 [00:04<00:00, 40.46it/s]
100%|██████████| 180/180 [00:04<00:00, 43.99it/s]


Epoch 16,  Loss: 14173.9795,  Regret:  8.2334%


100%|██████████| 180/180 [00:04<00:00, 39.36it/s]
100%|██████████| 180/180 [00:04<00:00, 44.17it/s]


Epoch 17,  Loss: 13983.6455,  Regret:  8.6991%


100%|██████████| 180/180 [00:04<00:00, 39.11it/s]
100%|██████████| 180/180 [00:04<00:00, 44.19it/s]


Epoch 18,  Loss: 15821.1387,  Regret:  9.6906%


100%|██████████| 180/180 [00:04<00:00, 38.93it/s]
100%|██████████| 180/180 [00:03<00:00, 45.62it/s]


Epoch 19,  Loss: 9600.0039,  Regret:  8.5677%


100%|██████████| 180/180 [00:04<00:00, 39.54it/s]
100%|██████████| 180/180 [00:04<00:00, 44.72it/s]


Epoch 20,  Loss: 13392.6904,  Regret:  8.8454%


100%|██████████| 180/180 [00:04<00:00, 39.26it/s]
100%|██████████| 180/180 [00:03<00:00, 46.42it/s]


Epoch 21,  Loss: 10059.9141,  Regret: 11.0458%


100%|██████████| 180/180 [00:04<00:00, 39.76it/s]
100%|██████████| 180/180 [00:03<00:00, 47.24it/s]


Epoch 22,  Loss: 13269.8545,  Regret: 10.7902%


100%|██████████| 180/180 [00:04<00:00, 40.84it/s]
100%|██████████| 180/180 [00:03<00:00, 47.79it/s]


Epoch 23,  Loss: 9913.8535,  Regret:  8.0658%


100%|██████████| 180/180 [00:04<00:00, 40.23it/s]
100%|██████████| 180/180 [00:03<00:00, 49.05it/s]


Epoch 24,  Loss: 8634.0400,  Regret:  8.8442%


100%|██████████| 180/180 [00:04<00:00, 40.82it/s]
100%|██████████| 180/180 [00:03<00:00, 47.63it/s]


Epoch 25,  Loss: 7603.0811,  Regret:  8.9904%


100%|██████████| 180/180 [00:04<00:00, 39.46it/s]
100%|██████████| 180/180 [00:03<00:00, 48.40it/s]


Epoch 26,  Loss: 12969.2646,  Regret: 10.2048%


100%|██████████| 180/180 [00:04<00:00, 39.26it/s]
100%|██████████| 180/180 [00:03<00:00, 48.03it/s]


Epoch 27,  Loss: 9476.9434,  Regret:  9.3839%


100%|██████████| 180/180 [00:04<00:00, 40.08it/s]
100%|██████████| 180/180 [00:03<00:00, 45.54it/s]


Epoch 28,  Loss: 12864.5820,  Regret: 10.8120%


100%|██████████| 180/180 [00:04<00:00, 40.35it/s]
100%|██████████| 180/180 [00:03<00:00, 47.07it/s]


Epoch 29,  Loss: 12825.9258,  Regret:  9.6786%


100%|██████████| 180/180 [00:04<00:00, 39.81it/s]
100%|██████████| 180/180 [00:03<00:00, 49.19it/s]


Epoch 30,  Loss: 12572.7363,  Regret:  9.4700%
Total Elapsed Time: 11333.38 Sec.


0,1
Linear loss,▃█▆▂▂▅▄▄▁▃▄▂▂▇▆▁▂▁▄▂▃▅▇▁▂▁▅▃▃█▄▂▂▁█▆▁█▃▄
Regret,▄█▆▄▇▁▄▄▂▄▁▂▂▅▁▁▂▃▂▂▄▃▁▂▂▃▂▃▂▂

0,1
Linear loss,12572.73633
Regret,0.0947


[34m[1mwandb[0m: Agent Starting Run: pz1iixym with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 49.81it/s]
100%|██████████| 180/180 [00:04<00:00, 40.70it/s]
100%|██████████| 180/180 [00:03<00:00, 48.22it/s]


Epoch  1,  Loss: 14786.6006,  Regret:  7.9631%


100%|██████████| 180/180 [00:04<00:00, 40.40it/s]
100%|██████████| 180/180 [00:03<00:00, 47.72it/s]


Epoch  2,  Loss: 13996.5537,  Regret:  8.0546%


100%|██████████| 180/180 [00:04<00:00, 40.96it/s]
100%|██████████| 180/180 [00:03<00:00, 47.80it/s]


Epoch  3,  Loss: 12560.5449,  Regret:  7.7437%


100%|██████████| 180/180 [00:04<00:00, 41.07it/s]
100%|██████████| 180/180 [00:03<00:00, 46.99it/s]


Epoch  4,  Loss: 12306.2949,  Regret:  7.6011%


100%|██████████| 180/180 [00:04<00:00, 40.30it/s]
100%|██████████| 180/180 [00:04<00:00, 44.49it/s]


Epoch  5,  Loss: 11728.0361,  Regret:  7.4841%


100%|██████████| 180/180 [00:04<00:00, 40.27it/s]
100%|██████████| 180/180 [00:04<00:00, 44.75it/s]


Epoch  6,  Loss: 11240.9492,  Regret:  7.4165%


100%|██████████| 180/180 [00:04<00:00, 40.43it/s]
100%|██████████| 180/180 [00:03<00:00, 46.00it/s]


Epoch  7,  Loss: 11008.7793,  Regret:  7.2850%


100%|██████████| 180/180 [00:04<00:00, 40.17it/s]
100%|██████████| 180/180 [00:03<00:00, 46.36it/s]


Epoch  8,  Loss: 10591.1836,  Regret:  7.3660%


100%|██████████| 180/180 [00:04<00:00, 41.09it/s]
100%|██████████| 180/180 [00:03<00:00, 47.01it/s]


Epoch  9,  Loss: 11244.0371,  Regret:  7.2630%


100%|██████████| 180/180 [00:04<00:00, 40.98it/s]
100%|██████████| 180/180 [00:03<00:00, 47.76it/s]


Epoch 10,  Loss: 12452.1836,  Regret:  7.2452%
Total Elapsed Time: 4021.61 Sec.


0,1
Linear loss,█▅▂▅▇▃▂▆█▄▂▆▃▁▂▅▇▂▄▆▇▃▁▄▃▁▅▆▃▁▄▄▁▂▄▅▁▂▅▇
Regret,▇█▅▄▃▂▁▂▁▁

0,1
Linear loss,12452.18359
Regret,0.07245


[34m[1mwandb[0m: Agent Starting Run: 20yfe3dr with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 48.05it/s]
100%|██████████| 180/180 [00:04<00:00, 41.00it/s]
100%|██████████| 180/180 [00:03<00:00, 47.75it/s]


Epoch  1,  Loss: 18986.3750,  Regret: 38.1820%


100%|██████████| 180/180 [00:04<00:00, 39.78it/s]
100%|██████████| 180/180 [00:03<00:00, 46.86it/s]


Epoch  2,  Loss: 18872.7363,  Regret: 12.7386%


100%|██████████| 180/180 [00:04<00:00, 40.52it/s]
100%|██████████| 180/180 [00:03<00:00, 48.18it/s]


Epoch  3,  Loss: 18370.5234,  Regret:  8.9745%


100%|██████████| 180/180 [00:04<00:00, 41.18it/s]
100%|██████████| 180/180 [00:03<00:00, 47.02it/s]


Epoch  4,  Loss: 18111.1719,  Regret:  8.3832%


100%|██████████| 180/180 [00:04<00:00, 40.71it/s]
100%|██████████| 180/180 [00:03<00:00, 48.25it/s]


Epoch  5,  Loss: 17585.5703,  Regret:  8.1882%


100%|██████████| 180/180 [00:04<00:00, 41.09it/s]
100%|██████████| 180/180 [00:03<00:00, 47.33it/s]


Epoch  6,  Loss: 15815.8584,  Regret:  7.9948%


100%|██████████| 180/180 [00:04<00:00, 39.88it/s]
100%|██████████| 180/180 [00:03<00:00, 46.00it/s]


Epoch  7,  Loss: 17834.0312,  Regret:  7.8342%


100%|██████████| 180/180 [00:04<00:00, 40.11it/s]
100%|██████████| 180/180 [00:03<00:00, 46.52it/s]


Epoch  8,  Loss: 18960.6914,  Regret:  7.7044%


100%|██████████| 180/180 [00:04<00:00, 40.71it/s]
100%|██████████| 180/180 [00:03<00:00, 47.45it/s]


Epoch  9,  Loss: 13628.8730,  Regret:  7.6687%


100%|██████████| 180/180 [00:04<00:00, 40.25it/s]
100%|██████████| 180/180 [00:04<00:00, 44.71it/s]


Epoch 10,  Loss: 16440.1992,  Regret:  7.6069%
Total Elapsed Time: 4030.27 Sec.


0,1
Linear loss,█▅▃▅▇▃▄█▆▄▄▇▄▁▂▆▇▂▅▃▇▃▂▄▄▁▇▆▄▁▄▅▂▂▄▃▃▁▇▆
Regret,█▂▁▁▁▁▁▁▁▁

0,1
Linear loss,16440.19922
Regret,0.07607


[34m[1mwandb[0m: Agent Starting Run: u0sd86oc with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 47.48it/s]
100%|██████████| 180/180 [00:04<00:00, 38.07it/s]
100%|██████████| 180/180 [00:03<00:00, 47.41it/s]


Epoch  1,  Loss: 18945.9688,  Regret: 19.5487%


100%|██████████| 180/180 [00:04<00:00, 37.88it/s]
100%|██████████| 180/180 [00:03<00:00, 48.10it/s]


Epoch  2,  Loss: 18561.8203,  Regret:  9.6015%


100%|██████████| 180/180 [00:04<00:00, 39.10it/s]
100%|██████████| 180/180 [00:03<00:00, 47.21it/s]


Epoch  3,  Loss: 17839.2695,  Regret:  8.3619%


100%|██████████| 180/180 [00:04<00:00, 37.82it/s]
100%|██████████| 180/180 [00:03<00:00, 47.96it/s]


Epoch  4,  Loss: 16552.8398,  Regret:  8.3172%


100%|██████████| 180/180 [00:04<00:00, 37.51it/s]
100%|██████████| 180/180 [00:03<00:00, 47.38it/s]


Epoch  5,  Loss: 15822.1143,  Regret:  8.0593%


100%|██████████| 180/180 [00:04<00:00, 38.90it/s]
100%|██████████| 180/180 [00:02<00:00, 63.94it/s]


Epoch  6,  Loss: 12723.0527,  Regret:  7.9012%


100%|██████████| 180/180 [00:04<00:00, 40.44it/s]
100%|██████████| 180/180 [00:03<00:00, 45.92it/s]


Epoch  7,  Loss: 16368.4355,  Regret:  7.8541%


100%|██████████| 180/180 [00:04<00:00, 38.53it/s]
100%|██████████| 180/180 [00:03<00:00, 46.65it/s]


Epoch  8,  Loss: 14500.8477,  Regret:  7.7806%


100%|██████████| 180/180 [00:04<00:00, 38.65it/s]
100%|██████████| 180/180 [00:03<00:00, 46.22it/s]


Epoch  9,  Loss: 14199.0449,  Regret:  7.6960%


100%|██████████| 180/180 [00:04<00:00, 38.19it/s]
100%|██████████| 180/180 [00:03<00:00, 46.54it/s]


Epoch 10,  Loss: 13114.6602,  Regret:  7.6819%


100%|██████████| 180/180 [00:04<00:00, 37.55it/s]
100%|██████████| 180/180 [00:03<00:00, 46.32it/s]


Epoch 11,  Loss: 13013.1006,  Regret:  7.6551%


100%|██████████| 180/180 [00:04<00:00, 37.85it/s]
100%|██████████| 180/180 [00:03<00:00, 46.46it/s]


Epoch 12,  Loss: 13415.8984,  Regret:  7.6293%


100%|██████████| 180/180 [00:04<00:00, 38.03it/s]
100%|██████████| 180/180 [00:03<00:00, 47.81it/s]


Epoch 13,  Loss: 13914.5234,  Regret:  7.6619%


100%|██████████| 180/180 [00:04<00:00, 39.12it/s]
100%|██████████| 180/180 [00:03<00:00, 47.76it/s]


Epoch 14,  Loss: 11935.8096,  Regret:  7.6535%


100%|██████████| 180/180 [00:04<00:00, 39.64it/s]
100%|██████████| 180/180 [00:03<00:00, 46.73it/s]


Epoch 15,  Loss: 15240.4541,  Regret:  7.6048%


100%|██████████| 180/180 [00:04<00:00, 38.55it/s]
100%|██████████| 180/180 [00:03<00:00, 46.99it/s]


Epoch 16,  Loss: 10987.7100,  Regret:  7.6115%


100%|██████████| 180/180 [00:04<00:00, 37.55it/s]
100%|██████████| 180/180 [00:03<00:00, 46.70it/s]


Epoch 17,  Loss: 12147.4141,  Regret:  7.5994%


100%|██████████| 180/180 [00:04<00:00, 37.83it/s]
100%|██████████| 180/180 [00:03<00:00, 46.05it/s]


Epoch 18,  Loss: 13146.1270,  Regret:  7.6310%


100%|██████████| 180/180 [00:04<00:00, 37.64it/s]
100%|██████████| 180/180 [00:04<00:00, 40.62it/s]


Epoch 19,  Loss: 11349.7441,  Regret:  7.6251%


100%|██████████| 180/180 [00:04<00:00, 36.44it/s]
100%|██████████| 180/180 [00:03<00:00, 46.19it/s]


Epoch 20,  Loss: 16479.1465,  Regret:  7.6107%
Total Elapsed Time: 8587.32 Sec.


0,1
Linear loss,▃▂▆▄▃▆▂▂▇▅▂▇▂▇▃▄▂▇▁▆▃▅▁█▂▄▂▅▂▆▂▅▂▇▁▆▃▆▁▄
Regret,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,16479.14648
Regret,0.07611


[34m[1mwandb[0m: Agent Starting Run: x18orxhd with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 30
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:06<00:00, 27.61it/s]
100%|██████████| 180/180 [00:04<00:00, 38.91it/s]
100%|██████████| 180/180 [00:05<00:00, 30.06it/s]


Epoch  1,  Loss: 13074.6650,  Regret: 14.4285%


100%|██████████| 180/180 [00:04<00:00, 36.21it/s]
100%|██████████| 180/180 [00:05<00:00, 32.87it/s]


Epoch  2,  Loss: 12907.5225,  Regret:  8.8636%


100%|██████████| 180/180 [00:04<00:00, 36.34it/s]
100%|██████████| 180/180 [00:13<00:00, 13.62it/s]


Epoch  3,  Loss: 13235.5205,  Regret:  9.7789%


100%|██████████| 180/180 [00:05<00:00, 31.69it/s]
100%|██████████| 180/180 [00:06<00:00, 27.66it/s]


Epoch  4,  Loss: 13971.1738,  Regret:  9.2091%


100%|██████████| 180/180 [00:05<00:00, 32.79it/s]
100%|██████████| 180/180 [00:06<00:00, 27.34it/s]


Epoch  5,  Loss: 11476.9863,  Regret:  8.6336%


100%|██████████| 180/180 [00:06<00:00, 26.72it/s]
100%|██████████| 180/180 [00:05<00:00, 35.87it/s]


Epoch  6,  Loss: 12939.6074,  Regret:  8.1572%


100%|██████████| 180/180 [00:07<00:00, 24.83it/s]
100%|██████████| 180/180 [00:04<00:00, 40.71it/s]


Epoch  7,  Loss: 13911.1035,  Regret:  8.0433%


100%|██████████| 180/180 [00:05<00:00, 31.61it/s]
100%|██████████| 180/180 [00:04<00:00, 39.78it/s]


Epoch  8,  Loss: 11072.1963,  Regret:  7.8689%


100%|██████████| 180/180 [00:05<00:00, 33.70it/s]
100%|██████████| 180/180 [00:04<00:00, 41.24it/s]


Epoch  9,  Loss: 10054.9248,  Regret:  9.1638%


100%|██████████| 180/180 [00:04<00:00, 36.67it/s]
100%|██████████| 180/180 [00:03<00:00, 47.86it/s]


Epoch 10,  Loss: 10625.1035,  Regret:  8.0033%


100%|██████████| 180/180 [00:05<00:00, 33.66it/s]
100%|██████████| 180/180 [00:03<00:00, 45.12it/s]


Epoch 11,  Loss: 10551.5469,  Regret:  8.7389%


100%|██████████| 180/180 [00:04<00:00, 36.28it/s]
100%|██████████| 180/180 [00:03<00:00, 45.20it/s]


Epoch 12,  Loss: 13907.9492,  Regret:  9.1425%


100%|██████████| 180/180 [00:04<00:00, 40.31it/s]
100%|██████████| 180/180 [00:04<00:00, 44.87it/s]


Epoch 13,  Loss: 10914.4893,  Regret:  8.2382%


100%|██████████| 180/180 [00:04<00:00, 39.54it/s]
100%|██████████| 180/180 [00:03<00:00, 45.67it/s]


Epoch 14,  Loss: 10085.5811,  Regret:  8.8090%


100%|██████████| 180/180 [00:04<00:00, 39.89it/s]
100%|██████████| 180/180 [00:04<00:00, 40.99it/s]


Epoch 15,  Loss: 11939.3838,  Regret:  8.0705%


100%|██████████| 180/180 [00:04<00:00, 39.93it/s]
100%|██████████| 180/180 [00:03<00:00, 45.86it/s]


Epoch 16,  Loss: 9086.7246,  Regret:  8.4324%


100%|██████████| 180/180 [00:04<00:00, 39.84it/s]
100%|██████████| 180/180 [00:04<00:00, 44.89it/s]


Epoch 17,  Loss: 9418.1377,  Regret:  7.8391%


100%|██████████| 180/180 [00:04<00:00, 39.77it/s]
100%|██████████| 180/180 [00:03<00:00, 45.20it/s]


Epoch 18,  Loss: 8799.9209,  Regret:  9.8124%


100%|██████████| 180/180 [00:04<00:00, 39.47it/s]
100%|██████████| 180/180 [00:03<00:00, 49.71it/s]


Epoch 19,  Loss: 11081.3857,  Regret:  9.0228%


100%|██████████| 180/180 [00:03<00:00, 50.48it/s]
100%|██████████| 180/180 [00:04<00:00, 44.26it/s]


Epoch 20,  Loss: 12880.0176,  Regret:  9.6635%


100%|██████████| 180/180 [00:03<00:00, 52.59it/s]
100%|██████████| 180/180 [00:01<00:00, 130.79it/s]


Epoch 21,  Loss: 11372.1797,  Regret:  9.1713%


100%|██████████| 180/180 [00:03<00:00, 53.02it/s]
100%|██████████| 180/180 [00:01<00:00, 116.00it/s]


Epoch 22,  Loss: 11192.1025,  Regret:  8.8549%


100%|██████████| 180/180 [00:04<00:00, 43.76it/s]
100%|██████████| 180/180 [00:03<00:00, 54.45it/s]


Epoch 23,  Loss: 9396.9834,  Regret:  8.7128%


100%|██████████| 180/180 [00:04<00:00, 40.86it/s]
100%|██████████| 180/180 [00:03<00:00, 45.57it/s]


Epoch 24,  Loss: 7996.1787,  Regret:  9.8893%


100%|██████████| 180/180 [00:04<00:00, 40.66it/s]
100%|██████████| 180/180 [00:03<00:00, 47.31it/s]


Epoch 25,  Loss: 13670.9062,  Regret: 10.5391%


100%|██████████| 180/180 [00:04<00:00, 40.29it/s]
100%|██████████| 180/180 [00:03<00:00, 45.97it/s]


Epoch 26,  Loss: 9626.1875,  Regret: 10.5434%


100%|██████████| 180/180 [00:04<00:00, 40.94it/s]
100%|██████████| 180/180 [00:03<00:00, 47.20it/s]


Epoch 27,  Loss: 11882.6787,  Regret:  9.0332%


100%|██████████| 180/180 [00:04<00:00, 40.56it/s]
100%|██████████| 180/180 [00:03<00:00, 47.75it/s]


Epoch 28,  Loss: 11040.3613,  Regret:  9.4102%


100%|██████████| 180/180 [00:04<00:00, 40.38it/s]
100%|██████████| 180/180 [00:03<00:00, 46.89it/s]


Epoch 29,  Loss: 11082.4629,  Regret: 10.4020%


100%|██████████| 180/180 [00:04<00:00, 40.42it/s]
100%|██████████| 180/180 [00:03<00:00, 46.52it/s]


Epoch 30,  Loss: 9624.1006,  Regret: 11.4069%
Total Elapsed Time: 13270.91 Sec.


0,1
Linear loss,▂█▆▂▂▅▄▃▁▃▅▂▂▇▆▁▂▁▄▁▁▃▇▂▁▁▇▅▃▆▇▂▂▂▇▅▁▆▄▂
Regret,█▂▃▂▂▁▁▁▂▁▂▂▁▂▁▂▁▃▂▃▂▂▂▃▄▄▂▃▄▅

0,1
Linear loss,9624.10059
Regret,0.11407


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hz1uu8q4 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 42.58it/s]
100%|██████████| 180/180 [00:04<00:00, 41.80it/s]
100%|██████████| 180/180 [00:04<00:00, 42.92it/s]


Epoch  1,  Loss: 14649.3086,  Regret: 16.4602%


100%|██████████| 180/180 [00:04<00:00, 40.95it/s]
100%|██████████| 180/180 [00:04<00:00, 41.48it/s]


Epoch  2,  Loss: 16710.9434,  Regret: 12.3331%


100%|██████████| 180/180 [00:04<00:00, 40.80it/s]
100%|██████████| 180/180 [00:04<00:00, 42.85it/s]


Epoch  3,  Loss: 8287.4082,  Regret:  9.6792%


100%|██████████| 180/180 [00:04<00:00, 41.39it/s]
100%|██████████| 180/180 [00:03<00:00, 47.32it/s]


Epoch  4,  Loss: 13632.2305,  Regret:  8.9215%


100%|██████████| 180/180 [00:04<00:00, 41.32it/s]
100%|██████████| 180/180 [00:05<00:00, 34.74it/s]


Epoch  5,  Loss: 12103.7861,  Regret:  8.5672%


100%|██████████| 180/180 [00:05<00:00, 34.58it/s]
100%|██████████| 180/180 [00:07<00:00, 22.97it/s]


Epoch  6,  Loss: 13278.5039,  Regret:  9.2006%


100%|██████████| 180/180 [00:06<00:00, 29.75it/s]
100%|██████████| 180/180 [00:04<00:00, 37.77it/s]


Epoch  7,  Loss: 13342.2461,  Regret:  8.6497%


100%|██████████| 180/180 [00:05<00:00, 30.49it/s]
100%|██████████| 180/180 [00:03<00:00, 47.30it/s]


Epoch  8,  Loss: 11909.0879,  Regret:  8.8080%


100%|██████████| 180/180 [00:05<00:00, 30.99it/s]
100%|██████████| 180/180 [00:04<00:00, 43.74it/s]


Epoch  9,  Loss: 12295.1660,  Regret:  8.9607%


100%|██████████| 180/180 [00:05<00:00, 30.31it/s]
100%|██████████| 180/180 [00:04<00:00, 44.92it/s]


Epoch 10,  Loss: 11726.0918,  Regret:  8.4691%
Total Elapsed Time: 4413.89 Sec.


0,1
Linear loss,█▄▂▆█▃▂▇▇▄▂▆▃▂▂▆█▂▄▅▇▃▂▄▂▁▇▆▃▁▄▅▁▂▄▅▁▂▅▇
Regret,█▄▂▁▁▂▁▁▁▁

0,1
Linear loss,11726.0918
Regret,0.08469


[34m[1mwandb[0m: Agent Starting Run: lfuvqeno with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 45.45it/s]
100%|██████████| 180/180 [00:04<00:00, 40.72it/s]
100%|██████████| 180/180 [00:04<00:00, 42.72it/s]


Epoch  1,  Loss: 12535.8457,  Regret:  8.4438%


100%|██████████| 180/180 [00:04<00:00, 40.04it/s]
100%|██████████| 180/180 [00:04<00:00, 42.61it/s]


Epoch  2,  Loss: 11691.0400,  Regret:  8.2638%


100%|██████████| 180/180 [00:04<00:00, 40.02it/s]
100%|██████████| 180/180 [00:04<00:00, 43.27it/s]


Epoch  3,  Loss: 10839.3574,  Regret:  7.9269%


100%|██████████| 180/180 [00:04<00:00, 40.68it/s]
100%|██████████| 180/180 [00:04<00:00, 40.28it/s]


Epoch  4,  Loss: 10621.7529,  Regret:  7.7224%


100%|██████████| 180/180 [00:04<00:00, 40.61it/s]
100%|██████████| 180/180 [00:04<00:00, 40.70it/s]


Epoch  5,  Loss: 11263.1025,  Regret:  7.6445%


100%|██████████| 180/180 [00:04<00:00, 40.12it/s]
100%|██████████| 180/180 [00:04<00:00, 41.30it/s]


Epoch  6,  Loss: 11140.5879,  Regret:  7.4651%


100%|██████████| 180/180 [00:04<00:00, 40.39it/s]
100%|██████████| 180/180 [00:04<00:00, 43.54it/s]


Epoch  7,  Loss: 13444.4131,  Regret:  7.2543%


100%|██████████| 180/180 [00:04<00:00, 40.68it/s]
100%|██████████| 180/180 [00:04<00:00, 41.44it/s]


Epoch  8,  Loss: 12825.1621,  Regret:  7.5851%


100%|██████████| 180/180 [00:04<00:00, 39.62it/s]
100%|██████████| 180/180 [00:04<00:00, 40.51it/s]


Epoch  9,  Loss: 10221.0225,  Regret:  7.3331%


100%|██████████| 180/180 [00:04<00:00, 40.03it/s]
100%|██████████| 180/180 [00:04<00:00, 42.88it/s]


Epoch 10,  Loss: 11358.4395,  Regret:  7.3547%


100%|██████████| 180/180 [00:04<00:00, 40.69it/s]
100%|██████████| 180/180 [00:04<00:00, 40.82it/s]


Epoch 11,  Loss: 10848.4902,  Regret:  7.3504%


100%|██████████| 180/180 [00:04<00:00, 40.59it/s]
100%|██████████| 180/180 [00:04<00:00, 38.57it/s]


Epoch 12,  Loss: 11077.8975,  Regret:  7.4181%


100%|██████████| 180/180 [00:04<00:00, 40.28it/s]
100%|██████████| 180/180 [00:04<00:00, 40.39it/s]


Epoch 13,  Loss: 11219.5391,  Regret:  7.7283%


100%|██████████| 180/180 [00:04<00:00, 41.52it/s]
100%|██████████| 180/180 [00:03<00:00, 45.15it/s]


Epoch 14,  Loss: 10866.8008,  Regret:  7.9050%


100%|██████████| 180/180 [00:05<00:00, 31.28it/s]
100%|██████████| 180/180 [00:03<00:00, 53.02it/s]


Epoch 15,  Loss: 8785.8262,  Regret:  7.7753%


100%|██████████| 180/180 [00:04<00:00, 40.32it/s]
100%|██████████| 180/180 [00:03<00:00, 53.71it/s]


Epoch 16,  Loss: 9913.8154,  Regret:  7.7825%


100%|██████████| 180/180 [00:04<00:00, 40.42it/s]
100%|██████████| 180/180 [00:03<00:00, 48.13it/s]


Epoch 17,  Loss: 10186.5449,  Regret:  7.4432%


100%|██████████| 180/180 [00:04<00:00, 39.86it/s]
100%|██████████| 180/180 [00:05<00:00, 35.16it/s]


Epoch 18,  Loss: 13429.4492,  Regret:  7.7554%


100%|██████████| 180/180 [00:03<00:00, 49.69it/s]
100%|██████████| 180/180 [00:05<00:00, 31.48it/s]


Epoch 19,  Loss: 10387.8730,  Regret:  7.7166%


100%|██████████| 180/180 [00:02<00:00, 62.38it/s]
100%|██████████| 180/180 [00:01<00:00, 122.81it/s]


Epoch 20,  Loss: 8597.7725,  Regret:  8.4269%
Total Elapsed Time: 7934.33 Sec.


0,1
Linear loss,▄▂█▂▃▆▂▂▇▄▁▅▂▅▃▄▁▇▁▆▃▅▁▅▂▄▃▅▁▅▃▄▁▆▃▇▄▅▁▄
Regret,█▇▅▄▃▂▁▃▁▂▂▂▄▅▄▄▂▄▄█

0,1
Linear loss,8597.77246
Regret,0.08427


[34m[1mwandb[0m: Agent Starting Run: rh9yeggk with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 30
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 47.84it/s]
100%|██████████| 180/180 [00:15<00:00, 11.77it/s]
100%|██████████| 180/180 [00:04<00:00, 44.52it/s]


Epoch  1,  Loss: 19537.9180,  Regret: 48.5597%


100%|██████████| 180/180 [00:15<00:00, 11.66it/s]
100%|██████████| 180/180 [00:04<00:00, 39.69it/s]


Epoch  2,  Loss: 19448.4473,  Regret: 29.7112%


100%|██████████| 180/180 [00:14<00:00, 12.29it/s]
100%|██████████| 180/180 [00:05<00:00, 31.47it/s]


Epoch  3,  Loss: 18871.0156,  Regret: 15.9369%


100%|██████████| 180/180 [00:13<00:00, 13.83it/s]
100%|██████████| 180/180 [00:04<00:00, 40.93it/s]


Epoch  4,  Loss: 17863.4121,  Regret: 12.1759%


100%|██████████| 180/180 [00:12<00:00, 13.96it/s]
100%|██████████| 180/180 [00:03<00:00, 50.31it/s]


Epoch  5,  Loss: 18856.9863,  Regret: 10.4984%


100%|██████████| 180/180 [00:11<00:00, 15.51it/s]
100%|██████████| 180/180 [00:03<00:00, 49.97it/s]


Epoch  6,  Loss: 16549.5723,  Regret:  8.8659%


100%|██████████| 180/180 [00:04<00:00, 41.07it/s]
100%|██████████| 180/180 [00:04<00:00, 40.64it/s]


Epoch  7,  Loss: 15826.3848,  Regret:  8.3598%


100%|██████████| 180/180 [00:04<00:00, 41.68it/s]
100%|██████████| 180/180 [00:04<00:00, 40.16it/s]


Epoch  8,  Loss: 18928.1934,  Regret:  8.0541%


100%|██████████| 180/180 [00:04<00:00, 40.88it/s]
100%|██████████| 180/180 [00:04<00:00, 41.17it/s]


Epoch  9,  Loss: 18843.3633,  Regret:  8.1124%


100%|██████████| 180/180 [00:04<00:00, 41.01it/s]
100%|██████████| 180/180 [00:04<00:00, 43.25it/s]


Epoch 10,  Loss: 17378.9512,  Regret:  8.1253%


100%|██████████| 180/180 [00:04<00:00, 41.09it/s]
100%|██████████| 180/180 [00:04<00:00, 42.11it/s]


Epoch 11,  Loss: 14696.5693,  Regret:  8.1807%


100%|██████████| 180/180 [00:04<00:00, 40.99it/s]
100%|██████████| 180/180 [00:04<00:00, 40.41it/s]


Epoch 12,  Loss: 16463.7891,  Regret:  8.0989%


100%|██████████| 180/180 [00:04<00:00, 41.00it/s]
100%|██████████| 180/180 [00:04<00:00, 43.29it/s]


Epoch 13,  Loss: 17177.8887,  Regret:  7.9332%


100%|██████████| 180/180 [00:04<00:00, 41.24it/s]
100%|██████████| 180/180 [00:04<00:00, 43.06it/s]


Epoch 14,  Loss: 18474.2812,  Regret:  7.9499%


100%|██████████| 180/180 [00:04<00:00, 41.40it/s]
100%|██████████| 180/180 [00:04<00:00, 38.15it/s]


Epoch 15,  Loss: 15128.0127,  Regret:  8.0538%


100%|██████████| 180/180 [00:04<00:00, 41.76it/s]
100%|██████████| 180/180 [00:04<00:00, 41.04it/s]


Epoch 16,  Loss: 13488.3984,  Regret:  7.9305%


100%|██████████| 180/180 [00:04<00:00, 40.84it/s]
100%|██████████| 180/180 [00:04<00:00, 40.26it/s]


Epoch 17,  Loss: 13133.1523,  Regret:  7.8855%


100%|██████████| 180/180 [00:04<00:00, 41.04it/s]
100%|██████████| 180/180 [00:04<00:00, 44.37it/s]


Epoch 18,  Loss: 15005.5029,  Regret:  7.7823%


100%|██████████| 180/180 [00:04<00:00, 41.20it/s]
100%|██████████| 180/180 [00:04<00:00, 40.35it/s]


Epoch 19,  Loss: 14277.5205,  Regret:  7.8453%


100%|██████████| 180/180 [00:04<00:00, 40.85it/s]
100%|██████████| 180/180 [00:04<00:00, 41.19it/s]


Epoch 20,  Loss: 12965.8389,  Regret:  7.8996%


100%|██████████| 180/180 [00:04<00:00, 41.29it/s]
100%|██████████| 180/180 [00:04<00:00, 40.75it/s]


Epoch 21,  Loss: 14032.0771,  Regret:  7.9083%


100%|██████████| 180/180 [00:04<00:00, 41.46it/s]
100%|██████████| 180/180 [00:04<00:00, 40.34it/s]


Epoch 22,  Loss: 11904.7148,  Regret:  7.7552%


100%|██████████| 180/180 [00:04<00:00, 40.83it/s]
100%|██████████| 180/180 [00:04<00:00, 40.53it/s]


Epoch 23,  Loss: 11821.5928,  Regret:  7.7906%


100%|██████████| 180/180 [00:04<00:00, 41.31it/s]
100%|██████████| 180/180 [00:04<00:00, 40.52it/s]


Epoch 24,  Loss: 12499.4629,  Regret:  7.8241%


100%|██████████| 180/180 [00:04<00:00, 40.78it/s]
100%|██████████| 180/180 [00:04<00:00, 43.19it/s]


Epoch 25,  Loss: 14437.0664,  Regret:  7.7591%


100%|██████████| 180/180 [00:04<00:00, 40.49it/s]
100%|██████████| 180/180 [00:04<00:00, 41.65it/s]


Epoch 26,  Loss: 13655.8652,  Regret:  7.7449%


100%|██████████| 180/180 [00:04<00:00, 41.07it/s]
100%|██████████| 180/180 [00:04<00:00, 43.74it/s]


Epoch 27,  Loss: 12387.6787,  Regret:  7.7201%


100%|██████████| 180/180 [00:04<00:00, 41.31it/s]
100%|██████████| 180/180 [00:04<00:00, 41.46it/s]


Epoch 28,  Loss: 12853.8623,  Regret:  7.6546%


100%|██████████| 180/180 [00:04<00:00, 40.44it/s]
100%|██████████| 180/180 [00:04<00:00, 42.23it/s]


Epoch 29,  Loss: 11668.8848,  Regret:  7.6881%


100%|██████████| 180/180 [00:04<00:00, 40.70it/s]
100%|██████████| 180/180 [00:04<00:00, 41.40it/s]


Epoch 30,  Loss: 17457.6699,  Regret:  7.5816%
Total Elapsed Time: 16900.46 Sec.


0,1
Linear loss,▃▇█▃▂▆▆▃▁▅▆▂▄▇▇▁▂▂▃▃▂▄█▁▂▃▄▄▃▆█▂▁▄▇▄▁▇▅▄
Regret,█▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,17457.66992
Regret,0.07582


[34m[1mwandb[0m: Agent Starting Run: xna78ja8 with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 42.04it/s]
100%|██████████| 180/180 [00:04<00:00, 41.11it/s]
100%|██████████| 180/180 [00:04<00:00, 43.83it/s]


Epoch  1,  Loss: 19796.7676,  Regret: 43.2914%


100%|██████████| 180/180 [00:04<00:00, 40.14it/s]
100%|██████████| 180/180 [00:04<00:00, 41.24it/s]


Epoch  2,  Loss: 18758.8008,  Regret: 26.8154%


100%|██████████| 180/180 [00:04<00:00, 40.33it/s]
100%|██████████| 180/180 [00:04<00:00, 41.36it/s]


Epoch  3,  Loss: 19614.0820,  Regret: 18.3584%


100%|██████████| 180/180 [00:04<00:00, 40.51it/s]
100%|██████████| 180/180 [00:04<00:00, 40.25it/s]


Epoch  4,  Loss: 18091.3945,  Regret: 13.3244%


100%|██████████| 180/180 [00:04<00:00, 40.78it/s]
100%|██████████| 180/180 [00:04<00:00, 40.41it/s]


Epoch  5,  Loss: 19437.7031,  Regret: 10.3622%


100%|██████████| 180/180 [00:04<00:00, 40.96it/s]
100%|██████████| 180/180 [00:04<00:00, 44.78it/s]


Epoch  6,  Loss: 19404.9277,  Regret:  9.1502%


100%|██████████| 180/180 [00:04<00:00, 40.59it/s]
100%|██████████| 180/180 [00:04<00:00, 42.91it/s]


Epoch  7,  Loss: 19347.2656,  Regret:  8.6565%


100%|██████████| 180/180 [00:04<00:00, 40.12it/s]
100%|██████████| 180/180 [00:04<00:00, 41.93it/s]


Epoch  8,  Loss: 19295.6055,  Regret:  8.5218%


100%|██████████| 180/180 [03:17<00:00,  1.09s/it]
100%|██████████| 180/180 [00:02<00:00, 64.52it/s]


Epoch  9,  Loss: 16126.4189,  Regret:  8.4132%


100%|██████████| 180/180 [00:03<00:00, 45.74it/s]
100%|██████████| 180/180 [00:03<00:00, 48.09it/s]


Epoch 10,  Loss: 16023.8496,  Regret:  8.2481%


100%|██████████| 180/180 [00:04<00:00, 41.74it/s]
100%|██████████| 180/180 [00:04<00:00, 43.12it/s]


Epoch 11,  Loss: 13048.3301,  Regret:  8.2322%


100%|██████████| 180/180 [00:04<00:00, 41.56it/s]
100%|██████████| 180/180 [00:04<00:00, 42.71it/s]


Epoch 12,  Loss: 19110.2734,  Regret:  8.1345%


100%|██████████| 180/180 [00:04<00:00, 41.24it/s]
100%|██████████| 180/180 [00:04<00:00, 39.59it/s]


Epoch 13,  Loss: 15148.2207,  Regret:  8.0467%


100%|██████████| 180/180 [00:04<00:00, 40.86it/s]
100%|██████████| 180/180 [00:04<00:00, 41.30it/s]


Epoch 14,  Loss: 15527.1816,  Regret:  7.9766%


100%|██████████| 180/180 [00:04<00:00, 42.40it/s]
100%|██████████| 180/180 [00:04<00:00, 41.25it/s]


Epoch 15,  Loss: 18997.3477,  Regret:  7.9346%


100%|██████████| 180/180 [00:04<00:00, 40.43it/s]
100%|██████████| 180/180 [00:04<00:00, 42.45it/s]


Epoch 16,  Loss: 16285.7344,  Regret:  7.9090%


100%|██████████| 180/180 [00:04<00:00, 40.98it/s]
100%|██████████| 180/180 [00:04<00:00, 41.29it/s]


Epoch 17,  Loss: 13273.3848,  Regret:  7.9147%


100%|██████████| 180/180 [00:04<00:00, 40.95it/s]
100%|██████████| 180/180 [00:04<00:00, 39.23it/s]


Epoch 18,  Loss: 17208.4375,  Regret:  7.8829%


100%|██████████| 180/180 [00:04<00:00, 40.46it/s]
100%|██████████| 180/180 [00:04<00:00, 38.62it/s]


Epoch 19,  Loss: 13963.2656,  Regret:  7.8898%


100%|██████████| 180/180 [00:04<00:00, 40.36it/s]
100%|██████████| 180/180 [00:04<00:00, 40.46it/s]


Epoch 20,  Loss: 14222.7910,  Regret:  7.8626%
Total Elapsed Time: 16636.03 Sec.


0,1
Linear loss,▃▂▆▃▄▇▃▂▆▅▁▇▁▆▄▅▁▅▃▆▂▇▃█▃▄▃▆▁▆▂▄▂▅▂▅▃▅▁▄
Regret,█▅▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,14222.79102
Regret,0.07863


[34m[1mwandb[0m: Agent Starting Run: e4gmgk9l with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 41.86it/s]
100%|██████████| 180/180 [00:04<00:00, 40.63it/s]
100%|██████████| 180/180 [00:04<00:00, 42.19it/s]


Epoch  1,  Loss: 14290.1523,  Regret:  7.7980%


100%|██████████| 180/180 [00:04<00:00, 40.03it/s]
100%|██████████| 180/180 [00:04<00:00, 41.07it/s]


Epoch  2,  Loss: 13516.0469,  Regret:  7.7883%


100%|██████████| 180/180 [00:04<00:00, 41.14it/s]
100%|██████████| 180/180 [00:04<00:00, 40.64it/s]


Epoch  3,  Loss: 11902.4873,  Regret:  7.9723%


100%|██████████| 180/180 [00:04<00:00, 41.44it/s]
100%|██████████| 180/180 [00:04<00:00, 44.53it/s]


Epoch  4,  Loss: 12759.3926,  Regret:  8.0435%


100%|██████████| 180/180 [00:04<00:00, 41.96it/s]
100%|██████████| 180/180 [00:04<00:00, 41.17it/s]


Epoch  5,  Loss: 10941.8818,  Regret:  7.4578%


100%|██████████| 180/180 [00:04<00:00, 41.66it/s]
100%|██████████| 180/180 [00:04<00:00, 41.81it/s]


Epoch  6,  Loss: 11082.8320,  Regret:  7.5293%


100%|██████████| 180/180 [00:04<00:00, 41.48it/s]
100%|██████████| 180/180 [00:04<00:00, 42.80it/s]


Epoch  7,  Loss: 12943.0205,  Regret:  7.5066%


100%|██████████| 180/180 [00:04<00:00, 40.58it/s]
100%|██████████| 180/180 [00:04<00:00, 43.36it/s]


Epoch  8,  Loss: 11357.9385,  Regret:  7.2346%


100%|██████████| 180/180 [00:04<00:00, 41.13it/s]
100%|██████████| 180/180 [00:04<00:00, 42.46it/s]


Epoch  9,  Loss: 11514.1484,  Regret:  7.2056%


100%|██████████| 180/180 [00:04<00:00, 41.23it/s]
100%|██████████| 180/180 [00:04<00:00, 39.65it/s]


Epoch 10,  Loss: 10982.2061,  Regret:  7.2300%
Total Elapsed Time: 3984.77 Sec.


0,1
Linear loss,█▅▂▅▇▃▂▆█▄▂▆▃▁▃▅▇▂▃▅▇▃▂▄▃▂▆▆▃▁▄▅▁▂▄▅▂▂▅▇
Regret,▆▆▇█▃▄▄▁▁▁

0,1
Linear loss,10982.20605
Regret,0.0723


[34m[1mwandb[0m: Agent Starting Run: v1q5x9g6 with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 38.81it/s]
100%|██████████| 180/180 [00:04<00:00, 41.54it/s]
100%|██████████| 180/180 [00:04<00:00, 41.09it/s]


Epoch  1,  Loss: 15460.0264,  Regret: 10.7922%


100%|██████████| 180/180 [00:04<00:00, 41.19it/s]
100%|██████████| 180/180 [00:04<00:00, 40.55it/s]


Epoch  2,  Loss: 13340.8125,  Regret:  8.5067%


100%|██████████| 180/180 [00:04<00:00, 40.47it/s]
100%|██████████| 180/180 [00:04<00:00, 42.71it/s]


Epoch  3,  Loss: 10409.5000,  Regret:  7.7484%


100%|██████████| 180/180 [00:04<00:00, 41.28it/s]
100%|██████████| 180/180 [00:04<00:00, 42.41it/s]


Epoch  4,  Loss: 11047.4189,  Regret:  7.6597%


100%|██████████| 180/180 [00:04<00:00, 41.35it/s]
100%|██████████| 180/180 [00:04<00:00, 40.46it/s]


Epoch  5,  Loss: 10744.1582,  Regret:  7.7034%


100%|██████████| 180/180 [00:04<00:00, 41.04it/s]
100%|██████████| 180/180 [00:04<00:00, 43.01it/s]


Epoch  6,  Loss: 10876.4551,  Regret:  7.6039%


100%|██████████| 180/180 [00:04<00:00, 41.10it/s]
100%|██████████| 180/180 [00:04<00:00, 39.00it/s]


Epoch  7,  Loss: 12684.0996,  Regret:  7.9949%


100%|██████████| 180/180 [00:04<00:00, 41.58it/s]
100%|██████████| 180/180 [00:04<00:00, 38.52it/s]


Epoch  8,  Loss: 10202.2490,  Regret:  7.8823%


100%|██████████| 180/180 [00:04<00:00, 40.94it/s]
100%|██████████| 180/180 [00:04<00:00, 39.97it/s]


Epoch  9,  Loss: 15507.5898,  Regret:  7.7079%


100%|██████████| 180/180 [00:04<00:00, 41.79it/s]
100%|██████████| 180/180 [00:03<00:00, 48.14it/s]


Epoch 10,  Loss: 15661.9961,  Regret:  7.7065%
Total Elapsed Time: 3988.04 Sec.


0,1
Linear loss,█▃▃▅█▂▂▇▇▄▂▆▃▁▂▆▇▂▄▇▇▂▂▄▃▁▆▆▃▁▄▅▁▂▄▄▁▁▅▇
Regret,█▃▁▁▁▁▂▂▁▁

0,1
Linear loss,15661.99609
Regret,0.07706


[34m[1mwandb[0m: Agent Starting Run: kmtw299q with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 30
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 45.90it/s]
100%|██████████| 180/180 [00:04<00:00, 41.05it/s]
100%|██████████| 180/180 [00:04<00:00, 44.29it/s]


Epoch  1,  Loss: 15865.0918,  Regret:  9.7673%


100%|██████████| 180/180 [00:04<00:00, 39.02it/s]
100%|██████████| 180/180 [00:03<00:00, 45.35it/s]


Epoch  2,  Loss: 14002.8301,  Regret: 12.5243%


100%|██████████| 180/180 [00:04<00:00, 42.27it/s]
100%|██████████| 180/180 [00:03<00:00, 45.28it/s]


Epoch  3,  Loss: 16171.6123,  Regret:  9.9519%


100%|██████████| 180/180 [00:04<00:00, 41.80it/s]
100%|██████████| 180/180 [00:03<00:00, 45.64it/s]


Epoch  4,  Loss: 15273.2334,  Regret:  9.1745%


100%|██████████| 180/180 [00:06<00:00, 25.88it/s]
100%|██████████| 180/180 [00:06<00:00, 29.14it/s]


Epoch  5,  Loss: 11579.5117,  Regret:  8.6040%


100%|██████████| 180/180 [00:07<00:00, 23.47it/s]
100%|██████████| 180/180 [00:02<00:00, 88.34it/s] 


Epoch  6,  Loss: 12283.8877,  Regret:  8.7100%


100%|██████████| 180/180 [00:04<00:00, 36.04it/s]
100%|██████████| 180/180 [00:01<00:00, 115.74it/s]


Epoch  7,  Loss: 10418.2412,  Regret:  8.5077%


100%|██████████| 180/180 [00:04<00:00, 38.16it/s]
100%|██████████| 180/180 [00:04<00:00, 44.47it/s]


Epoch  8,  Loss: 10393.4951,  Regret:  9.0797%


100%|██████████| 180/180 [00:06<00:00, 26.25it/s]
100%|██████████| 180/180 [00:04<00:00, 42.60it/s]


Epoch  9,  Loss: 11255.0361,  Regret:  7.5444%


100%|██████████| 180/180 [00:04<00:00, 37.53it/s]
100%|██████████| 180/180 [00:01<00:00, 106.97it/s]


Epoch 10,  Loss: 12294.6523,  Regret: 11.9919%


100%|██████████| 180/180 [00:04<00:00, 36.28it/s]
100%|██████████| 180/180 [00:01<00:00, 102.23it/s]


Epoch 11,  Loss: 11725.3164,  Regret:  8.6641%


100%|██████████| 180/180 [00:05<00:00, 31.45it/s]
100%|██████████| 180/180 [00:05<00:00, 34.32it/s]


Epoch 12,  Loss: 10875.0693,  Regret:  8.3922%


100%|██████████| 180/180 [00:04<00:00, 39.11it/s]
100%|██████████| 180/180 [00:03<00:00, 46.82it/s]


Epoch 13,  Loss: 9982.7812,  Regret:  7.7767%


100%|██████████| 180/180 [00:05<00:00, 34.02it/s]
100%|██████████| 180/180 [00:03<00:00, 46.90it/s]


Epoch 14,  Loss: 10607.2461,  Regret:  7.7856%


100%|██████████| 180/180 [00:05<00:00, 34.28it/s]
100%|██████████| 180/180 [00:05<00:00, 31.89it/s]


Epoch 15,  Loss: 10411.1162,  Regret:  7.8651%


100%|██████████| 180/180 [00:06<00:00, 28.66it/s]
100%|██████████| 180/180 [00:05<00:00, 31.12it/s]


Epoch 16,  Loss: 10000.5205,  Regret:  7.6424%


100%|██████████| 180/180 [00:06<00:00, 29.55it/s]
100%|██████████| 180/180 [00:04<00:00, 37.08it/s]


Epoch 17,  Loss: 10142.0488,  Regret:  8.9516%


100%|██████████| 180/180 [00:05<00:00, 35.38it/s]
100%|██████████| 180/180 [00:02<00:00, 83.80it/s]


Epoch 18,  Loss: 10391.7441,  Regret:  7.7071%


100%|██████████| 180/180 [00:05<00:00, 32.15it/s]
100%|██████████| 180/180 [00:02<00:00, 89.82it/s] 


Epoch 19,  Loss: 10416.5469,  Regret:  8.3246%


100%|██████████| 180/180 [00:06<00:00, 29.12it/s]
100%|██████████| 180/180 [00:02<00:00, 81.43it/s]


Epoch 20,  Loss: 10313.2676,  Regret:  8.0845%


100%|██████████| 180/180 [00:06<00:00, 25.73it/s]
100%|██████████| 180/180 [00:02<00:00, 72.33it/s]


Epoch 21,  Loss: 10457.7764,  Regret:  8.3414%


100%|██████████| 180/180 [00:05<00:00, 30.47it/s]
100%|██████████| 180/180 [00:01<00:00, 95.87it/s] 


Epoch 22,  Loss: 10679.6904,  Regret:  8.1754%


100%|██████████| 180/180 [00:05<00:00, 31.91it/s]
100%|██████████| 180/180 [00:01<00:00, 91.75it/s] 


Epoch 23,  Loss: 10371.9609,  Regret:  7.9146%


100%|██████████| 180/180 [00:05<00:00, 30.79it/s]
100%|██████████| 180/180 [00:01<00:00, 90.26it/s] 


Epoch 24,  Loss: 10692.4414,  Regret:  8.6898%


100%|██████████| 180/180 [00:05<00:00, 31.29it/s]
100%|██████████| 180/180 [00:01<00:00, 97.71it/s] 


Epoch 25,  Loss: 10736.4707,  Regret:  7.6350%


100%|██████████| 180/180 [00:06<00:00, 27.11it/s]
100%|██████████| 180/180 [00:07<00:00, 25.71it/s]


Epoch 26,  Loss: 11017.9336,  Regret:  7.8548%


100%|██████████| 180/180 [00:09<00:00, 18.95it/s]
100%|██████████| 180/180 [00:07<00:00, 24.01it/s]


Epoch 27,  Loss: 10375.4355,  Regret:  8.6377%


100%|██████████| 180/180 [00:07<00:00, 24.61it/s]
100%|██████████| 180/180 [00:04<00:00, 37.97it/s]


Epoch 28,  Loss: 9883.1191,  Regret:  7.6466%


100%|██████████| 180/180 [00:07<00:00, 25.04it/s]
100%|██████████| 180/180 [00:04<00:00, 39.97it/s]


Epoch 29,  Loss: 9825.3223,  Regret:  7.5539%


100%|██████████| 180/180 [00:06<00:00, 28.68it/s]
100%|██████████| 180/180 [00:03<00:00, 45.50it/s]


Epoch 30,  Loss: 9822.5410,  Regret:  7.8054%
Total Elapsed Time: 15812.51 Sec.


0,1
Linear loss,▄▇█▂▄▅▃▃▁▂▅▂▂▆▅▁▂▁▄▁▃▃▆▁▁▁▄▃▂▇▆▂▂▂▆▅▁▆▄▃
Regret,▄█▄▃▂▃▂▃▁▇▃▂▁▁▁▁▃▁▂▂▂▂▂▃▁▁▃▁▁▁

0,1
Linear loss,9822.54102
Regret,0.07805


[34m[1mwandb[0m: Agent Starting Run: eger943f with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:06<00:00, 28.74it/s]
100%|██████████| 180/180 [00:13<00:00, 13.02it/s]
100%|██████████| 180/180 [00:06<00:00, 29.04it/s]


Epoch  1,  Loss: 18938.9004,  Regret: 10.6064%


100%|██████████| 180/180 [00:09<00:00, 18.36it/s]
100%|██████████| 180/180 [00:06<00:00, 28.61it/s]


Epoch  2,  Loss: 17095.6230,  Regret:  7.9557%


100%|██████████| 180/180 [00:11<00:00, 15.94it/s]
100%|██████████| 180/180 [00:07<00:00, 23.86it/s]


Epoch  3,  Loss: 13525.4834,  Regret:  7.9380%


100%|██████████| 180/180 [00:14<00:00, 12.74it/s]
100%|██████████| 180/180 [00:07<00:00, 22.85it/s]


Epoch  4,  Loss: 14080.3740,  Regret:  7.8946%


100%|██████████| 180/180 [00:17<00:00, 10.39it/s]
100%|██████████| 180/180 [00:08<00:00, 21.47it/s]


Epoch  5,  Loss: 13954.8926,  Regret:  7.9353%


100%|██████████| 180/180 [00:12<00:00, 14.00it/s]
100%|██████████| 180/180 [00:05<00:00, 32.91it/s]


Epoch  6,  Loss: 11159.3945,  Regret:  7.9676%


100%|██████████| 180/180 [00:06<00:00, 26.70it/s]
100%|██████████| 180/180 [00:05<00:00, 31.73it/s]


Epoch  7,  Loss: 11881.8271,  Regret:  7.7469%


100%|██████████| 180/180 [00:06<00:00, 29.53it/s]
100%|██████████| 180/180 [00:06<00:00, 26.31it/s]


Epoch  8,  Loss: 11899.8184,  Regret:  7.7284%


100%|██████████| 180/180 [00:14<00:00, 12.01it/s]
100%|██████████| 180/180 [00:07<00:00, 23.62it/s]


Epoch  9,  Loss: 11304.3086,  Regret:  7.6448%


100%|██████████| 180/180 [00:13<00:00, 13.21it/s]
100%|██████████| 180/180 [00:09<00:00, 19.35it/s]


Epoch 10,  Loss: 11057.3066,  Regret:  7.7780%


100%|██████████| 180/180 [00:16<00:00, 10.95it/s]
100%|██████████| 180/180 [00:08<00:00, 20.52it/s]


Epoch 11,  Loss: 10442.8750,  Regret:  7.7480%


100%|██████████| 180/180 [00:15<00:00, 11.70it/s]
100%|██████████| 180/180 [00:07<00:00, 25.24it/s]


Epoch 12,  Loss: 11343.2852,  Regret:  7.6824%


100%|██████████| 180/180 [00:14<00:00, 12.70it/s]
100%|██████████| 180/180 [00:06<00:00, 29.92it/s]


Epoch 13,  Loss: 12864.4766,  Regret:  7.6591%


100%|██████████| 180/180 [00:12<00:00, 14.70it/s]
100%|██████████| 180/180 [00:06<00:00, 26.51it/s]


Epoch 14,  Loss: 11261.8398,  Regret:  7.7805%


100%|██████████| 180/180 [00:16<00:00, 10.99it/s]
100%|██████████| 180/180 [00:08<00:00, 20.84it/s]


Epoch 15,  Loss: 9587.7861,  Regret:  7.5143%


100%|██████████| 180/180 [00:17<00:00, 10.48it/s]
100%|██████████| 180/180 [00:09<00:00, 18.65it/s]


Epoch 16,  Loss: 9126.4395,  Regret:  7.4149%


100%|██████████| 180/180 [00:15<00:00, 11.80it/s]
100%|██████████| 180/180 [00:09<00:00, 18.58it/s]


Epoch 17,  Loss: 10905.4473,  Regret:  7.4538%


100%|██████████| 180/180 [00:11<00:00, 15.86it/s]
100%|██████████| 180/180 [00:09<00:00, 18.89it/s]


Epoch 18,  Loss: 8625.7295,  Regret:  7.4638%


100%|██████████| 180/180 [00:11<00:00, 15.21it/s]
100%|██████████| 180/180 [00:09<00:00, 19.74it/s]


Epoch 19,  Loss: 11367.3877,  Regret:  7.4032%


100%|██████████| 180/180 [00:15<00:00, 11.59it/s]
100%|██████████| 180/180 [00:08<00:00, 21.91it/s]


Epoch 20,  Loss: 9953.6797,  Regret:  7.4114%
Total Elapsed Time: 24287.63 Sec.


0,1
Linear loss,▄▃▇▄▄▆▂▂█▄▁▇▂▇▃▄▁▇▃▆▃▅▁█▂▅▃▅▂▆▃▅▂▆▂▆▃▆▁▅
Regret,█▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁

0,1
Linear loss,9953.67969
Regret,0.07411


[34m[1mwandb[0m: Agent Starting Run: hz93u1yn with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:09<00:00, 18.34it/s]
100%|██████████| 180/180 [00:16<00:00, 10.90it/s]
100%|██████████| 180/180 [00:07<00:00, 23.39it/s]


Epoch  1,  Loss: 14437.6035,  Regret: 21.5167%


100%|██████████| 180/180 [00:10<00:00, 16.41it/s]
100%|██████████| 180/180 [00:05<00:00, 32.81it/s]


Epoch  2,  Loss: 12317.7012,  Regret: 12.1700%


100%|██████████| 180/180 [00:04<00:00, 39.21it/s]
100%|██████████| 180/180 [00:04<00:00, 40.27it/s]


Epoch  3,  Loss: 15915.2715,  Regret:  8.5501%


100%|██████████| 180/180 [00:04<00:00, 39.52it/s]
100%|██████████| 180/180 [00:04<00:00, 43.84it/s]


Epoch  4,  Loss: 13583.1328,  Regret:  8.8510%


100%|██████████| 180/180 [00:04<00:00, 41.14it/s]
100%|██████████| 180/180 [00:04<00:00, 41.99it/s]


Epoch  5,  Loss: 12715.9863,  Regret:  7.8796%


100%|██████████| 180/180 [00:04<00:00, 36.87it/s]
100%|██████████| 180/180 [00:04<00:00, 37.27it/s]


Epoch  6,  Loss: 14597.0361,  Regret:  8.7586%


100%|██████████| 180/180 [00:05<00:00, 35.57it/s]
100%|██████████| 180/180 [00:04<00:00, 40.63it/s]


Epoch  7,  Loss: 12066.0820,  Regret:  8.0745%


100%|██████████| 180/180 [00:04<00:00, 36.98it/s]
100%|██████████| 180/180 [00:04<00:00, 39.35it/s]


Epoch  8,  Loss: 13228.7402,  Regret:  9.0736%


100%|██████████| 180/180 [00:05<00:00, 33.38it/s]
100%|██████████| 180/180 [00:04<00:00, 36.66it/s]


Epoch  9,  Loss: 13086.3809,  Regret:  8.5480%


100%|██████████| 180/180 [00:05<00:00, 34.93it/s]
100%|██████████| 180/180 [00:04<00:00, 38.46it/s]


Epoch 10,  Loss: 10852.6455,  Regret:  7.8026%
Total Elapsed Time: 6000.11 Sec.


0,1
Linear loss,▆▄▁▄▅▂▂▄▅▂▂▅▃▁▂▅▅▂▃█▅▂▁▃▂▁▄▅▂▁▃▄▁▂▃▃▁▁▄▅
Regret,█▃▁▂▁▁▁▂▁▁

0,1
Linear loss,10852.64551
Regret,0.07803


[34m[1mwandb[0m: Agent Starting Run: m5o9d38y with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 42.05it/s]
100%|██████████| 180/180 [02:23<00:00,  1.25it/s]
100%|██████████| 180/180 [00:02<00:00, 61.86it/s]


Epoch  1,  Loss: 19501.2422,  Regret: 46.8705%


100%|██████████| 180/180 [00:04<00:00, 43.16it/s]
100%|██████████| 180/180 [00:03<00:00, 48.70it/s]


Epoch  2,  Loss: 19124.0625,  Regret: 25.3611%


100%|██████████| 180/180 [00:04<00:00, 41.85it/s]
100%|██████████| 180/180 [00:04<00:00, 39.04it/s]


Epoch  3,  Loss: 18858.8184,  Regret: 12.5525%


100%|██████████| 180/180 [00:04<00:00, 41.38it/s]
100%|██████████| 180/180 [00:04<00:00, 43.40it/s]


Epoch  4,  Loss: 17358.3750,  Regret:  9.9638%


100%|██████████| 180/180 [00:04<00:00, 42.20it/s]
100%|██████████| 180/180 [00:03<00:00, 45.88it/s]


Epoch  5,  Loss: 17185.5664,  Regret:  9.1509%


100%|██████████| 180/180 [00:04<00:00, 42.20it/s]
100%|██████████| 180/180 [00:04<00:00, 44.05it/s]


Epoch  6,  Loss: 18082.3672,  Regret:  8.5458%


100%|██████████| 180/180 [00:04<00:00, 42.71it/s]
100%|██████████| 180/180 [00:04<00:00, 43.13it/s]


Epoch  7,  Loss: 15096.9668,  Regret:  8.2898%


100%|██████████| 180/180 [00:04<00:00, 42.62it/s]
100%|██████████| 180/180 [00:04<00:00, 43.36it/s]


Epoch  8,  Loss: 15825.3750,  Regret:  8.2015%


100%|██████████| 180/180 [00:04<00:00, 41.76it/s]
100%|██████████| 180/180 [00:04<00:00, 42.78it/s]


Epoch  9,  Loss: 13714.2500,  Regret:  8.0686%


100%|██████████| 180/180 [00:04<00:00, 42.19it/s]
100%|██████████| 180/180 [00:03<00:00, 45.30it/s]


Epoch 10,  Loss: 13877.7949,  Regret:  7.9561%


100%|██████████| 180/180 [00:04<00:00, 41.70it/s]
100%|██████████| 180/180 [00:04<00:00, 38.19it/s]


Epoch 11,  Loss: 13462.7734,  Regret:  7.8249%


100%|██████████| 180/180 [00:04<00:00, 42.26it/s]
100%|██████████| 180/180 [00:04<00:00, 44.05it/s]


Epoch 12,  Loss: 13641.8867,  Regret:  7.7772%


100%|██████████| 180/180 [00:04<00:00, 42.18it/s]
100%|██████████| 180/180 [00:03<00:00, 45.78it/s]


Epoch 13,  Loss: 14328.1826,  Regret:  7.7963%


100%|██████████| 180/180 [00:04<00:00, 42.65it/s]
100%|██████████| 180/180 [00:04<00:00, 43.49it/s]


Epoch 14,  Loss: 13680.8418,  Regret:  7.6120%


100%|██████████| 180/180 [00:04<00:00, 43.94it/s]
100%|██████████| 180/180 [00:04<00:00, 43.13it/s]


Epoch 15,  Loss: 14104.0957,  Regret:  7.5381%


100%|██████████| 180/180 [00:04<00:00, 43.04it/s]
100%|██████████| 180/180 [00:04<00:00, 44.76it/s]


Epoch 16,  Loss: 12339.9072,  Regret:  7.5588%


100%|██████████| 180/180 [00:04<00:00, 43.15it/s]
100%|██████████| 180/180 [00:03<00:00, 45.84it/s]


Epoch 17,  Loss: 12921.8779,  Regret:  7.5146%


100%|██████████| 180/180 [00:04<00:00, 41.81it/s]
100%|██████████| 180/180 [00:04<00:00, 41.39it/s]


Epoch 18,  Loss: 15230.8828,  Regret:  7.3890%


100%|██████████| 180/180 [00:04<00:00, 42.24it/s]
100%|██████████| 180/180 [00:04<00:00, 42.64it/s]


Epoch 19,  Loss: 11461.5996,  Regret:  7.3423%


100%|██████████| 180/180 [00:05<00:00, 32.92it/s]
100%|██████████| 180/180 [00:04<00:00, 42.64it/s]


Epoch 20,  Loss: 14345.9453,  Regret:  7.2198%
Total Elapsed Time: 22405.54 Sec.


0,1
Linear loss,▃▃▆▄▄█▃▃▆▅▃▇▂▆▄▄▂▆▃▅▃▅▁█▂▄▂▆▂▅▃▅▂▆▁▅▃▆▁▄
Regret,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,14345.94531
Regret,0.0722


[34m[1mwandb[0m: Agent Starting Run: kw41utnk with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 42.51it/s]
100%|██████████| 180/180 [00:06<00:00, 25.89it/s]
100%|██████████| 180/180 [00:04<00:00, 39.52it/s]


Epoch  1,  Loss: 14191.5586,  Regret: 17.6118%


100%|██████████| 180/180 [00:08<00:00, 21.26it/s]
100%|██████████| 180/180 [00:07<00:00, 24.89it/s]


Epoch  2,  Loss: 11071.4141,  Regret: 25.4605%


100%|██████████| 180/180 [00:05<00:00, 32.47it/s]
100%|██████████| 180/180 [00:04<00:00, 38.91it/s]


Epoch  3,  Loss: 17285.6172,  Regret:  8.7431%


100%|██████████| 180/180 [00:04<00:00, 39.37it/s]
100%|██████████| 180/180 [00:04<00:00, 42.84it/s]


Epoch  4,  Loss: 14576.4902,  Regret: 15.6740%


100%|██████████| 180/180 [00:04<00:00, 36.32it/s]
100%|██████████| 180/180 [00:04<00:00, 44.76it/s]


Epoch  5,  Loss: 14311.2148,  Regret:  9.7466%


100%|██████████| 180/180 [00:05<00:00, 31.91it/s]
100%|██████████| 180/180 [00:04<00:00, 37.07it/s]


Epoch  6,  Loss: 19861.7676,  Regret: 18.4132%


100%|██████████| 180/180 [00:05<00:00, 31.91it/s]
100%|██████████| 180/180 [00:04<00:00, 41.28it/s]


Epoch  7,  Loss: 15291.8291,  Regret:  8.0743%


100%|██████████| 180/180 [00:16<00:00, 10.81it/s]
100%|██████████| 180/180 [00:08<00:00, 22.39it/s]


Epoch  8,  Loss: 14803.7539,  Regret:  8.1583%


100%|██████████| 180/180 [00:08<00:00, 22.08it/s]
100%|██████████| 180/180 [00:04<00:00, 40.80it/s]


Epoch  9,  Loss: 15265.7979,  Regret:  8.1783%


100%|██████████| 180/180 [00:08<00:00, 22.38it/s]
100%|██████████| 180/180 [00:04<00:00, 39.61it/s]


Epoch 10,  Loss: 14734.2969,  Regret:  8.3540%


100%|██████████| 180/180 [00:05<00:00, 30.39it/s]
100%|██████████| 180/180 [00:04<00:00, 41.93it/s]


Epoch 11,  Loss: 15106.7051,  Regret:  8.2297%


100%|██████████| 180/180 [00:05<00:00, 30.17it/s]
100%|██████████| 180/180 [00:04<00:00, 40.93it/s]


Epoch 12,  Loss: 14945.8594,  Regret:  8.3750%


100%|██████████| 180/180 [00:07<00:00, 24.16it/s]
100%|██████████| 180/180 [00:04<00:00, 42.39it/s]


Epoch 13,  Loss: 14920.4775,  Regret:  8.4731%


100%|██████████| 180/180 [00:06<00:00, 26.01it/s]
100%|██████████| 180/180 [00:04<00:00, 40.68it/s]


Epoch 14,  Loss: 14843.0176,  Regret:  8.2905%


100%|██████████| 180/180 [00:07<00:00, 24.75it/s]
100%|██████████| 180/180 [00:04<00:00, 39.33it/s]


Epoch 15,  Loss: 14159.4160,  Regret:  8.4824%


100%|██████████| 180/180 [00:07<00:00, 24.80it/s]
100%|██████████| 180/180 [00:04<00:00, 40.80it/s]


Epoch 16,  Loss: 13588.5430,  Regret:  8.1084%


100%|██████████| 180/180 [00:09<00:00, 19.36it/s]
100%|██████████| 180/180 [00:04<00:00, 38.47it/s]


Epoch 17,  Loss: 14551.3564,  Regret:  7.9836%


100%|██████████| 180/180 [00:08<00:00, 20.35it/s]
100%|██████████| 180/180 [00:04<00:00, 40.27it/s]


Epoch 18,  Loss: 12293.7031,  Regret:  8.2798%


100%|██████████| 180/180 [00:07<00:00, 25.15it/s]
100%|██████████| 180/180 [00:04<00:00, 41.06it/s]


Epoch 19,  Loss: 9967.0732,  Regret:  8.4304%


100%|██████████| 180/180 [00:07<00:00, 24.08it/s]
100%|██████████| 180/180 [00:04<00:00, 41.90it/s]


Epoch 20,  Loss: 9599.5771,  Regret:  8.1455%
Total Elapsed Time: 12926.94 Sec.


0,1
Linear loss,▂▂█▂▅▆▃▃█▄▂▆▂▇▄▄▁▇▃▇▃▆▂█▃▅▃▆▂▃▃▁▂▇▂▇▄▅▁▅
Regret,▅█▁▄▂▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,9599.57715
Regret,0.08145


[34m[1mwandb[0m: Agent Starting Run: hbflsvr3 with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 38.49it/s]
100%|██████████| 180/180 [00:04<00:00, 39.50it/s]
100%|██████████| 180/180 [00:04<00:00, 44.21it/s]


Epoch  1,  Loss: 19222.6270,  Regret:  8.8684%


100%|██████████| 180/180 [00:04<00:00, 38.94it/s]
100%|██████████| 180/180 [00:04<00:00, 44.35it/s]


Epoch  2,  Loss: 17453.1133,  Regret:  7.6538%


100%|██████████| 180/180 [00:04<00:00, 38.81it/s]
100%|██████████| 180/180 [00:04<00:00, 43.19it/s]


Epoch  3,  Loss: 15596.3633,  Regret:  7.4829%


100%|██████████| 180/180 [00:04<00:00, 39.25it/s]
100%|██████████| 180/180 [00:04<00:00, 41.74it/s]


Epoch  4,  Loss: 14137.8506,  Regret:  7.8721%


100%|██████████| 180/180 [00:04<00:00, 38.60it/s]
100%|██████████| 180/180 [00:04<00:00, 39.00it/s]


Epoch  5,  Loss: 12715.0693,  Regret:  7.7142%


100%|██████████| 180/180 [00:04<00:00, 39.98it/s]
100%|██████████| 180/180 [00:04<00:00, 39.68it/s]


Epoch  6,  Loss: 12594.5879,  Regret:  7.6872%


100%|██████████| 180/180 [00:04<00:00, 38.53it/s]
100%|██████████| 180/180 [00:04<00:00, 41.67it/s]


Epoch  7,  Loss: 11910.9863,  Regret:  7.8037%


100%|██████████| 180/180 [00:04<00:00, 38.81it/s]
100%|██████████| 180/180 [00:04<00:00, 40.46it/s]


Epoch  8,  Loss: 11672.3301,  Regret:  7.7713%


100%|██████████| 180/180 [00:04<00:00, 39.15it/s]
100%|██████████| 180/180 [00:04<00:00, 41.73it/s]


Epoch  9,  Loss: 11181.2227,  Regret:  7.7115%


100%|██████████| 180/180 [00:04<00:00, 38.89it/s]
100%|██████████| 180/180 [00:04<00:00, 41.47it/s]


Epoch 10,  Loss: 11287.5410,  Regret:  7.7269%
Total Elapsed Time: 4193.99 Sec.


0,1
Linear loss,█▅▃▆▇▄▄█▇▄▂▇▄▁▂▆▇▂▄▄▇▃▂▄▃▁▅▇▃▁▄▅▁▂▄▄▁▂▆▇
Regret,█▂▁▃▂▂▃▂▂▂

0,1
Linear loss,11287.54102
Regret,0.07727


[34m[1mwandb[0m: Agent Starting Run: o7bt3no7 with config:
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 40.68it/s]
100%|██████████| 180/180 [00:04<00:00, 40.83it/s]
100%|██████████| 180/180 [00:04<00:00, 41.44it/s]


Epoch  1,  Loss: 19441.6406,  Regret: 35.4935%


100%|██████████| 180/180 [00:04<00:00, 41.44it/s]
100%|██████████| 180/180 [00:04<00:00, 39.85it/s]


Epoch  2,  Loss: 18766.2461,  Regret: 17.8448%


100%|██████████| 180/180 [00:04<00:00, 40.83it/s]
100%|██████████| 180/180 [00:04<00:00, 41.28it/s]


Epoch  3,  Loss: 18330.3203,  Regret:  9.7776%


100%|██████████| 180/180 [00:04<00:00, 41.42it/s]
100%|██████████| 180/180 [00:04<00:00, 38.11it/s]


Epoch  4,  Loss: 16669.9883,  Regret:  8.2784%


100%|██████████| 180/180 [00:11<00:00, 16.12it/s]
100%|██████████| 180/180 [00:06<00:00, 27.11it/s]


Epoch  5,  Loss: 15954.0762,  Regret:  8.0870%


100%|██████████| 180/180 [00:11<00:00, 15.12it/s]
100%|██████████| 180/180 [00:05<00:00, 35.03it/s]


Epoch  6,  Loss: 15265.6338,  Regret:  8.0164%


100%|██████████| 180/180 [00:09<00:00, 18.82it/s]
100%|██████████| 180/180 [00:04<00:00, 39.40it/s]


Epoch  7,  Loss: 14281.0566,  Regret:  7.8296%


100%|██████████| 180/180 [00:06<00:00, 28.12it/s]
100%|██████████| 180/180 [00:04<00:00, 38.91it/s]


Epoch  8,  Loss: 14831.4463,  Regret:  7.9070%


100%|██████████| 180/180 [00:06<00:00, 26.09it/s]
100%|██████████| 180/180 [00:06<00:00, 28.99it/s]


Epoch  9,  Loss: 15167.2031,  Regret:  8.0157%


100%|██████████| 180/180 [00:08<00:00, 22.11it/s]
100%|██████████| 180/180 [00:04<00:00, 37.04it/s]


Epoch 10,  Loss: 12745.3740,  Regret:  7.8706%


100%|██████████| 180/180 [00:08<00:00, 20.36it/s]
100%|██████████| 180/180 [00:04<00:00, 36.03it/s]


Epoch 11,  Loss: 12559.9746,  Regret:  7.8110%


100%|██████████| 180/180 [00:12<00:00, 14.12it/s]
100%|██████████| 180/180 [00:06<00:00, 26.34it/s]


Epoch 12,  Loss: 16198.3301,  Regret:  7.7726%


100%|██████████| 180/180 [00:12<00:00, 14.12it/s]
100%|██████████| 180/180 [00:05<00:00, 31.93it/s]


Epoch 13,  Loss: 12207.3379,  Regret:  7.6232%


100%|██████████| 180/180 [00:09<00:00, 18.57it/s]
100%|██████████| 180/180 [00:04<00:00, 37.92it/s]


Epoch 14,  Loss: 11903.7539,  Regret:  7.6611%


100%|██████████| 180/180 [00:06<00:00, 26.57it/s]
100%|██████████| 180/180 [00:04<00:00, 39.73it/s]


Epoch 15,  Loss: 11716.7285,  Regret:  7.5974%


100%|██████████| 180/180 [00:06<00:00, 26.64it/s]
100%|██████████| 180/180 [00:05<00:00, 32.33it/s]


Epoch 16,  Loss: 14572.8389,  Regret:  7.5099%


100%|██████████| 180/180 [00:07<00:00, 24.20it/s]
100%|██████████| 180/180 [00:04<00:00, 39.41it/s]


Epoch 17,  Loss: 11954.5996,  Regret:  7.4351%


100%|██████████| 180/180 [00:06<00:00, 29.00it/s]
100%|██████████| 180/180 [00:04<00:00, 41.19it/s]


Epoch 18,  Loss: 11428.8857,  Regret:  7.4258%


100%|██████████| 180/180 [00:05<00:00, 34.69it/s]
100%|██████████| 180/180 [00:04<00:00, 38.06it/s]


Epoch 19,  Loss: 11812.6074,  Regret:  7.3508%


100%|██████████| 180/180 [00:07<00:00, 24.38it/s]
100%|██████████| 180/180 [00:04<00:00, 38.90it/s]


Epoch 20,  Loss: 11973.7334,  Regret:  7.2950%
Total Elapsed Time: 13927.88 Sec.


0,1
Linear loss,▃▂▆▄▄█▂▂▆▄▂▇▂▆▃▄▁▆▃▆▃▅▁█▂▄▄▆▁▅▃▅▁▆▂▅▂▆▁▄
Regret,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,11973.7334
Regret,0.07295


[34m[1mwandb[0m: Agent Starting Run: 507nh422 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 36.31it/s]
100%|██████████| 180/180 [00:06<00:00, 25.76it/s]
100%|██████████| 180/180 [00:05<00:00, 32.23it/s]


Epoch  1,  Loss: 18887.3008,  Regret:  8.4391%


100%|██████████| 180/180 [00:07<00:00, 25.57it/s]
100%|██████████| 180/180 [00:06<00:00, 29.69it/s]


Epoch  2,  Loss: 16275.7441,  Regret:  7.3416%


100%|██████████| 180/180 [00:08<00:00, 20.87it/s]
100%|██████████| 180/180 [00:04<00:00, 40.24it/s]


Epoch  3,  Loss: 13946.6855,  Regret:  7.2424%


100%|██████████| 180/180 [00:06<00:00, 27.43it/s]
100%|██████████| 180/180 [00:01<00:00, 112.04it/s]


Epoch  4,  Loss: 12989.8750,  Regret:  7.2373%


100%|██████████| 180/180 [00:05<00:00, 30.75it/s]
100%|██████████| 180/180 [00:03<00:00, 46.07it/s]


Epoch  5,  Loss: 11822.1436,  Regret:  7.1854%


100%|██████████| 180/180 [00:04<00:00, 40.03it/s]
100%|██████████| 180/180 [00:03<00:00, 49.09it/s]


Epoch  6,  Loss: 12677.0312,  Regret:  7.2690%


100%|██████████| 180/180 [00:04<00:00, 40.41it/s]
100%|██████████| 180/180 [00:03<00:00, 47.18it/s]


Epoch  7,  Loss: 11631.0762,  Regret:  7.3194%


100%|██████████| 180/180 [00:04<00:00, 37.26it/s]
100%|██████████| 180/180 [00:03<00:00, 51.33it/s]


Epoch  8,  Loss: 11060.0986,  Regret:  7.2560%


100%|██████████| 180/180 [00:04<00:00, 40.36it/s]
100%|██████████| 180/180 [00:03<00:00, 50.63it/s]


Epoch  9,  Loss: 11195.8311,  Regret:  7.2850%


100%|██████████| 180/180 [00:04<00:00, 40.09it/s]
100%|██████████| 180/180 [00:03<00:00, 49.73it/s]


Epoch 10,  Loss: 10829.7451,  Regret:  7.4079%


100%|██████████| 180/180 [00:04<00:00, 38.90it/s]
100%|██████████| 180/180 [00:04<00:00, 43.32it/s]


Epoch 11,  Loss: 11016.3320,  Regret:  7.3567%


100%|██████████| 180/180 [00:04<00:00, 38.51it/s]
100%|██████████| 180/180 [00:03<00:00, 51.48it/s]


Epoch 12,  Loss: 9946.9971,  Regret:  7.3192%


100%|██████████| 180/180 [00:04<00:00, 40.22it/s]
100%|██████████| 180/180 [00:03<00:00, 52.08it/s]


Epoch 13,  Loss: 9908.3438,  Regret:  7.2969%


100%|██████████| 180/180 [00:05<00:00, 33.21it/s]
100%|██████████| 180/180 [00:03<00:00, 50.51it/s]


Epoch 14,  Loss: 10070.9541,  Regret:  7.1475%


100%|██████████| 180/180 [00:05<00:00, 35.80it/s]
100%|██████████| 180/180 [00:03<00:00, 45.92it/s]


Epoch 15,  Loss: 10893.8730,  Regret:  7.1952%


100%|██████████| 180/180 [00:04<00:00, 39.71it/s]
100%|██████████| 180/180 [00:04<00:00, 44.75it/s]


Epoch 16,  Loss: 10037.7295,  Regret:  7.2363%


100%|██████████| 180/180 [00:04<00:00, 39.34it/s]
100%|██████████| 180/180 [00:03<00:00, 52.33it/s]


Epoch 17,  Loss: 9450.8721,  Regret:  7.1526%


100%|██████████| 180/180 [00:05<00:00, 35.45it/s]
100%|██████████| 180/180 [00:03<00:00, 51.73it/s]


Epoch 18,  Loss: 9891.8018,  Regret:  7.0894%


100%|██████████| 180/180 [00:06<00:00, 28.66it/s]
100%|██████████| 180/180 [00:05<00:00, 31.93it/s]


Epoch 19,  Loss: 10039.4834,  Regret:  7.1880%


100%|██████████| 180/180 [00:05<00:00, 35.51it/s]
100%|██████████| 180/180 [00:03<00:00, 49.60it/s]


Epoch 20,  Loss: 10041.5186,  Regret:  7.0939%
Total Elapsed Time: 9432.04 Sec.


0,1
Linear loss,▄▃▇▄▃▆▂▂█▄▁▆▂▆▃▄▁▆▂▆▃▅▁█▃▅▃▅▁▆▃▅▂▆▂▆▃▆▁▄
Regret,█▂▂▂▁▂▂▂▂▃▂▂▂▁▂▂▁▁▂▁

0,1
Linear loss,10041.51855
Regret,0.07094


[34m[1mwandb[0m: Agent Starting Run: yd3eevhp with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 41.00it/s]
100%|██████████| 180/180 [00:06<00:00, 26.24it/s]
100%|██████████| 180/180 [00:04<00:00, 43.69it/s]


Epoch  1,  Loss: 20080.1211,  Regret: 49.6902%


100%|██████████| 180/180 [00:06<00:00, 27.95it/s]
100%|██████████| 180/180 [00:03<00:00, 48.28it/s]


Epoch  2,  Loss: 18857.5332,  Regret: 34.2495%


100%|██████████| 180/180 [00:04<00:00, 39.37it/s]
100%|██████████| 180/180 [00:04<00:00, 41.08it/s]


Epoch  3,  Loss: 19049.5684,  Regret: 21.7931%


100%|██████████| 180/180 [00:06<00:00, 29.10it/s]
100%|██████████| 180/180 [00:04<00:00, 38.26it/s]


Epoch  4,  Loss: 19462.8633,  Regret: 15.9131%


100%|██████████| 180/180 [00:05<00:00, 33.97it/s]
100%|██████████| 180/180 [00:03<00:00, 48.92it/s]


Epoch  5,  Loss: 17523.6406,  Regret: 13.2349%


100%|██████████| 180/180 [00:06<00:00, 28.53it/s]
100%|██████████| 180/180 [00:04<00:00, 42.68it/s]


Epoch  6,  Loss: 18420.6816,  Regret: 11.5843%


100%|██████████| 180/180 [00:06<00:00, 26.69it/s]
100%|██████████| 180/180 [00:03<00:00, 49.40it/s]


Epoch  7,  Loss: 16157.8867,  Regret: 10.7433%


100%|██████████| 180/180 [00:07<00:00, 24.41it/s]
100%|██████████| 180/180 [00:11<00:00, 15.66it/s]


Epoch  8,  Loss: 15169.3242,  Regret: 10.1821%


100%|██████████| 180/180 [00:10<00:00, 17.11it/s]
100%|██████████| 180/180 [00:05<00:00, 35.96it/s]


Epoch  9,  Loss: 14175.8066,  Regret:  9.4508%


100%|██████████| 180/180 [00:04<00:00, 39.21it/s]
100%|██████████| 180/180 [00:03<00:00, 50.47it/s]


Epoch 10,  Loss: 16708.4160,  Regret:  9.1446%
Total Elapsed Time: 6227.38 Sec.


0,1
Linear loss,▇▄▂▅▇▃▃█▆▄▃▆▅▁▂▆▇▂▇▂▇▂▂▃▄▂▇▆▄▁▆▅▁▂▄▂▃▁█▇
Regret,█▅▃▂▂▁▁▁▁▁

0,1
Linear loss,16708.41602
Regret,0.09145


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gbewyf53 with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 49.68it/s]
100%|██████████| 180/180 [00:04<00:00, 41.64it/s]
100%|██████████| 180/180 [00:03<00:00, 49.19it/s]


Epoch  1,  Loss: 19299.4395,  Regret: 21.0514%


100%|██████████| 180/180 [00:04<00:00, 41.59it/s]
100%|██████████| 180/180 [00:03<00:00, 51.70it/s]


Epoch  2,  Loss: 18380.2637,  Regret:  8.6305%


100%|██████████| 180/180 [00:04<00:00, 41.36it/s]
100%|██████████| 180/180 [00:04<00:00, 42.00it/s]


Epoch  3,  Loss: 17163.6348,  Regret:  8.1425%


100%|██████████| 180/180 [00:04<00:00, 36.47it/s]
100%|██████████| 180/180 [00:01<00:00, 117.59it/s]


Epoch  4,  Loss: 15576.5840,  Regret:  7.7848%


100%|██████████| 180/180 [00:04<00:00, 36.88it/s]
100%|██████████| 180/180 [00:03<00:00, 50.54it/s]


Epoch  5,  Loss: 15816.4053,  Regret:  7.7379%


100%|██████████| 180/180 [00:04<00:00, 41.61it/s]
100%|██████████| 180/180 [00:03<00:00, 51.11it/s]


Epoch  6,  Loss: 13513.0400,  Regret:  7.7789%


100%|██████████| 180/180 [00:04<00:00, 41.76it/s]
100%|██████████| 180/180 [00:03<00:00, 55.21it/s]


Epoch  7,  Loss: 15571.7822,  Regret:  7.8065%


100%|██████████| 180/180 [00:04<00:00, 42.06it/s]
100%|██████████| 180/180 [00:03<00:00, 51.14it/s]


Epoch  8,  Loss: 13375.8027,  Regret:  7.7219%


100%|██████████| 180/180 [00:04<00:00, 41.82it/s]
100%|██████████| 180/180 [00:03<00:00, 52.85it/s]


Epoch  9,  Loss: 13079.7754,  Regret:  7.7663%


100%|██████████| 180/180 [00:04<00:00, 41.53it/s]
100%|██████████| 180/180 [00:03<00:00, 54.39it/s]


Epoch 10,  Loss: 12929.8145,  Regret:  7.7951%


100%|██████████| 180/180 [00:04<00:00, 41.85it/s]
100%|██████████| 180/180 [00:03<00:00, 53.73it/s]


Epoch 11,  Loss: 13451.9971,  Regret:  7.7320%


100%|██████████| 180/180 [00:04<00:00, 42.29it/s]
100%|██████████| 180/180 [00:03<00:00, 54.42it/s]


Epoch 12,  Loss: 12655.6602,  Regret:  7.7765%


100%|██████████| 180/180 [00:04<00:00, 41.70it/s]
100%|██████████| 180/180 [00:03<00:00, 51.68it/s]


Epoch 13,  Loss: 12936.1934,  Regret:  7.7366%


100%|██████████| 180/180 [00:04<00:00, 41.48it/s]
100%|██████████| 180/180 [00:03<00:00, 47.94it/s]


Epoch 14,  Loss: 12323.6758,  Regret:  7.7411%


100%|██████████| 180/180 [00:04<00:00, 41.50it/s]
100%|██████████| 180/180 [00:03<00:00, 50.44it/s]


Epoch 15,  Loss: 12217.6475,  Regret:  7.7359%


100%|██████████| 180/180 [00:04<00:00, 42.16it/s]
100%|██████████| 180/180 [00:03<00:00, 49.11it/s]


Epoch 16,  Loss: 11961.4785,  Regret:  7.7194%


100%|██████████| 180/180 [00:04<00:00, 41.71it/s]
100%|██████████| 180/180 [00:03<00:00, 50.08it/s]


Epoch 17,  Loss: 12196.4922,  Regret:  7.6855%


100%|██████████| 180/180 [00:04<00:00, 41.46it/s]
100%|██████████| 180/180 [00:03<00:00, 50.48it/s]


Epoch 18,  Loss: 12210.4541,  Regret:  7.6671%


100%|██████████| 180/180 [00:04<00:00, 41.65it/s]
100%|██████████| 180/180 [00:03<00:00, 52.13it/s]


Epoch 19,  Loss: 11610.7646,  Regret:  7.6637%


100%|██████████| 180/180 [00:04<00:00, 41.71it/s]
100%|██████████| 180/180 [00:03<00:00, 51.75it/s]


Epoch 20,  Loss: 11737.9111,  Regret:  7.6632%
Total Elapsed Time: 7889.37 Sec.


0,1
Linear loss,▃▃▇▄▄▇▁▂▇▅▁▇▁▇▃▅▂▆▂▅▃▅▁█▂▄▂▆▁▆▃▅▁▆▁▅▃▆▁▄
Regret,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,11737.91113
Regret,0.07663


[34m[1mwandb[0m: Agent Starting Run: p9x8qptj with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 51.69it/s]
100%|██████████| 180/180 [00:04<00:00, 42.28it/s]
100%|██████████| 180/180 [00:03<00:00, 51.49it/s]


Epoch  1,  Loss: 15139.6152,  Regret:  8.1627%


100%|██████████| 180/180 [00:04<00:00, 42.61it/s]
100%|██████████| 180/180 [00:03<00:00, 52.08it/s]


Epoch  2,  Loss: 12835.8652,  Regret:  8.1863%


100%|██████████| 180/180 [00:04<00:00, 42.53it/s]
100%|██████████| 180/180 [00:03<00:00, 50.09it/s]


Epoch  3,  Loss: 10953.9688,  Regret:  7.8368%


100%|██████████| 180/180 [00:04<00:00, 42.23it/s]
100%|██████████| 180/180 [00:03<00:00, 47.51it/s]


Epoch  4,  Loss: 12089.8184,  Regret:  7.9854%


100%|██████████| 180/180 [00:04<00:00, 41.87it/s]
100%|██████████| 180/180 [00:03<00:00, 48.86it/s]


Epoch  5,  Loss: 12491.6055,  Regret:  7.8052%


100%|██████████| 180/180 [00:04<00:00, 41.92it/s]
100%|██████████| 180/180 [00:03<00:00, 50.63it/s]


Epoch  6,  Loss: 13523.1992,  Regret:  7.6999%


100%|██████████| 180/180 [00:04<00:00, 42.47it/s]
100%|██████████| 180/180 [00:03<00:00, 48.79it/s]


Epoch  7,  Loss: 10649.3193,  Regret:  7.9188%


100%|██████████| 180/180 [00:04<00:00, 42.32it/s]
100%|██████████| 180/180 [00:03<00:00, 48.34it/s]


Epoch  8,  Loss: 9388.1299,  Regret:  8.1333%


100%|██████████| 180/180 [00:04<00:00, 42.03it/s]
100%|██████████| 180/180 [00:03<00:00, 49.31it/s]


Epoch  9,  Loss: 8259.9766,  Regret:  7.8232%


100%|██████████| 180/180 [00:04<00:00, 41.89it/s]
100%|██████████| 180/180 [00:03<00:00, 51.02it/s]


Epoch 10,  Loss: 12924.0996,  Regret:  7.6411%
Total Elapsed Time: 3866.74 Sec.


0,1
Linear loss,█▅▃▅▇▃▂▆█▃▂▆▃▂▂▆▇▂▅▅█▂▁▄▃▁▆▆▄▁▄▅▁▂▄▅▁▂▅▇
Regret,██▄▅▃▂▅▇▃▁

0,1
Linear loss,12924.09961
Regret,0.07641


[34m[1mwandb[0m: Agent Starting Run: cu8xz02k with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:03<00:00, 54.54it/s]
100%|██████████| 180/180 [00:04<00:00, 40.70it/s]
100%|██████████| 180/180 [00:03<00:00, 51.54it/s]


Epoch  1,  Loss: 19078.7969,  Regret:  9.0615%


100%|██████████| 180/180 [00:04<00:00, 40.61it/s]
100%|██████████| 180/180 [00:03<00:00, 49.69it/s]


Epoch  2,  Loss: 16328.0547,  Regret:  7.8843%


100%|██████████| 180/180 [00:04<00:00, 40.00it/s]
100%|██████████| 180/180 [00:03<00:00, 49.71it/s]


Epoch  3,  Loss: 14384.8008,  Regret:  7.7564%


100%|██████████| 180/180 [00:04<00:00, 39.79it/s]
100%|██████████| 180/180 [00:03<00:00, 54.71it/s]


Epoch  4,  Loss: 13168.7002,  Regret:  7.6281%


100%|██████████| 180/180 [00:04<00:00, 39.93it/s]
100%|██████████| 180/180 [00:03<00:00, 53.15it/s]


Epoch  5,  Loss: 12112.6064,  Regret:  7.5481%


100%|██████████| 180/180 [00:04<00:00, 40.77it/s]
100%|██████████| 180/180 [00:03<00:00, 54.60it/s]


Epoch  6,  Loss: 11506.9277,  Regret:  7.4001%


100%|██████████| 180/180 [00:04<00:00, 40.10it/s]
100%|██████████| 180/180 [00:03<00:00, 53.30it/s]


Epoch  7,  Loss: 11561.8838,  Regret:  7.5230%


100%|██████████| 180/180 [00:04<00:00, 39.31it/s]
100%|██████████| 180/180 [00:03<00:00, 51.03it/s]


Epoch  8,  Loss: 11014.8154,  Regret:  7.5064%


100%|██████████| 180/180 [15:07<00:00,  5.04s/it] 
100%|██████████| 180/180 [00:04<00:00, 43.70it/s]


Epoch  9,  Loss: 10633.8154,  Regret:  7.5452%


100%|██████████| 180/180 [00:04<00:00, 41.22it/s]
100%|██████████| 180/180 [00:03<00:00, 50.63it/s]


Epoch 10,  Loss: 10899.0254,  Regret:  7.4918%
Total Elapsed Time: 121452.09 Sec.


0,1
Linear loss,█▅▃▅▇▃▄▇█▃▂▇▄▁▃▆█▂▄▄█▃▂▄▃▁▅▆▃▁▄▅▁▂▄▄▁▂▆▇
Regret,█▃▃▂▂▁▂▁▂▁

0,1
Linear loss,10899.02539
Regret,0.07492


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ftb64mat with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 30
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:04<00:00, 43.79it/s]
100%|██████████| 180/180 [00:04<00:00, 39.11it/s]
100%|██████████| 180/180 [00:04<00:00, 44.47it/s]


Epoch  1,  Loss: 20297.4727,  Regret: 13.9846%


100%|██████████| 180/180 [00:04<00:00, 39.05it/s]
100%|██████████| 180/180 [00:04<00:00, 41.31it/s]


Epoch  2,  Loss: 12058.8906,  Regret: 16.0470%


100%|██████████| 180/180 [00:04<00:00, 39.55it/s]
100%|██████████| 180/180 [00:04<00:00, 42.06it/s]


Epoch  3,  Loss: 17213.9629,  Regret:  8.3892%


100%|██████████| 180/180 [00:04<00:00, 39.14it/s]
100%|██████████| 180/180 [00:04<00:00, 43.72it/s]


Epoch  4,  Loss: 16453.6133,  Regret:  8.5305%


100%|██████████| 180/180 [00:04<00:00, 38.79it/s]
100%|██████████| 180/180 [00:04<00:00, 40.66it/s]


Epoch  5,  Loss: 15047.6377,  Regret:  7.9262%


100%|██████████| 180/180 [00:05<00:00, 35.05it/s]
100%|██████████| 180/180 [00:04<00:00, 43.81it/s]


Epoch  6,  Loss: 10662.2109,  Regret:  7.9172%


100%|██████████| 180/180 [00:05<00:00, 33.39it/s]
100%|██████████| 180/180 [00:04<00:00, 43.30it/s]


Epoch  7,  Loss: 14123.7305,  Regret:  7.7207%


100%|██████████| 180/180 [00:05<00:00, 32.97it/s]
100%|██████████| 180/180 [00:04<00:00, 39.29it/s]


Epoch  8,  Loss: 9924.7812,  Regret:  7.7936%


100%|██████████| 180/180 [00:05<00:00, 32.69it/s]
100%|██████████| 180/180 [00:04<00:00, 41.29it/s]


Epoch  9,  Loss: 14108.9746,  Regret:  7.8439%


100%|██████████| 180/180 [00:05<00:00, 32.10it/s]
100%|██████████| 180/180 [00:04<00:00, 43.90it/s]


Epoch 10,  Loss: 15205.2285,  Regret:  7.8290%


100%|██████████| 180/180 [00:05<00:00, 32.61it/s]
100%|██████████| 180/180 [00:04<00:00, 41.94it/s]


Epoch 11,  Loss: 11784.6592,  Regret:  7.7795%


100%|██████████| 180/180 [00:05<00:00, 32.11it/s]
100%|██████████| 180/180 [00:04<00:00, 42.59it/s]


Epoch 12,  Loss: 15042.9336,  Regret:  7.8397%


100%|██████████| 180/180 [00:05<00:00, 31.49it/s]
100%|██████████| 180/180 [00:04<00:00, 41.39it/s]


Epoch 13,  Loss: 14226.7734,  Regret:  7.8428%


100%|██████████| 180/180 [00:05<00:00, 31.06it/s]
100%|██████████| 180/180 [00:04<00:00, 44.72it/s]


Epoch 14,  Loss: 14869.8184,  Regret:  7.8897%


100%|██████████| 180/180 [00:05<00:00, 30.93it/s]
100%|██████████| 180/180 [00:04<00:00, 44.06it/s]


Epoch 15,  Loss: 14795.4434,  Regret:  7.9639%


100%|██████████| 180/180 [00:05<00:00, 30.73it/s]
100%|██████████| 180/180 [00:04<00:00, 41.90it/s]


Epoch 16,  Loss: 14733.0498,  Regret:  7.9527%


100%|██████████| 180/180 [00:05<00:00, 31.11it/s]
100%|██████████| 180/180 [00:04<00:00, 44.86it/s]


Epoch 17,  Loss: 11895.1611,  Regret:  7.9621%


100%|██████████| 180/180 [00:05<00:00, 31.39it/s]
100%|██████████| 180/180 [00:04<00:00, 39.87it/s]


Epoch 18,  Loss: 14651.4551,  Regret:  7.9683%


100%|██████████| 180/180 [00:05<00:00, 31.14it/s]
100%|██████████| 180/180 [00:04<00:00, 43.77it/s]


Epoch 19,  Loss: 9315.5859,  Regret:  7.9415%


100%|██████████| 180/180 [00:05<00:00, 31.05it/s]
100%|██████████| 180/180 [00:04<00:00, 38.34it/s]


Epoch 20,  Loss: 11429.0879,  Regret:  8.0234%


100%|██████████| 180/180 [00:05<00:00, 31.66it/s]
100%|██████████| 180/180 [00:04<00:00, 40.42it/s]


Epoch 21,  Loss: 12968.8662,  Regret:  7.9802%


100%|██████████| 180/180 [00:05<00:00, 30.89it/s]
100%|██████████| 180/180 [00:04<00:00, 40.33it/s]


Epoch 22,  Loss: 14505.5547,  Regret:  7.9480%


100%|██████████| 180/180 [00:05<00:00, 31.14it/s]
100%|██████████| 180/180 [00:04<00:00, 42.52it/s]


Epoch 23,  Loss: 13222.3447,  Regret:  7.8632%


100%|██████████| 180/180 [00:05<00:00, 31.17it/s]
100%|██████████| 180/180 [00:04<00:00, 42.98it/s]


Epoch 24,  Loss: 12828.9824,  Regret:  7.9129%


100%|██████████| 180/180 [00:05<00:00, 31.01it/s]
100%|██████████| 180/180 [00:04<00:00, 40.43it/s]


Epoch 25,  Loss: 10713.0781,  Regret:  7.9831%


100%|██████████| 180/180 [00:05<00:00, 30.78it/s]
100%|██████████| 180/180 [00:04<00:00, 38.19it/s]


Epoch 26,  Loss: 12412.5166,  Regret:  7.9857%


100%|██████████| 180/180 [00:05<00:00, 30.81it/s]
100%|██████████| 180/180 [00:04<00:00, 40.60it/s]


Epoch 27,  Loss: 14364.0254,  Regret:  8.0215%


100%|██████████| 180/180 [00:05<00:00, 30.79it/s]
100%|██████████| 180/180 [00:04<00:00, 40.80it/s]


Epoch 28,  Loss: 10106.5732,  Regret:  7.9887%


100%|██████████| 180/180 [00:05<00:00, 30.57it/s]
100%|██████████| 180/180 [00:04<00:00, 41.48it/s]


Epoch 29,  Loss: 14322.8496,  Regret:  7.9934%


100%|██████████| 180/180 [00:05<00:00, 31.24it/s]
100%|██████████| 180/180 [00:04<00:00, 43.78it/s]


Epoch 30,  Loss: 8482.0615,  Regret:  8.0759%
Total Elapsed Time: 14985.92 Sec.


0,1
Linear loss,▄█▆▂▃▆▃▃▂▅▅▂▃█▆▁▂▂▄▁▂▅▇▁▂▁▅▅▃█▁▂▂▂█▆▁█▄▄
Regret,▆█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,8482.06152
Regret,0.08076


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kaw8bkqz with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:01<00:00, 169.46it/s]
100%|██████████| 180/180 [00:02<00:00, 62.88it/s]
100%|██████████| 180/180 [00:01<00:00, 155.38it/s]


Epoch  1,  Loss: 14941.5645,  Regret: 12.2762%


100%|██████████| 180/180 [00:03<00:00, 54.88it/s]
100%|██████████| 180/180 [00:01<00:00, 131.53it/s]


Epoch  2,  Loss: 22064.2266,  Regret:  7.5242%


100%|██████████| 180/180 [00:03<00:00, 46.90it/s]
100%|██████████| 180/180 [00:01<00:00, 132.49it/s]


Epoch  3,  Loss: 15270.4375,  Regret: 18.8759%


100%|██████████| 180/180 [00:04<00:00, 43.34it/s]
100%|██████████| 180/180 [00:01<00:00, 108.61it/s]


Epoch  4,  Loss: 18255.3379,  Regret:  7.8399%


100%|██████████| 180/180 [00:04<00:00, 38.60it/s]
100%|██████████| 180/180 [00:04<00:00, 42.05it/s]


Epoch  5,  Loss: 10820.1895,  Regret:  7.9926%


100%|██████████| 180/180 [00:04<00:00, 36.65it/s]
100%|██████████| 180/180 [00:04<00:00, 39.33it/s]


Epoch  6,  Loss: 15446.5566,  Regret:  7.8780%


100%|██████████| 180/180 [00:05<00:00, 35.54it/s]
100%|██████████| 180/180 [00:03<00:00, 45.37it/s]


Epoch  7,  Loss: 14684.1426,  Regret:  7.8805%


100%|██████████| 180/180 [00:03<00:00, 46.56it/s]
100%|██████████| 180/180 [00:01<00:00, 164.14it/s]


Epoch  8,  Loss: 15637.5596,  Regret:  7.9360%


100%|██████████| 180/180 [00:03<00:00, 50.10it/s]
100%|██████████| 180/180 [00:01<00:00, 160.46it/s]


Epoch  9,  Loss: 15499.7109,  Regret:  7.8529%


100%|██████████| 180/180 [00:03<00:00, 47.82it/s]
100%|██████████| 180/180 [00:01<00:00, 156.00it/s]


Epoch 10,  Loss: 22306.5371,  Regret:  7.9202%


100%|██████████| 180/180 [00:04<00:00, 44.76it/s]
100%|██████████| 180/180 [00:01<00:00, 156.42it/s]


Epoch 11,  Loss: 17129.0977,  Regret:  7.6177%


100%|██████████| 180/180 [00:04<00:00, 43.68it/s]
100%|██████████| 180/180 [00:01<00:00, 144.93it/s]


Epoch 12,  Loss: 13929.1348,  Regret:  7.6322%


100%|██████████| 180/180 [00:04<00:00, 41.23it/s]
100%|██████████| 180/180 [00:01<00:00, 143.41it/s]


Epoch 13,  Loss: 14727.4043,  Regret:  7.6876%


100%|██████████| 180/180 [00:04<00:00, 39.53it/s]
100%|██████████| 180/180 [00:01<00:00, 143.34it/s]


Epoch 14,  Loss: 14604.4551,  Regret:  7.6382%


100%|██████████| 180/180 [00:04<00:00, 40.74it/s]
100%|██████████| 180/180 [00:01<00:00, 149.65it/s]


Epoch 15,  Loss: 14434.3809,  Regret:  7.6200%


100%|██████████| 180/180 [00:04<00:00, 38.47it/s]
100%|██████████| 180/180 [00:01<00:00, 93.29it/s]


Epoch 16,  Loss: 14226.5645,  Regret:  7.6238%


100%|██████████| 180/180 [00:05<00:00, 34.26it/s]
100%|██████████| 180/180 [00:01<00:00, 144.81it/s]


Epoch 17,  Loss: 11702.1035,  Regret:  7.6381%


100%|██████████| 180/180 [00:04<00:00, 38.21it/s]
100%|██████████| 180/180 [00:01<00:00, 150.66it/s]


Epoch 18,  Loss: 13980.4883,  Regret:  7.6466%


100%|██████████| 180/180 [00:04<00:00, 40.50it/s]
100%|██████████| 180/180 [00:01<00:00, 142.32it/s]


Epoch 19,  Loss: 13864.1484,  Regret:  7.6598%


100%|██████████| 180/180 [00:04<00:00, 41.40it/s]
100%|██████████| 180/180 [00:01<00:00, 141.62it/s]


Epoch 20,  Loss: 13721.4453,  Regret:  7.6590%
Total Elapsed Time: 7657.88 Sec.


0,1
Linear loss,▂▃▆▅▂███▆▅▂▅▃▄▅▄▁▅▁▆▃▅▃█▂▄▃▅▂▆▃▄▂▅▂▆▂▅▁▄
Regret,▄▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Linear loss,13721.44531
Regret,0.07659


[34m[1mwandb[0m: Agent Starting Run: 835yij4n with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:01<00:00, 160.23it/s]
100%|██████████| 180/180 [00:02<00:00, 69.68it/s]
100%|██████████| 180/180 [00:01<00:00, 154.20it/s]


Epoch  1,  Loss: 18264.6289,  Regret:  8.3223%


100%|██████████| 180/180 [00:02<00:00, 60.86it/s]
100%|██████████| 180/180 [00:01<00:00, 147.99it/s]


Epoch  2,  Loss: 18383.1465,  Regret:  8.0560%


100%|██████████| 180/180 [00:02<00:00, 60.08it/s]
100%|██████████| 180/180 [00:01<00:00, 147.52it/s]


Epoch  3,  Loss: 15768.7012,  Regret:  7.8277%


100%|██████████| 180/180 [00:03<00:00, 58.00it/s]
100%|██████████| 180/180 [00:01<00:00, 144.84it/s]


Epoch  4,  Loss: 14187.3750,  Regret:  7.8106%


100%|██████████| 180/180 [00:03<00:00, 57.76it/s]
100%|██████████| 180/180 [00:01<00:00, 143.49it/s]


Epoch  5,  Loss: 12593.0137,  Regret:  8.0551%


100%|██████████| 180/180 [00:03<00:00, 55.23it/s]
100%|██████████| 180/180 [00:01<00:00, 140.43it/s]


Epoch  6,  Loss: 15983.0801,  Regret:  8.1015%


100%|██████████| 180/180 [00:03<00:00, 56.57it/s]
100%|██████████| 180/180 [00:01<00:00, 134.08it/s]


Epoch  7,  Loss: 10196.9014,  Regret:  7.8993%


100%|██████████| 180/180 [00:03<00:00, 53.54it/s]
100%|██████████| 180/180 [00:01<00:00, 138.73it/s]


Epoch  8,  Loss: 14175.9473,  Regret:  7.7238%


100%|██████████| 180/180 [00:03<00:00, 55.25it/s]
100%|██████████| 180/180 [00:01<00:00, 139.02it/s]


Epoch  9,  Loss: 12080.1475,  Regret:  7.7515%


100%|██████████| 180/180 [00:03<00:00, 54.73it/s]
100%|██████████| 180/180 [00:01<00:00, 141.70it/s]


Epoch 10,  Loss: 10042.5742,  Regret:  7.7444%
Total Elapsed Time: 2776.88 Sec.


0,1
Linear loss,▇▄▃▅▇▂▂▆█▃▂▆▃▂▂▅▇▃▅▃▇▂▂▄▃▁▄▆▃▁▄▅▁▂▃▆▁▁▆▆
Regret,█▅▂▂▅▅▃▁▁▁

0,1
Linear loss,10042.57422
Regret,0.07744


[34m[1mwandb[0m: Agent Starting Run: hyy0peqr with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 128
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:01<00:00, 154.96it/s]
100%|██████████| 180/180 [00:03<00:00, 56.93it/s]
100%|██████████| 180/180 [00:01<00:00, 146.54it/s]


Epoch  1,  Loss: 13584.8398,  Regret:  8.8491%


100%|██████████| 180/180 [00:03<00:00, 49.77it/s]
100%|██████████| 180/180 [00:01<00:00, 145.86it/s]


Epoch  2,  Loss: 11633.7188,  Regret:  8.3197%


100%|██████████| 180/180 [00:03<00:00, 48.63it/s]
100%|██████████| 180/180 [00:01<00:00, 141.39it/s]


Epoch  3,  Loss: 11688.2314,  Regret:  8.0392%


100%|██████████| 180/180 [00:03<00:00, 48.07it/s]
100%|██████████| 180/180 [00:01<00:00, 137.37it/s]


Epoch  4,  Loss: 12407.6299,  Regret:  7.9070%


100%|██████████| 180/180 [00:03<00:00, 48.44it/s]
100%|██████████| 180/180 [00:01<00:00, 140.74it/s]


Epoch  5,  Loss: 12665.1406,  Regret:  7.5597%


100%|██████████| 180/180 [00:03<00:00, 51.71it/s]
100%|██████████| 180/180 [00:01<00:00, 145.11it/s]


Epoch  6,  Loss: 11221.4941,  Regret:  7.8506%


100%|██████████| 180/180 [00:03<00:00, 47.67it/s]
100%|██████████| 180/180 [00:01<00:00, 136.15it/s]


Epoch  7,  Loss: 11237.3779,  Regret:  7.8167%


100%|██████████| 180/180 [00:03<00:00, 47.92it/s]
100%|██████████| 180/180 [00:01<00:00, 141.38it/s]


Epoch  8,  Loss: 9962.3271,  Regret:  7.2883%


100%|██████████| 180/180 [00:03<00:00, 49.31it/s]
100%|██████████| 180/180 [00:01<00:00, 143.26it/s]


Epoch  9,  Loss: 8501.7227,  Regret:  7.0889%


100%|██████████| 180/180 [00:03<00:00, 48.73it/s]
100%|██████████| 180/180 [00:01<00:00, 138.84it/s]


Epoch 10,  Loss: 9286.0283,  Regret:  7.1118%
Total Elapsed Time: 3251.62 Sec.


0,1
Linear loss,▇▄▂▆▇▃▂▅█▃▂▅▃▁▂▅▆▂▄▅▇▄▂▄▂▁▅▅▃▁▄▄▁▂▄▆▂▂▅▇
Regret,█▆▅▄▃▄▄▂▁▁

0,1
Linear loss,9286.02832
Regret,0.07112


[34m[1mwandb[0m: Agent Starting Run: nfrrdgho with config:
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:01<00:00, 150.38it/s]
100%|██████████| 180/180 [00:02<00:00, 63.92it/s]
100%|██████████| 180/180 [00:01<00:00, 157.35it/s]


Epoch  1,  Loss: 12557.1230,  Regret:  7.8119%


100%|██████████| 180/180 [00:03<00:00, 59.63it/s]
100%|██████████| 180/180 [00:01<00:00, 152.25it/s]


Epoch  2,  Loss: 13394.6689,  Regret:  7.8394%


100%|██████████| 180/180 [00:03<00:00, 58.34it/s]
100%|██████████| 180/180 [00:01<00:00, 140.27it/s]


Epoch  3,  Loss: 11534.9375,  Regret:  7.6364%


100%|██████████| 180/180 [00:03<00:00, 59.54it/s]
100%|██████████| 180/180 [00:01<00:00, 144.22it/s]


Epoch  4,  Loss: 10602.2236,  Regret:  7.7842%


100%|██████████| 180/180 [00:03<00:00, 59.23it/s]
100%|██████████| 180/180 [00:01<00:00, 144.74it/s]


Epoch  5,  Loss: 12092.3408,  Regret:  7.7687%


100%|██████████| 180/180 [00:03<00:00, 54.86it/s]
100%|██████████| 180/180 [00:01<00:00, 140.77it/s]


Epoch  6,  Loss: 10823.9023,  Regret:  7.6074%


100%|██████████| 180/180 [00:03<00:00, 55.98it/s]
100%|██████████| 180/180 [00:01<00:00, 144.42it/s]


Epoch  7,  Loss: 10069.8252,  Regret:  7.3299%


100%|██████████| 180/180 [00:03<00:00, 53.53it/s]
100%|██████████| 180/180 [00:01<00:00, 126.32it/s]


Epoch  8,  Loss: 9882.2715,  Regret:  7.4396%


100%|██████████| 180/180 [00:03<00:00, 51.22it/s]
100%|██████████| 180/180 [00:01<00:00, 140.78it/s]


Epoch  9,  Loss: 10620.3545,  Regret:  7.4706%


100%|██████████| 180/180 [00:03<00:00, 51.88it/s]
100%|██████████| 180/180 [00:01<00:00, 132.40it/s]


Epoch 10,  Loss: 9674.5645,  Regret:  7.4843%
Total Elapsed Time: 2837.51 Sec.


0,1
Linear loss,▇▄▁▅▇▃▂▆█▃▂▅▃▁▂▅▆▂▃▅▇▃▂▄▃▁▅▅▂▁▃▄▁▂▃▅▁▂▄▆
Regret,██▅▇▇▅▁▃▃▃

0,1
Linear loss,9674.56445
Regret,0.07484


[34m[1mwandb[0m: Agent Starting Run: vyge3oos with config:
[34m[1mwandb[0m: 	dropout: 0.7
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 32
[34m[1mwandb[0m: 	num_epochs: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:01<00:00, 160.29it/s]
100%|██████████| 180/180 [00:02<00:00, 69.01it/s]
100%|██████████| 180/180 [00:01<00:00, 152.95it/s]


Epoch  1,  Loss: 16537.1875,  Regret:  7.8329%


100%|██████████| 180/180 [00:03<00:00, 52.96it/s]
100%|██████████| 180/180 [00:01<00:00, 139.99it/s]


Epoch  2,  Loss: 16679.4766,  Regret:  8.0528%


100%|██████████| 180/180 [00:03<00:00, 55.46it/s]
100%|██████████| 180/180 [00:01<00:00, 142.59it/s]


Epoch  3,  Loss: 12740.5703,  Regret:  8.0480%


100%|██████████| 180/180 [00:03<00:00, 56.22it/s]
100%|██████████| 180/180 [00:01<00:00, 139.78it/s]


Epoch  4,  Loss: 11087.7246,  Regret:  7.8831%


100%|██████████| 180/180 [00:03<00:00, 56.75it/s]
100%|██████████| 180/180 [00:01<00:00, 142.52it/s]


Epoch  5,  Loss: 11477.3525,  Regret:  7.7737%


100%|██████████| 180/180 [00:03<00:00, 58.64it/s]
100%|██████████| 180/180 [00:01<00:00, 143.92it/s]


Epoch  6,  Loss: 10260.0498,  Regret:  7.7665%


100%|██████████| 180/180 [00:03<00:00, 59.43it/s]
100%|██████████| 180/180 [00:01<00:00, 146.84it/s]


Epoch  7,  Loss: 10074.1230,  Regret:  7.9967%


100%|██████████| 180/180 [00:02<00:00, 60.95it/s]
100%|██████████| 180/180 [00:01<00:00, 141.46it/s]


Epoch  8,  Loss: 9986.2393,  Regret:  7.8837%


100%|██████████| 180/180 [00:03<00:00, 50.95it/s]
100%|██████████| 180/180 [00:01<00:00, 123.09it/s]


Epoch  9,  Loss: 9521.3457,  Regret:  7.9183%


100%|██████████| 180/180 [00:04<00:00, 44.83it/s]
100%|██████████| 180/180 [00:01<00:00, 133.38it/s]


Epoch 10,  Loss: 16095.0645,  Regret:  7.5582%


100%|██████████| 180/180 [00:04<00:00, 42.53it/s]
100%|██████████| 180/180 [00:01<00:00, 126.08it/s]


Epoch 11,  Loss: 10713.5527,  Regret:  7.6676%


100%|██████████| 180/180 [00:03<00:00, 51.19it/s]
100%|██████████| 180/180 [00:02<00:00, 84.95it/s] 


Epoch 12,  Loss: 13485.2246,  Regret:  7.3893%


100%|██████████| 180/180 [00:03<00:00, 52.69it/s]
100%|██████████| 180/180 [00:01<00:00, 127.45it/s]


Epoch 13,  Loss: 11899.7793,  Regret:  7.3651%


100%|██████████| 180/180 [00:03<00:00, 56.02it/s]
100%|██████████| 180/180 [00:01<00:00, 132.88it/s]


Epoch 14,  Loss: 10753.2275,  Regret:  7.4692%


100%|██████████| 180/180 [00:03<00:00, 54.68it/s]
100%|██████████| 180/180 [00:01<00:00, 130.96it/s]


Epoch 15,  Loss: 11422.8174,  Regret:  7.6034%


100%|██████████| 180/180 [00:03<00:00, 57.56it/s]
100%|██████████| 180/180 [00:01<00:00, 136.58it/s]


Epoch 16,  Loss: 12531.2695,  Regret:  7.6495%


100%|██████████| 180/180 [00:03<00:00, 57.06it/s]
100%|██████████| 180/180 [00:01<00:00, 133.61it/s]


Epoch 17,  Loss: 12340.3496,  Regret:  7.4528%


100%|██████████| 180/180 [00:03<00:00, 57.70it/s]
100%|██████████| 180/180 [00:01<00:00, 133.97it/s]


Epoch 18,  Loss: 11175.6250,  Regret:  7.4104%


100%|██████████| 180/180 [00:03<00:00, 56.18it/s]
100%|██████████| 180/180 [00:01<00:00, 128.27it/s]


Epoch 19,  Loss: 10307.5703,  Regret:  7.3540%


100%|██████████| 180/180 [00:03<00:00, 58.28it/s]
100%|██████████| 180/180 [00:01<00:00, 137.89it/s]


Epoch 20,  Loss: 8712.3857,  Regret:  7.2795%
Total Elapsed Time: 5848.95 Sec.


0,1
Linear loss,▃▂▆▁▃▄▁▃▆▃▁▄▂▄▂▃▁▅▂▅▂▄▁█▃▄▂▅▂▄▂▄▁▅▁▅▂▄▁▄
Regret,▆██▆▅▅▇▆▇▄▅▂▂▃▄▄▃▂▂▁

0,1
Linear loss,8712.38574
Regret,0.07279


[34m[1mwandb[0m: Agent Starting Run: p5o5ky1r with config:
[34m[1mwandb[0m: 	dropout: 0.9
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	method_name: spo+
[34m[1mwandb[0m: 	neurons: 64
[34m[1mwandb[0m: 	num_epochs: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


100%|██████████| 180/180 [00:01<00:00, 138.62it/s]
100%|██████████| 180/180 [00:02<00:00, 62.02it/s]
100%|██████████| 180/180 [00:01<00:00, 133.09it/s]


Epoch  1,  Loss: 14274.8066,  Regret:  8.2001%


100%|██████████| 180/180 [00:03<00:00, 57.96it/s]
100%|██████████| 180/180 [00:01<00:00, 140.42it/s]


Epoch  2,  Loss: 13471.7803,  Regret:  7.7742%


100%|██████████| 180/180 [00:03<00:00, 56.94it/s]
100%|██████████| 180/180 [00:01<00:00, 129.15it/s]


Epoch  3,  Loss: 13509.2598,  Regret:  7.5969%


100%|██████████| 180/180 [00:03<00:00, 56.31it/s]
100%|██████████| 180/180 [00:01<00:00, 135.73it/s]


Epoch  4,  Loss: 16179.7109,  Regret:  7.3977%


100%|██████████| 180/180 [00:03<00:00, 53.50it/s]
100%|██████████| 180/180 [00:01<00:00, 124.21it/s]


Epoch  5,  Loss: 12667.5889,  Regret:  7.6616%


100%|██████████| 180/180 [00:03<00:00, 55.81it/s]
100%|██████████| 180/180 [00:01<00:00, 107.90it/s]


Epoch  6,  Loss: 11817.7969,  Regret:  7.4780%


100%|██████████| 180/180 [00:03<00:00, 50.91it/s]
100%|██████████| 180/180 [00:01<00:00, 113.39it/s]


Epoch  7,  Loss: 12424.9502,  Regret:  7.2898%


100%|██████████| 180/180 [00:03<00:00, 48.90it/s]
100%|██████████| 180/180 [00:01<00:00, 113.18it/s]


Epoch  8,  Loss: 10442.0176,  Regret:  7.2418%


100%|██████████| 180/180 [00:03<00:00, 49.03it/s]
100%|██████████| 180/180 [00:01<00:00, 140.25it/s]


Epoch  9,  Loss: 10571.3916,  Regret:  7.1744%


100%|██████████| 180/180 [00:03<00:00, 52.61it/s]
100%|██████████| 180/180 [00:01<00:00, 123.14it/s]


Epoch 10,  Loss: 9786.9795,  Regret:  7.2747%
Total Elapsed Time: 2948.07 Sec.


0,1
Linear loss,█▅▃▅█▂▁▇▇▄▂▆▂▁▃▆▇▂▅▅█▂▂▄▃▁▄▇▂▁▄▅▁▂▄▅▂▂▅▇
Regret,█▅▄▃▄▃▂▁▁▂

0,1
Linear loss,9786.97949
Regret,0.07275


In [None]:
api = wandb.Api()
sweep = api.sweep("Pyepo_special/Sweep Pyepo Basic Model/<sweep_id>")
best_run = sweep.best_run()

In [18]:
# Redo training with best hyperparameters
best_config={
        'lr': 1e-3,
        'gamma': 0.9,
        'num_epochs': 30,
        'neurons': 128,
        'dropout': 0.7,
        "method_name": "spo+",
    }

reg, loss_log, loss_log_regret = trainModel(best_config)
wandb.finish()

100%|██████████| 365/365 [00:02<00:00, 141.20it/s]
100%|██████████| 365/365 [00:07<00:00, 49.63it/s]
100%|██████████| 365/365 [00:02<00:00, 136.30it/s]


Epoch  1,  Loss: 10869.3262,  Regret: 12.9335%


100%|██████████| 365/365 [00:08<00:00, 45.30it/s]
100%|██████████| 365/365 [00:02<00:00, 134.43it/s]


Epoch  2,  Loss: 12989.1963,  Regret: 13.7724%


100%|██████████| 365/365 [00:07<00:00, 45.84it/s]
100%|██████████| 365/365 [00:02<00:00, 132.72it/s]


Epoch  3,  Loss: 11205.2529,  Regret: 14.8011%


100%|██████████| 365/365 [00:08<00:00, 43.92it/s]
100%|██████████| 365/365 [00:02<00:00, 129.89it/s]


Epoch  4,  Loss: 11532.5010,  Regret: 15.1881%


100%|██████████| 365/365 [00:08<00:00, 42.86it/s]
100%|██████████| 365/365 [00:02<00:00, 133.60it/s]


Epoch  5,  Loss: 12011.2637,  Regret: 15.2285%


100%|██████████| 365/365 [00:08<00:00, 41.52it/s]
100%|██████████| 365/365 [00:02<00:00, 133.10it/s]


Epoch  6,  Loss: 12645.5400,  Regret: 15.0165%


100%|██████████| 365/365 [00:09<00:00, 39.54it/s]
100%|██████████| 365/365 [00:02<00:00, 135.26it/s]


Epoch  7,  Loss: 12187.3516,  Regret: 15.6875%


100%|██████████| 365/365 [00:09<00:00, 40.15it/s]
100%|██████████| 365/365 [00:02<00:00, 135.70it/s]


Epoch  8,  Loss: 11480.4121,  Regret: 15.9656%


100%|██████████| 365/365 [00:09<00:00, 38.25it/s]
100%|██████████| 365/365 [00:09<00:00, 38.61it/s]


Epoch  9,  Loss: 8829.1338,  Regret: 15.9134%


100%|██████████| 365/365 [00:10<00:00, 34.74it/s]
100%|██████████| 365/365 [00:11<00:00, 31.93it/s]


Epoch 10,  Loss: 14217.1084,  Regret: 16.5042%


100%|██████████| 365/365 [00:11<00:00, 32.08it/s]
100%|██████████| 365/365 [00:09<00:00, 37.83it/s]


Epoch 11,  Loss: 10423.0850,  Regret: 16.6399%


100%|██████████| 365/365 [00:09<00:00, 38.51it/s]
100%|██████████| 365/365 [00:09<00:00, 38.29it/s]


Epoch 12,  Loss: 13560.4326,  Regret: 16.6164%


100%|██████████| 365/365 [00:09<00:00, 38.01it/s]
100%|██████████| 365/365 [00:09<00:00, 37.48it/s]


Epoch 13,  Loss: 10760.8916,  Regret: 16.8653%


100%|██████████| 365/365 [00:09<00:00, 37.80it/s]
100%|██████████| 365/365 [00:09<00:00, 37.63it/s]


Epoch 14,  Loss: 12370.3975,  Regret: 16.9791%


100%|██████████| 365/365 [00:09<00:00, 38.49it/s]
100%|██████████| 365/365 [00:02<00:00, 155.64it/s]


Epoch 15,  Loss: 12614.7080,  Regret: 16.8645%


100%|██████████| 365/365 [00:07<00:00, 48.25it/s]
100%|██████████| 365/365 [00:03<00:00, 118.98it/s]


Epoch 16,  Loss: 9824.5088,  Regret: 16.8683%


100%|██████████| 365/365 [00:10<00:00, 35.06it/s]
100%|██████████| 365/365 [00:02<00:00, 123.23it/s]


Epoch 17,  Loss: 9550.0088,  Regret: 16.7526%


100%|██████████| 365/365 [00:10<00:00, 33.46it/s]
100%|██████████| 365/365 [00:02<00:00, 128.44it/s]


Epoch 18,  Loss: 12399.9561,  Regret: 17.7850%


100%|██████████| 365/365 [00:10<00:00, 34.26it/s]
100%|██████████| 365/365 [00:03<00:00, 120.94it/s]


Epoch 19,  Loss: 7329.9824,  Regret: 18.0466%


100%|██████████| 365/365 [00:11<00:00, 32.80it/s]
100%|██████████| 365/365 [00:03<00:00, 116.34it/s]


Epoch 20,  Loss: 11481.3955,  Regret: 17.6925%


100%|██████████| 365/365 [00:11<00:00, 32.98it/s]
100%|██████████| 365/365 [00:03<00:00, 110.73it/s]


Epoch 21,  Loss: 9882.2998,  Regret: 17.8233%


100%|██████████| 365/365 [00:11<00:00, 33.11it/s]
100%|██████████| 365/365 [00:03<00:00, 113.59it/s]


Epoch 22,  Loss: 13232.2617,  Regret: 17.5040%


100%|██████████| 365/365 [00:11<00:00, 31.77it/s]
100%|██████████| 365/365 [00:03<00:00, 114.05it/s]


Epoch 23,  Loss: 10266.8408,  Regret: 17.5084%


100%|██████████| 365/365 [00:12<00:00, 30.13it/s]
100%|██████████| 365/365 [00:03<00:00, 110.75it/s]


Epoch 24,  Loss: 6561.2510,  Regret: 17.4460%


100%|██████████| 365/365 [00:12<00:00, 29.88it/s]
100%|██████████| 365/365 [00:03<00:00, 106.45it/s]


Epoch 25,  Loss: 7024.0654,  Regret: 17.7415%


100%|██████████| 365/365 [00:14<00:00, 24.78it/s]
100%|██████████| 365/365 [00:03<00:00, 95.63it/s] 


Epoch 26,  Loss: 12756.4355,  Regret: 17.5829%


100%|██████████| 365/365 [00:12<00:00, 30.17it/s]
100%|██████████| 365/365 [00:03<00:00, 109.88it/s]


Epoch 27,  Loss: 10269.2812,  Regret: 17.6518%


100%|██████████| 365/365 [00:11<00:00, 31.26it/s]
100%|██████████| 365/365 [00:03<00:00, 112.56it/s]


Epoch 28,  Loss: 9888.4531,  Regret: 17.9752%


100%|██████████| 365/365 [00:11<00:00, 30.73it/s]
100%|██████████| 365/365 [00:03<00:00, 109.39it/s]


Epoch 29,  Loss: 13063.7793,  Regret: 17.8070%


100%|██████████| 365/365 [00:11<00:00, 32.20it/s]
100%|██████████| 365/365 [00:03<00:00, 106.24it/s]


Epoch 30,  Loss: 10326.1895,  Regret: 17.7897%
Total Elapsed Time: 55546.91 Sec.


0,1
Linear loss,█▆▂▇▃▃▃▄▅█▁▇▃▃▂▄▃▅▂▇▃▃▃▅▂▁▅▂▆▂▂▄▁▂▁▂▄▃▂▃
Regret,▁▂▄▄▄▄▅▅▅▆▆▆▆▇▆▆▆████▇▇▇█▇▇███

0,1
Linear loss,10326.18945
Regret,0.1779


In [19]:
forward_bids = []
hydrogen_plan = []
reg.eval()
for i, data in enumerate(loader_test):
    x, c, w, z = data
    if torch.cuda.is_available():
        x, c, w, z = x.cuda(), c.cuda(), w.cuda(), z.cuda()
    predicted_costs = reg(x).detach().numpy()[0]
    model = hydrogenPlanning(realized=wind_test[i])
    model.setObjective(predicted_costs)
    forward, hydrogen = model.get_plan()
    forward_bids.extend(forward)
    hydrogen_plan.extend(hydrogen)


In [20]:
pd.DataFrame({"forward bid" : forward_bids,"hydrogen production" : hydrogen_plan}).to_csv("ILO_base_365_best.csv", index=False)