In [31]:
torch.backends.mps.is_available() 

True

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD, Adam
from pytorch_lightning.loggers import TensorBoardLogger, logger


import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import optuna
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping


  from .autonotebook import tqdm as notebook_tqdm


In [71]:
# load data
df = pd.read_csv('data/insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [72]:
# convert categorical data 'smoker' and 'region' to numerical data
df['smoker'] = df['smoker'].map({'yes': 1, 'no': 0})
df['region'] = df['region'].map({'southwest': 0, 'southeast': 1, 'northwest': 2, 'northeast': 3})

# normalize data
df['age'] = (df['age'] - df['age'].mean()) / df['age'].std()
df['bmi'] = (df['bmi'] - df['bmi'].mean()) / df['bmi'].std()
df['children'] = (df['children'] - df['children'].mean()) / df['children'].std()
df['charges'] = (df['charges'] - df['charges'].mean()) / df['charges'].std()

# split data into train and validate and test
train_df = df.sample(frac=0.6, random_state=0)
val_df = df.drop(train_df.index).sample(frac=0.5, random_state=0)
test_df = df.drop(train_df.index).drop(val_df.index)

In [73]:
# create Dataset class for train_df and val_df

class trainDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.X = torch.tensor(df[['age', 'bmi', 'children', 'smoker', 'region']].values, dtype=torch.float32)
        self.y = torch.tensor(df['charges'].values, dtype=torch.float32)
        #self.y = self.y.view(self.y.shape[0], 1)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class valDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.X = torch.tensor(df[['age', 'bmi', 'children', 'smoker', 'region']].values, dtype=torch.float32)
        self.y = torch.tensor(df['charges'].values, dtype=torch.float32)
        #self.y = self.y.view(self.y.shape[0], 1)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# create train and val dataloaders
train_dataset = trainDataset(train_df)
val_dataset = valDataset(val_df)

In [75]:
train_dataset[0]

(tensor([ 0.9105, -0.0760, -0.0787,  0.0000,  0.0000]), tensor(-0.2928))

In [132]:
import optuna
import pytorch_lightning as pl
import torch

class RegressionModel(pl.LightningModule):
    def __init__(self, num_neurons, learning_rate, batch_size):
        super().__init__()
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.num_neurons = num_neurons              
        self.linear1 = torch.nn.Linear(5, num_neurons)
        self.linear2 = torch.nn.Linear(num_neurons, num_neurons)
        self.linear3 = torch.nn.Linear(num_neurons, 1)
        self.relu = torch.nn.ReLU()
        self.save_hyperparameters()

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        x = self.relu(x)
        x = self.linear3(x)
        x = F.softmax(x, dim=1)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x).squeeze(dim=1)
        loss = torch.nn.functional.mse_loss(y_hat, y)
        self.log('train_loss', loss)
        return {'loss': loss}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x).squeeze(dim=1)
        loss = torch.nn.functional.mse_loss(y_hat, y)
        self.log('val_loss', loss)
        return loss

    def validation_end(self, validation_step_outputs):
        avg_loss = torch.stack(validation_step_outputs)
        self.log('avg_val_loss', avg_loss)
        return avg_loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

    def train_dataloader(self):
        return torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, num_workers=0)

    def val_dataloader(self):
        return torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, num_workers=0)




In [133]:
logger = TensorBoardLogger('lightning_logs', name='insurance')

model = RegressionModel(num_neurons=32, learning_rate=0.01, batch_size=32)
trainer = pl.Trainer(accelerator='gpu', devices=1, max_epochs=10, logger=logger)
trainer.fit(model)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name    | Type   | Params
-----------------------------------
0 | linear1 | Linear | 192   
1 | linear2 | Linear | 1.1 K 
2 | linear3 | Linear | 33    
3 | relu    | ReLU   | 0     
-----------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


                                                                           

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 9: 100%|██████████| 35/35 [00:00<00:00, 144.23it/s, loss=2.02, v_num=8]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 35/35 [00:00<00:00, 139.55it/s, loss=2.02, v_num=8]


In [134]:
trainer.logged_metrics

{'train_loss': tensor(1.0916), 'val_loss': tensor(1.8803)}

In [135]:
for i, j in trainer.callback_metrics.items():
    print(i, j)

train_loss tensor(1.0916)
val_loss tensor(1.8803)


In [136]:
def optimize_model(trial):
    
    num_neurons = trial.suggest_int('num_neurons', 16, 256)
    batch_size= trial.suggest_int('batch_size', 32, 512)
    learning_rate= trial.suggest_float('learning_rate', 1e-5, 1e-1)

    model = RegressionModel(num_neurons=num_neurons, learning_rate=learning_rate, batch_size=batch_size)
    trainer = pl.Trainer(accelerator='mps')
    trainer.fit(model)
    return trainer.callback_metrics['val_loss']
    
study = optuna.create_study()
study.optimize(optimize_model, n_trials=10)

print(study.best_trial)

[32m[I 2023-01-10 01:16:30,948][0m A new study created in memory with name: no-name-45b6d698-6707-466e-a7df-bebb3935c60c[0m
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name    | Type   | Params
-----------------------------------
0 | linear1 | Linear | 834   
1 | linear2 | Linear | 19.5 K
2 | linear3 | Linear | 140   
3 | relu    | ReLU   | 0     
-----------------------------------
20.4 K    Trainable params
0         Non-trainable params
20.4 K    Total params
0.082     Total estimated model params size (MB)


                                                                           

  rank_zero_warn(


Epoch 999: 100%|██████████| 7/7 [00:00<00:00, 115.49it/s, loss=2.05, v_num=17]

`Trainer.fit` stopped: `max_epochs=1000` reached.


Epoch 999: 100%|██████████| 7/7 [00:00<00:00, 103.34it/s, loss=2.05, v_num=17]


[32m[I 2023-01-10 01:17:41,867][0m Trial 0 finished with value: 1.8802931308746338 and parameters: {'num_neurons': 139, 'batch_size': 167, 'learning_rate': 0.08769622508616076}. Best is trial 0 with value: 1.8802931308746338.[0m
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name    | Type   | Params
-----------------------------------
0 | linear1 | Linear | 816   
1 | linear2 | Linear | 18.6 K
2 | linear3 | Linear | 137   
3 | relu    | ReLU   | 0     
-----------------------------------
19.6 K    Trainable params
0         Non-trainable params
19.6 K    Total params
0.078     Total estimated model params size (MB)


                                                                           

  rank_zero_warn(


Epoch 246:   7%|▋         | 1/15 [00:00<00:00, 119.43it/s, loss=2.03, v_num=18] 

In [46]:
# set up the logger
logger = TensorBoardLogger('lightning_logs', name='insurance')

#??? set up the early stopping callback???
early_stop_callback = EarlyStopping( monitor='val_loss', min_delta=0.00, patience=10, verbose=False, mode='min')

#??? set up the checkpoint callback???
checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath='lightning_logs/insurance/checkpoints', filename='insurance-{epoch:02d}-{val_loss:.2f}', save_top_k=3, mode='min')

# define the model
class InsuranceModel(pl.LightningModule):
    def __init__(self, learning_rate=0.01, weight_decay=0.01):
        super().__init__()
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.linear = nn.Linear(5, 1)
        self.loss = nn.MSELoss()
        
    def forward(self, x):
        return self.linear(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        self.log('val_loss', loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
        return optimizer
    
    def train_dataloader(self):
        return torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X, y), batch_size=32, shuffle=True)
    
    def val_dataloader(self):
        return torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X, y), batch_size=32, shuffle=False)
    
    def test_dataloader(self):
        return torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X, y), batch_size=32, shuffle=False)

# define the objective function
def objective(trial):
    # sample hyperparameters
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-1)
    
    # instantiate the model
    model = InsuranceModel(learning_rate=learning_rate, weight_decay=weight_decay)
    
    # instantiate the trainer
    trainer = pl.Trainer(max_epochs=100, logger=logger, callbacks=[early_stop_callback, checkpoint_callback])
    
    # train the model
    trainer.fit(model)
    
    # return the validation loss
    return trainer.callback_metrics['val_loss'].item()

# instantiate the study
study = optuna.create_study(direction='minimize')

# optimize the objective function
study.optimize(objective, n_trials=100)

# print the best hyperparameters
print(study.best_params)

# print the best validation loss
print(study.best_value)

# print the best model
print(study.best_trial)

# plot the optimization history
optuna.visualization.plot_optimization_history(study)

# plot the hyperparameter relationships
optuna.visualization.plot_slice(study)

# plot the hyperparameter importances
optuna.visualization.plot_param_importances(study)

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'])

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='val_loss')

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='train_loss')

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='val_loss', target_is_max=False)

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='train_loss', target_is_max=False)

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='val_loss', target_is_max=False, n_levels=20)

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='train_loss', target_is_max=False, n_levels=20)

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='val_loss', target_is_max=False, n_levels=20, sample=100)

# plot the learning rate and weight decay
optuna.visualization.plot_contour(study, params=['learning_rate', 'weight_decay'], target_name='train_loss', target_is_max=False, n_levels=20, sample=100)



[32m[I 2023-01-08 23:11:03,308][0m A new study created in memory with name: no-name-efa712d4-2402-474b-bcf0-8cde9ba049bb[0m
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-5, 1e-1)
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: lightning_logs/insurance

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


                                                                            

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 13:  66%|██████▌   | 45/68 [00:00<00:00, 413.93it/s, loss=1.38, v_num=0]
Validation DataLoader 0:  35%|███▌      | 12/34 [00:00<00:00, 454.33it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 32:  47%|████▋     | 32/68 [00:00<00:00, 447.95it/s, loss=1.01, v_num=0]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 54:  79%|███████▉  | 54/68 [00:00<00:00, 417.97it/s, loss=0.889, v_num=0]
Validation DataLoader 0:  62%|██████▏   | 21/34 [00:00<00:00, 397.25it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 76:  96%|█████████▌| 65/68 [00:00<00:00, 406.37it/s, loss=0.775, v_num=0]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 98:  84%|████████▍ | 57/68 [00:00<00:00, 422.23it/s, loss=0.664, v_num=0]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 9: 100%|██████████| 68/68 [00:00<00:00, 392.90it/s, loss=1.62, v_num=0]


[32m[I 2023-01-08 23:11:21,340][0m Trial 1 finished with value: 1.5269993543624878 and parameters: {'learning_rate': 0.00032289502374522293, 'weight_decay': 0.00012616774111181441}. Best is trial 0 with value: 0.5994266867637634.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 384.64it/s, loss=1.44, v_num=0]


[32m[I 2023-01-08 23:11:21,539][0m Trial 2 finished with value: 1.3961275815963745 and parameters: {'learning_rate': 2.5459387646736242e-05, 'weight_decay': 7.554936858819517e-05}. Best is trial 0 with value: 0.5994266867637634.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 10:  25%|██▌       | 17/68 [00:00<00:00, 358.69it/s, loss=0.255, v_num=0]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 428.43it/s, loss=1.72, v_num=0]


[32m[I 2023-01-08 23:11:24,538][0m Trial 8 finished with value: 1.5785019397735596 and parameters: {'learning_rate': 0.0002701057575618161, 'weight_decay': 5.773654686609404e-05}. Best is trial 3 with value: 0.2505391836166382.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 319.83it/s, loss=1.02, v_num=0] 


[32m[I 2023-01-08 23:11:24,781][0m Trial 9 finished with value: 0.8684954047203064 and parameters: {'learning_rate': 0.0046360008906032755, 'weight_decay': 0.00037775872234153696}. Best is trial 3 with value: 0.2505391836166382.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 390.43it/s, loss=0.264, v_num=0]

[32m[I 2023-01-08 23:11:24,987][0m Trial 10 finished with value: 0.24988143146038055 and parameters: {'learning_rate': 0.09199477357843268, 'weight_decay': 0.002714645956037795}. Best is trial 10 with value: 0.24988143146038055.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs






  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 329.87it/s, loss=0.292, v_num=0]


[32m[I 2023-01-08 23:11:25,221][0m Trial 11 finished with value: 0.2661546468734741 and parameters: {'learning_rate': 0.0802106122982599, 'weight_decay': 0.0018574652863732287}. Best is trial 10 with value: 0.24988143146038055.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 390.74it/s, loss=0.312, v_num=0]

[32m[I 2023-01-08 23:11:25,425][0m Trial 12 finished with value: 0.24191558361053467 and parameters: {'learning_rate': 0.08695790914740267, 'weight_decay': 0.002329114393782327}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 336.37it/s, loss=0.886, v_num=0]


[32m[I 2023-01-08 23:11:25,654][0m Trial 13 finished with value: 0.7491325736045837 and parameters: {'learning_rate': 0.006761209919633058, 'weight_decay': 0.004453489582234519}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 378.94it/s, loss=0.933, v_num=0]

[32m[I 2023-01-08 23:11:25,862][0m Trial 14 finished with value: 0.6813340783119202 and parameters: {'learning_rate': 0.010230887410916776, 'weight_decay': 0.07255649657882056}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs





HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 376.49it/s, loss=1.3, v_num=0] 

[32m[I 2023-01-08 23:11:26,066][0m Trial 15 finished with value: 1.2157574892044067 and parameters: {'learning_rate': 0.002235092948547615, 'weight_decay': 0.0022242661909152644}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 376.20it/s, loss=0.507, v_num=0]

[32m[I 2023-01-08 23:11:26,275][0m Trial 16 finished with value: 0.41210901737213135 and parameters: {'learning_rate': 0.02496178172829638, 'weight_decay': 1.2920754757033912e-05}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 370.66it/s, loss=0.4, v_num=0]  

[32m[I 2023-01-08 23:11:26,492][0m Trial 17 finished with value: 0.2753080725669861 and parameters: {'learning_rate': 0.07974514842194401, 'weight_decay': 0.008297110973863974}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 381.55it/s, loss=0.726, v_num=0]

[32m[I 2023-01-08 23:11:26,696][0m Trial 18 finished with value: 0.5911929607391357 and parameters: {'learning_rate': 0.015932887366599355, 'weight_decay': 0.0005168820843173792}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 370.35it/s, loss=1.85, v_num=0]

[32m[I 2023-01-08 23:11:26,903][0m Trial 19 finished with value: 1.5722709894180298 and parameters: {'learning_rate': 0.003187900858741191, 'weight_decay': 0.04133696472361746}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 407.49it/s, loss=1.72, v_num=0]


[32m[I 2023-01-08 23:11:27,098][0m Trial 20 finished with value: 1.5425899028778076 and parameters: {'learning_rate': 0.0016190992268423137, 'weight_decay': 0.0018270781040655062}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0:  10%|█         | 7/68 [00:00<00:00, 338.49it/s, loss=0.594, v_num=0]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 366.15it/s, loss=0.272, v_num=0]

[32m[I 2023-01-08 23:11:28,158][0m Trial 27 finished with value: 0.24504226446151733 and parameters: {'learning_rate': 0.09470329146222604, 'weight_decay': 0.004187091621106384}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 334.83it/s, loss=1.78, v_num=0]


[32m[I 2023-01-08 23:11:28,393][0m Trial 28 finished with value: 1.7842592000961304 and parameters: {'learning_rate': 8.304146918419725e-05, 'weight_decay': 0.017816773420951313}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 385.08it/s, loss=0.341, v_num=0]

[32m[I 2023-01-08 23:11:28,598][0m Trial 29 finished with value: 0.276746541261673 and parameters: {'learning_rate': 0.09496556500606591, 'weight_decay': 0.003733791563038003}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 378.86it/s, loss=0.976, v_num=0]

[32m[I 2023-01-08 23:11:28,799][0m Trial 30 finished with value: 0.8791000247001648 and parameters: {'learning_rate': 0.006593369462133495, 'weight_decay': 0.01887063229871756}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 397.56it/s, loss=0.492, v_num=0]


[32m[I 2023-01-08 23:11:28,999][0m Trial 31 finished with value: 0.3675293028354645 and parameters: {'learning_rate': 0.029843769182975946, 'weight_decay': 0.0024819433294625765}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 408.69it/s, loss=0.362, v_num=0]


[32m[I 2023-01-08 23:11:29,198][0m Trial 32 finished with value: 0.25957682728767395 and parameters: {'learning_rate': 0.05528316035688753, 'weight_decay': 0.0010810801536838726}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 382.19it/s, loss=0.723, v_num=0]

[32m[I 2023-01-08 23:11:29,413][0m Trial 33 finished with value: 0.5375903248786926 and parameters: {'learning_rate': 0.01657045545332488, 'weight_decay': 0.005519247450987185}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs






  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 401.11it/s, loss=0.545, v_num=0]


[32m[I 2023-01-08 23:11:29,612][0m Trial 34 finished with value: 0.347271591424942 and parameters: {'learning_rate': 0.057971194566389016, 'weight_decay': 0.00023282689910976038}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 372.65it/s, loss=1.47, v_num=0]

[32m[I 2023-01-08 23:11:29,820][0m Trial 35 finished with value: 1.428170919418335 and parameters: {'learning_rate': 0.0009589546005860462, 'weight_decay': 0.0011574307550290665}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 395.12it/s, loss=1.07, v_num=0]

[32m[I 2023-01-08 23:11:30,022][0m Trial 36 finished with value: 1.1219455003738403 and parameters: {'learning_rate': 8.492680504261069e-05, 'weight_decay': 0.012041914151578662}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 336.85it/s, loss=0.446, v_num=0]


[32m[I 2023-01-08 23:11:30,251][0m Trial 37 finished with value: 0.39872610569000244 and parameters: {'learning_rate': 0.02077580624861699, 'weight_decay': 0.0001159427356725279}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 400.00it/s, loss=0.472, v_num=0]


[32m[I 2023-01-08 23:11:30,448][0m Trial 38 finished with value: 0.39554286003112793 and parameters: {'learning_rate': 0.03169841554958815, 'weight_decay': 0.0006100686970768852}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0:  85%|████████▌ | 58/68 [00:00<00:00, 407.75it/s, loss=1.39, v_num=0]
Validation DataLoader 0:  74%|███████▎  | 25/34 [00:00<00:00, 436.55it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 400.98it/s, loss=2.15, v_num=0]


[32m[I 2023-01-08 23:11:31,633][0m Trial 45 finished with value: 2.1580471992492676 and parameters: {'learning_rate': 0.0005114830928836242, 'weight_decay': 0.0029122144518592182}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 371.23it/s, loss=1.1, v_num=0] 

[32m[I 2023-01-08 23:11:31,850][0m Trial 46 finished with value: 0.9502697587013245 and parameters: {'learning_rate': 0.009738512547428059, 'weight_decay': 0.00016505812644835883}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 381.79it/s, loss=0.39, v_num=0] 

[32m[I 2023-01-08 23:11:32,056][0m Trial 47 finished with value: 0.26258283853530884 and parameters: {'learning_rate': 0.05942627030799447, 'weight_decay': 0.0008899566367984021}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 391.67it/s, loss=0.746, v_num=0]

[32m[I 2023-01-08 23:11:32,263][0m Trial 48 finished with value: 0.6781606674194336 and parameters: {'learning_rate': 0.021669693089970386, 'weight_decay': 0.0015697360674879958}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 378.49it/s, loss=0.522, v_num=0]

[32m[I 2023-01-08 23:11:32,467][0m Trial 49 finished with value: 0.3693954646587372 and parameters: {'learning_rate': 0.03713766899855699, 'weight_decay': 0.012585888947312931}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs





HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 388.50it/s, loss=0.622, v_num=0]

[32m[I 2023-01-08 23:11:32,669][0m Trial 50 finished with value: 0.5414841175079346 and parameters: {'learning_rate': 0.014745229642123535, 'weight_decay': 0.0317136671287179}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 393.43it/s, loss=0.333, v_num=0]


[32m[I 2023-01-08 23:11:32,870][0m Trial 51 finished with value: 0.2477877289056778 and parameters: {'learning_rate': 0.0709966196152277, 'weight_decay': 0.0007716614882896781}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 338.30it/s, loss=0.365, v_num=0]


[32m[I 2023-01-08 23:11:33,104][0m Trial 52 finished with value: 0.25585395097732544 and parameters: {'learning_rate': 0.07629657026687044, 'weight_decay': 0.0004950132288258954}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 399.29it/s, loss=0.529, v_num=0]

[32m[I 2023-01-08 23:11:33,311][0m Trial 53 finished with value: 0.3648189902305603 and parameters: {'learning_rate': 0.04409681099321583, 'weight_decay': 0.0017502612315502468}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 390.06it/s, loss=0.323, v_num=0]

[32m[I 2023-01-08 23:11:33,518][0m Trial 54 finished with value: 0.2478480488061905 and parameters: {'learning_rate': 0.09926451381214513, 'weight_decay': 0.0052510996845763665}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 385.25it/s, loss=0.347, v_num=0]

[32m[I 2023-01-08 23:11:33,727][0m Trial 55 finished with value: 0.2573448121547699 and parameters: {'learning_rate': 0.06545045759833731, 'weight_decay': 0.005376565604252919}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 372.23it/s, loss=0.689, v_num=0]

[32m[I 2023-01-08 23:11:33,936][0m Trial 56 finished with value: 0.44532057642936707 and parameters: {'learning_rate': 0.04459709333617457, 'weight_decay': 0.004200923660355229}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 410.23it/s, loss=0.646, v_num=0]


[32m[I 2023-01-08 23:11:34,130][0m Trial 57 finished with value: 0.5164177417755127 and parameters: {'learning_rate': 0.021078118320135918, 'weight_decay': 0.0021808985279637965}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


                                                                            

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 352.49it/s, loss=0.459, v_num=0]

[32m[I 2023-01-08 23:11:35,197][0m Trial 64 finished with value: 0.2911171019077301 and parameters: {'learning_rate': 0.08149976648468153, 'weight_decay': 0.002250973578274163}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 208.89it/s, loss=0.61, v_num=0] 

[32m[I 2023-01-08 23:11:35,551][0m Trial 65 finished with value: 0.48937007784843445 and parameters: {'learning_rate': 0.030823375968562345, 'weight_decay': 0.000626963382702796}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 389.95it/s, loss=0.319, v_num=0]

[32m[I 2023-01-08 23:11:35,765][0m Trial 66 finished with value: 0.2444782853126526 and parameters: {'learning_rate': 0.09622597630395366, 'weight_decay': 0.004998226971108804}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 345.60it/s, loss=0.395, v_num=0]


[32m[I 2023-01-08 23:11:35,996][0m Trial 67 finished with value: 0.3015871047973633 and parameters: {'learning_rate': 0.07476993832289393, 'weight_decay': 0.004532271048136581}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 350.61it/s, loss=0.278, v_num=0]


[32m[I 2023-01-08 23:11:36,218][0m Trial 68 finished with value: 0.2536575496196747 and parameters: {'learning_rate': 0.09692574981441031, 'weight_decay': 0.014510352940613983}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 391.07it/s, loss=0.514, v_num=0]

[32m[I 2023-01-08 23:11:36,421][0m Trial 69 finished with value: 0.34852322936058044 and parameters: {'learning_rate': 0.052826009220753575, 'weight_decay': 0.008734775327146144}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 346.43it/s, loss=0.425, v_num=0]


[32m[I 2023-01-08 23:11:36,658][0m Trial 70 finished with value: 0.3196139335632324 and parameters: {'learning_rate': 0.0435092662778254, 'weight_decay': 3.336718377171069e-05}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 363.78it/s, loss=0.375, v_num=0]

[32m[I 2023-01-08 23:11:36,872][0m Trial 71 finished with value: 0.2611806392669678 and parameters: {'learning_rate': 0.06361204209242728, 'weight_decay': 1.3066543218375387e-05}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 366.62it/s, loss=0.364, v_num=0]

[32m[I 2023-01-08 23:11:37,087][0m Trial 72 finished with value: 0.2762264311313629 and parameters: {'learning_rate': 0.09872926945430814, 'weight_decay': 0.002646726613795081}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 408.85it/s, loss=0.306, v_num=0]


[32m[I 2023-01-08 23:11:37,286][0m Trial 73 finished with value: 0.2582390308380127 and parameters: {'learning_rate': 0.07421261295963015, 'weight_decay': 0.005951605304303238}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 303.80it/s, loss=0.521, v_num=0]


[32m[I 2023-01-08 23:11:37,542][0m Trial 74 finished with value: 0.4182218015193939 and parameters: {'learning_rate': 0.027324086477317305, 'weight_decay': 0.0002937177239137875}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 385.48it/s, loss=0.414, v_num=0]

[32m[I 2023-01-08 23:11:37,750][0m Trial 75 finished with value: 0.31115010380744934 and parameters: {'learning_rate': 0.04783243125802071, 'weight_decay': 0.0019422173231578353}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores





IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0:  96%|█████████▌| 65/68 [00:00<00:00, 375.95it/s, loss=1.31, v_num=0]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 377.51it/s, loss=0.546, v_num=0]

[32m[I 2023-01-08 23:11:38,887][0m Trial 82 finished with value: 0.346096009016037 and parameters: {'learning_rate': 0.05285079746780974, 'weight_decay': 0.0007245881462535157}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 258.14it/s, loss=1.59, v_num=0]


[32m[I 2023-01-08 23:11:39,183][0m Trial 83 finished with value: 1.5954949855804443 and parameters: {'learning_rate': 0.00014529213963662853, 'weight_decay': 0.0015449693194852696}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 198.03it/s, loss=0.329, v_num=0]

[32m[I 2023-01-08 23:11:39,575][0m Trial 84 finished with value: 0.24810254573822021 and parameters: {'learning_rate': 0.07646364276222026, 'weight_decay': 0.00044146764019313426}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 274.77it/s, loss=0.359, v_num=0]


[32m[I 2023-01-08 23:11:39,880][0m Trial 85 finished with value: 0.284080445766449 and parameters: {'learning_rate': 0.03965266777065738, 'weight_decay': 0.00021157094547784685}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 260.43it/s, loss=0.32, v_num=0] 


[32m[I 2023-01-08 23:11:40,184][0m Trial 86 finished with value: 0.24524596333503723 and parameters: {'learning_rate': 0.09825978217590155, 'weight_decay': 0.00044876316703422616}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 337.08it/s, loss=0.298, v_num=0]


[32m[I 2023-01-08 23:11:40,416][0m Trial 87 finished with value: 0.2436017543077469 and parameters: {'learning_rate': 0.08891259505584889, 'weight_decay': 0.0004643759135704179}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 316.98it/s, loss=0.373, v_num=0]


[32m[I 2023-01-08 23:11:40,663][0m Trial 88 finished with value: 0.25992444157600403 and parameters: {'learning_rate': 0.05863319774028265, 'weight_decay': 0.00011151650832510528}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 358.74it/s, loss=0.301, v_num=0]


[32m[I 2023-01-08 23:11:40,902][0m Trial 89 finished with value: 0.2614080309867859 and parameters: {'learning_rate': 0.09980178391684386, 'weight_decay': 0.0004219216742864661}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 350.30it/s, loss=0.333, v_num=0]


[32m[I 2023-01-08 23:11:41,125][0m Trial 90 finished with value: 0.27060097455978394 and parameters: {'learning_rate': 0.08290032403351626, 'weight_decay': 0.00047666619212507317}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 375.72it/s, loss=0.381, v_num=0]

[32m[I 2023-01-08 23:11:41,333][0m Trial 91 finished with value: 0.2469727098941803 and parameters: {'learning_rate': 0.0665392591743621, 'weight_decay': 0.00023831661508799865}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 338.12it/s, loss=0.314, v_num=0]

[32m[I 2023-01-08 23:11:41,571][0m Trial 92 finished with value: 0.2563928961753845 and parameters: {'learning_rate': 0.06652274803825682, 'weight_decay': 0.0001999619119338508}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 364.70it/s, loss=0.28, v_num=0] 

[32m[I 2023-01-08 23:11:41,789][0m Trial 93 finished with value: 0.28474143147468567 and parameters: {'learning_rate': 0.0803625904987773, 'weight_decay': 0.0003250878141098513}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 362.17it/s, loss=0.468, v_num=0]

[32m[I 2023-01-08 23:11:42,007][0m Trial 94 finished with value: 0.3410435616970062 and parameters: {'learning_rate': 0.04982744671783909, 'weight_decay': 7.839948031077568e-05}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 364.37it/s, loss=0.355, v_num=0]

[32m[I 2023-01-08 23:11:42,218][0m Trial 95 finished with value: 0.280737966299057 and parameters: {'learning_rate': 0.06522392086569541, 'weight_decay': 0.00015483879735909205}. Best is trial 12 with value: 0.24191558361053467.[0m





GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 68/68 [00:00<00:00, 353.75it/s, loss=0.442, v_num=0]


[32m[I 2023-01-08 23:11:42,440][0m Trial 96 finished with value: 0.3060876727104187 and parameters: {'learning_rate': 0.03763460446020724, 'weight_decay': 0.0009382235989019492}. Best is trial 12 with value: 0.24191558361053467.[0m
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type    | Params
-----------------------------------
0 | linear | Linear  | 6     
1 | loss   | MSELoss | 0     
-----------------------------------
6         Trainable params
0         Non-trainable params
6         Total params
0.000     Total estimated model params size (MB)


Epoch 0:  85%|████████▌ | 58/68 [00:00<00:00, 327.97it/s, loss=0.279, v_num=0]


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

