# Training a Neural Network with PyTorch

In [1]:
import torch
import torch.nn as nn #Base class for all neural network modules.
import matplotlib.pyplot as plt
import pandas as pd

## Load data

In [2]:
# in case of colab notebook
from google.colab import drive
drive.mount('/content/drive')

cdir = '' #path to your directory

Mounted at /content/drive


In [3]:
data = pd.read_csv(cdir + 'california_housing_train.csv')
data.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0,17000.0
mean,-119.562108,35.625225,28.589353,2643.664412,539.410824,1429.573941,501.221941,3.883578,207300.912353
std,2.005166,2.13734,12.586937,2179.947071,421.499452,1147.852959,384.520841,1.908157,115983.764387
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.79,33.93,18.0,1462.0,297.0,790.0,282.0,2.566375,119400.0
50%,-118.49,34.25,29.0,2127.0,434.0,1167.0,409.0,3.5446,180400.0
75%,-118.0,37.72,37.0,3151.25,648.25,1721.0,605.25,4.767,265000.0
max,-114.31,41.95,52.0,37937.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [4]:
#Fill NaN values
data = data.fillna(0)
#Normalize values
data = (data-data.mean())/data.std()
#Separate features and targets
x_df = pd.DataFrame(data, columns=data.columns[:-1])
y_df = pd.DataFrame(data, columns=[data.columns[-1]]) #target = 'median_house_value'
#Save in tensors
x = torch.tensor(x_df.values, dtype=torch.float)
y = torch.tensor(y_df.values, dtype=torch.float)

print(f"x shape: {x.shape}")
print(f"y shape: {y.shape}")

x shape: torch.Size([17000, 8])
y shape: torch.Size([17000, 1])


## Create a Neural Network

In [5]:
class Net(nn.Module):
    def __init__(self, D_in, H1, H2, H3, D_out):
        super(Net, self).__init__()

        self.linear1 = nn.Linear(D_in, H1) # You can also try include activation functions to check how your model will behave
        self.linear2 = nn.Linear(H1, H2)
        self.linear3 = nn.Linear(H2, H3)
        self.linear4 = nn.Linear(H3, D_out)

    def forward(self, x):
        h1 = self.linear1(x)
        h2 = self.linear2(h1)
        h3 = self.linear3(h2)
        out = self.linear4(h3)
        return out

In [6]:
#Define layer sizes
D_in = x.shape[1] #size of the input sample
H1 = 128
H2 = 64
H3 = 32
D_out = 1

#Define Hyperparameters
learning_rate = 1e-4 # You can also experiment with different learning rates

#Initialise model, loss, optimizer
model = Net(D_in, H1, H2, H3, D_out)
loss_func = nn.MSELoss(reduction='sum') # You can also try BCELoss and BCEWithLogitsLoss
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # You can also try Adam and AdamW

#Initialise dataloader
dataset = torch.utils.data.TensorDataset(x, y) #class to represent the data as list of tensors. x=input_features, y=labels
dataloader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)

In [7]:
model

Net(
  (linear1): Linear(in_features=8, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=32, bias=True)
  (linear4): Linear(in_features=32, out_features=1, bias=True)
)

## Train Network

In [8]:
for epoch in range(5):
  batch_losses = []

  for x_batch, y_batch in dataloader:
    y_pred = model(x_batch)

    loss = loss_func(y_pred, y_batch)
    batch_losses.append(loss.item())
    # print('y_pred=', y_pred[0])
    #Delete previously stored gradients
    optimizer.zero_grad()
    #Perform backpropagation starting from the loss calculated in this epoch
    loss.backward()
    #Update model's weights based on the gradients calculated during backprop
    optimizer.step()

  print(f"Epoch {epoch:3}: Loss = {sum(batch_losses)/len(dataloader):.5f}")


Epoch   0: Loss = 29.48857
Epoch   1: Loss = 23.76013
Epoch   2: Loss = 23.27231
Epoch   3: Loss = 23.23699
Epoch   4: Loss = 23.20877


##Optimization

Optuna framework

Optuna Concepts
- Objective Function: This is the function that defines the machine learning task you're trying to optimize. In our case, it includes the code for training and validating a neural network model. Optuna will run this function multiple times with different hyperparameter settings.

- Trial: A trial is a single execution of the objective function using a specific set of hyperparameters. Each trial evaluates how well that particular configuration performs.

- Study: A study is a collection of trials. Optuna runs several trials as part of a study to explore different hyperparameter combinations. After all trials are complete, the study helps identify the best-performing configuration—i.e., the one that minimizes (or maximizes) the objective function.

- Parameter: These are the values we are trying to optimize—like learning rate, optimizer type, or layer sizes. Each trial uses a different set of parameters. The goal is to find the combination that results in the best performance of the model.

In [9]:
# !pip install optuna
import optuna

#Optuna sample

def suggest_hyperparameters(trial): #function to include the suggested hyperparameters
    # Experimenting with different optimizers
    optimizer_name = trial.suggest_categorical("optimizer_name", ["Adam", "AdamW"])
    return optimizer_name

#Optuna sample
def objective(trial):
    optimizer_name = suggest_hyperparameters(trial)
    #Define layer sizes
    D_in = x.shape[1]
    H1 = 128 #size of the input sample
    H2 = 64
    H3 = 32
    D_out = 1

    #Define Hyperparameters
    learning_rate = 1e-4 # You can also experiment with different learning rates

    #Initialise model, loss, optimizer
    model = Net(D_in, H1, H2, H3, D_out)
    loss_func = nn.MSELoss(reduction='sum') # You can also try BCELoss and BCEWithLogitsLoss
    if optimizer_name == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == "AdamW":
        optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    # elif optimizer_name == "SGD":
    #     optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    #Initialise dataloader
    dataset = torch.utils.data.TensorDataset(x, y) #class to represent the data as list of tensors. x=input_features, y=labels
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)
    for epoch in range(5):
        batch_losses = []

        for x_batch, y_batch in dataloader:
            y_pred = model(x_batch)

            loss = loss_func(y_pred, y_batch)
            batch_losses.append(loss.item())
            # print('y_pred=', y_pred[0])
            #Delete previously stored gradients
            optimizer.zero_grad()
            #Perform backpropagation starting from the loss calculated in this epoch
            loss.backward()
            #Update model's weights based on the gradients calculated during backprop
            optimizer.step()

        print(f"Epoch {epoch:3}: Loss = {sum(batch_losses)/len(dataloader):.5f}")
        epoch_loss = sum(batch_losses)/len(dataloader)
    return epoch_loss

optuna_sample = optuna.create_study(direction = 'minimize' , study_name = 'lr-minim-sample')
optuna_sample.optimize(objective, n_trials = 50) #the first parameter is the function that we want to optimise
print('numbers of the finished trials:' , len(optuna_sample.trials))
print('the best params:' , optuna_sample.best_trial.params)
print('the best value:' , optuna_sample.best_value)

# Best number of trials appears to be 50 , as it gave me the best hyperparameters for my model.

Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.2.1


[I 2025-04-02 06:27:39,012] A new study created in memory with name: lr-minim-sample


Epoch   0: Loss = 31.29959
Epoch   1: Loss = 23.51294
Epoch   2: Loss = 23.21660
Epoch   3: Loss = 23.17496


[I 2025-04-02 06:27:42,065] Trial 0 finished with value: 23.21031472198945 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 0 with value: 23.21031472198945.


Epoch   4: Loss = 23.21031
Epoch   0: Loss = 34.60865
Epoch   1: Loss = 24.01241
Epoch   2: Loss = 23.20606
Epoch   3: Loss = 23.16348


[I 2025-04-02 06:27:44,722] Trial 1 finished with value: 23.13915596868759 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.13916
Epoch   0: Loss = 33.57677
Epoch   1: Loss = 23.59758
Epoch   2: Loss = 23.20317
Epoch   3: Loss = 23.17824


[I 2025-04-02 06:27:47,262] Trial 2 finished with value: 23.154956276255444 and parameters: {'optimizer_name': 'Adam'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.15496
Epoch   0: Loss = 34.69173
Epoch   1: Loss = 23.84621
Epoch   2: Loss = 23.25390
Epoch   3: Loss = 23.17674


[I 2025-04-02 06:27:49,816] Trial 3 finished with value: 23.19330040135778 and parameters: {'optimizer_name': 'Adam'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.19330
Epoch   0: Loss = 33.00095
Epoch   1: Loss = 23.52067
Epoch   2: Loss = 23.23313
Epoch   3: Loss = 23.13299


[I 2025-04-02 06:27:53,354] Trial 4 finished with value: 23.165939768454187 and parameters: {'optimizer_name': 'Adam'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.16594
Epoch   0: Loss = 34.84033
Epoch   1: Loss = 24.02460
Epoch   2: Loss = 23.40069
Epoch   3: Loss = 23.22896


[I 2025-04-02 06:27:56,055] Trial 5 finished with value: 23.16449575137375 and parameters: {'optimizer_name': 'Adam'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.16450
Epoch   0: Loss = 35.76769
Epoch   1: Loss = 23.86411
Epoch   2: Loss = 23.36183
Epoch   3: Loss = 23.16832


[I 2025-04-02 06:27:58,720] Trial 6 finished with value: 23.142048495156423 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.14205
Epoch   0: Loss = 33.49070
Epoch   1: Loss = 23.74216
Epoch   2: Loss = 23.29572
Epoch   3: Loss = 23.21700


[I 2025-04-02 06:28:01,278] Trial 7 finished with value: 23.22355135939175 and parameters: {'optimizer_name': 'Adam'}. Best is trial 1 with value: 23.13915596868759.


Epoch   4: Loss = 23.22355
Epoch   0: Loss = 33.28112
Epoch   1: Loss = 23.90244
Epoch   2: Loss = 23.30817
Epoch   3: Loss = 23.11854


[I 2025-04-02 06:28:03,955] Trial 8 finished with value: 23.124969482421875 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 8 with value: 23.124969482421875.


Epoch   4: Loss = 23.12497
Epoch   0: Loss = 34.03190
Epoch   1: Loss = 23.59471
Epoch   2: Loss = 23.17969
Epoch   3: Loss = 23.12425


[I 2025-04-02 06:28:07,595] Trial 9 finished with value: 23.112174754752253 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 9 with value: 23.112174754752253.


Epoch   4: Loss = 23.11217
Epoch   0: Loss = 31.09470
Epoch   1: Loss = 23.60501
Epoch   2: Loss = 23.21930
Epoch   3: Loss = 23.19040


[I 2025-04-02 06:28:10,166] Trial 10 finished with value: 23.101047476431482 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 10 with value: 23.101047476431482.


Epoch   4: Loss = 23.10105
Epoch   0: Loss = 35.46032
Epoch   1: Loss = 23.86225
Epoch   2: Loss = 23.24601
Epoch   3: Loss = 23.15635


[I 2025-04-02 06:28:12,750] Trial 11 finished with value: 23.11816352829897 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 10 with value: 23.101047476431482.


Epoch   4: Loss = 23.11816
Epoch   0: Loss = 33.32556
Epoch   1: Loss = 23.75517
Epoch   2: Loss = 23.22492
Epoch   3: Loss = 23.15171


[I 2025-04-02 06:28:15,311] Trial 12 finished with value: 23.11686464897672 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 10 with value: 23.101047476431482.


Epoch   4: Loss = 23.11686
Epoch   0: Loss = 33.67721
Epoch   1: Loss = 23.47463
Epoch   2: Loss = 23.15713
Epoch   3: Loss = 23.12756


[I 2025-04-02 06:28:18,196] Trial 13 finished with value: 23.103127099517593 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 10 with value: 23.101047476431482.


Epoch   4: Loss = 23.10313
Epoch   0: Loss = 35.55361
Epoch   1: Loss = 24.00857
Epoch   2: Loss = 23.24708
Epoch   3: Loss = 23.15202


[I 2025-04-02 06:28:21,623] Trial 14 finished with value: 23.23316358982172 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 10 with value: 23.101047476431482.


Epoch   4: Loss = 23.23316
Epoch   0: Loss = 33.17685
Epoch   1: Loss = 23.73510
Epoch   2: Loss = 23.17611
Epoch   3: Loss = 23.14761


[I 2025-04-02 06:28:24,257] Trial 15 finished with value: 23.14906140736171 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 10 with value: 23.101047476431482.


Epoch   4: Loss = 23.14906
Epoch   0: Loss = 30.89387
Epoch   1: Loss = 23.44758
Epoch   2: Loss = 23.12487
Epoch   3: Loss = 23.13638


[I 2025-04-02 06:28:27,414] Trial 16 finished with value: 23.040539906437235 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.04054
Epoch   0: Loss = 31.21830
Epoch   1: Loss = 23.40517
Epoch   2: Loss = 23.14080
Epoch   3: Loss = 23.10432


[I 2025-04-02 06:28:30,273] Trial 17 finished with value: 23.08905181669651 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.08905
Epoch   0: Loss = 33.65367
Epoch   1: Loss = 23.49457
Epoch   2: Loss = 23.19788
Epoch   3: Loss = 23.16490


[I 2025-04-02 06:28:33,843] Trial 18 finished with value: 23.117786518613197 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.11779
Epoch   0: Loss = 32.10825
Epoch   1: Loss = 23.37132
Epoch   2: Loss = 23.21047
Epoch   3: Loss = 23.13083


[I 2025-04-02 06:28:36,702] Trial 19 finished with value: 23.154725899373677 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.15473
Epoch   0: Loss = 31.34207
Epoch   1: Loss = 23.65640
Epoch   2: Loss = 23.16744
Epoch   3: Loss = 23.12192


[I 2025-04-02 06:28:39,330] Trial 20 finished with value: 23.1222745780658 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.12227
Epoch   0: Loss = 33.05167
Epoch   1: Loss = 23.44846
Epoch   2: Loss = 23.12669
Epoch   3: Loss = 23.08210


[I 2025-04-02 06:28:41,956] Trial 21 finished with value: 23.055470332167204 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.05547
Epoch   0: Loss = 34.42663
Epoch   1: Loss = 24.23037
Epoch   2: Loss = 23.28181
Epoch   3: Loss = 23.11788


[I 2025-04-02 06:28:44,623] Trial 22 finished with value: 23.128061853853385 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.12806
Epoch   0: Loss = 32.77266
Epoch   1: Loss = 23.68752
Epoch   2: Loss = 23.27705
Epoch   3: Loss = 23.25161


[I 2025-04-02 06:28:48,311] Trial 23 finished with value: 23.221544627856492 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.22154
Epoch   0: Loss = 30.31454
Epoch   1: Loss = 23.29962
Epoch   2: Loss = 23.08519
Epoch   3: Loss = 23.12082


[I 2025-04-02 06:28:50,919] Trial 24 finished with value: 23.145910148333787 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.14591
Epoch   0: Loss = 36.38152
Epoch   1: Loss = 24.06523
Epoch   2: Loss = 23.17763
Epoch   3: Loss = 23.14491


[I 2025-04-02 06:28:53,625] Trial 25 finished with value: 23.086922862475976 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.08692
Epoch   0: Loss = 32.46458
Epoch   1: Loss = 23.50222
Epoch   2: Loss = 23.16698
Epoch   3: Loss = 23.13265


[I 2025-04-02 06:28:56,146] Trial 26 finished with value: 23.083422058507015 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.08342
Epoch   0: Loss = 31.93635
Epoch   1: Loss = 23.92028
Epoch   2: Loss = 23.36039
Epoch   3: Loss = 23.21987


[I 2025-04-02 06:28:58,956] Trial 27 finished with value: 23.14889273249117 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.14889
Epoch   0: Loss = 33.52912
Epoch   1: Loss = 23.85999
Epoch   2: Loss = 23.15931
Epoch   3: Loss = 23.09783


[I 2025-04-02 06:29:02,325] Trial 28 finished with value: 23.05242924224165 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.05243
Epoch   0: Loss = 31.11266
Epoch   1: Loss = 23.64379
Epoch   2: Loss = 23.24553
Epoch   3: Loss = 23.15510


[I 2025-04-02 06:29:04,862] Trial 29 finished with value: 23.18350352739033 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.18350
Epoch   0: Loss = 31.91802
Epoch   1: Loss = 23.43211
Epoch   2: Loss = 23.15399
Epoch   3: Loss = 23.19235


[I 2025-04-02 06:29:07,368] Trial 30 finished with value: 23.100866927240126 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.10087
Epoch   0: Loss = 34.37062
Epoch   1: Loss = 23.57260
Epoch   2: Loss = 23.15532
Epoch   3: Loss = 23.18915


[I 2025-04-02 06:29:09,872] Trial 31 finished with value: 23.228296713721484 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.22830
Epoch   0: Loss = 32.72234
Epoch   1: Loss = 23.56443
Epoch   2: Loss = 23.12470
Epoch   3: Loss = 23.13622


[I 2025-04-02 06:29:12,660] Trial 32 finished with value: 23.11801931194793 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.11802
Epoch   0: Loss = 33.53127
Epoch   1: Loss = 23.91110
Epoch   2: Loss = 23.27688
Epoch   3: Loss = 23.23181


[I 2025-04-02 06:29:15,956] Trial 33 finished with value: 23.144952852923172 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.14495
Epoch   0: Loss = 34.56350
Epoch   1: Loss = 23.99636
Epoch   2: Loss = 23.32643
Epoch   3: Loss = 23.13621


[I 2025-04-02 06:29:18,469] Trial 34 finished with value: 23.145787934611615 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.14579
Epoch   0: Loss = 32.46600
Epoch   1: Loss = 23.64442
Epoch   2: Loss = 23.15010
Epoch   3: Loss = 23.11385


[I 2025-04-02 06:29:20,945] Trial 35 finished with value: 23.093333237153246 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.09333
Epoch   0: Loss = 33.62105
Epoch   1: Loss = 24.13670
Epoch   2: Loss = 23.31429
Epoch   3: Loss = 23.20029


[I 2025-04-02 06:29:23,495] Trial 36 finished with value: 23.186636634339067 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.18664
Epoch   0: Loss = 34.51848
Epoch   1: Loss = 23.77005
Epoch   2: Loss = 23.24316
Epoch   3: Loss = 23.16058


[I 2025-04-02 06:29:26,312] Trial 37 finished with value: 23.081230342836307 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.08123
Epoch   0: Loss = 32.55698
Epoch   1: Loss = 23.79508
Epoch   2: Loss = 23.29917
Epoch   3: Loss = 23.15438


[I 2025-04-02 06:29:29,603] Trial 38 finished with value: 23.114193159834784 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.11419
Epoch   0: Loss = 32.18251
Epoch   1: Loss = 23.63748
Epoch   2: Loss = 23.19127
Epoch   3: Loss = 23.14381


[I 2025-04-02 06:29:32,106] Trial 39 finished with value: 23.084149288951902 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.08415
Epoch   0: Loss = 32.14786
Epoch   1: Loss = 23.88537
Epoch   2: Loss = 23.21603
Epoch   3: Loss = 23.19247


[I 2025-04-02 06:29:34,627] Trial 40 finished with value: 23.134404687953175 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.13440
Epoch   0: Loss = 31.45147
Epoch   1: Loss = 23.64441
Epoch   2: Loss = 23.19004
Epoch   3: Loss = 23.17815


[I 2025-04-02 06:29:37,158] Trial 41 finished with value: 23.172456997677795 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.17246
Epoch   0: Loss = 33.21847
Epoch   1: Loss = 23.81368
Epoch   2: Loss = 23.44036
Epoch   3: Loss = 23.20101


[I 2025-04-02 06:29:40,037] Trial 42 finished with value: 23.204307541811378 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.20431
Epoch   0: Loss = 33.62914
Epoch   1: Loss = 23.55669
Epoch   2: Loss = 23.25975
Epoch   3: Loss = 23.17095


[I 2025-04-02 06:29:43,268] Trial 43 finished with value: 23.136746775835082 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.13675
Epoch   0: Loss = 32.22704
Epoch   1: Loss = 23.71771
Epoch   2: Loss = 23.14250
Epoch   3: Loss = 23.22268


[I 2025-04-02 06:29:45,756] Trial 44 finished with value: 23.13194851767748 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.13195
Epoch   0: Loss = 35.61102
Epoch   1: Loss = 24.10051
Epoch   2: Loss = 23.27134
Epoch   3: Loss = 23.09242


[I 2025-04-02 06:29:48,254] Trial 45 finished with value: 23.113476297909155 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.11348
Epoch   0: Loss = 34.54420
Epoch   1: Loss = 23.88102
Epoch   2: Loss = 23.17306
Epoch   3: Loss = 23.14120


[I 2025-04-02 06:29:50,737] Trial 46 finished with value: 23.110491239934937 and parameters: {'optimizer_name': 'Adam'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.11049
Epoch   0: Loss = 32.46770
Epoch   1: Loss = 23.74761
Epoch   2: Loss = 23.18963
Epoch   3: Loss = 23.07579


[I 2025-04-02 06:29:53,715] Trial 47 finished with value: 23.056398757418297 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.05640
Epoch   0: Loss = 32.43928
Epoch   1: Loss = 23.73180
Epoch   2: Loss = 23.19591
Epoch   3: Loss = 23.14503


[I 2025-04-02 06:29:57,017] Trial 48 finished with value: 23.146095713278406 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.14610
Epoch   0: Loss = 32.32622
Epoch   1: Loss = 23.83744
Epoch   2: Loss = 23.28816
Epoch   3: Loss = 23.17083


[I 2025-04-02 06:29:59,593] Trial 49 finished with value: 23.129662976229103 and parameters: {'optimizer_name': 'AdamW'}. Best is trial 16 with value: 23.040539906437235.


Epoch   4: Loss = 23.12966
numbers of the finished trials: 50
the best params: {'optimizer_name': 'AdamW'}
the best value: 23.040539906437235
