In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import GroupKFold
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import optuna


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class DynamicRegressionModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(DynamicRegressionModel, self).__init__()
        layers = [nn.Linear(input_size, hidden_sizes[0]), nn.ReLU()]
        
        for i in range(1, len(hidden_sizes)):
            layers += [nn.Linear(hidden_sizes[i-1], hidden_sizes[i]), nn.ReLU()]
        
        layers += [nn.Linear(hidden_sizes[-1], output_size)]
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


In [3]:
COLS_TO_DROP = ['prune_name','rgft_name','orig_ds_ll','ll_prune','time_prune',
                'd_ll_prune', 'orig_ds_id','ll_rgft','time_rgft', 'd_ll_merged',                 
                'name2ntaxa_pruned_prune','name2ntaxa_remaining_prune',
                'name2ntaxa_pruned_rgft','name2ntaxa_remaining_rgft']

#COLS_TO_DROP = ['prune_name','rgft_name','orig_ds_ll','ll_prune','time_prune',
#                'd_ll_prune', 'orig_ds_id','ll_rgft','time_rgft', 'd_ll_merged']

TARGET_COL = 'd_ll_merged'

file_path = './data/validation_data/learning_all_moves.csv'
df = pd.read_csv(file_path, index_col='iteration', low_memory=False)


In [4]:
groups = df['orig_ds_id']
print(f'Are present {len(groups.unique())} different datasets')
data = df.drop(COLS_TO_DROP, axis=1)

labels = df[TARGET_COL].values
features = data.values

# Normalize features
scaler = StandardScaler()
#scaler = RobustScaler()
features_scaled = scaler.fit_transform(features)

# Convert to PyTorch tensors
features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.float32)

Are present 266 different datasets


In [9]:
BATCH_SIZE = 64
OUTPUT_SIZE = 1  # predicting only 'd_ll_merged' value
NUM_EPOCHS = 10

def objective(trial):
	# Define the hyperparameters to tune
	lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
	num_layers = trial.suggest_int('num_layers', 1, 5)
	hidden_sizes = [trial.suggest_int(f'n_units_l{i}', 16, 128) for i in range(num_layers)]
	#n_splits = trial.suggest_int('n_splits', 5, len(groups.unique()))

	# Cross-validation
	gkf = GroupKFold(n_splits=10)
	mae_kfold = []
	r2_kfold = []
	for fold, (train_idx, val_idx) in enumerate(gkf.split(features_scaled, labels, groups=groups)):
		print(f"Start Fold {fold+1}")

		X_train_fold, y_train_fold = features_scaled[train_idx], labels[train_idx]
		X_val_fold, y_val_fold = features_scaled[val_idx], labels[val_idx]
		
		train_dataset = TensorDataset(torch.tensor(X_train_fold, dtype=torch.float32), 
									torch.tensor(y_train_fold, dtype=torch.float32))
		val_dataset = TensorDataset(torch.tensor(X_val_fold, dtype=torch.float32), 
									torch.tensor(y_val_fold, dtype=torch.float32))
		
		train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
		val_loader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE)
  
		input_size = X_train_fold.shape[1]  # Number of features
		model = DynamicRegressionModel(input_size, hidden_sizes, OUTPUT_SIZE)
		criterion = nn.MSELoss()
		optimizer = torch.optim.Adam(model.parameters(), lr=lr)		
		
		# Training loop for the current fold
		num_epochs = NUM_EPOCHS
		for epoch in range(num_epochs):
			epoch_losses = []
			for inputs, targets in train_loader:
				optimizer.zero_grad()
				outputs = model(inputs)
				loss = criterion(outputs, targets.view(-1, 1))
				loss.backward()
				optimizer.step()
				
				epoch_losses.append(loss.item())
			
			avg_loss = sum(epoch_losses) / len(epoch_losses)
			print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, AVGLoss: {avg_loss:.4f}')

		model.eval()  # Set the model to evaluation mode
		with torch.no_grad():
			predictions = []
			actuals = []
			for X_batch, y_batch in val_loader:
				y_pred = model(X_batch)
				predictions.append(y_pred.numpy())
				actuals.append(y_batch.numpy())

			# Flatten the list if predictions and actuals are nested lists
			predictions = np.concatenate(predictions, axis=0)
			actuals = np.concatenate(actuals, axis=0)

			mae = mean_absolute_error(actuals, predictions)
			mse = mean_squared_error(actuals, predictions)
			r2 = r2_score(actuals, predictions)
			print(f'Fold {fold+1}, MAE: {mae}, MSE: {mse}, R^2: {r2}')
		mae_kfold.append(mae)
		r2_kfold.append(r2)

	return np.mean(r2_kfold)

study = optuna.create_study(direction= 'maximize')
#study = optuna.create_study(directions=['minimize','maximize'])
study.optimize(objective, n_trials=30)

[I 2024-02-17 00:40:48,288] A new study created in memory with name: no-name-c68298de-dd09-4758-aded-657823b7e7b0


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Start Fold 1
Epoch [1/10], Loss: 7400.6753, AVGLoss: 28014.3609
Epoch [2/10], Loss: 14990.5625, AVGLoss: 21731.5352
Epoch [3/10], Loss: 11205.0771, AVGLoss: 17229.5196
Epoch [4/10], Loss: 14147.7373, AVGLoss: 16079.6509
Epoch [5/10], Loss: 6761.5991, AVGLoss: 15158.7718
Epoch [6/10], Loss: 46965.9609, AVGLoss: 14321.9484
Epoch [7/10], Loss: 11324.2051, AVGLoss: 13579.5589
Epoch [8/10], Loss: 19528.2441, AVGLoss: 12993.6143
Epoch [9/10], Loss: 4097.0659, AVGLoss: 12565.6541
Epoch [10/10], Loss: 21152.4922, AVGLoss: 12208.5294
Fold 1, MAE: 93.67870330810547, MSE: 26042.904296875, R^2: 0.32972757236738404
Start Fold 2
Epoch [1/10], Loss: 26112.5254, AVGLoss: 22308.5251
Epoch [2/10], Loss: 3407.9590, AVGLoss: 16241.8058
Epoch [3/10], Loss: 9133.9580, AVGLoss: 14536.7019
Epoch [4/10], Loss: 10329.0547, AVGLoss: 14112.1211
Epoch [5/10], Loss: 4473.9487, AVGLoss: 13809.7138
Epoch [6/10], Loss: 9549.0742, AVGLoss: 13550.8261
Epoch [7/10], Loss: 17834.3613, AVGLoss: 13363.2113
Epoch [8/10], Los

[I 2024-02-17 01:14:54,668] Trial 0 finished with value: 0.3867656384835983 and parameters: {'lr': 7.047249111653818e-05, 'num_layers': 4, 'n_units_l0': 76, 'n_units_l1': 54, 'n_units_l2': 104, 'n_units_l3': 33}. Best is trial 0 with value: 0.3867656384835983.


Fold 10, MAE: 69.723876953125, MSE: 15845.6806640625, R^2: 0.5613878985633882
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 10865.3936, AVGLoss: 19497.4670
Epoch [2/10], Loss: 55989.8750, AVGLoss: 15376.4636
Epoch [3/10], Loss: 46994.7812, AVGLoss: 14409.1091
Epoch [4/10], Loss: 6961.2827, AVGLoss: 13770.2580
Epoch [5/10], Loss: 25259.2246, AVGLoss: 13260.6888
Epoch [6/10], Loss: 10932.0625, AVGLoss: 12830.5825
Epoch [7/10], Loss: 10512.2285, AVGLoss: 12511.0988
Epoch [8/10], Loss: 5265.6938, AVGLoss: 12242.2121
Epoch [9/10], Loss: 4621.4932, AVGLoss: 12054.6310
Epoch [10/10], Loss: 5703.7012, AVGLoss: 11903.3612
Fold 1, MAE: 93.0851058959961, MSE: 23235.138671875, R^2: 0.4019916814047757
Start Fold 2
Epoch [1/10], Loss: 3875.0852, AVGLoss: 16430.8226
Epoch [2/10], Loss: 6663.2349, AVGLoss: 13874.9418
Epoch [3/10], Loss: 10076.8252, AVGLoss: 13513.4713
Epoch [4/10], Loss: 18037.2578, AVGLoss: 13239.0772
Epoch [5/10], Loss: 80871.1406, AVGLoss: 13029.2191
Epoch [6/10], Loss: 14529.6797, AVGLoss: 12776.5829
Epoch [7/10], Loss: 6363.9561, AVGLoss: 12531.3638
Epoch [8/10], Loss: 16370.9453, 

[I 2024-02-17 01:42:10,999] Trial 1 finished with value: 0.3916948616311412 and parameters: {'lr': 0.000991079707114551, 'num_layers': 2, 'n_units_l0': 118, 'n_units_l1': 114}. Best is trial 1 with value: 0.3916948616311412.


Fold 10, MAE: 67.83736419677734, MSE: 14684.0810546875, R^2: 0.5935412306852665
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 15453.2959, AVGLoss: 33200.8781
Epoch [2/10], Loss: 12580.2979, AVGLoss: 26961.6599
Epoch [3/10], Loss: 15018.8066, AVGLoss: 26465.5865
Epoch [4/10], Loss: 22283.8262, AVGLoss: 26121.6994
Epoch [5/10], Loss: 12884.0928, AVGLoss: 25847.7572
Epoch [6/10], Loss: 33475.8438, AVGLoss: 25608.2408
Epoch [7/10], Loss: 17941.1387, AVGLoss: 25374.6731
Epoch [8/10], Loss: 19242.6328, AVGLoss: 25141.0912
Epoch [9/10], Loss: 13676.0176, AVGLoss: 24905.4170
Epoch [10/10], Loss: 10788.9160, AVGLoss: 24667.6600
Fold 1, MAE: 96.88160705566406, MSE: 27316.310546875, R^2: 0.2969536412131789
Start Fold 2
Epoch [1/10], Loss: 105547.6719, AVGLoss: 27963.2540
Epoch [2/10], Loss: 10471.0000, AVGLoss: 22179.5628
Epoch [3/10], Loss: 31181.8047, AVGLoss: 21928.3235
Epoch [4/10], Loss: 12497.6572, AVGLoss: 21847.6454
Epoch [5/10], Loss: 8812.8828, AVGLoss: 21792.2867
Epoch [6/10], Loss: 6249.5649, AVGLoss: 21744.6422
Epoch [7/10], Loss: 10227.7607, AVGLoss: 21703.6739
Epoch [8/10], Loss: 63307

[I 2024-02-17 02:01:00,567] Trial 2 finished with value: 0.2431155669450163 and parameters: {'lr': 0.0002279391855375882, 'num_layers': 1, 'n_units_l0': 44}. Best is trial 1 with value: 0.3916948616311412.


Fold 10, MAE: 92.24771881103516, MSE: 23860.0546875, R^2: 0.33954826260375814
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 10921.1377, AVGLoss: 26199.1231
Epoch [2/10], Loss: 9251.4941, AVGLoss: 17373.2795
Epoch [3/10], Loss: 66226.9688, AVGLoss: 15325.6003
Epoch [4/10], Loss: 6845.2139, AVGLoss: 14144.0753
Epoch [5/10], Loss: 8579.7285, AVGLoss: 13409.2610
Epoch [6/10], Loss: 9086.0479, AVGLoss: 12991.9128
Epoch [7/10], Loss: 9124.3623, AVGLoss: 12721.9115
Epoch [8/10], Loss: 18923.3809, AVGLoss: 12525.8552
Epoch [9/10], Loss: 9260.4453, AVGLoss: 12323.4886
Epoch [10/10], Loss: 8605.7666, AVGLoss: 12091.8265
Fold 1, MAE: 85.86998748779297, MSE: 21231.587890625, R^2: 0.453557573055114
Start Fold 2
Epoch [1/10], Loss: 22497.7109, AVGLoss: 22433.3913
Epoch [2/10], Loss: 7523.6411, AVGLoss: 16523.3528
Epoch [3/10], Loss: 9125.3340, AVGLoss: 14539.7537
Epoch [4/10], Loss: 5289.6514, AVGLoss: 14075.0380
Epoch [5/10], Loss: 6852.4395, AVGLoss: 13773.0534
Epoch [6/10], Loss: 4241.1982, AVGLoss: 13540.3714
Epoch [7/10], Loss: 7186.8281, AVGLoss: 13360.6086
Epoch [8/10], Loss: 18128.4629, AVGLos

[I 2024-02-17 02:36:48,673] Trial 3 finished with value: 0.40348780795787587 and parameters: {'lr': 0.00010036057886686167, 'num_layers': 5, 'n_units_l0': 66, 'n_units_l1': 53, 'n_units_l2': 111, 'n_units_l3': 21, 'n_units_l4': 19}. Best is trial 3 with value: 0.40348780795787587.


Fold 10, MAE: 68.68785095214844, MSE: 15505.599609375, R^2: 0.5708014284838825
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 6610.4131, AVGLoss: 25275.6351
Epoch [2/10], Loss: 9613.7979, AVGLoss: 18055.3588
Epoch [3/10], Loss: 14977.1016, AVGLoss: 16720.5975
Epoch [4/10], Loss: 11432.6953, AVGLoss: 16127.8260
Epoch [5/10], Loss: 4429.3955, AVGLoss: 15523.3333
Epoch [6/10], Loss: 9374.0635, AVGLoss: 14966.5034
Epoch [7/10], Loss: 7002.0229, AVGLoss: 14578.9422
Epoch [8/10], Loss: 10739.2646, AVGLoss: 14283.4360
Epoch [9/10], Loss: 8868.3330, AVGLoss: 14042.2780
Epoch [10/10], Loss: 10983.9697, AVGLoss: 13824.7831
Fold 1, MAE: 93.4051284790039, MSE: 24234.787109375, R^2: 0.3762634687571518
Start Fold 2
Epoch [1/10], Loss: 7368.0225, AVGLoss: 20619.9407
Epoch [2/10], Loss: 11051.2734, AVGLoss: 15020.9309
Epoch [3/10], Loss: 9411.5566, AVGLoss: 14362.2422
Epoch [4/10], Loss: 8424.8730, AVGLoss: 14076.3673
Epoch [5/10], Loss: 6785.0679, AVGLoss: 13874.1410
Epoch [6/10], Loss: 6643.0845, AVGLoss: 13737.2528
Epoch [7/10], Loss: 9729.8838, AVGLoss: 13616.7672
Epoch [8/10], Loss: 7741.2456, AVGLos

[I 2024-02-17 03:02:02,305] Trial 4 finished with value: 0.39059802823886425 and parameters: {'lr': 0.0004443006758539235, 'num_layers': 2, 'n_units_l0': 128, 'n_units_l1': 31}. Best is trial 3 with value: 0.40348780795787587.


Fold 10, MAE: 73.36746978759766, MSE: 16465.013671875, R^2: 0.5442446650759396
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 7404.7202, AVGLoss: 40924.5419
Epoch [2/10], Loss: 19524.1367, AVGLoss: 28795.5014
Epoch [3/10], Loss: 15272.8125, AVGLoss: 27414.0486
Epoch [4/10], Loss: 18563.7871, AVGLoss: 26977.6211
Epoch [5/10], Loss: 10698.0518, AVGLoss: 26638.5672
Epoch [6/10], Loss: 18291.8730, AVGLoss: 26367.7053
Epoch [7/10], Loss: 11133.0527, AVGLoss: 26132.3172
Epoch [8/10], Loss: 7682.2690, AVGLoss: 25911.8838
Epoch [9/10], Loss: 10045.4365, AVGLoss: 25684.6301
Epoch [10/10], Loss: 20010.3320, AVGLoss: 25440.9070
Fold 1, MAE: 99.62218475341797, MSE: 28210.0546875, R^2: 0.27395104494060696
Start Fold 2
Epoch [1/10], Loss: 32066.9648, AVGLoss: 34012.9687
Epoch [2/10], Loss: 8346.6650, AVGLoss: 23614.2224
Epoch [3/10], Loss: 8757.1221, AVGLoss: 22381.1875
Epoch [4/10], Loss: 12715.6475, AVGLoss: 21968.1330
Epoch [5/10], Loss: 20375.9941, AVGLoss: 21685.9038
Epoch [6/10], Loss: 31111.5156, AVGLoss: 21454.5107
Epoch [7/10], Loss: 8753.4502, AVGLoss: 21240.0175
Epoch [8/10], Loss: 8741.5938,

[I 2024-02-17 03:31:46,147] Trial 5 finished with value: 0.2511438249315127 and parameters: {'lr': 1.367258721438188e-05, 'num_layers': 3, 'n_units_l0': 127, 'n_units_l1': 60, 'n_units_l2': 27}. Best is trial 3 with value: 0.40348780795787587.


Fold 10, MAE: 88.61422729492188, MSE: 22758.509765625, R^2: 0.3700392226425876
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 17952.5781, AVGLoss: 17098.0919
Epoch [2/10], Loss: 4483.4131, AVGLoss: 13894.6624
Epoch [3/10], Loss: 5793.0889, AVGLoss: 13206.1909
Epoch [4/10], Loss: 12512.6660, AVGLoss: 11870.4322
Epoch [5/10], Loss: 3847.0764, AVGLoss: 11031.0603
Epoch [6/10], Loss: 11296.4707, AVGLoss: 11131.6576
Epoch [7/10], Loss: 8068.3154, AVGLoss: 10645.2446
Epoch [8/10], Loss: 13753.7354, AVGLoss: 10486.7595
Epoch [9/10], Loss: 2301.7700, AVGLoss: 10316.5245
Epoch [10/10], Loss: 4579.0298, AVGLoss: 10206.3776
Fold 1, MAE: 97.27667236328125, MSE: 25165.240234375, R^2: 0.352316179808538
Start Fold 2
Epoch [1/10], Loss: 7386.5068, AVGLoss: 14970.7119
Epoch [2/10], Loss: 24622.1855, AVGLoss: 13232.7264
Epoch [3/10], Loss: 4460.2949, AVGLoss: 12832.4206
Epoch [4/10], Loss: 7798.8740, AVGLoss: 12200.3075
Epoch [5/10], Loss: 7294.5811, AVGLoss: 11913.0118
Epoch [6/10], Loss: 6332.1875, AVGLoss: 11335.1184
Epoch [7/10], Loss: 5817.8057, AVGLoss: 11052.2381
Epoch [8/10], Loss: 12739.3623, AVGLo

[I 2024-02-17 04:11:19,082] Trial 6 finished with value: 0.3746414333339966 and parameters: {'lr': 0.021931500746914075, 'num_layers': 5, 'n_units_l0': 92, 'n_units_l1': 83, 'n_units_l2': 18, 'n_units_l3': 116, 'n_units_l4': 20}. Best is trial 3 with value: 0.40348780795787587.


Fold 10, MAE: 82.4711685180664, MSE: 17222.697265625, R^2: 0.5232717930722959
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 11901.2295, AVGLoss: 32729.2336
Epoch [2/10], Loss: 11303.9404, AVGLoss: 26933.2475
Epoch [3/10], Loss: 16145.8223, AVGLoss: 25790.1962
Epoch [4/10], Loss: 16116.3926, AVGLoss: 24447.0944
Epoch [5/10], Loss: 10458.9600, AVGLoss: 22630.3708
Epoch [6/10], Loss: 27555.6738, AVGLoss: 20244.7573
Epoch [7/10], Loss: 10530.1738, AVGLoss: 18359.1746
Epoch [8/10], Loss: 9283.7881, AVGLoss: 17487.1220
Epoch [9/10], Loss: 37101.2500, AVGLoss: 16965.0290
Epoch [10/10], Loss: 7268.7251, AVGLoss: 16568.9116
Fold 1, MAE: 87.5767822265625, MSE: 21769.857421875, R^2: 0.43970396835044134
Start Fold 2
Epoch [1/10], Loss: 7663.7612, AVGLoss: 28206.8106
Epoch [2/10], Loss: 21288.0176, AVGLoss: 21997.3385
Epoch [3/10], Loss: 32326.1953, AVGLoss: 21117.8318
Epoch [4/10], Loss: 15407.9326, AVGLoss: 20167.9045
Epoch [5/10], Loss: 12913.5469, AVGLoss: 18866.9373
Epoch [6/10], Loss: 10014.2812, AVGLoss: 17104.8913
Epoch [7/10], Loss: 11962.1230, AVGLoss: 15600.9070
Epoch [8/10], Loss: 10747.3

[I 2024-02-17 04:47:32,713] Trial 7 finished with value: 0.40851867084895466 and parameters: {'lr': 1.7879740314594983e-05, 'num_layers': 5, 'n_units_l0': 68, 'n_units_l1': 62, 'n_units_l2': 61, 'n_units_l3': 29, 'n_units_l4': 75}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 72.55000305175781, MSE: 16203.701171875, R^2: 0.5514779183388459
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 7885.7344, AVGLoss: 24088.8058
Epoch [2/10], Loss: 6989.3062, AVGLoss: 16918.5776
Epoch [3/10], Loss: 13152.6465, AVGLoss: 15816.5273
Epoch [4/10], Loss: 11626.5869, AVGLoss: 15060.4946
Epoch [5/10], Loss: 4690.1768, AVGLoss: 14377.0347
Epoch [6/10], Loss: 8048.4683, AVGLoss: 13718.4174
Epoch [7/10], Loss: 5860.4497, AVGLoss: 13116.6091
Epoch [8/10], Loss: 12939.4541, AVGLoss: 12664.5358
Epoch [9/10], Loss: 6091.9429, AVGLoss: 12315.6085
Epoch [10/10], Loss: 5766.5532, AVGLoss: 12025.6717
Fold 1, MAE: 89.20728302001953, MSE: 24707.23828125, R^2: 0.36410393989606316
Start Fold 2
Epoch [1/10], Loss: 6964.9688, AVGLoss: 20291.0181
Epoch [2/10], Loss: 14244.0068, AVGLoss: 14664.2789
Epoch [3/10], Loss: 6229.0420, AVGLoss: 13932.3752
Epoch [4/10], Loss: 19574.4883, AVGLoss: 13564.7779
Epoch [5/10], Loss: 12137.1123, AVGLoss: 13300.0637
Epoch [6/10], Loss: 12578.6504, AVGLoss: 13085.3350
Epoch [7/10], Loss: 12389.0674, AVGLoss: 12905.6909
Epoch [8/10], Loss: 26006.6211, A

[I 2024-02-17 05:25:46,561] Trial 8 finished with value: 0.38023543682591876 and parameters: {'lr': 8.528435161303137e-05, 'num_layers': 4, 'n_units_l0': 113, 'n_units_l1': 113, 'n_units_l2': 73, 'n_units_l3': 55}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 67.40550231933594, MSE: 14632.8642578125, R^2: 0.5949589754969016
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 15006.2510, AVGLoss: 16040.4211
Epoch [2/10], Loss: 18986.0352, AVGLoss: 13575.5262
Epoch [3/10], Loss: 5196.5562, AVGLoss: 12552.1540
Epoch [4/10], Loss: 4613.9141, AVGLoss: 12049.2522
Epoch [5/10], Loss: 19023.0254, AVGLoss: 11976.5118
Epoch [6/10], Loss: 14808.8965, AVGLoss: 11633.7491
Epoch [7/10], Loss: 9376.9600, AVGLoss: 11125.8298
Epoch [8/10], Loss: 11742.7178, AVGLoss: 10777.0301
Epoch [9/10], Loss: 4458.5854, AVGLoss: 10918.8122
Epoch [10/10], Loss: 29354.2441, AVGLoss: 10234.7038
Fold 1, MAE: 97.82805633544922, MSE: 27657.89453125, R^2: 0.2881620730260339
Start Fold 2
Epoch [1/10], Loss: 30413.2637, AVGLoss: 14745.2931
Epoch [2/10], Loss: 13103.4385, AVGLoss: 13098.3271
Epoch [3/10], Loss: 6913.5840, AVGLoss: 12244.6197
Epoch [4/10], Loss: 7676.9292, AVGLoss: 11959.4514
Epoch [5/10], Loss: 13136.3975, AVGLoss: 11820.2099
Epoch [6/10], Loss: 4989.2700, AVGLoss: 11347.4249
Epoch [7/10], Loss: 9333.8145, AVGLoss: 11367.7877
Epoch [8/10], Loss: 11019.5596, A

[I 2024-02-17 05:58:37,984] Trial 9 finished with value: 0.32449827597455627 and parameters: {'lr': 0.021626086687883992, 'num_layers': 3, 'n_units_l0': 69, 'n_units_l1': 70, 'n_units_l2': 102}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 75.54235076904297, MSE: 18326.537109375, R^2: 0.4927172957380501
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 49474.3477, AVGLoss: 38986.3711
Epoch [2/10], Loss: 12210.9434, AVGLoss: 28489.4377
Epoch [3/10], Loss: 21658.0977, AVGLoss: 27495.1635
Epoch [4/10], Loss: 14501.0322, AVGLoss: 27122.8849
Epoch [5/10], Loss: 12398.5283, AVGLoss: 26819.7320
Epoch [6/10], Loss: 43268.5312, AVGLoss: 26512.8191
Epoch [7/10], Loss: 21767.0957, AVGLoss: 26163.1290
Epoch [8/10], Loss: 84328.0703, AVGLoss: 25757.7195
Epoch [9/10], Loss: 10480.6416, AVGLoss: 25290.1528
Epoch [10/10], Loss: 9637.0078, AVGLoss: 24748.0829
Fold 1, MAE: 95.8653335571289, MSE: 26916.73046875, R^2: 0.3072376665729204
Start Fold 2
Epoch [1/10], Loss: 7259.5020, AVGLoss: 33900.5467
Epoch [2/10], Loss: 5580.0908, AVGLoss: 23528.6375
Epoch [3/10], Loss: 13131.6680, AVGLoss: 22424.9081
Epoch [4/10], Loss: 23670.5488, AVGLoss: 21988.6925
Epoch [5/10], Loss: 14347.8076, AVGLoss: 21648.5432
Epoch [6/10], Loss: 9861.1309, AVGLoss: 21290.1283
Epoch [7/10], Loss: 20813.7129, AVGLoss: 20901.9300
Epoch [8/10], Loss: 12249.0000

[I 2024-02-17 06:31:43,638] Trial 10 finished with value: 0.2674107901738998 and parameters: {'lr': 1.0215196206727948e-05, 'num_layers': 4, 'n_units_l0': 20, 'n_units_l1': 90, 'n_units_l2': 65, 'n_units_l3': 89}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 87.24059295654297, MSE: 22441.3125, R^2: 0.378819424071712
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 22321.4062, AVGLoss: 28833.3601
Epoch [2/10], Loss: 76043.9375, AVGLoss: 23620.4466
Epoch [3/10], Loss: 21734.2754, AVGLoss: 18562.6960
Epoch [4/10], Loss: 17775.9863, AVGLoss: 16787.1444
Epoch [5/10], Loss: 10155.3174, AVGLoss: 16007.6959
Epoch [6/10], Loss: 12702.3164, AVGLoss: 15359.2163
Epoch [7/10], Loss: 7223.1089, AVGLoss: 14740.4089
Epoch [8/10], Loss: 12329.1553, AVGLoss: 14212.5262
Epoch [9/10], Loss: 11620.2646, AVGLoss: 13783.2543
Epoch [10/10], Loss: 17446.1406, AVGLoss: 13444.7737
Fold 1, MAE: 91.5932388305664, MSE: 24200.59765625, R^2: 0.377143441301387
Start Fold 2
Epoch [1/10], Loss: 7976.5654, AVGLoss: 23757.9902
Epoch [2/10], Loss: 16059.9844, AVGLoss: 19340.5499
Epoch [3/10], Loss: 6926.4673, AVGLoss: 16238.0812
Epoch [4/10], Loss: 4066.7026, AVGLoss: 14935.2360
Epoch [5/10], Loss: 12707.8789, AVGLoss: 14399.6678
Epoch [6/10], Loss: 5132.7881, AVGLoss: 14063.7305
Epoch [7/10], Loss: 6612.4033, AVGLoss: 13830.5616
Epoch [8/10], Loss: 12197.6855, A

[I 2024-02-17 07:07:11,801] Trial 11 finished with value: 0.38406955836353845 and parameters: {'lr': 4.861888468343111e-05, 'num_layers': 5, 'n_units_l0': 56, 'n_units_l1': 34, 'n_units_l2': 126, 'n_units_l3': 17, 'n_units_l4': 83}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 73.35481262207031, MSE: 17834.939453125, R^2: 0.5063247980641092
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 23931.7363, AVGLoss: 14965.9890
Epoch [2/10], Loss: 5199.5190, AVGLoss: 11959.8786
Epoch [3/10], Loss: 10193.6152, AVGLoss: 10669.6527
Epoch [4/10], Loss: 8052.9634, AVGLoss: 9972.4078
Epoch [5/10], Loss: 5686.4326, AVGLoss: 9776.7528
Epoch [6/10], Loss: 9154.4707, AVGLoss: 9407.8039
Epoch [7/10], Loss: 6770.0713, AVGLoss: 8956.4539
Epoch [8/10], Loss: 9421.8447, AVGLoss: 8764.3259
Epoch [9/10], Loss: 8480.5986, AVGLoss: 8570.2243
Epoch [10/10], Loss: 49598.0430, AVGLoss: 8356.8343
Fold 1, MAE: 91.15430450439453, MSE: 22815.94140625, R^2: 0.41278064046709007
Start Fold 2
Epoch [1/10], Loss: 6211.2773, AVGLoss: 14293.3087
Epoch [2/10], Loss: 27260.3945, AVGLoss: 12397.2856
Epoch [3/10], Loss: 9325.6650, AVGLoss: 11998.1117
Epoch [4/10], Loss: 5015.2070, AVGLoss: 11076.9547
Epoch [5/10], Loss: 4906.1729, AVGLoss: 11021.7185
Epoch [6/10], Loss: 8121.5396, AVGLoss: 10263.7701
Epoch [7/10], Loss: 11792.3350, AVGLoss: 9364.7990
Epoch [8/10], Loss: 8110.6533, AVGLoss: 8684

[I 2024-02-17 07:45:07,134] Trial 12 finished with value: 0.21129971434803418 and parameters: {'lr': 0.0037394474284245124, 'num_layers': 5, 'n_units_l0': 89, 'n_units_l1': 46, 'n_units_l2': 58, 'n_units_l3': 46, 'n_units_l4': 46}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 81.48343658447266, MSE: 21050.638671875, R^2: 0.4173134882471383
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 15698.3047, AVGLoss: 30437.5316
Epoch [2/10], Loss: 17829.5332, AVGLoss: 25443.6718
Epoch [3/10], Loss: 12241.6689, AVGLoss: 22528.9214
Epoch [4/10], Loss: 41979.2617, AVGLoss: 18774.5667
Epoch [5/10], Loss: 12491.3369, AVGLoss: 17151.8972
Epoch [6/10], Loss: 4774.8696, AVGLoss: 16409.3755
Epoch [7/10], Loss: 52892.9766, AVGLoss: 15906.7481
Epoch [8/10], Loss: 3530.5051, AVGLoss: 15487.9825
Epoch [9/10], Loss: 119435.8359, AVGLoss: 15118.8018
Epoch [10/10], Loss: 17990.5898, AVGLoss: 14745.4400
Fold 1, MAE: 92.59767150878906, MSE: 25421.88671875, R^2: 0.34571071284686783
Start Fold 2
Epoch [1/10], Loss: 5445.1187, AVGLoss: 25446.9782
Epoch [2/10], Loss: 24482.4883, AVGLoss: 21206.1819
Epoch [3/10], Loss: 23554.1895, AVGLoss: 19780.5090
Epoch [4/10], Loss: 11131.2266, AVGLoss: 17535.9833
Epoch [5/10], Loss: 18253.7285, AVGLoss: 15637.6277
Epoch [6/10], Loss: 23423.5801, AVGLoss: 14939.5869
Epoch [7/10], Loss: 8927.1631, AVGLoss: 14631.3154
Epoch [8/10], Loss: 20380.9

[I 2024-02-17 08:19:52,396] Trial 13 finished with value: 0.4035071900840622 and parameters: {'lr': 3.393398114876199e-05, 'num_layers': 5, 'n_units_l0': 42, 'n_units_l1': 22, 'n_units_l2': 89, 'n_units_l3': 21, 'n_units_l4': 127}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 70.07286834716797, MSE: 16073.314453125, R^2: 0.5550869999390237
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 5589.5845, AVGLoss: 33629.0487
Epoch [2/10], Loss: 14957.2529, AVGLoss: 27291.1264
Epoch [3/10], Loss: 9014.0615, AVGLoss: 26367.1256
Epoch [4/10], Loss: 11403.5996, AVGLoss: 25379.2505
Epoch [5/10], Loss: 21559.6348, AVGLoss: 24070.1168
Epoch [6/10], Loss: 27141.6172, AVGLoss: 22373.6732
Epoch [7/10], Loss: 6494.6099, AVGLoss: 20419.5158
Epoch [8/10], Loss: 11527.2607, AVGLoss: 18824.6707
Epoch [9/10], Loss: 18710.7129, AVGLoss: 17890.6974
Epoch [10/10], Loss: 10785.3809, AVGLoss: 17345.0649
Fold 1, MAE: 87.71614074707031, MSE: 21361.091796875, R^2: 0.450224496372707
Start Fold 2
Epoch [1/10], Loss: 10199.5898, AVGLoss: 28304.0604
Epoch [2/10], Loss: 6762.8203, AVGLoss: 22157.4752
Epoch [3/10], Loss: 49967.7031, AVGLoss: 21347.2791
Epoch [4/10], Loss: 7441.4233, AVGLoss: 20534.6972
Epoch [5/10], Loss: 40329.2148, AVGLoss: 19539.9429
Epoch [6/10], Loss: 21499.2227, AVGLoss: 18255.4668
Epoch [7/10], Loss: 6811.8701, AVGLoss: 16834.0859
Epoch [8/10], Loss: 28742.5605,

[I 2024-02-17 08:51:38,985] Trial 14 finished with value: 0.4045876786676848 and parameters: {'lr': 2.2096787734714057e-05, 'num_layers': 4, 'n_units_l0': 35, 'n_units_l1': 22, 'n_units_l2': 84, 'n_units_l3': 70}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 76.88099670410156, MSE: 17197.841796875, R^2: 0.5239598263144623
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 8781.1885, AVGLoss: 20979.9230
Epoch [2/10], Loss: 30988.6445, AVGLoss: 18648.0695
Epoch [3/10], Loss: 12687.9443, AVGLoss: 18588.4023
Epoch [4/10], Loss: 29159.6113, AVGLoss: 18168.2794
Epoch [5/10], Loss: 11189.3848, AVGLoss: 18262.7123
Epoch [6/10], Loss: 15608.4648, AVGLoss: 17902.1294
Epoch [7/10], Loss: 12975.1357, AVGLoss: 17803.6187
Epoch [8/10], Loss: 29275.1738, AVGLoss: 33959.2176
Epoch [9/10], Loss: 28513.4102, AVGLoss: 34834.0372
Epoch [10/10], Loss: 93970.1016, AVGLoss: 34835.7010
Fold 1, MAE: 128.72218322753906, MSE: 39513.37890625, R^2: -0.016965364820932693
Start Fold 2
Epoch [1/10], Loss: 29968.1934, AVGLoss: 16526.7236
Epoch [2/10], Loss: 62513.1523, AVGLoss: 17706.0883
Epoch [3/10], Loss: 9822.3691, AVGLoss: 26269.0283
Epoch [4/10], Loss: 10750.4287, AVGLoss: 27487.7900
Epoch [5/10], Loss: 43213.9727, AVGLoss: 27347.6365
Epoch [6/10], Loss: 21915.5020, AVGLoss: 27477.8999
Epoch [7/10], Loss: 12471.7393, AVGLoss: 28456.0547
Epoch [8/10], Loss: 156

[I 2024-02-17 09:25:12,333] Trial 15 finished with value: 0.14050917867313523 and parameters: {'lr': 0.0835700603259095, 'num_layers': 4, 'n_units_l0': 16, 'n_units_l1': 18, 'n_units_l2': 41, 'n_units_l3': 77}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 83.81629943847656, MSE: 17086.578125, R^2: 0.5270395490334836
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 25490.9238, AVGLoss: 15046.4187
Epoch [2/10], Loss: 7820.1079, AVGLoss: 11687.6580
Epoch [3/10], Loss: 7721.8730, AVGLoss: 10365.6474
Epoch [4/10], Loss: 3918.8452, AVGLoss: 9489.3248
Epoch [5/10], Loss: 5432.6982, AVGLoss: 8695.4948
Epoch [6/10], Loss: 10396.9512, AVGLoss: 8061.9746
Epoch [7/10], Loss: 6150.2744, AVGLoss: 7572.5723
Epoch [8/10], Loss: 5898.8354, AVGLoss: 7205.2569
Epoch [9/10], Loss: 3961.7375, AVGLoss: 6802.0006
Epoch [10/10], Loss: 3898.9939, AVGLoss: 6440.7253
Fold 1, MAE: 125.96882629394531, MSE: 31105.275390625, R^2: 0.19943616254024377
Start Fold 2
Epoch [1/10], Loss: 5804.6592, AVGLoss: 14564.6392
Epoch [2/10], Loss: 13745.3691, AVGLoss: 12533.3283
Epoch [3/10], Loss: 5524.7354, AVGLoss: 11616.3690
Epoch [4/10], Loss: 3132.1406, AVGLoss: 10701.1467
Epoch [5/10], Loss: 3155.7485, AVGLoss: 9497.5611
Epoch [6/10], Loss: 5483.4307, AVGLoss: 8409.2158
Epoch [7/10], Loss: 4717.2964, AVGLoss: 7445.0649
Epoch [8/10], Loss: 7180.7412, AVGLoss: 6677.2

[I 2024-02-17 10:02:28,724] Trial 16 finished with value: 0.17023625344320217 and parameters: {'lr': 0.0025388202250515722, 'num_layers': 4, 'n_units_l0': 33, 'n_units_l1': 96, 'n_units_l2': 82, 'n_units_l3': 97}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 75.41324615478516, MSE: 17098.552734375, R^2: 0.5267081429225355
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 35297.6992, AVGLoss: 34182.4161
Epoch [2/10], Loss: 27129.3555, AVGLoss: 27273.9203
Epoch [3/10], Loss: 24625.1133, AVGLoss: 26641.1604
Epoch [4/10], Loss: 9628.0488, AVGLoss: 26168.9261
Epoch [5/10], Loss: 24163.8398, AVGLoss: 25688.2044
Epoch [6/10], Loss: 14357.4502, AVGLoss: 25117.5211
Epoch [7/10], Loss: 26099.2168, AVGLoss: 24418.7709
Epoch [8/10], Loss: 18953.5000, AVGLoss: 23543.8009
Epoch [9/10], Loss: 11146.0967, AVGLoss: 22446.7753
Epoch [10/10], Loss: 10953.8076, AVGLoss: 21136.9281
Fold 1, MAE: 91.80814361572266, MSE: 24086.978515625, R^2: 0.38006764966649564
Start Fold 2
Epoch [1/10], Loss: 6477.5996, AVGLoss: 29675.7062
Epoch [2/10], Loss: 4251.4775, AVGLoss: 22457.4908
Epoch [3/10], Loss: 53224.0977, AVGLoss: 21768.1585
Epoch [4/10], Loss: 5222.7344, AVGLoss: 21223.4336
Epoch [5/10], Loss: 10903.1211, AVGLoss: 20660.2979
Epoch [6/10], Loss: 6480.8906, AVGLoss: 20036.7990
Epoch [7/10], Loss: 6875.6094, AVGLoss: 19311.9383
Epoch [8/10], Loss: 6384.9766

[I 2024-02-17 10:33:04,728] Trial 17 finished with value: 0.36393979205017196 and parameters: {'lr': 2.395812659346019e-05, 'num_layers': 3, 'n_units_l0': 88, 'n_units_l1': 38, 'n_units_l2': 49}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 85.07441711425781, MSE: 20996.9609375, R^2: 0.4187993793460101
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 6719.0874, AVGLoss: 24853.4918
Epoch [2/10], Loss: 15049.6904, AVGLoss: 17073.2285
Epoch [3/10], Loss: 6847.3813, AVGLoss: 15551.2577
Epoch [4/10], Loss: 8038.9326, AVGLoss: 14518.8106
Epoch [5/10], Loss: 13444.7256, AVGLoss: 13637.3295
Epoch [6/10], Loss: 10387.2129, AVGLoss: 13086.6702
Epoch [7/10], Loss: 11267.7295, AVGLoss: 12770.6889
Epoch [8/10], Loss: 5512.9141, AVGLoss: 12516.9936
Epoch [9/10], Loss: 5782.0864, AVGLoss: 12303.0036
Epoch [10/10], Loss: 14047.4746, AVGLoss: 12116.0764
Fold 1, MAE: 89.5791015625, MSE: 22596.91015625, R^2: 0.4184179660647551
Start Fold 2
Epoch [1/10], Loss: 7893.4766, AVGLoss: 21080.0434
Epoch [2/10], Loss: 4602.9116, AVGLoss: 15377.6700
Epoch [3/10], Loss: 4645.2446, AVGLoss: 14362.8110
Epoch [4/10], Loss: 20325.5332, AVGLoss: 14024.5657
Epoch [5/10], Loss: 9864.3730, AVGLoss: 13756.2949
Epoch [6/10], Loss: 11865.1621, AVGLoss: 13566.9862
Epoch [7/10], Loss: 15149.9902, AVGLoss: 13409.7375
Epoch [8/10], Loss: 8068.1641, AVGLoss

[I 2024-02-17 11:02:57,872] Trial 18 finished with value: 0.3961858250176387 and parameters: {'lr': 0.0001938678658623871, 'num_layers': 3, 'n_units_l0': 49, 'n_units_l1': 71, 'n_units_l2': 84}. Best is trial 7 with value: 0.40851867084895466.


Fold 10, MAE: 70.13946533203125, MSE: 15570.447265625, R^2: 0.5690064971346865
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 14492.5908, AVGLoss: 33536.2618
Epoch [2/10], Loss: 20185.9668, AVGLoss: 27069.2209
Epoch [3/10], Loss: 18328.3945, AVGLoss: 26148.5010
Epoch [4/10], Loss: 37580.7539, AVGLoss: 25218.1218
Epoch [5/10], Loss: 76011.1484, AVGLoss: 24064.7903
Epoch [6/10], Loss: 10697.8271, AVGLoss: 22374.7740
Epoch [7/10], Loss: 7617.3833, AVGLoss: 20244.0908
Epoch [8/10], Loss: 13709.4209, AVGLoss: 18609.3915
Epoch [9/10], Loss: 7478.3594, AVGLoss: 17787.5950
Epoch [10/10], Loss: 17865.7656, AVGLoss: 17352.5911
Fold 1, MAE: 89.4544906616211, MSE: 21826.16796875, R^2: 0.4382547240297032
Start Fold 2
Epoch [1/10], Loss: 46670.4570, AVGLoss: 28358.8803
Epoch [2/10], Loss: 27458.1387, AVGLoss: 22121.7286
Epoch [3/10], Loss: 34564.3320, AVGLoss: 21303.0276
Epoch [4/10], Loss: 14548.8936, AVGLoss: 20590.3903
Epoch [5/10], Loss: 35232.5000, AVGLoss: 19806.7379
Epoch [6/10], Loss: 14101.3779, AVGLoss: 18686.1061
Epoch [7/10], Loss: 6025.2173, AVGLoss: 17233.8890
Epoch [8/10], Loss: 11033.750

[I 2024-02-17 11:35:51,181] Trial 19 finished with value: 0.41561160115995077 and parameters: {'lr': 2.053456874735523e-05, 'num_layers': 4, 'n_units_l0': 32, 'n_units_l1': 99, 'n_units_l2': 43, 'n_units_l3': 60}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 73.40399169921875, MSE: 15833.6376953125, R^2: 0.5617213432369101
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 24723.3105, AVGLoss: 24903.9844
Epoch [2/10], Loss: 24251.4492, AVGLoss: 17717.2788
Epoch [3/10], Loss: 9008.7764, AVGLoss: 16718.7704
Epoch [4/10], Loss: 7178.8301, AVGLoss: 16024.8263
Epoch [5/10], Loss: 5242.7188, AVGLoss: 15382.2716
Epoch [6/10], Loss: 4647.8364, AVGLoss: 14922.7002
Epoch [7/10], Loss: 36355.4375, AVGLoss: 14582.3933
Epoch [8/10], Loss: 10421.2715, AVGLoss: 14309.8665
Epoch [9/10], Loss: 10850.3955, AVGLoss: 14079.1455
Epoch [10/10], Loss: 10313.9443, AVGLoss: 13878.6320
Fold 1, MAE: 90.83367156982422, MSE: 24242.974609375, R^2: 0.37605278250681273
Start Fold 2
Epoch [1/10], Loss: 35144.3750, AVGLoss: 21046.6403
Epoch [2/10], Loss: 7377.8667, AVGLoss: 15133.9995
Epoch [3/10], Loss: 13099.0918, AVGLoss: 14255.6959
Epoch [4/10], Loss: 15205.7529, AVGLoss: 13989.5784
Epoch [5/10], Loss: 31332.1621, AVGLoss: 13819.4510
Epoch [6/10], Loss: 25963.1562, AVGLoss: 13682.4541
Epoch [7/10], Loss: 39228.4180, AVGLoss: 13574.4564
Epoch [8/10], Loss: 137844.4

[I 2024-02-17 12:01:10,543] Trial 20 finished with value: 0.40316915015154614 and parameters: {'lr': 0.00033792485169583937, 'num_layers': 2, 'n_units_l0': 59, 'n_units_l1': 127}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 73.32038879394531, MSE: 16520.62890625, R^2: 0.5427052609277031
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 16909.9434, AVGLoss: 32894.5157
Epoch [2/10], Loss: 12630.8916, AVGLoss: 27156.8124
Epoch [3/10], Loss: 11704.5625, AVGLoss: 26320.6823
Epoch [4/10], Loss: 8583.5576, AVGLoss: 25364.9826
Epoch [5/10], Loss: 12874.9854, AVGLoss: 24096.7875
Epoch [6/10], Loss: 22145.4531, AVGLoss: 22352.9735
Epoch [7/10], Loss: 8351.4902, AVGLoss: 20336.9421
Epoch [8/10], Loss: 44603.4062, AVGLoss: 18914.7945
Epoch [9/10], Loss: 11354.1934, AVGLoss: 18183.0841
Epoch [10/10], Loss: 17714.2422, AVGLoss: 17757.3532
Fold 1, MAE: 87.29556274414062, MSE: 21224.646484375, R^2: 0.45373620866925257
Start Fold 2
Epoch [1/10], Loss: 13613.7041, AVGLoss: 27724.9863
Epoch [2/10], Loss: 7633.5698, AVGLoss: 22280.9144
Epoch [3/10], Loss: 29330.3691, AVGLoss: 21629.7300
Epoch [4/10], Loss: 21982.8105, AVGLoss: 21068.8995
Epoch [5/10], Loss: 17158.0898, AVGLoss: 20460.9776
Epoch [6/10], Loss: 5326.0723, AVGLoss: 19760.3273
Epoch [7/10], Loss: 30900.5957, AVGLoss: 18873.5680
Epoch [8/10], Loss: 24936.2

[I 2024-02-17 12:34:12,228] Trial 21 finished with value: 0.4126358455599092 and parameters: {'lr': 2.1483321154431334e-05, 'num_layers': 4, 'n_units_l0': 28, 'n_units_l1': 101, 'n_units_l2': 43, 'n_units_l3': 61}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 72.5330810546875, MSE: 15520.7119140625, R^2: 0.5703831603360949
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 14906.6113, AVGLoss: 41035.0890
Epoch [2/10], Loss: 24004.2422, AVGLoss: 29823.8933
Epoch [3/10], Loss: 16929.3652, AVGLoss: 27898.1485
Epoch [4/10], Loss: 9818.6377, AVGLoss: 27336.7321
Epoch [5/10], Loss: 25633.3613, AVGLoss: 26994.7396
Epoch [6/10], Loss: 26458.1406, AVGLoss: 26718.4233
Epoch [7/10], Loss: 12274.2666, AVGLoss: 26465.6880
Epoch [8/10], Loss: 13533.1523, AVGLoss: 26218.5183
Epoch [9/10], Loss: 13741.9453, AVGLoss: 25959.1722
Epoch [10/10], Loss: 11997.3721, AVGLoss: 25670.1459
Fold 1, MAE: 98.08596801757812, MSE: 28306.103515625, R^2: 0.2714790637043728
Start Fold 2
Epoch [1/10], Loss: 11533.2197, AVGLoss: 36248.0260
Epoch [2/10], Loss: 18913.6094, AVGLoss: 24809.2779
Epoch [3/10], Loss: 24732.7578, AVGLoss: 22730.8133
Epoch [4/10], Loss: 7687.9087, AVGLoss: 22152.3694
Epoch [5/10], Loss: 9164.1260, AVGLoss: 21766.6504
Epoch [6/10], Loss: 9312.8379, AVGLoss: 21427.7589
Epoch [7/10], Loss: 17239.4902, AVGLoss: 21106.9338
Epoch [8/10], Loss: 39234.13

[I 2024-02-17 13:06:30,734] Trial 22 finished with value: 0.24470164203333092 and parameters: {'lr': 1.0446298953445264e-05, 'num_layers': 4, 'n_units_l0': 28, 'n_units_l1': 102, 'n_units_l2': 36, 'n_units_l3': 53}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 88.6154556274414, MSE: 22967.2109375, R^2: 0.3642623288272948
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 67253.4922, AVGLoss: 29457.0648
Epoch [2/10], Loss: 15896.8232, AVGLoss: 23910.3563
Epoch [3/10], Loss: 11637.6709, AVGLoss: 19037.8895
Epoch [4/10], Loss: 11784.1934, AVGLoss: 17297.4791
Epoch [5/10], Loss: 13758.3936, AVGLoss: 16641.0858
Epoch [6/10], Loss: 7568.4653, AVGLoss: 16183.6099
Epoch [7/10], Loss: 16721.1289, AVGLoss: 15818.7424
Epoch [8/10], Loss: 8532.6680, AVGLoss: 15465.8660
Epoch [9/10], Loss: 8204.1162, AVGLoss: 15072.3094
Epoch [10/10], Loss: 7556.8833, AVGLoss: 14666.7479
Fold 1, MAE: 89.26873016357422, MSE: 23829.263671875, R^2: 0.3867006187923524
Start Fold 2
Epoch [1/10], Loss: 41661.1797, AVGLoss: 24310.3441
Epoch [2/10], Loss: 13478.9297, AVGLoss: 20020.7726
Epoch [3/10], Loss: 12115.5977, AVGLoss: 16719.3755
Epoch [4/10], Loss: 34306.4805, AVGLoss: 14800.3825
Epoch [5/10], Loss: 6853.7119, AVGLoss: 14268.8902
Epoch [6/10], Loss: 11010.7393, AVGLoss: 13985.0068
Epoch [7/10], Loss: 14048.1934, AVGLoss: 13789.2135
Epoch [8/10], Loss: 7301.6094

[I 2024-02-17 13:43:15,227] Trial 23 finished with value: 0.40141640859194655 and parameters: {'lr': 3.818172011372832e-05, 'num_layers': 5, 'n_units_l0': 26, 'n_units_l1': 81, 'n_units_l2': 49, 'n_units_l3': 66, 'n_units_l4': 85}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 71.81346130371094, MSE: 16794.8984375, R^2: 0.5351134037583347
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 13803.7754, AVGLoss: 23437.6252
Epoch [2/10], Loss: 15037.0059, AVGLoss: 16052.5719
Epoch [3/10], Loss: 52253.3320, AVGLoss: 14427.3607
Epoch [4/10], Loss: 10865.8643, AVGLoss: 13291.0052
Epoch [5/10], Loss: 6751.2866, AVGLoss: 12570.7178
Epoch [6/10], Loss: 8375.0527, AVGLoss: 12107.1494
Epoch [7/10], Loss: 7491.1089, AVGLoss: 11768.1360
Epoch [8/10], Loss: 7448.6421, AVGLoss: 11500.8481
Epoch [9/10], Loss: 3390.7078, AVGLoss: 11269.3186
Epoch [10/10], Loss: 6980.6938, AVGLoss: 11049.8507
Fold 1, MAE: 90.95266723632812, MSE: 21743.54296875, R^2: 0.4403812675856319
Start Fold 2
Epoch [1/10], Loss: 3821.6550, AVGLoss: 19637.5591
Epoch [2/10], Loss: 12275.0957, AVGLoss: 14399.2130
Epoch [3/10], Loss: 29616.8906, AVGLoss: 13747.2101
Epoch [4/10], Loss: 17456.1738, AVGLoss: 13370.2541
Epoch [5/10], Loss: 10961.4375, AVGLoss: 13051.3550
Epoch [6/10], Loss: 9334.8291, AVGLoss: 12704.3442
Epoch [7/10], Loss: 11445.2236, AVGLoss: 12358.6314
Epoch [8/10], Loss: 13898.5811, A

[I 2024-02-17 14:17:58,425] Trial 24 finished with value: 0.3518842942721252 and parameters: {'lr': 0.00012759059081237959, 'num_layers': 4, 'n_units_l0': 100, 'n_units_l1': 105, 'n_units_l2': 56, 'n_units_l3': 37}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 70.70548248291016, MSE: 16419.2578125, R^2: 0.5455112468417478
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 24076.7578, AVGLoss: 29707.7205
Epoch [2/10], Loss: 8652.1084, AVGLoss: 23884.9587
Epoch [3/10], Loss: 28010.5215, AVGLoss: 19089.9205
Epoch [4/10], Loss: 9741.5859, AVGLoss: 16997.6514
Epoch [5/10], Loss: 16036.3691, AVGLoss: 16241.2362
Epoch [6/10], Loss: 10309.4453, AVGLoss: 15765.7961
Epoch [7/10], Loss: 12063.0146, AVGLoss: 15375.6562
Epoch [8/10], Loss: 10946.6943, AVGLoss: 15001.5277
Epoch [9/10], Loss: 9191.7598, AVGLoss: 14649.7042
Epoch [10/10], Loss: 8891.1689, AVGLoss: 14326.1118
Fold 1, MAE: 90.78707885742188, MSE: 24016.98046875, R^2: 0.38186927951112415
Start Fold 2
Epoch [1/10], Loss: 70231.5625, AVGLoss: 24661.2002
Epoch [2/10], Loss: 9163.8184, AVGLoss: 19841.0267
Epoch [3/10], Loss: 26826.4414, AVGLoss: 16810.8478
Epoch [4/10], Loss: 9044.9238, AVGLoss: 15097.4171
Epoch [5/10], Loss: 8220.9668, AVGLoss: 14492.4651
Epoch [6/10], Loss: 10885.8262, AVGLoss: 14197.1780
Epoch [7/10], Loss: 8815.0547, AVGLoss: 14008.9391
Epoch [8/10], Loss: 2886.8066, A

[I 2024-02-17 14:57:52,610] Trial 25 finished with value: 0.392582865811933 and parameters: {'lr': 2.4901720049331996e-05, 'num_layers': 5, 'n_units_l0': 79, 'n_units_l1': 127, 'n_units_l2': 36, 'n_units_l3': 58, 'n_units_l4': 116}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 68.57418823242188, MSE: 15017.7998046875, R^2: 0.5843038731766582
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 17299.8398, AVGLoss: 16654.5039
Epoch [2/10], Loss: 22058.9434, AVGLoss: 12301.4788
Epoch [3/10], Loss: 7731.2329, AVGLoss: 11042.1050
Epoch [4/10], Loss: 19355.0996, AVGLoss: 10194.8166
Epoch [5/10], Loss: 4056.3804, AVGLoss: 9481.2111
Epoch [6/10], Loss: 10752.3486, AVGLoss: 8798.8181
Epoch [7/10], Loss: 6805.0220, AVGLoss: 8186.1032
Epoch [8/10], Loss: 9964.0576, AVGLoss: 7633.0046
Epoch [9/10], Loss: 4394.8525, AVGLoss: 7238.9520
Epoch [10/10], Loss: 5294.0796, AVGLoss: 6844.5106
Fold 1, MAE: 82.50650024414062, MSE: 19534.83203125, R^2: 0.4972273938876852
Start Fold 2
Epoch [1/10], Loss: 11606.8496, AVGLoss: 15220.6350
Epoch [2/10], Loss: 15013.6396, AVGLoss: 13246.5957
Epoch [3/10], Loss: 7875.4033, AVGLoss: 12540.3905
Epoch [4/10], Loss: 1873.0026, AVGLoss: 11535.4072
Epoch [5/10], Loss: 4356.2856, AVGLoss: 10316.2904
Epoch [6/10], Loss: 7727.9033, AVGLoss: 9431.8104
Epoch [7/10], Loss: 3113.0225, AVGLoss: 8590.9870
Epoch [8/10], Loss: 1312.2915, AVGLoss: 7663

[I 2024-02-17 15:33:16,538] Trial 26 finished with value: 0.1002345695623652 and parameters: {'lr': 0.0006829540139294841, 'num_layers': 4, 'n_units_l0': 55, 'n_units_l1': 81, 'n_units_l2': 67, 'n_units_l3': 81}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 73.3755111694336, MSE: 17554.111328125, R^2: 0.5140982152168708
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 5227.9741, AVGLoss: 14973.4256
Epoch [2/10], Loss: 8200.7871, AVGLoss: 11331.8927
Epoch [3/10], Loss: 8431.4053, AVGLoss: 10313.7616
Epoch [4/10], Loss: 11063.4902, AVGLoss: 9560.7615
Epoch [5/10], Loss: 4588.3496, AVGLoss: 8900.1376
Epoch [6/10], Loss: 7055.3823, AVGLoss: 8363.4064
Epoch [7/10], Loss: 7949.6426, AVGLoss: 7924.9776
Epoch [8/10], Loss: 6424.0532, AVGLoss: 7747.6882
Epoch [9/10], Loss: 5811.0239, AVGLoss: 7313.6048
Epoch [10/10], Loss: 6138.5986, AVGLoss: 7534.1626
Fold 1, MAE: 86.59569549560547, MSE: 20371.953125, R^2: 0.475682169259964
Start Fold 2
Epoch [1/10], Loss: 22303.2246, AVGLoss: 14619.2492
Epoch [2/10], Loss: 7901.9951, AVGLoss: 12514.0994
Epoch [3/10], Loss: 12072.8301, AVGLoss: 11132.4900
Epoch [4/10], Loss: 16315.8857, AVGLoss: 9757.4663
Epoch [5/10], Loss: 9721.7031, AVGLoss: 8631.8618
Epoch [6/10], Loss: 7450.0684, AVGLoss: 7701.7283
Epoch [7/10], Loss: 2084.6848, AVGLoss: 6912.0230
Epoch [8/10], Loss: 4433.6343, AVGLoss: 6392.6560
Ep

[I 2024-02-17 16:09:11,298] Trial 27 finished with value: 0.2002333869373254 and parameters: {'lr': 0.001899975871816656, 'num_layers': 5, 'n_units_l0': 40, 'n_units_l1': 65, 'n_units_l2': 24, 'n_units_l3': 42, 'n_units_l4': 58}. Best is trial 19 with value: 0.41561160115995077.


Fold 10, MAE: 73.55255126953125, MSE: 18368.236328125, R^2: 0.49156303728845085
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 8731.0615, AVGLoss: 30719.6775
Epoch [2/10], Loss: 22710.5156, AVGLoss: 26614.6483
Epoch [3/10], Loss: 16483.9570, AVGLoss: 25708.0700
Epoch [4/10], Loss: 37843.0000, AVGLoss: 24627.0094
Epoch [5/10], Loss: 36979.8711, AVGLoss: 23275.3049
Epoch [6/10], Loss: 12910.6572, AVGLoss: 21651.2790
Epoch [7/10], Loss: 6364.6890, AVGLoss: 20209.2302
Epoch [8/10], Loss: 30269.2363, AVGLoss: 19248.0041
Epoch [9/10], Loss: 21473.7871, AVGLoss: 18614.6614
Epoch [10/10], Loss: 5005.9360, AVGLoss: 18161.1642
Fold 1, MAE: 90.52285766601562, MSE: 21725.013671875, R^2: 0.4408581882745316
Start Fold 2
Epoch [1/10], Loss: 127197.2734, AVGLoss: 25515.6170
Epoch [2/10], Loss: 16119.8594, AVGLoss: 21557.8044
Epoch [3/10], Loss: 9929.1973, AVGLoss: 20796.0725
Epoch [4/10], Loss: 21648.4805, AVGLoss: 19788.1088
Epoch [5/10], Loss: 11834.1641, AVGLoss: 18338.0925
Epoch [6/10], Loss: 21361.8086, AVGLoss: 16689.3347
Epoch [7/10], Loss: 16888.3555, AVGLoss: 15549.4800
Epoch [8/10], Loss: 3114.00

[I 2024-02-17 16:38:39,391] Trial 28 finished with value: 0.41893336317808016 and parameters: {'lr': 5.044450031349452e-05, 'num_layers': 3, 'n_units_l0': 25, 'n_units_l1': 90, 'n_units_l2': 56}. Best is trial 28 with value: 0.41893336317808016.


Fold 10, MAE: 72.32888793945312, MSE: 15488.669921875, R^2: 0.5712700146056426
Start Fold 1


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch [1/10], Loss: 19411.2930, AVGLoss: 31536.0268
Epoch [2/10], Loss: 25182.1680, AVGLoss: 26510.2512
Epoch [3/10], Loss: 19318.4141, AVGLoss: 25264.9635
Epoch [4/10], Loss: 21267.2207, AVGLoss: 23960.0873
Epoch [5/10], Loss: 10027.9297, AVGLoss: 22438.4050
Epoch [6/10], Loss: 18341.2441, AVGLoss: 20720.6018
Epoch [7/10], Loss: 8798.7842, AVGLoss: 19095.2771
Epoch [8/10], Loss: 18660.0820, AVGLoss: 17990.5390
Epoch [9/10], Loss: 6937.2905, AVGLoss: 17377.9368
Epoch [10/10], Loss: 11380.5742, AVGLoss: 16975.5280
Fold 1, MAE: 89.35684204101562, MSE: 22223.9609375, R^2: 0.4280165857472584
Start Fold 2
Epoch [1/10], Loss: 48699.8047, AVGLoss: 26178.6820
Epoch [2/10], Loss: 9073.4023, AVGLoss: 21757.9683
Epoch [3/10], Loss: 8561.2090, AVGLoss: 21082.3218
Epoch [4/10], Loss: 13929.4551, AVGLoss: 20346.1652
Epoch [5/10], Loss: 12506.0410, AVGLoss: 19492.2390
Epoch [6/10], Loss: 7982.6455, AVGLoss: 18380.5643
Epoch [7/10], Loss: 11446.7051, AVGLoss: 17062.9651
Epoch [8/10], Loss: 19931.4609,

[I 2024-02-17 17:07:02,921] Trial 29 finished with value: 0.40263376615399044 and parameters: {'lr': 4.902399951952506e-05, 'num_layers': 3, 'n_units_l0': 24, 'n_units_l1': 91, 'n_units_l2': 48}. Best is trial 28 with value: 0.41893336317808016.


Fold 10, MAE: 75.07625579833984, MSE: 16798.365234375, R^2: 0.5350173952940345


In [None]:
# Use the best hyperparameters
best_hyperparams = study.best_trial
print(f"Best hyperparameters: {best_hyperparams.params}")

In [23]:
import pickle

study_name = "valStudy"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)
# Save the sampler with pickle to be loaded later.
with open("sampler.pkl", "wb") as fout:
    pickle.dump(study.sampler, fout)

restored_sampler = pickle.load(open("sampler.pkl", "rb"))
study = optuna.create_study(
    study_name=study_name, storage=storage_name, load_if_exists=True, sampler=restored_sampler
)

[I 2024-02-17 17:49:49,526] Using an existing study with name 'valStudy30Trial' instead of creating a new one.


In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
fig = optuna.visualization.plot_param_importances(study)
fig.show()