In [10]:
from collections import defaultdict

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from ydata_profiling import ProfileReport

from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from torchmetrics import MeanSquaredError, MeanAbsoluteError

import optuna

from pytorch_tools import CreateDataset, train_model_cls, train_model_reg, plot_metrics

In [11]:
df = pd.read_csv('data/ConcreteStrengthData.csv')

In [12]:
df

Unnamed: 0,CementComponent,BlastFurnaceSlag,FlyAshComponent,WaterComponent,SuperplasticizerComponent,CoarseAggregateComponent,FineAggregateComponent,AgeInDays,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,32.77


In [13]:
X = df.drop(columns=['Strength'])
y = df['Strength']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((824, 8), (206, 8), (824,), (206,))

In [14]:
train_dataset = CreateDataset(X_train, y_train)
test_dataset = CreateDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset,
                              batch_size=40,
                              num_workers=0
                             )

test_dataloader = DataLoader(test_dataset,
                              batch_size=40,
                              num_workers=0
                             )

In [15]:
def objective(trial):
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    hidden1 = trial.suggest_int("hidden1", 64, 512)
    hidden2 = trial.suggest_int("hidden2", 32, hidden1)
    hidden3 = trial.suggest_int("hidden3", 16, hidden2)

    class LinearModel(nn.Module):
        def __init__(self, in_dim, out_dim=1):
            super().__init__()
            self.features = nn.Sequential(
                nn.Linear(in_dim, hidden1),
                nn.ReLU(),
                nn.Linear(hidden1, hidden2),
                nn.ReLU(),
                nn.Linear(hidden2, hidden3),
                nn.ReLU(),
                nn.Linear(hidden3, out_dim),
            )
        def forward(self, x):
            return self.features(x)

    model = LinearModel(in_dim=X_train.shape[1], out_dim=1)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    metrics, _ = train_model_reg(
        num_epoch=20,
        train_dataloader=train_dataloader,
        test_dataloader=test_dataloader,
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        device=torch.device("cpu")
    )

    return metrics['test_mse'][-1]

In [16]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

[I 2025-04-23 19:43:19,761] A new study created in memory with name: no-name-3c3c22f6-28c1-4aba-8e08-63b5dd98de19


Epoch [10/20] Train Loss: 46.5907 MSE: 46.5907 MAE: 5.2210 RMSE: 6.8257


[I 2025-04-23 19:43:20,290] Trial 0 finished with value: 51.60118058584269 and parameters: {'lr': 0.006289204037020523, 'hidden1': 191, 'hidden2': 121, 'hidden3': 70}. Best is trial 0 with value: 51.60118058584269.


Epoch [20/20] Train Loss: 26.5198 MSE: 26.5198 MAE: 3.9198 RMSE: 5.1497
Epoch [10/20] Train Loss: 99.7098 MSE: 99.7098 MAE: 7.8434 RMSE: 9.9855


[I 2025-04-23 19:43:20,815] Trial 1 finished with value: 57.82598617701854 and parameters: {'lr': 0.0050882860740876674, 'hidden1': 142, 'hidden2': 58, 'hidden3': 33}. Best is trial 0 with value: 51.60118058584269.


Epoch [20/20] Train Loss: 38.1979 MSE: 38.1979 MAE: 4.6494 RMSE: 6.1804
Epoch [10/20] Train Loss: 128.8422 MSE: 128.8422 MAE: 9.1984 RMSE: 11.3509


[I 2025-04-23 19:43:21,353] Trial 2 finished with value: 59.67184833415504 and parameters: {'lr': 0.0026799108087435237, 'hidden1': 112, 'hidden2': 59, 'hidden3': 40}. Best is trial 0 with value: 51.60118058584269.


Epoch [20/20] Train Loss: 48.6618 MSE: 48.6618 MAE: 5.2547 RMSE: 6.9758
Epoch [10/20] Train Loss: 1506.9938 MSE: 1506.9938 MAE: 34.9671 RMSE: 38.8200


[I 2025-04-23 19:43:21,889] Trial 3 finished with value: 1282.9610560148665 and parameters: {'lr': 2.331855073421897e-05, 'hidden1': 224, 'hidden2': 128, 'hidden3': 55}. Best is trial 0 with value: 51.60118058584269.


Epoch [20/20] Train Loss: 1336.3749 MSE: 1336.3749 MAE: 32.4477 RMSE: 36.5565
Epoch [10/20] Train Loss: 349.7418 MSE: 349.7418 MAE: 14.5017 RMSE: 18.7014


[I 2025-04-23 19:43:22,433] Trial 4 finished with value: 171.1981226356284 and parameters: {'lr': 0.00018977872915194914, 'hidden1': 248, 'hidden2': 52, 'hidden3': 37}. Best is trial 0 with value: 51.60118058584269.


Epoch [20/20] Train Loss: 199.3764 MSE: 199.3764 MAE: 11.3991 RMSE: 14.1201
Epoch [10/20] Train Loss: 161.0464 MSE: 161.0464 MAE: 10.3117 RMSE: 12.6904


[I 2025-04-23 19:43:23,005] Trial 5 finished with value: 79.53551179459951 and parameters: {'lr': 0.000755569926485612, 'hidden1': 156, 'hidden2': 124, 'hidden3': 75}. Best is trial 0 with value: 51.60118058584269.


Epoch [20/20] Train Loss: 72.9956 MSE: 72.9956 MAE: 6.4902 RMSE: 8.5437
Epoch [10/20] Train Loss: 44.4546 MSE: 44.4546 MAE: 5.1165 RMSE: 6.6674


[I 2025-04-23 19:43:23,635] Trial 6 finished with value: 50.07806457593603 and parameters: {'lr': 0.004499353405785745, 'hidden1': 449, 'hidden2': 316, 'hidden3': 39}. Best is trial 6 with value: 50.07806457593603.


Epoch [20/20] Train Loss: 25.7695 MSE: 25.7695 MAE: 3.8393 RMSE: 5.0764
Epoch [10/20] Train Loss: 141.7851 MSE: 141.7851 MAE: 9.6718 RMSE: 11.9074


[I 2025-04-23 19:43:24,209] Trial 7 finished with value: 52.85474795739628 and parameters: {'lr': 0.0008714267509666363, 'hidden1': 368, 'hidden2': 97, 'hidden3': 92}. Best is trial 6 with value: 50.07806457593603.


Epoch [20/20] Train Loss: 46.3261 MSE: 46.3261 MAE: 5.1494 RMSE: 6.8063
Epoch [10/20] Train Loss: 920.0948 MSE: 920.0948 MAE: 25.6687 RMSE: 30.3331


[I 2025-04-23 19:43:24,759] Trial 8 finished with value: 208.7791282876024 and parameters: {'lr': 0.00011648378125503171, 'hidden1': 292, 'hidden2': 84, 'hidden3': 46}. Best is trial 6 with value: 50.07806457593603.


Epoch [20/20] Train Loss: 236.9876 MSE: 236.9876 MAE: 12.5108 RMSE: 15.3944
Epoch [10/20] Train Loss: 141.0551 MSE: 141.0551 MAE: 9.6728 RMSE: 11.8767


[I 2025-04-23 19:43:25,412] Trial 9 finished with value: 58.95726731679972 and parameters: {'lr': 0.0005118459252191325, 'hidden1': 315, 'hidden2': 290, 'hidden3': 251}. Best is trial 6 with value: 50.07806457593603.


Epoch [20/20] Train Loss: 45.9102 MSE: 45.9102 MAE: 5.1436 RMSE: 6.7757


In [17]:
print("Best trial:")
print(study.best_trial.params)

Best trial:
{'lr': 0.004499353405785745, 'hidden1': 449, 'hidden2': 316, 'hidden3': 39}


In [18]:
class Model(torch.nn.Module):
    def __init__(self, in_dim, out_dim=1):
        super().__init__()
        
        self.features = torch.nn.Sequential(
            nn.Linear(in_dim, 144),
            torch.nn.ReLU(),
            
            nn.Linear(144, 72),
            torch.nn.ReLU(),
            
            nn.Linear(72, 72),
            torch.nn.ReLU(),
            
            nn.Linear(72, out_dim),
        )
    
        
    def forward(self, x):
        output = self.features(x)
        return output
    

model = Model(in_dim=X_train.shape[1], out_dim=1)
  
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

metrics, model = train_model_reg(num_epoch=20,
                    train_dataloader = train_dataloader,
                    test_dataloader = test_dataloader,
                    model=model,
                    criterion=criterion,
                    optimizer=optimizer,
                    )

Epoch [10/20] Train Loss: 100.0167 MSE: 100.0167 MAE: 7.8522 RMSE: 10.0008
Epoch [20/20] Train Loss: 38.0902 MSE: 38.0902 MAE: 4.6711 RMSE: 6.1717


In [20]:
y_true = []
y_pred = []

with torch.no_grad():
    for X_batch, y_batch in test_dataloader:
        predictions = model(X_batch).squeeze()
        
        y_true.extend(y_batch.numpy())
        y_pred.extend(predictions.numpy())

r2 = r2_score(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"R²: {r2:.4f}")
print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")

R²: 0.8052
MSE: 50.1873
MAE: 5.4734
