# lib

In [1]:
import xgboost as xgb
import pandas as pd
import polars as pl
import numpy as np
import os
from glob import glob 
from hvplot import polars, pandas

from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib
import optuna

  from .autonotebook import tqdm as notebook_tqdm


# Config

In [2]:
DATA_PATH = "./data/df_add_test_all_torque_estimate.csv"

feature = ['noperation', 'nsampling', 'valve', 'pressure', 'voltage',
       'current', 'temp', 'theta0', 'theta1', 'theta2', 'cos1', 'cos2_inv',
       'cos2_inv2', 'cos2_inv3', 'd_valve', 'd_press', 'dv_sign','status','torque_estimate','bf_right','bf_lft']
target = ['torque']

# Function

In [3]:
scaler = StandardScaler()

In [4]:
data = pd.read_csv(DATA_PATH)
data.head(1)

Unnamed: 0,noperation,nsampling,valve,pressure_1,pressure_in,pressure,voltage,torque,current,temp,...,d_press,dv_sign,bf_right,bf_lft,aft_right,aft_lft,force_above,force_below,status,torque_estimate
0,4935.0,1.0,99.175842,1006.0,4977.0,497.7,14821.0,-2.156,2510.0,40.0,...,0.0,1.0,0.0,0.0,0.0,0.0,115.0,115.0,0.0,-2.871047


In [5]:
X = data[feature]
y = data[target]

In [6]:
X_scaler = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaler, y, test_size= 0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.4)

# Create model

In [9]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)
evals_result = {} 

def build_xgb(trial):
    params = {
        'booster': 'dart',
        'device':'cuda',
        'max_depth': trial.suggest_int("max_depth", 3, 30),
        'learning_rate': trial.suggest_float("learning_rate", 1e-2, 1),
        'objective': 'reg:squarederror',
        'sample_type': 'uniform',
        'normalize_type': 'tree',
        'eval_metric': 'rmse',
        'random_state': 42,
        'rate_drop': trial.suggest_float("rate_drop", 1e-2, 1),
        'skip_drop': trial.suggest_float("skip_drop", 1e-2, 1)
    }
    num_boost_round = trial.suggest_int("num_boost_round", 100, 1500, step=50) 
    early_stopping_rounds = trial.suggest_int("early_stopping_rounds", 10, 50, step=5)

    xgb_model = xgb.train(
        params,
        dtrain,
        num_boost_round=num_boost_round, # Pass the num_boost_round variable here
        evals=[(dval, 'validation')],
        early_stopping_rounds=early_stopping_rounds, # Pass early_stopping_rounds here
        evals_result=evals_result,
        verbose_eval=False
    )
    return xgb_model

def objective(trial):
    xgb_model = build_xgb(trial)

    best_rmse = evals_result['validation']['rmse'][-1]  
    return best_rmse 

study = optuna.create_study(direction="minimize")  # Minimize RMSE
study.optimize(objective, n_trials=2)  # Adjust n_trials as needed


print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-02-20 14:00:07,998] A new study created in memory with name: no-name-bb0956fc-26ef-49e6-849e-f0552d631064
[I 2025-02-20 14:00:37,265] Trial 0 finished with value: 0.3679203541528326 and parameters: {'max_depth': 10, 'learning_rate': 0.44458597432349617, 'rate_drop': 0.3293368742254536, 'skip_drop': 0.5721931769175302, 'num_boost_round': 850, 'early_stopping_rounds': 15}. Best is trial 0 with value: 0.3679203541528326.
[W 2025-02-20 14:00:37,409] Trial 1 failed with parameters: {'max_depth': 23, 'learning_rate': 0.964839712890937, 'rate_drop': 0.6770552022210584, 'skip_drop': 0.5543470486248528, 'num_boost_round': 1300, 'early_stopping_rounds': 10} because of the following error: XGBoostError('[14:00:37] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [14:00:37] /workspace/src/c_api/../common/device_helpers.cuh:393: Memory allocation error on worker 0: std::bad_alloc: cudaErrorMemoryAllocation: out of memory\n- Free memory: 767426560\n- Requested memory: 1073

XGBoostError: [14:00:37] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [14:00:37] /workspace/src/c_api/../common/device_helpers.cuh:393: Memory allocation error on worker 0: std::bad_alloc: cudaErrorMemoryAllocation: out of memory
- Free memory: 767426560
- Requested memory: 1073741824

Stack trace:
  [bt] (0) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x25c1ac) [0x76a52485c1ac]
  [bt] (1) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x907346) [0x76a524f07346]
  [bt] (2) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x12fabc) [0x76a52472fabc]
  [bt] (3) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xe36c48) [0x76a525436c48]
  [bt] (4) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xe371a8) [0x76a5254371a8]
  [bt] (5) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xe377a6) [0x76a5254377a6]
  [bt] (6) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xe3a03e) [0x76a52543a03e]
  [bt] (7) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xe3b739) [0x76a52543b739]
  [bt] (8) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x5ad006) [0x76a524bad006]



Stack trace:
  [bt] (0) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x25c1ac) [0x76a52485c1ac]
  [bt] (1) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xe3ba0b) [0x76a52543ba0b]
  [bt] (2) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x5ad006) [0x76a524bad006]
  [bt] (3) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x5ae3d4) [0x76a524bae3d4]
  [bt] (4) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x5f8cd8) [0x76a524bf8cd8]
  [bt] (5) /home/hailt/.local/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x76a524765a1f]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x76a5cb0bfe2e]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x76a5cb0bc493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x76a5cb0e43e9]



In [None]:
'max_depth': 10, 
'learning_rate': 0.44458597432349617, 
'rate_drop': 0.3293368742254536, 
'skip_drop': 0.5721931769175302, 
'num_boost_round': 850, 
'early_stopping_rounds': 15}. 
Best is trial 0 with value: 0.3679203541528326.

In [11]:
best_params = study.best_params
best_params['booster'] = 'dart' # Make sure booster is included
best_params['objective'] = 'reg:squarederror'
best_params['eval_metric'] = 'rmse'
best_params['sample_type'] = 'uniform'
best_params['normalize_type'] = 'tree'
best_params['random_state'] = 42

final_model = xgb.train(
    best_params,
    dtrain,
    num_boost_round=study.best_trial.params['num_boost_round'], # Use the best num_boost_round
    evals=[(dval, 'validation')],
    early_stopping_rounds=study.best_trial.params['early_stopping_rounds'], # Use the best early_stopping_rounds
    verbose_eval=False
)

Parameters: { "early_stopping_rounds", "num_boost_round" } are not used.



In [13]:
y_pred_xgb = final_model.predict(xgb.DMatrix(X_test))

In [15]:
r2_score(y_test, y_pred_xgb)

0.9983132481575012

In [None]:
df_test['torque_estimation_xgb'] = y_pred_xgb

In [None]:
plot = df_test.hvplot(
    width = 1200,
    height = 700 ,
    y=['torque','torque_estimation_xgb']
)