In [1]:
import pandas as pd
from sklearn.preprocessing import MaxAbsScaler

# read datasets
train_df = pd.read_csv('data/train2.csv')
test_df = pd.read_csv('data/test2.csv')

y1_name, y2_name, y3_name = "dir_costs", "traffic_costs_s_r", "lost_trips_costs_s_r"
train_y1, train_y2, train_y3 = train_df[y1_name], train_df[y2_name], train_df[y3_name]
test_y1, test_y2, test_y3 = test_df[y1_name], test_df[y2_name], test_df[y3_name]

# scale features
X_train = train_df.drop(columns=[y1_name, y2_name, y3_name])
scaler = MaxAbsScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)

X_test = test_df.drop(columns=[y1_name, y2_name, y3_name])
X_test = scaler.transform(X_test)


<h1> Train GBR model using random hyperparameters </h1>

In [2]:
from metrics import print_metrics
from sklearn.ensemble import GradientBoostingRegressor

# Create an instance of GradientBoostingRegressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Fit the regressor to the training data
gb_regressor.fit(X_train, train_y1)

print("------ test metrics ------")
print_metrics(test_y1, gb_regressor.predict(X_test))

print("------ train metrics ------")
print_metrics(train_y1, gb_regressor.predict(X_train))


------ test metrics ------
Mean Squared Error (MSE):              27439980506090.0664062500
Root Mean Squared Error (RMSE):        5238318.4807808381
Mean Absolute Error (MAE):             2876794.1302192868
R-squared (R²):                        0.8626833396
Mean Absolute Percentage Error (MAPE): 0.0690625984
Max Error (ME):                        24963096.2137863524
Median Absolute Error (MedAE):         1357845.8501538225
------ train metrics ------
Mean Squared Error (MSE):              12169729567402.5410156250
Root Mean Squared Error (RMSE):        3488513.9482883741
Mean Absolute Error (MAE):             2048725.9760733228
R-squared (R²):                        0.9315339912
Mean Absolute Percentage Error (MAPE): 0.0511555516
Max Error (ME):                        20353932.9277601838
Median Absolute Error (MedAE):         1141793.6472524647


<h1> Optimize Gradient boost parameters using Differential evolution</h1>

In [3]:
from sklearn.metrics import r2_score
from scipy.optimize import differential_evolution

# define objective function
def objective_function(params, train_X, train_y, test_X, test_y):
    regressor = GradientBoostingRegressor(n_estimators=int(params[0]), learning_rate=params[1], max_depth=int(params[2]), random_state=42)
    regressor.fit(train_X, train_y)
    pred_y = regressor.predict(test_X)
    r2 = r2_score(test_y, pred_y)
    return -r2

In [5]:
optimization_res = differential_evolution(func=objective_function, 
                                          bounds=[(2, 300), (0.0001, 0.5), (2, 10)], 
                                          updating='deferred',
                                          workers=10, 
                                          disp=True,
                                          tol=0.00001,
                                          atol=0.00001,
                                          maxiter=100,
                                          args=(X_train, train_y1, X_test, test_y1))
print(optimization_res)

differential_evolution step 1: f(x)= -0.865639
differential_evolution step 2: f(x)= -0.865639
differential_evolution step 3: f(x)= -0.865959
differential_evolution step 4: f(x)= -0.865959
differential_evolution step 5: f(x)= -0.865959
differential_evolution step 6: f(x)= -0.865959
differential_evolution step 7: f(x)= -0.866972
differential_evolution step 8: f(x)= -0.866972
differential_evolution step 9: f(x)= -0.86701
differential_evolution step 10: f(x)= -0.86701
differential_evolution step 11: f(x)= -0.86701
differential_evolution step 12: f(x)= -0.86701
differential_evolution step 13: f(x)= -0.86701
differential_evolution step 14: f(x)= -0.867454
differential_evolution step 15: f(x)= -0.867454
differential_evolution step 16: f(x)= -0.867454
differential_evolution step 17: f(x)= -0.867454
differential_evolution step 18: f(x)= -0.867797
differential_evolution step 19: f(x)= -0.867797
differential_evolution step 20: f(x)= -0.867797
differential_evolution step 21: f(x)= -0.867797
differ

In [6]:
optimization_res.x

array([2.35521523e+02, 5.01130182e-02, 3.84020583e+00])

In [11]:
from metrics import print_metrics
from sklearn.ensemble import GradientBoostingRegressor

# Create an instance of GradientBoostingRegressor
gb_regressor = GradientBoostingRegressor(n_estimators=236, learning_rate=0.050113, max_depth=3, random_state=42)

# Fit the regressor to the training data
gb_regressor.fit(X_train, train_y1)

print("------ test metrics ------")
print_metrics(test_y1, gb_regressor.predict(X_test))

print("------ train metrics ------")
print_metrics(train_y1, gb_regressor.predict(X_train))


------ test metrics ------
Mean Squared Error (MSE):              26123859632432.6015625000
Root Mean Squared Error (RMSE):        5111150.5194459502
Mean Absolute Error (MAE):             2811978.8433588878
R-squared (R²):                        0.8692695441
Mean Absolute Percentage Error (MAPE): 0.0674873352
Max Error (ME):                        24920269.7472040541
Median Absolute Error (MedAE):         1326876.1562735289
------ train metrics ------
Mean Squared Error (MSE):              11262503596302.6855468750
Root Mean Squared Error (RMSE):        3355965.3747174875
Mean Absolute Error (MAE):             1970045.3433365910
R-squared (R²):                        0.9366379782
Mean Absolute Percentage Error (MAPE): 0.0491131486
Max Error (ME):                        18553272.5825485103
Median Absolute Error (MedAE):         1095451.3801190890
