In [80]:
import pandas as pd
from sklearn.preprocessing import MaxAbsScaler

# read datasets
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

y1_name, y2_name, y3_name = "dir_costs", "traffic_costs_s_r", "lost_trips_costs_s_r"
train_y1, train_y2, train_y3 = train_df[y1_name], train_df[y2_name], train_df[y3_name]
test_y1, test_y2, test_y3 = test_df[y1_name], test_df[y2_name], test_df[y3_name]

# scale features
X_train = train_df.drop(columns=[y1_name, y2_name, y3_name])
scaler = MaxAbsScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
# X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)

X_test = test_df.drop(columns=[y1_name, y2_name, y3_name])
X_test_scaled = scaler.transform(X_test)
# X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)


<h2> Pick arbitrary sigma</h2>

In [81]:
from GRNN import GRNN
from metrics import print_metrics
grnn_y1 = GRNN(sigma=0.1)
grnn_y1.fit(X_train_scaled, train_y1)
print("------ test metrics ------")
print_metrics(test_y1, grnn_y1.predict(X_test_scaled))
print("------ train metrics ------")
print_metrics(train_y1, grnn_y1.predict(X_train_scaled))


------ test metrics ------
Mean Squared Error (MSE):              48338730466213.3281250000
Root Mean Squared Error (RMSE):        6952606.0197751271
Mean Absolute Error (MAE):             4976156.7957704086
R-squared (R²):                        0.7487963659
Mean Absolute Percentage Error (MAPE): 0.1325812722
Max Error (ME):                        31273101.6573293209
Median Absolute Error (MedAE):         3904027.6013873965
------ train metrics ------
Mean Squared Error (MSE):              36019039146339.2578125000
Root Mean Squared Error (RMSE):        6001586.3858099431
Mean Absolute Error (MAE):             4218341.8555033142
R-squared (R²):                        0.8501923302
Mean Absolute Percentage Error (MAPE): 0.1196653676
Max Error (ME):                        29333179.1724790111
Median Absolute Error (MedAE):         3374039.2250335049


<h2> Use differential evolution to pick optimal sigma <h2>

In [85]:
from sklearn.metrics import r2_score
from scipy.optimize import differential_evolution

def objective_function(sigma, train_X, train_y, test_X, test_y):
    grnn = GRNN(sigma=sigma[0])
    grnn.fit(train_X, train_y)
    pred_y = grnn.predict(test_X)
    r2 = r2_score(test_y, pred_y)
    return -r2

<h1> Y1 </h1>

In [87]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10,
                                        args=(X_train_scaled, train_y1, X_test_scaled, test_y1))
print(optimization_res)
print("Optimal sigma_y1: {:.{}f}".format(optimization_res.x[0], 9))

 message: Optimization terminated successfully.
 success: True
     fun: -0.7969511832558027
       x: [ 6.598e-02]
     nit: 13
    nfev: 220
     jac: [ 1.110e-08]
Optimal sigma_y1: 0.065982727


In [88]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y1 = GRNN(sigma=0.065982727)
grnn_y1.fit(X_train_scaled, train_y1)

print("------ test metrics ------")
print_metrics(test_y1, grnn_y1.predict(X_test_scaled))
print("------ train metrics ------")
print_metrics(train_y1, grnn_y1.predict(X_train_scaled))

------ test metrics ------
Mean Squared Error (MSE):              39072372729413.3671875000
Root Mean Squared Error (RMSE):        6250789.7684543328
Mean Absolute Error (MAE):             4171352.0500839101
R-squared (R²):                        0.7969511833
Mean Absolute Percentage Error (MAPE): 0.1076935767
Max Error (ME):                        27390213.6671032012
Median Absolute Error (MedAE):         2727698.7167711779
------ train metrics ------
Mean Squared Error (MSE):              11319550268831.7382812500
Root Mean Squared Error (RMSE):        3364453.9332307312
Mean Absolute Error (MAE):             1928127.2052676135
R-squared (R²):                        0.9529205806
Mean Absolute Percentage Error (MAPE): 0.0532162149
Max Error (ME):                        23854965.2255111933
Median Absolute Error (MedAE):         1020814.4992648717


<h1> Y2 </h1>

In [89]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10,
                                        args=(X_train_scaled, train_y2, X_test_scaled, test_y2))
print(optimization_res)
print("Optimal sigma_y2: {:.{}f}".format(optimization_res.x[0], 9))

 message: Optimization terminated successfully.
 success: True
     fun: -0.7023818942348741
       x: [ 6.106e-02]
     nit: 11
    nfev: 190
     jac: [-2.220e-08]
Optimal sigma_y1: 0.061057788


In [90]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y2 = GRNN(sigma=0.061057788)
grnn_y2.fit(X_train_scaled, train_y2)

print("------ test metrics ------")
print_metrics(test_y2, grnn_y2.predict(X_test_scaled))
print("------ train metrics ------")
print_metrics(train_y2, grnn_y2.predict(X_train_scaled))

------ test metrics ------
Mean Squared Error (MSE):              68501419241731.3984375000
Root Mean Squared Error (RMSE):        8276558.4177078940
Mean Absolute Error (MAE):             4388624.5701665469
R-squared (R²):                        0.7023818942
Mean Absolute Percentage Error (MAPE): 1.6495639019
Max Error (ME):                        85655608.1953858286
Median Absolute Error (MedAE):         2658237.9371098289
------ train metrics ------
Mean Squared Error (MSE):              3350194761325.9506835938
Root Mean Squared Error (RMSE):        1830353.7257388122
Mean Absolute Error (MAE):             1022780.9595428550
R-squared (R²):                        0.9845427978
Mean Absolute Percentage Error (MAPE): 0.4284010169
Max Error (ME):                        24835440.3675847687
Median Absolute Error (MedAE):         618150.4338244664


<h1> Y3</h1>

In [91]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10,
                                        args=(X_train_scaled, train_y3, X_test_scaled, test_y3))
print(optimization_res)
print("Optimal sigma_y3: {:.{}f}".format(optimization_res.x[0], 9))

 message: Optimization terminated successfully.
 success: True
     fun: -0.5353204368090128
       x: [ 8.154e-02]
     nit: 13
    nfev: 218
     jac: [ 9.104e-07]
Optimal sigma_y2: 0.081542483


In [92]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y3 = GRNN(sigma=0.081542483)
grnn_y3.fit(X_train_scaled, train_y3)

print("------ test metrics ------")
print_metrics(test_y3, grnn_y3.predict(X_test_scaled))
print("------ train metrics ------")
print_metrics(train_y3, grnn_y3.predict(X_train_scaled))

------ test metrics ------
Mean Squared Error (MSE):              479167494670487.4375000000
Root Mean Squared Error (RMSE):        21889894.8072047047
Mean Absolute Error (MAE):             11919462.5806300901
R-squared (R²):                        0.5353204368
Mean Absolute Percentage Error (MAPE): 0.1719544801
Max Error (ME):                        200235208.9786968231
Median Absolute Error (MedAE):         6931585.6261113454
------ train metrics ------
Mean Squared Error (MSE):              172358645450598.6562500000
Root Mean Squared Error (RMSE):        13128543.1579668671
Mean Absolute Error (MAE):             7678646.3210368101
R-squared (R²):                        0.8276829904
Mean Absolute Percentage Error (MAPE): 0.1239887136
Max Error (ME):                        90314658.3416012973
Median Absolute Error (MedAE):         4924903.9227655530
