In [21]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MaxAbsScaler

# read datasets
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

y1_name, y2_name, y3_name = "dir_costs", "traffic_costs_s_r", "lost_trips_costs_s_r"
train_y1, train_y2, train_y3 = train_df[y1_name], train_df[y2_name], train_df[y3_name]
test_y1, test_y2, test_y3 = test_df[y1_name], test_df[y2_name], test_df[y3_name]

# scale features
X_train = train_df.drop(columns=[y1_name, y2_name, y3_name])
scaler = MaxAbsScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

X_test = test_df.drop(columns=[y1_name, y2_name, y3_name])
X_test_scaled = scaler.transform(X_test)

<h2> Catch dependance of y1, y2, y3. Pick sigma from prev optimization </h2>

In [22]:
from GRNN import GRNN
dep_grnn_y1 = GRNN(sigma=0.065982727)
dep_grnn_y1.fit(X_train_scaled, train_y1)
dep_y1_train_pred = dep_grnn_y1.predict(X_train_scaled)
dep_y1_test_pred = dep_grnn_y1.predict(X_test_scaled)

dep_grnn_y2 = GRNN(sigma=0.061057788)
dep_grnn_y2.fit(X_train_scaled, train_y2)
dep_y2_train_pred = dep_grnn_y2.predict(X_train_scaled)
dep_y2_test_pred = dep_grnn_y2.predict(X_test_scaled)

dep_grnn_y3 = GRNN(sigma=0.081542483)
dep_grnn_y3.fit(X_train_scaled, train_y3)
dep_y3_train_pred = dep_grnn_y3.predict(X_train_scaled)
dep_y3_test_pred = dep_grnn_y3.predict(X_test_scaled)


In [23]:
X_train_scaled_dep = np.concatenate((X_train_scaled, dep_y1_train_pred[:, None], dep_y2_train_pred[:, None], dep_y3_train_pred[:, None]), axis=1)

X_test_scaled_dep = np.concatenate((X_test_scaled, dep_y1_test_pred[:, None], dep_y2_test_pred[:, None], dep_y3_test_pred[:, None]), axis=1)

In [27]:
# scale again
scaler2 = MaxAbsScaler()
scaler2.fit(X_train_scaled_dep)
X_train_scaled_dep_scaled = scaler2.transform(X_train_scaled_dep)
X_test_scaled_dep_scaled = scaler2.transform(X_test_scaled_dep)

<h2> Optimize final sigma using differential evolution </h2>

In [28]:
from sklearn.metrics import r2_score
from scipy.optimize import differential_evolution

def objective_function(sigma, train_X, train_y, test_X, test_y):
    grnn = GRNN(sigma=sigma[0])
    grnn.fit(train_X, train_y)
    pred_y = grnn.predict(test_X)
    r2 = r2_score(test_y, pred_y)
    return -r2

<h1> Y1<h1>

In [31]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10, args=(X_train_scaled_dep_scaled, train_y1, X_test_scaled_dep_scaled, test_y1))

print(optimization_res)
print("Optimal sigma_y1: {:.{}f}".format(optimization_res.x[0], 9))

 message: Optimization terminated successfully.
 success: True
     fun: -0.7848448816746552
       x: [ 7.224e-02]
     nit: 10
    nfev: 177
     jac: [ 4.441e-08]
Optimal sigma_y1: 0.072244104


In [33]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y1 = GRNN(sigma=0.072244104)
grnn_y1.fit(X_train_scaled_dep_scaled, train_y1)

print("------ test metrics ------")
print_metrics(test_y1, grnn_y1.predict(X_test_scaled_dep_scaled))
print("------ train metrics ------")
print_metrics(train_y1, grnn_y1.predict(X_train_scaled_dep_scaled))

------ test metrics ------
Mean Squared Error (MSE):              41401969795468.7109375000
Root Mean Squared Error (RMSE):        6434436.2453496037
Mean Absolute Error (MAE):             4236768.4919447722
R-squared (R²):                        0.7848448817
Mean Absolute Percentage Error (MAPE): 0.1071790498
Max Error (ME):                        28885122.2729864791
Median Absolute Error (MedAE):         2481914.5252101943
------ train metrics ------
Mean Squared Error (MSE):              9453914137032.9511718750
Root Mean Squared Error (RMSE):        3074721.7983149225
Mean Absolute Error (MAE):             1708582.2954284349
R-squared (R²):                        0.9606799937
Mean Absolute Percentage Error (MAPE): 0.0478930442
Max Error (ME):                        23431456.6301187575
Median Absolute Error (MedAE):         855273.3424295746


<h1> Y2 </h1>

In [34]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10, args=(X_train_scaled_dep_scaled, train_y2, X_test_scaled_dep_scaled, test_y2))

print(optimization_res)
print("Optimal sigma_y2: {:.{}f}".format(optimization_res.x[0], 9))

 message: Optimization terminated successfully.
 success: True
     fun: -0.7012675275927482
       x: [ 6.714e-02]
     nit: 13
    nfev: 220
     jac: [-1.776e-07]
Optimal sigma_y2: 0.067135529


In [35]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y2 = GRNN(sigma=0.067135529)
grnn_y2.fit(X_train_scaled_dep_scaled, train_y2)

print("------ test metrics ------")
print_metrics(test_y2, grnn_y2.predict(X_test_scaled_dep_scaled))
print("------ train metrics ------")
print_metrics(train_y2, grnn_y2.predict(X_train_scaled_dep_scaled))

------ test metrics ------
Mean Squared Error (MSE):              68757907993801.7656250000
Root Mean Squared Error (RMSE):        8292038.8321450697
Mean Absolute Error (MAE):             4467847.7385427710
R-squared (R²):                        0.7012675276
Mean Absolute Percentage Error (MAPE): 1.5948290265
Max Error (ME):                        81669821.2950690389
Median Absolute Error (MedAE):         2611803.4048670521
------ train metrics ------
Mean Squared Error (MSE):              2319202637989.1362304688
Root Mean Squared Error (RMSE):        1522892.8517755726
Mean Absolute Error (MAE):             913188.8965885671
R-squared (R²):                        0.9892996119
Mean Absolute Percentage Error (MAPE): 0.3733631987
Max Error (ME):                        20094517.1908298805
Median Absolute Error (MedAE):         565885.4639296018


<h1> Y3 </h1>

In [36]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10, args=(X_train_scaled_dep_scaled, train_y3, X_test_scaled_dep_scaled, test_y3))

print(optimization_res)
print("Optimal sigma_y3: {:.{}f}".format(optimization_res.x[0], 9))

 message: Optimization terminated successfully.
 success: True
     fun: -0.5194998274262453
       x: [ 9.278e-02]
     nit: 12
    nfev: 203
     jac: [-1.910e-06]
Optimal sigma_y3: 0.092775171


In [38]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y3 = GRNN(sigma=0.092775171)
grnn_y3.fit(X_train_scaled_dep_scaled, train_y3)

print("------ test metrics ------")
print_metrics(test_y3, grnn_y3.predict(X_test_scaled_dep_scaled))
print("------ train metrics ------")
print_metrics(train_y3, grnn_y3.predict(X_train_scaled_dep_scaled))

------ test metrics ------
Mean Squared Error (MSE):              495481364189611.5625000000
Root Mean Squared Error (RMSE):        22259410.6882821955
Mean Absolute Error (MAE):             11979424.2989736013
R-squared (R²):                        0.5194998274
Mean Absolute Percentage Error (MAPE): 0.1685334735
Max Error (ME):                        206564719.9991375804
Median Absolute Error (MedAE):         6494627.9019230232
------ train metrics ------
Mean Squared Error (MSE):              138891414340720.2031250000
Root Mean Squared Error (RMSE):        11785220.1651356611
Mean Absolute Error (MAE):             6781348.9117446886
R-squared (R²):                        0.8611421370
Mean Absolute Percentage Error (MAPE): 0.1132210091
Max Error (ME):                        81790609.1853025109
Median Absolute Error (MedAE):         4305304.9733991474
