In [21]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MaxAbsScaler

# read datasets
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

y1_name, y2_name, y3_name = "dir_costs", "traffic_costs_s_r", "lost_trips_costs_s_r"
train_y1, train_y2, train_y3 = train_df[y1_name], train_df[y2_name], train_df[y3_name]
test_y1, test_y2, test_y3 = test_df[y1_name], test_df[y2_name], test_df[y3_name]

# scale features
X_train = train_df.drop(columns=[y1_name, y2_name, y3_name])
scaler = MaxAbsScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

X_test = test_df.drop(columns=[y1_name, y2_name, y3_name])
X_test_scaled = scaler.transform(X_test)

<h2> Catch dependance of y1, y2, y3. Pick sigma from prev optimization </h2>

In [22]:
from GRNN import GRNN
dep_grnn_y1 = GRNN(sigma=0.065982727)
dep_grnn_y1.fit(X_train_scaled, train_y1)
dep_y1_train_pred = dep_grnn_y1.predict(X_train_scaled)
dep_y1_test_pred = dep_grnn_y1.predict(X_test_scaled)

dep_grnn_y2 = GRNN(sigma=0.061057788)
dep_grnn_y2.fit(X_train_scaled, train_y2)
dep_y2_train_pred = dep_grnn_y2.predict(X_train_scaled)
dep_y2_test_pred = dep_grnn_y2.predict(X_test_scaled)

dep_grnn_y3 = GRNN(sigma=0.081542483)
dep_grnn_y3.fit(X_train_scaled, train_y3)
dep_y3_train_pred = dep_grnn_y3.predict(X_train_scaled)
dep_y3_test_pred = dep_grnn_y3.predict(X_test_scaled)


In [23]:
X_train_scaled_dep = np.concatenate((X_train_scaled, dep_y1_train_pred[:, None], dep_y2_train_pred[:, None], dep_y3_train_pred[:, None]), axis=1)

X_test_scaled_dep = np.concatenate((X_test_scaled, dep_y1_test_pred[:, None], dep_y2_test_pred[:, None], dep_y3_test_pred[:, None]), axis=1)

In [27]:
# scale again
scaler2 = MaxAbsScaler()
scaler2.fit(X_train_scaled_dep)
X_train_scaled_dep_scaled = scaler2.transform(X_train_scaled_dep)
X_test_scaled_dep_scaled = scaler2.transform(X_test_scaled_dep)

<h2> Optimize final sigma using differential evolution </h2>

In [28]:
from sklearn.metrics import r2_score
from scipy.optimize import differential_evolution

def objective_function(sigma, train_X, train_y, test_X, test_y):
    grnn = GRNN(sigma=sigma[0])
    grnn.fit(train_X, train_y)
    pred_y = grnn.predict(test_X)
    r2 = r2_score(test_y, pred_y)
    return -r2

<h1> Y1<h1>

In [None]:
optimization_res = differential_evolution(func=objective_function, bounds=[(0, 10)], updating='deferred', workers=10, args=(X_train_scaled_dep_scaled, train_y1, X_test_scaled_dep_scaled, test_y1))

print(optimization_res)
print("Optimal sigma_y1: {:.{}f}".format(optimization_res.x[0], 9))

In [20]:
from GRNN import GRNN
from metrics import print_metrics

grnn_y1 = GRNN(sigma=0.907297525)
grnn_y1.fit(X_train_scaled_dep, train_y1)

print("------ test metrics ------")
print_metrics(test_y1, grnn_y1.predict(X_test_scaled_dep))
print("------ train metrics ------")
print_metrics(train_y1, grnn_y1.predict(X_train_scaled_dep))

------ test metrics ------
Mean Squared Error (MSE):              1594091900756622.5000000000
Root Mean Squared Error (RMSE):        39926080.4582245797
Mean Absolute Error (MAE):             37438795.8244600520
R-squared (R²):                        -7.2840752076
Mean Absolute Percentage Error (MAPE): 1.0000000000
Max Error (ME):                        112812501.9430129975
Median Absolute Error (MedAE):         35028221.3546233028
------ train metrics ------
Mean Squared Error (MSE):              0.0000000000
Root Mean Squared Error (RMSE):        0.0000000000
Mean Absolute Error (MAE):             0.0000000000
R-squared (R²):                        1.0000000000
Mean Absolute Percentage Error (MAPE): 0.0000000000
Max Error (ME):                        0.0000000000
Median Absolute Error (MedAE):         0.0000000000
