In [1]:
# change root path one level up
import os
os.chdir("../acnportal/")


In [3]:
# lightgbm and optuna
import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import pytz
import seaborn as sns
from tqdm.notebook import tqdm

CAT_FEATURES = [
    "stationID",
    "spaceID",
    "siteID",
    "clusterID",
    "userID",
    "paymentRequired",
    "connectionTime_Weekday",
    "connectionTime_Hour",
    "connectionTime_Month",
    "connectionTime_is_holiday",
    "connectionTime_is_weekend",
    "connectionTime_weekday_hour",
]


In [4]:
sessions = pd.read_csv("../data/caltech_test_data.csv", index_col=[0, 1])
# drop idle_time column
sessions = sessions.drop(columns=["idle_time"])
sessions


Unnamed: 0_level_0,Unnamed: 1_level_0,stationID,spaceID,siteID,clusterID,kWhDelivered,userID,parking_time,kWhRequested,Requested_parking_time,paymentRequired,...,connectionTime_Month,connectionTime_weekday_hour,connectionTime_hour_x,connectionTime_hour_y,connectionTime_month_x,connectionTime_month_y,connectionTime_weekday_x,connectionTime_weekday_y,connectionTime_is_holiday,connectionTime_is_weekend
connection_time_copy,_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2018-04-30,5bc915caf9af8b0dad3c0660,2-39-88-24,CA-314,2,39,47.808,22,9.335000,59.5,9.166667,True,...,4,0-8,-5.000000e-01,8.660254e-01,0.892254,-0.451533,9.555728e-01,0.294755,0,0
2018-05-07,5bc917d0f9af8b0dc677b8bb,2-39-139-28,CA-303,2,39,27.683,61,10.611944,8.0,1.000000,True,...,5,0-7,-2.588190e-01,9.659258e-01,0.565554,0.824711,9.659258e-01,0.258819,0,0
2018-05-11,5bc9190ff9af8b0dc677b9c1,2-39-88-24,CA-314,2,39,17.485,22,9.028333,17.5,9.100000,True,...,5,4-8,-5.000000e-01,8.660254e-01,-0.232105,0.972691,-7.330519e-01,-0.680173,0,0
2018-05-14,5bc919c3f9af8b0dc677ba32,2-39-79-380,CA-489,2,39,11.795,66,9.321944,8.0,1.000000,True,...,5,0-6,8.770390e-14,1.000000e+00,-0.742684,0.669642,9.749279e-01,0.222521,0,0
2018-05-14,5bc919c3f9af8b0dc677ba62,2-39-139-28,CA-303,2,39,3.076,61,1.121944,5.0,10.183333,True,...,5,0-17,-2.588190e-01,-9.659258e-01,-0.802694,0.596391,8.045978e-01,0.593820,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-30,5e1fbc9ff9af8b5391bcd6fc,2-39-89-25,CA-315,2,39,5.814,743,2.066667,8.0,1.083333,True,...,12,0-9,-7.071068e-01,7.071068e-01,0.968119,-0.250491,9.438833e-01,0.330279,0,0
2019-12-30,5e1fbc9ff9af8b5391bcd700,2-39-79-377,CA-325,2,39,6.509,1124,7.735556,8.0,1.000000,True,...,12,0-12,-1.000000e+00,-9.512791e-14,0.974267,-0.225399,9.009689e-01,0.433884,0,0
2019-12-31,5e210e1ff9af8b57bb4f54fa,2-39-89-25,CA-315,2,39,4.793,743,1.963889,8.0,0.983333,True,...,12,1-8,-5.000000e-01,8.660254e-01,0.998482,-0.055088,3.653410e-01,0.930874,0,0
2019-12-31,5e210e1ff9af8b57bb4f54fd,2-39-79-377,CA-325,2,39,6.618,1124,7.338333,8.0,1.000000,True,...,12,1-11,-9.659258e-01,2.588190e-01,0.999571,-0.029276,2.588190e-01,0.965926,0,0


In [5]:
# testing data is collected from Dec. 1, 2018 to Jan. 1, 2019
test = sessions.loc[(slice("2018-12-01", "2019-01-01"), slice(None)), :]


In [6]:
# evaluate smape
def smape(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred) / np.abs(y_true + y_pred)) * 100


# evaluate smape of requested parking time
print(
    "smape of requested parking time: ",
    smape(test["Requested_parking_time"], test["parking_time"]),
)


smape of requested parking time:  23.0599293684014


In [7]:
# evaluare mae of requested parking time
print(
    "mae of requested parking time: ",
    np.mean(np.abs(test["Requested_parking_time"] - test["parking_time"])),
)


mae of requested parking time:  2.5341190876014554


In [8]:
# evaluate smape of requested energy
print("smape of requested energy: ", smape(test["kWhRequested"], test["kWhDelivered"]))


smape of requested energy:  27.933973390963622


In [9]:
# evaluare mae of requested energy
print(
    "mae of requested energy: ",
    np.mean(np.abs(test["kWhRequested"] - test["kWhDelivered"])),
)


mae of requested energy:  7.1740579345088165


In [10]:
# put all metrics in a dataframe
metrics = pd.DataFrame(
    {
        "smape of requested parking time": [
            smape(test["Requested_parking_time"], test["parking_time"])
        ],
        "mae of requested parking time": [
            np.mean(np.abs(test["Requested_parking_time"] - test["parking_time"]))
        ],
        "smape of requested energy": [
            smape(test["kWhRequested"], test["kWhDelivered"])
        ],
        "mae of requested energy": [
            np.mean(np.abs(test["kWhRequested"] - test["kWhDelivered"]))
        ],
    }
)
# save metrics to csv
metrics.to_csv("./metrics_user_input.csv")
metrics


Unnamed: 0,smape of requested parking time,mae of requested parking time,smape of requested energy,mae of requested energy
0,23.059929,2.534119,27.933973,7.174058
