# Utilization Prediction: Prediction of hourly utilization of the two sites

## Import libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os
os.environ["KERAS_BACKEND"] = "torch"

import keras
from keras import Sequential
from keras.layers import Dense

%matplotlib inline

## Load and filter data

In [2]:
# Load data
charging_data = pd.read_pickle("data/charging_modified.pkl")

# TODO: more filtering of features

# Delete rows with null values
charging_data = charging_data.dropna()

# delete duplicate row index
charging_data.reset_index(drop = True, inplace = True)


charging_data.head()

Unnamed: 0,id,connectionTime,disconnectTime,doneChargingTime,kWhDelivered,siteID,spaceID,stationID,userID,NoChargingTime,...,kWhPerMinute,month,user_paymentRequired_values,user_userID,user_requestedDeparture,user_modifiedAt,user_minutesAvailable,user_milesRequested,user_kWhRequested,user_WhPerMile
0,5e23b149f9af8b5fe4b973cf,2020-01-02 13:08:54+00:00,2020-01-02 19:11:15+00:00,2020-01-02 17:31:35+00:00,25.016,1,AG-3F30,1-1-179-810,194.0,0 days 01:39:40,...,0.095233,1,True,194.0,"Thu, 02 Jan 2020 20:51:54 GMT","Thu, 02 Jan 2020 13:09:39 GMT",463.0,100.0,25.0,250.0
1,5e23b149f9af8b5fe4b973d0,2020-01-02 13:36:50+00:00,2020-01-02 22:38:21+00:00,2020-01-02 20:18:05+00:00,33.097,1,AG-1F01,1-1-193-825,4275.0,0 days 02:20:16,...,0.082485,1,True,4275.0,"Thu, 02 Jan 2020 23:31:50 GMT","Thu, 02 Jan 2020 13:37:11 GMT",595.0,250.0,70.0,280.0
2,5e23b149f9af8b5fe4b973d1,2020-01-02 13:56:35+00:00,2020-01-03 00:39:22+00:00,2020-01-02 16:35:06+00:00,6.521,1,AG-1F03,1-1-193-829,344.0,0 days 08:04:16,...,0.041138,1,True,344.0,"Thu, 02 Jan 2020 14:56:35 GMT","Thu, 02 Jan 2020 13:57:17 GMT",60.0,20.0,8.0,400.0
3,5e23b149f9af8b5fe4b973d2,2020-01-02 13:59:58+00:00,2020-01-02 16:38:39+00:00,2020-01-02 15:18:45+00:00,2.355,1,AG-1F04,1-1-193-820,1117.0,0 days 01:19:54,...,0.029892,1,True,1117.0,"Thu, 02 Jan 2020 15:04:58 GMT","Thu, 02 Jan 2020 14:00:03 GMT",65.0,20.0,8.0,400.0
4,5e23b149f9af8b5fe4b973d3,2020-01-02 14:00:01+00:00,2020-01-02 22:08:40+00:00,2020-01-02 18:17:30+00:00,13.375,1,AG-1F06,1-1-193-819,334.0,0 days 03:51:10,...,0.051945,1,True,334.0,"Thu, 02 Jan 2020 22:24:01 GMT","Thu, 02 Jan 2020 14:00:13 GMT",504.0,40.0,16.0,400.0


## Create feature vector X and labels Y

In [3]:
# split time interval between connectionTime and disconnectTime in minutes for every hour
def split_time_interval(index, start_date, end_date):
    indices = pd.DatetimeIndex([start_date])
    indices = indices.append(pd.date_range(start_date.ceil(freq = "H"), end_date.floor(freq = "H"), freq = "H"))
    indices = indices.append(pd.DatetimeIndex([end_date]))

    time_list = list(map(lambda x : 60 if x == 0 else x, indices.minute))
    time_list[0] = 60 - time_list[0]
    
    return list((time_list, indices))


# creates table with session id, charging time in minutes per hour and timestamps
def create_minute_table(id, minute_array):
    return list(zip([id] * len(minute_array[0]), minute_array[0], minute_array[1]))

In [4]:
# concat single minute lists to one big list with lists of tuples containing id, charging times in minutes per hour and timestamps
temp = []

for index in charging_data.index :
    charging_times = split_time_interval(charging_data.index[index], charging_data.loc[index, "connectionTime"], charging_data.loc[index, "disconnectTime"])
    temp.append(create_minute_table(charging_data.loc[index, "id"], charging_times))

In [39]:
# parse list of lists of triples into list of three lists: ids, charging time in minutes per hour and timestamps
ids, charging_times, timestamps = map(list, zip(*[tuple for list in temp for tuple in list]))

# create data.frame with three columns: session id, charging time and starting timestamp in minutes for every hour
charging_time_per_hour = pd.DataFrame(
                            {"charging_time_per_hour": charging_times,
                             "timestamp": timestamps})
charging_time_per_hour.loc[:, "timestamp"] = charging_time_per_hour.loc[:, "timestamp"].dt.floor(freq = "H")

# roughly 50 EV charging stations per site * 60 minutes
maximum_utilization = 50 * 60

X = charging_data
Y = list(map(lambda x : maximum_utilization - x, charging_time_per_hour.groupby("timestamp").sum().loc[:, "charging_time_per_hour"]))
Y

[2947,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2913,
 2978,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2925,
 2957,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2922,
 2990,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2931,
 2946,
 2927,
 2893,
 2964,
 2940,
 2940,
 2940,
 2940,
 2926,
 2989,
 2940,
 2940,
 2887,
 2880,
 2880,
 2880,
 2880,
 2880,
 2880,
 2873,
 2940,
 2918,
 2963,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2927,
 2943,
 2940,
 2894,
 2944,
 2940,
 2940,
 2932,
 2978,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2902,
 2943,
 2940,
 2940,
 2938,
 2995,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2919,
 2992,
 2940,
 2940,
 2940,
 2940,
 2940,
 2935,
 2956,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2940,
 2890,
 2973,
 2904,
 2880,
 2880,
 2880,
 2880,
 2880,
 2880,
 2880,
 2822,
 2887,
 2970,
 2940,
 2940,
 2940,
 2940,
 2940,

## Cross-validation: split data in training and test set