In [1]:
import pandas as pd
import numpy as np
import datetime
from EvGym import config

In [2]:
df_elaad = df_elaad = pd.read_csv("data/elaadnl_open_ev_datasets.csv", parse_dates = ["UTCTransactionStart", "UTCTransactionStop"])
df_elaad = df_elaad.rename(columns = config.elaad_rename)

In [3]:
df_elaad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   session               10000 non-null  int64         
 1   ChargePoint           10000 non-null  object        
 2   Connector             10000 non-null  int64         
 3   starttime_parking     10000 non-null  datetime64[ns]
 4   endtime_parking       10000 non-null  datetime64[ns]
 5   StartCard             10000 non-null  object        
 6   connected_time_float  10000 non-null  float64       
 7   charged_time_float    10000 non-null  float64       
 8   total_energy          10000 non-null  float64       
 9   max_power             10000 non-null  float64       
dtypes: datetime64[ns](2), float64(4), int64(2), object(2)
memory usage: 781.4+ KB


In [4]:
df_elaad["start_hour"] = df_elaad["starttime_parking"].dt.hour
df_elaad["day_no"] = (pd.to_datetime(df_elaad["starttime_parking"].dt.date) - datetime.datetime(year = 2019, month = 1, day = 1)).dt.days

# Clean vehicles that have higher charged_time than connected time
df_elaad["charged_time_float"] = np.minimum(df_elaad["charged_time_float"], df_elaad["connected_time_float"])



df_elaad["energy_supplied"] = np.minimum(config.alpha_c * config.eta_c * df_elaad["charged_time_float"], config.B * config.FINAL_SOC)
df_elaad["soc_arr"] = (1 - df_elaad["energy_supplied"]/ (config.B))


df_elaad["charged_time"] = np.ceil(df_elaad["charged_time_float"]) 
df_elaad["connected_time"] = np.ceil(df_elaad["connected_time_float"])

starttime_min = pd.to_datetime("2000-01-01 00:00:00")
df_elaad["ts_arr"] = np.floor((df_elaad["starttime_parking"] - starttime_min).dt.total_seconds() / config.timestep)


if config.timestep == 3600:
    # This way it is consistent with connected_time
    df_elaad["ts_dep"] = df_elaad["ts_arr"] + df_elaad["connected_time"]
else:
    df_elaad["ts_dep"] = np.ceil((df_elaad["endtime_parking"] - starttime_min).dt.total_seconds() / config.timestep)
df_elaad["ts_soj"] = df_elaad["ts_dep"] - df_elaad["ts_arr"]
df_elaad["laxity"] = df_elaad["connected_time"] - (config.FINAL_SOC - df_elaad["initial_soc"]) * config.B / (config.alpha_c*config.eta_c)
df_elaad["depart_hour"] = df_elaad["start_hour"] + df_elaad["connected_time"] # Look how 
df_elaad["xi"] = df_elaad["laxity"] * config.psi * config.alpha_c

In [5]:
df_elaad.describe()

Unnamed: 0,session,Connector,connected_time_float,charged_time_float,total_energy,max_power,start_hour,day_no,energy_supplied,initial_soc,charged_time,connected_time,ts_arr,ts_dep,ts_soj,laxity,depart_hour,xi
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,3452817.0,1.3603,5.822735,2.878022,13.635216,5.589366,12.4126,190.9796,28.608123,0.642398,3.3553,6.3283,171155.923,171162.2513,6.3283,3.89712,18.7409,21.001189
std,107613.9,0.480111,7.096361,2.644282,14.282036,3.333663,4.532131,110.531348,20.586961,0.257337,2.650125,7.093627,2652.843492,2653.132903,7.093627,6.334915,9.166023,34.138224
min,3261657.0,1.0,0.02,0.02,0.1,0.167,0.0,0.0,0.2156,0.03,1.0,1.0,166560.0,166568.0,1.0,0.222635,2.0,1.199755
25%,3360979.0,1.0,1.51,1.25,4.53,3.398,9.0,93.0,13.475,0.528375,2.0,2.0,168804.0,168808.75,2.0,0.752635,13.0,4.055872
50%,3452182.0,1.0,3.38,2.24,7.83,3.64,12.0,190.0,24.1472,0.69816,3.0,4.0,171128.0,171133.5,4.0,1.182635,16.0,6.373099
75%,3547667.0,2.0,8.41,3.5,18.2625,7.27775,16.0,294.0,37.73,0.831562,4.0,9.0,173622.25,173629.0,9.0,5.024119,21.0,27.074475
max,3634120.0,2.0,161.17,25.67,93.929,22.5,23.0,364.0,77.6,0.997305,26.0,162.0,175317.0,175336.0,162.0,155.024119,175.0,835.409508


In [6]:
df_elaad[["connected_time", "ts_soj"]].describe()

Unnamed: 0,connected_time,ts_soj
count,10000.0,10000.0
mean,6.3283,6.3283
std,7.093627,7.093627
min,1.0,1.0
25%,2.0,2.0
50%,4.0,4.0
75%,9.0,9.0
max,162.0,162.0


In [9]:
df_elaad[["depart_hour"]].describe()

Unnamed: 0,depart_hour
count,10000.0
mean,18.7409
std,9.166023
min,2.0
25%,13.0
50%,16.0
75%,21.0
max,175.0


In [7]:
df_elaad.to_csv("data/df_elaad_preproc.csv", index = False)

In [8]:
df_elaad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 22 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   session               10000 non-null  int64         
 1   ChargePoint           10000 non-null  object        
 2   Connector             10000 non-null  int64         
 3   starttime_parking     10000 non-null  datetime64[ns]
 4   endtime_parking       10000 non-null  datetime64[ns]
 5   StartCard             10000 non-null  object        
 6   connected_time_float  10000 non-null  float64       
 7   charged_time_float    10000 non-null  float64       
 8   total_energy          10000 non-null  float64       
 9   max_power             10000 non-null  float64       
 10  start_hour            10000 non-null  int64         
 11  day_no                10000 non-null  int64         
 12  energy_supplied       10000 non-null  float64       
 13  initial_soc      