In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plot 

In [42]:
df = pd.read_csv('./datasets/Electric_Vehicle_Charging_Station_Data.csv')
df

Unnamed: 0,ObjectId2,Station_Name,Address,City,State_Province,Zip_Postal_Code,Start_Date___Time,Start_Time_Zone,End_Date___Time,End_Time_Zone,Total_Duration__hh_mm_ss_,Charging_Time__hh_mm_ss_,Energy__kWh_,GHG_Savings__kg_,Gasoline_Savings__gallons_,Port_Type,ObjectID
0,1,BOULDER / JUNCTION ST1,2280 Junction Pl,Boulder,Colorado,80301,01/01/18 17:49,MDT,01/01/18 19:52,MDT,2:03:02,2:02:44,6.504,2.732,0.816,Level 2,0
1,2,BOULDER / JUNCTION ST1,2280 Junction Pl,Boulder,Colorado,80301,01/02/18 8:52,MDT,01/02/18 9:16,MDT,0:24:34,0:24:19,2.481,1.042,0.311,Level 2,1
2,3,BOULDER / JUNCTION ST1,2280 Junction Pl,Boulder,Colorado,80301,01/02/18 21:11,MDT,01/03/18 6:23,MDT,9:12:21,3:40:52,15.046,6.319,1.888,Level 2,2
3,4,BOULDER / ALPINE ST1,1275 Alpine Ave,Boulder,Colorado,80304,01/03/18 9:19,MDT,01/03/18 11:14,MDT,1:54:51,1:54:29,6.947,2.918,0.872,Level 2,3
4,5,BOULDER / BASELINE ST1,900 Baseline Rd,Boulder,Colorado,80302,01/03/18 14:13,MDT,01/03/18 14:30,MDT,0:16:58,0:16:44,1.800,0.756,0.226,Level 2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148131,148132,BOULDER / N BOULDER REC 1,3172 Broadway,Boulder,Colorado,80304,11/30/2023 19:58,MST,11/30/2023 20:10,MST,0:12:13,0:11:42,1.194,0.848,0.150,Level 2,77937
148132,148133,BOULDER / CARPENTER PARK1,1505 30th St,Boulder,Colorado,80303,11/30/2023 20:00,MST,11/30/2023 20:01,MST,0:01:07,0:00:00,0.000,0.000,0.000,Level 2,77938
148133,148134,BOULDER / CARPENTER PARK1,1505 30th St,Boulder,Colorado,80303,11/30/2023 20:01,MST,11/30/2023 20:21,MST,0:19:52,0:19:43,1.899,1.349,0.238,Level 2,77939
148134,148135,BOULDER / REC CENTER ST2,1360 Gillaspie Dr,Boulder,Colorado,80305,11/30/2023 21:03,MST,11/30/2023 21:31,MST,0:28:08,0:27:50,1.499,1.064,0.188,Level 2,77940


In [36]:
# Selecting the columns to keep 
columns_to_keep = ['ObjectId2','Address' , 'Start_Date___Time', 'End_Date___Time', 'Total_Duration__hh_mm_ss_' , 'Charging_Time__hh_mm_ss_']

# Filtering the dataset
df_filtered = df[columns_to_keep].copy()  # Make a copy to avoid the SettingWithCopyWarning
df_filtered.iloc[6]

ObjectId2                                      7
Address                         1745 14th street
Start_Date___Time            2018-01-04 09:29:00
End_Date___Time              2018-01-08 08:35:00
Total_Duration__hh_mm_ss_        3 days 23:06:31
Charging_Time__hh_mm_ss_         0 days 02:14:10
Name: 6, dtype: object

In [37]:
# On checking it is clear that all the elements in the df_filtered are 'str' and not datetime objects
# Converting them to datetime and timedelta

df_filtered.loc[:, 'Start_Date___Time'] = pd.to_datetime(df_filtered['Start_Date___Time'])
df_filtered.loc[:, 'End_Date___Time'] = pd.to_datetime(df_filtered['End_Date___Time'])
df_filtered.loc[:, 'Total_Duration__hh_mm_ss_'] = pd.to_timedelta(df_filtered['Total_Duration__hh_mm_ss_'])
df_filtered.loc[:, 'Charging_Time__hh_mm_ss_'] = pd.to_timedelta(df_filtered['Charging_Time__hh_mm_ss_'])
df_filtered.head()

Unnamed: 0,ObjectId2,Address,Start_Date___Time,End_Date___Time,Total_Duration__hh_mm_ss_,Charging_Time__hh_mm_ss_
0,1,2280 Junction Pl,2018-01-01 17:49:00,2018-01-01 19:52:00,0 days 02:03:02,0 days 02:02:44
1,2,2280 Junction Pl,2018-01-02 08:52:00,2018-01-02 09:16:00,0 days 00:24:34,0 days 00:24:19
2,3,2280 Junction Pl,2018-01-02 21:11:00,2018-01-03 06:23:00,0 days 09:12:21,0 days 03:40:52
3,4,1275 Alpine Ave,2018-01-03 09:19:00,2018-01-03 11:14:00,0 days 01:54:51,0 days 01:54:29
4,5,900 Baseline Rd,2018-01-03 14:13:00,2018-01-03 14:30:00,0 days 00:16:58,0 days 00:16:44


In [38]:
# Checking that in all cases charging time < total duration 

rows_with_greater_charging_time = []
for index, row in df_filtered.iterrows():
    if row['Charging_Time__hh_mm_ss_'] > row['Total_Duration__hh_mm_ss_']:
        rows_with_greater_charging_time.append(index)

print(rows_with_greater_charging_time)

[]


In [39]:
# Separating the dataset based on the address of the location into 50 different datasets 

unique_addresses = df_filtered['Address'].unique()

# Create separate DataFrames for each unique address
for i, address in enumerate(unique_addresses, 1):
    # Create a DataFrame for the current address
    current_df = df_filtered[df_filtered['Address'] == address].copy()

    # Rename the DataFrame to df_i
    globals()[f'df_{i}'] = current_df

In [41]:
df_1

Unnamed: 0,ObjectId2,Address,Start_Date___Time,End_Date___Time,Total_Duration__hh_mm_ss_,Charging_Time__hh_mm_ss_
0,1,2280 Junction Pl,2018-01-01 17:49:00,2018-01-01 19:52:00,0 days 02:03:02,0 days 02:02:44
1,2,2280 Junction Pl,2018-01-02 08:52:00,2018-01-02 09:16:00,0 days 00:24:34,0 days 00:24:19
2,3,2280 Junction Pl,2018-01-02 21:11:00,2018-01-03 06:23:00,0 days 09:12:21,0 days 03:40:52
10,11,2280 Junction Pl,2018-01-04 20:16:00,2018-01-05 07:24:00,0 days 11:08:17,0 days 04:03:39
11,12,2280 Junction Pl,2018-01-04 21:11:00,2018-01-04 21:52:00,0 days 00:41:42,0 days 00:41:20
...,...,...,...,...,...,...
90024,90025,2280 Junction Pl,2020-01-09 20:56:00,2020-01-09 21:08:00,0 days 00:12:13,0 days 00:11:27
90025,90026,2280 Junction Pl,2020-01-09 22:22:00,2020-01-10 06:50:00,0 days 08:27:55,0 days 03:33:20
90071,90072,2280 Junction Pl,2020-01-10 15:07:00,2020-01-10 15:26:00,0 days 00:18:51,0 days 00:18:23
90076,90077,2280 Junction Pl,2020-01-10 17:11:00,2020-01-10 18:30:00,0 days 01:19:43,0 days 01:19:21
