In [37]:
import pandas as pd
import matplotlib.pyplot as plt
import json

In [38]:
df = pd.read_csv("data/charging_sessions.csv")

In [39]:
# Set Datatypes
df['connectionTime'] = pd.to_datetime(df['connectionTime'])
df['disconnectTime'] = pd.to_datetime(df['disconnectTime'])
df['doneChargingTime'] = pd.to_datetime(df['doneChargingTime'])

df['sessionID'] = df['sessionID'].astype(str)
df['siteID'] = df['siteID'].astype(str)
df['spaceID'] = df['spaceID'].astype(str)
df['stationID'] = df['stationID'].astype(str)
df['userID'] = df['userID'].astype(str)

In [40]:
# Rename first column, contains an index
df = df.rename(columns={df.columns[0]: 'Index'})
df = df.set_index('Index')

In [41]:
# Session ID is a composite value of stationID and connectionTime, and therefor redundant
# Timezone contains only one value, and can therefor be considered meta-data
df = df.drop(['sessionID', 'timezone'], axis=1)

In [42]:
df['NoChargingTime'] = df['disconnectTime'] - df['doneChargingTime']
df['NoChargingTimeMinutes'] = (df['NoChargingTime']).dt.total_seconds() / 60.0  # Convert seconds to minutes

df['ChargingTime'] = df['doneChargingTime'] - df['connectionTime']
df['ChargingTimeMinutes'] = (df['ChargingTime']).dt.total_seconds() / 60.0  # Convert seconds to minutes

df['kWhPerMinute'] = df['kWhDelivered'] / df['ChargingTimeMinutes']

# Create a new column 'month' to store the month information
df['month'] = df['connectionTime'].dt.month

### Flatten the Json-Object UserInput

In [43]:
# df_charging['userInputs'] = df_charging['userInputs'].str.replace("'", '"')
# df_charging['userInputs'] = df_charging['userInputs'].str.replace('"[', "'[")
# df_charging['userInputs'] = df_charging['userInputs'].str.replace(']"', "]'")
# df_charging['userInputs'] = df_charging['userInputs'].str.replace("True", "true")
# df_charging['userInputs'] = df_charging['userInputs'].str.replace("False", "false")

# Mapping: Value to be replaced: replacing value
#replacements = {
#    "'": '"', 
#    '"[': "'[",
#    ']"': "]'",
#    "True": "true",
#    "False": "false"
# }

# shorthand to replace multiple values
# for i, j in replacements.items():
#    df_charging['userInputs'] = df_charging['userInputs'].str.replace(i, j)
# Old Version
# for i in range(len(df_charging['userInputs'])):
#     if not pd.isna(df_charging['userInputs'].iloc[i]):
#        df_charging.at[i, 'userInputs'] = json.loads(df_charging['userInputs'].iloc[i])

# Refactored Version
# df_charging['userInputs'] = df_charging['userInputs'].apply(lambda x: json.loads(x) if pd.notna(x) else x)

In [44]:
df['userInputs'] = df['userInputs'].str.replace("'", '"')
df['userInputs'] = df['userInputs'].str.replace('"[', "'[")
df['userInputs'] = df['userInputs'].str.replace(']"', "]'")
df['userInputs'] = df['userInputs'].str.replace("True", "true")
df['userInputs'] = df['userInputs'].str.replace("False", "false")

# Old Version
# for i in range(len(df['userInputs'])):
#     if not pd.isna(df['userInputs'].iloc[i]):
#        df.at[i, 'userInputs'] = json.loads(df['userInputs'].iloc[i])

# Refactored Version
df['userInputs'] = df['userInputs'].apply(lambda x: json.loads(x) if pd.notna(x) else x)
# Extract 'paymentRequired' values and save them in a new column
df['user_paymentRequired_values'] = df['userInputs'].apply(lambda x: x[0]['paymentRequired'] if isinstance(x, list) and len(x) > 0 else None)
df['user_userID'] = df['userInputs'].apply(lambda x: x[0]['userID'] if isinstance(x, list) and len(x) > 0 else None)
df['user_requestedDeparture'] = df['userInputs'].apply(lambda x: x[0]['requestedDeparture'] if isinstance(x, list) and len(x) > 0 else None)
df['user_modifiedAt'] = df['userInputs'].apply(lambda x: x[0]['modifiedAt'] if isinstance(x, list) and len(x) > 0 else None)
df['user_minutesAvailable'] = df['userInputs'].apply(lambda x: x[0]['minutesAvailable'] if isinstance(x, list) and len(x) > 0 else None)
df['user_milesRequested'] = df['userInputs'].apply(lambda x: x[0]['milesRequested'] if isinstance(x, list) and len(x) > 0 else None)
df['user_kWhRequested'] = df['userInputs'].apply(lambda x: x[0]['kWhRequested'] if isinstance(x, list) and len(x) > 0 else None)
df['user_WhPerMile'] = df['userInputs'].apply(lambda x: x[0]['WhPerMile'] if isinstance(x, list) and len(x) > 0 else None)

# Display the DataFrame
print(df)

                             id            connectionTime  \
Index                                                       
0      5e23b149f9af8b5fe4b973cf 2020-01-02 13:08:54+00:00   
1      5e23b149f9af8b5fe4b973d0 2020-01-02 13:36:50+00:00   
2      5e23b149f9af8b5fe4b973d1 2020-01-02 13:56:35+00:00   
3      5e23b149f9af8b5fe4b973d2 2020-01-02 13:59:58+00:00   
4      5e23b149f9af8b5fe4b973d3 2020-01-02 14:00:01+00:00   
...                         ...                       ...   
10083  5d574ad2f9af8b4c10c03652 2019-07-31 18:08:04+00:00   
10084  5d574ad2f9af8b4c10c03653 2019-07-31 18:40:41+00:00   
10085  5d574ad2f9af8b4c10c03654 2019-07-31 19:04:40+00:00   
10086  5d574ad2f9af8b4c10c03655 2019-07-31 19:19:47+00:00   
10087  5d574ad2f9af8b4c10c03656 2019-07-31 19:21:47+00:00   

                 disconnectTime          doneChargingTime  kWhDelivered  \
Index                                                                     
0     2020-01-02 19:11:15+00:00 2020-01-02 17:31:35+00:0

In [45]:
df.to_pickle('data/charging_modified.pkl')