# ChargeUp! Battery Swapping Cabinet Data Analysis (Public)

This notebook provides a framework for the analysis of Battery Swap Station data for e-motorcycles and was developed as part of the **ChargeUp!** project (2022-2023), funded by **P4G** (https://p4gpartnerships.org/chargeup). 

Author: Cameron Sheehan (Research Associate, Energy Futures Lab, Imperial College London)

## 1. Import all required packages

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from keplergl import KeplerGl
import random
from h3 import h3
import h3pandas
from folium import Map, Marker, GeoJson
from folium.plugins import MarkerCluster
import branca.colormap as cm
from branca.colormap import linear
import folium
import networkx as nx
import osmnx as ox
import shapely
from shapely.geometry import LineString
from shapely.geometry import Point
from shapely.geometry import Polygon
import pulp
from pulp import LpMaximize, LpProblem, LpStatus, lpSum, LpVariable
from descartes import PolygonPatch
from rasterstats import zonal_stats
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import normalize
import movingpandas as mpd
import xarray as xr
import hvplot.xarray  # noqa
import hvplot.pandas 
from holoviews import opts
import seaborn as sns
from pytz import common_timezones, all_timezones
import warnings
from IPython.display import display, HTML

display(HTML("<style>.output_result { max-width:100% !important; }</style>"))

plt.rcParams['axes.axisbelow'] = True

warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## 2. Import Data

Open files as dataframes and process column data.

First, paste full pathname to battery_swaps data file below, ensure pathname is in inverted commas, i.e. '/XXX/XXX/XXX/battery_swaps.csv'

In [None]:
# Set csv data file pathnames
pathname_battery_swaps = ''

Next, input the column headings for the required data fields:

In [None]:
BSS_id_col = '';
batt_in_id_col = '';
batt_out_id_col = '';
batt_in_soc_col = '';
batt_out_soc_col = '';
swap_time_col = '';
user_id_col = ''

Set the column headings for the optional data fields if they're available:

In [None]:
# The following data fields are optional, if they're available they can be used to filter the data further.
# If they are available, set the availability varibale below as True, else set it to False
optional_data_fields_available = True # False

if optional_data_fields_available==True:
    swap_status_col = '';
    swap_city_col = '';

Read data from csv file and create a pandas dataframe:

In [None]:
df_battery_swaps_raw = pd.read_csv(pathname_battery_swaps, header=0)
df_battery_swaps = pd.DataFrame()

# Assign data columns to correct dataframe column headings for required data fields
df_battery_swaps[['swap_time','BSS_id','batt_in_id','batt_in_soc','batt_out_id','batt_out_soc','user_id']] = df_battery_swaps_raw[[swap_time_col,BSS_id_col,batt_in_id_col,batt_in_soc_col,batt_out_id_col,batt_out_soc_col,user_id_col]]

# Assign the optional fields if they're available
if optional_data_fields_available==True:
    df_battery_swaps[['swap_status','swap_city']] = df_battery_swaps_raw[[swap_status_col,swap_city_col]]

In [None]:
# Process battery_swaps data types
df_battery_swaps[['BSS_id', 'batt_in_id', 'batt_out_id', 'swap_status', 'swap_city', 'user_id']] = df_battery_swaps[['BSS_id', 'batt_in_id','batt_out_id', 'swap_status', 'swap_city', 'user_id']].astype("string")
df_battery_swaps[['batt_in_soc', 'batt_out_soc']] = df_battery_swaps[['batt_in_soc', 'batt_out_soc']].apply(pd.to_numeric, errors='coerce')

In [None]:
# Filter out swaps that are either not completed or took place in China
if optional_data_fields_available==True:
    df_battery_swaps = df_battery_swaps[(df_battery_swaps['swap_status']=='completed') & (df_battery_swaps['swap_city']=='Nairobi')]
    df_battery_swaps = df_battery_swaps.drop(columns=['swap_status','swap_city'])

In [None]:
# Set timezone of time data and convert to local timezone and create a date_time column to be used as index
df_battery_swaps["date_time"] = pd.to_datetime(df_battery_swaps['swap_time'], utc=False).dt.tz_localize('Asia/Hong_Kong').dt.tz_convert('Africa/Nairobi')#, infer_datetime_format=True)# , unit='s')
df_battery_swaps['swap_time'] = pd.to_datetime(df_battery_swaps['swap_time'], utc=False).dt.tz_localize('Asia/Hong_Kong').dt.tz_convert('Africa/Nairobi') #.tz_localize('Africa/Nairobi')
df_battery_swaps = df_battery_swaps.sort_values(by=['BSS_id','date_time'])
df_battery_swaps = df_battery_swaps.set_index(['date_time']) #(['BSS_id','date_time']) #.tz_localize('Africa/Nairobi')

## 3. Analyse swap cabinet data

Check number of unique devices in dataset:

In [None]:
n_batteries = len(pd.unique(df_battery_swaps[['batt_out_id','batt_in_id']].values.ravel('K')))
n_cabinets = len(pd.unique(df_battery_swaps['BSS_id']))
n_users = len(pd.unique(df_battery_swaps['user_id']))
print("There are a total of ", n_batteries, " unique batteries, ", n_cabinets, " swap cabinets, and a total of ", n_users, " users in this dataset.")

Determine total number of swaps per swap cabinet in dataset:

In [None]:
df_swap_cabinets_count = df_battery_swaps.groupby('BSS_id').size().to_frame(name='total_swaps')
df_swap_cabinets_count

Determine total number of swaps in dataset:

In [None]:
total_swaps = df_swap_cabinets_count.total_swaps.sum()
print("There was a total of ", total_swaps, " battery swaps that occurred in this dataset.")

Filter out certain data entries. <br>

Note: The values used to filter data can be changed to whatever values are deemed appropriate for the specific dataset. 

In [None]:
# Remove cabinets with low total swap numbers from anlaysis dataset, as these low numbers may not be 
# representative of actual swap behaviour.
# Here we have chosen a lower limit of 20 swaps.
min_swaps_req = 20
remove_cabinets = df_swap_cabinets_count.index[df_swap_cabinets_count['total_swaps'] < min_swaps_req].tolist()
df_battery_swaps = df_battery_swaps.drop(df_battery_swaps[df_battery_swaps['BSS_id'].isin(remove_cabinets)].index)

# Remove swap records where the battery being swapped has an SOC > 90%, implying the swap was not required and probably part of initial testing
df_battery_swaps = df_battery_swaps.drop(df_battery_swaps[df_battery_swaps['batt_in_soc']>90].index)
# Remove swap records where the battery being removed has an SOC < 50%, implying the swap was probably part of initial testing
df_battery_swaps = df_battery_swaps.drop(df_battery_swaps[df_battery_swaps['batt_out_soc']<50].index)

# Display list of Cabinet Serial Numbers (SN) to be removed from dataset
print("The following cabinets have been removed from the dataset since they did not meet the minimum swap limit of ", 
      min_swaps_req, " set: ", remove_cabinets)

Visualise data for each swap cabinet:

In [None]:
ax = sns.relplot(data=df_battery_swaps, hue='BSS_id', x='swap_time', y='batt_in_soc', 
                kind='scatter', height=3, aspect=3, marker='o', alpha=0.6)
ax.fig.autofmt_xdate()

In [None]:
ax = sns.relplot(data=df_battery_swaps, hue='BSS_id', x='swap_time', y='batt_out_soc', 
                kind='scatter', height=3, aspect=3, marker='o', alpha=0.6)
ax.fig.autofmt_xdate()

### Determine number of swaps per day per user

In [None]:
df_battery_swaps_g_date_user = df_battery_swaps.groupby([df_battery_swaps.index.date, 'user_id'] ).size().to_frame('swaps_by_user')
mean_swap_per_user_per_day = df_battery_swaps_g_date_user['swaps_by_user'].mean()
std_swap_per_user_per_day = df_battery_swaps_g_date_user['swaps_by_user'].std()
print("The average number of swaps per day per user was ", np.round(mean_swap_per_user_per_day, decimals=2), " swaps/day/user in this dataset.")

### Add columns to be used for grouping by various time increments (hourly, half-hourly, every 15 mins)

In [None]:
# Determine which hourly increment the swap occurred in
df_battery_swaps['hr_inc'] = df_battery_swaps.index.hour + np.ceil(df_battery_swaps.index.minute/60)
df_battery_swaps['hr_inc'] = df_battery_swaps['hr_inc'].astype('int')

# Determine which 30 minute increment the swap occured in 
df_battery_swaps['30_mins_inc'] = df_battery_swaps.index.hour + np.ceil(df_battery_swaps.index.minute/30)*30/60

# Determine which 15 minute increment the swap occured in
df_battery_swaps['15_mins_inc'] = df_battery_swaps.index.hour + np.ceil(df_battery_swaps.index.minute/15)*15/60

# Display some of the calculated increments in the dataframe
df_battery_swaps.head()

### Swap distribution by *day of week*

Note: Monday=0, Sunday=6 <br>
Check if there are lower numbers / probability of swaps on weekends vs weekdays

In [None]:
df_battery_swaps_g_dow = df_battery_swaps.groupby(df_battery_swaps.index.dayofweek)
df_swap_distrib_stats_dow = df_battery_swaps_g_dow.size().to_frame(name='total_swaps')
df_swap_distrib_stats_dow = (df_swap_distrib_stats_dow
                         .join(df_battery_swaps_g_dow.agg({'batt_in_soc':'mean'}).rename(columns={'batt_in_soc': 'SOC_in_mean'}))
                         .join(df_battery_swaps_g_dow.agg({'batt_in_soc':'max'}).rename(columns={'batt_in_soc': 'SOC_in_max'}))
                         .join(df_battery_swaps_g_dow.agg({'batt_in_soc':'min'}).rename(columns={'batt_in_soc': 'SOC_in_min'}))
                         .join(df_battery_swaps_g_dow.agg({'batt_out_soc':'mean'}).rename(columns={'batt_out_soc': 'SOC_out_mean'}))
                         .join(df_battery_swaps_g_dow.agg({'batt_out_soc':'max'}).rename(columns={'batt_out_soc': 'SOC_out_max'}))
                         .join(df_battery_swaps_g_dow.agg({'batt_out_soc':'min'}).rename(columns={'batt_out_soc': 'SOC_out_min'}))
                        )
df_swap_distrib_stats_dow['prob'] = normalize(df_swap_distrib_stats_dow['total_swaps'].values.reshape(1,-1), norm="l1").reshape(-1,1)
df_swap_distrib_stats_dow

In [None]:
# Create bar plot of swap probabilities in each time increment
df_swap_distrib_stats_dow['prob_percent'] = df_swap_distrib_stats_dow['prob']*100

In [None]:
ax = df_swap_distrib_stats_dow.plot.bar(y='prob_percent', figsize=(8,3), legend=False, width=0.90, ylim=(0, 25), rot=45, color="blue")
# p.set_title('Battery swap probability distribution by hours of day')
ax.set_xlabel("Day of week");
ax.set_ylabel("Probability of swap occurring (%)");
ax.yaxis.grid(color='gray', linestyle='-')
# ax.yaxis.set_major_formatter(plt.FormatStrFormatter('%.0f'))
# ax.xaxis.set_major_formatter(plt.FormatStrFormatter('%.0f'))


In [None]:
# ax.figure.savefig("images/Battery swap probability distribution by day of week.png", bbox_inches='tight')

### Swap counts and SOC distribution by *date*

In [None]:
df_battery_swaps_g_date = df_battery_swaps.groupby(df_battery_swaps.index.date)
df_swap_distrib_stats_date = df_battery_swaps_g_date.size().to_frame(name='total_swaps')
df_swap_distrib_stats_date = (df_swap_distrib_stats_date
                              .join(df_battery_swaps_g_date.agg({'batt_in_soc':'mean'}).rename(columns={'batt_in_soc': 'SOC_in_mean'}))
                              .join(df_battery_swaps_g_date.agg({'batt_in_soc':'max'}).rename(columns={'batt_in_soc': 'SOC_in_max'}))
                              .join(df_battery_swaps_g_date.agg({'batt_in_soc':'min'}).rename(columns={'batt_in_soc': 'SOC_in_min'}))
                              .join(df_battery_swaps_g_date.agg({'batt_in_soc':'std'}).rename(columns={'batt_in_soc': 'SOC_in_std'}))
                              .join(df_battery_swaps_g_date.agg({'batt_out_soc':'mean'}).rename(columns={'batt_out_soc': 'SOC_out_mean'}))
                              .join(df_battery_swaps_g_date.agg({'batt_out_soc':'max'}).rename(columns={'batt_out_soc': 'SOC_out_max'}))
                              .join(df_battery_swaps_g_date.agg({'batt_out_soc':'min'}).rename(columns={'batt_out_soc': 'SOC_out_min'}))
                             )
df_swap_distrib_stats_date['prob'] = normalize(df_swap_distrib_stats_date['total_swaps'].values.reshape(1,-1), norm="l1").reshape(-1,1)
df_swap_distrib_stats_date.head()

###  Swap counts and SOC distribution by *hour*

In [None]:
df_battery_swaps_g_hr = df_battery_swaps.groupby(df_battery_swaps['hr_inc'])
df_swap_distrib_stats_hr = df_battery_swaps_g_hr.size().to_frame(name='total_swaps')
df_swap_distrib_stats_hr = (df_swap_distrib_stats_hr
                            .join(df_battery_swaps_g_hr.agg({'batt_in_soc':'mean'}).rename(columns={'batt_in_soc': 'SOC_in_mean'}))
                            .join(df_battery_swaps_g_hr.agg({'batt_in_soc':'max'}).rename(columns={'batt_in_soc': 'SOC_in_max'}))
                            .join(df_battery_swaps_g_hr.agg({'batt_in_soc':'min'}).rename(columns={'batt_in_soc': 'SOC_in_min'}))
                            .join(df_battery_swaps_g_hr.agg({'batt_in_soc':'std'}).rename(columns={'batt_in_soc': 'SOC_in_std'}))
                            .join(df_battery_swaps_g_hr.agg({'batt_out_soc':'mean'}).rename(columns={'batt_out_soc': 'SOC_out_mean'}))
                            .join(df_battery_swaps_g_hr.agg({'batt_out_soc':'max'}).rename(columns={'batt_out_soc': 'SOC_out_max'}))
                            .join(df_battery_swaps_g_hr.agg({'batt_out_soc':'min'}).rename(columns={'batt_out_soc': 'SOC_out_min'}))
                        )

# Create a list and dataframe of all the hour values (i.e. 1-24)
index_list_hr = np.arange(1,25,1)
df_index_hr = pd.DataFrame(index=index_list_hr)

# Join list of hours with swap distribution dataframe, so there is an entry for every hour
df_swap_distrib_stats_hr = df_swap_distrib_stats_hr.join(df_index_hr,how='right')

# Set total_swaps to zero in all hours where no swaps occurred (NaN values)
df_swap_distrib_stats_hr['total_swaps'] = df_swap_distrib_stats_hr['total_swaps'].fillna(0)

# Calculate probability for a swap occuring in each time increment
df_swap_distrib_stats_hr['prob'] = normalize(df_swap_distrib_stats_hr['total_swaps'].values.reshape(1,-1), norm="l1").reshape(-1,1)

# Create spreadsheet of swap distribution statistics for use in other models
df_swap_distrib_stats_hr.to_excel("swap_distrib_stats_hr.xlsx", index_label='time_bin', na_rep='NA')

# Display dataframe
df_swap_distrib_stats_hr

In [None]:
# Create bar plot of swap probabilities in each time increment
df_swap_distrib_stats_hr['prob_percent'] = df_swap_distrib_stats_hr['prob']*100


In [None]:
ax = df_swap_distrib_stats_hr.plot.bar(y='prob_percent', figsize=(8,3), legend=False, width=0.90, ylim=(0, 10), rot=45, color="blue")
# p.set_title('Battery swap probability distribution by hours of day')
ax.set_xlabel("Time period (hours)");
ax.set_ylabel("Probability of swap occurring (%)");
ax.yaxis.grid(color='gray', linestyle='-')
# ax.yaxis.set_major_formatter(plt.FormatStrFormatter('%.0f'))
# ax.xaxis.set_major_formatter(plt.FormatStrFormatter('%.0f'))


In [None]:
# ax.figure.savefig("images/Battery swap probability distribution by hours of day.png", bbox_inches='tight')

In [None]:
# Join dataframe of hours with battery_swap dataframe, so there is an entry for every hour
df_hr_inc = pd.DataFrame(index_list_hr, columns=['hr_inc'])
df_battery_swaps_hr = pd.merge(df_hr_inc,df_battery_swaps,how='left', on = 'hr_inc')

# Create box plot of SOC values for each time increment
boxplot_hr = df_battery_swaps_hr.boxplot(column=['batt_in_soc'], by = ['hr_inc'], 
                                      grid=False, rot=45, fontsize=10, figsize = [8,3])
# boxplot_hr.set_title('Boxplot of arriving battery SOC values by hour')
boxplot_hr.set_title('');
boxplot_hr.set_xlabel("Time period (hours)");
boxplot_hr.set_ylabel("SOC of battery (%)");
boxplot_hr.get_figure().suptitle('');
# boxplot_hr.yaxis.grid(color='gray', linestyle='-');
boxplot_hr.set_ylim(-5, 100);

In [None]:
# boxplot_hr.figure.savefig("images/Boxplot of arriving battery SOC values by hour.png", bbox_inches='tight')

###  Swap counts and SOC distribution by *half hour*

In [None]:
df_battery_swaps_g_30mins = df_battery_swaps.groupby(df_battery_swaps['30_mins_inc'])
df_swap_distrib_stats_30mins = df_battery_swaps_g_30mins.size().to_frame(name='total_swaps')
df_swap_distrib_stats_30mins = (df_swap_distrib_stats_30mins
                                .join(df_battery_swaps_g_30mins.agg({'batt_in_soc':'mean'}).rename(columns={'batt_in_soc': 'SOC_in_mean'}))
                                .join(df_battery_swaps_g_30mins.agg({'batt_in_soc':'max'}).rename(columns={'batt_in_soc': 'SOC_in_max'}))
                                .join(df_battery_swaps_g_30mins.agg({'batt_in_soc':'min'}).rename(columns={'batt_in_soc': 'SOC_in_min'}))
                                .join(df_battery_swaps_g_30mins.agg({'batt_in_soc':'std'}).rename(columns={'batt_in_soc': 'SOC_in_std'}))
                                .join(df_battery_swaps_g_30mins.agg({'batt_out_soc':'mean'}).rename(columns={'batt_out_soc': 'SOC_out_mean'}))
                                .join(df_battery_swaps_g_30mins.agg({'batt_out_soc':'max'}).rename(columns={'batt_out_soc': 'SOC_out_max'}))
                                .join(df_battery_swaps_g_30mins.agg({'batt_out_soc':'min'}).rename(columns={'batt_out_soc': 'SOC_out_min'}))
                        )

# Create a list and dataframe of all the half-hour values (i.e. 0.5-24)
index_list_30mins = np.arange(0.5,24.5,0.5)
df_index_30mins = pd.DataFrame(index=index_list_30mins)
# Join list of 30 min increments with swap distribution dataframe, so there is an entry for every half-hour
df_swap_distrib_stats_30mins = df_swap_distrib_stats_30mins.join(df_index_30mins,how='right')
# Set total_swaps to zero in all time increments where no swaps occurred (NaN values)
df_swap_distrib_stats_30mins['total_swaps'] = df_swap_distrib_stats_30mins['total_swaps'].fillna(0)
# Calculate probability for a swap occuring in each time increment
df_swap_distrib_stats_30mins['prob'] = normalize(df_swap_distrib_stats_30mins['total_swaps'].values.reshape(1,-1), norm="l1").reshape(-1,1)
# Create spreadsheet of swap distribution statistics for use in other models
df_swap_distrib_stats_30mins.to_excel("swap_distrib_stats_30mins.xlsx", index_label='time_bin', na_rep='NA')
# Display dataframe
df_swap_distrib_stats_30mins.head()

In [None]:
# Create bar plot of swap probabilities in each time increment
df_swap_distrib_stats_30mins['prob_percent'] = df_swap_distrib_stats_30mins['prob']*100


In [None]:
ax = df_swap_distrib_stats_30mins.plot.bar(y='prob_percent', figsize=(10,3), legend=False, width=0.8, ylim=(0,6), rot=90)
# ax.set_title('Battery swap probability distribution for every 30 mins of day')
ax.set_title('')
ax.set_xlabel("Time period (hours)");
ax.set_ylabel("Probability of swap occurring (%)");
ax.yaxis.grid(color='gray', linestyle='-')

In [None]:
# ax.figure.savefig("images/Battery swap probability distribution by half-hours of day.png", bbox_inches='tight')

In [None]:
# Join dataframe of 30 min increments with battery_swap dataframe, so there is an entry for every half-hour
df_30_mins_inc = pd.DataFrame(index_list_30mins, columns=['30_mins_inc'])
df_battery_swaps_30_mins = pd.merge(df_30_mins_inc,df_battery_swaps,how='left', on = '30_mins_inc')

# Create box plot of SOC values for each time increment
boxplot_30mins = df_battery_swaps_30_mins.boxplot(column=['batt_in_soc'], by = ['30_mins_inc'], 
                                          grid=False, rot=90, fontsize=8, figsize = [12,5])
# boxplot_30mins.set_title('Boxplot of arriving battery SOC values by 30 minute increments')
boxplot_30mins.set_xlabel("30 minute increments");
boxplot_30mins.set_ylabel("SOC of battery (%)");
boxplot_30mins.get_figure().suptitle('');

In [None]:
# boxplot_30mins.figure.savefig("images/Boxplot of arriving battery SOC values by 30 mins.png", bbox_inches='tight')

###  Swap counts and SOC distribution by *15 minute increments*

In [None]:
df_battery_swaps_g_15mins = df_battery_swaps.groupby(df_battery_swaps['15_mins_inc'])
df_swap_distrib_stats_15mins = df_battery_swaps_g_15mins.size().to_frame(name='total_swaps')
df_swap_distrib_stats_15mins = (df_swap_distrib_stats_15mins
                                .join(df_battery_swaps_g_15mins.agg({'batt_in_soc':'mean'}).rename(columns={'batt_in_soc': 'SOC_in_mean'}))
                                .join(df_battery_swaps_g_15mins.agg({'batt_in_soc':'max'}).rename(columns={'batt_in_soc': 'SOC_in_max'}))
                                .join(df_battery_swaps_g_15mins.agg({'batt_in_soc':'min'}).rename(columns={'batt_in_soc': 'SOC_in_min'}))
                                .join(df_battery_swaps_g_15mins.agg({'batt_in_soc':'std'}).rename(columns={'batt_in_soc': 'SOC_in_std'}))
                                .join(df_battery_swaps_g_15mins.agg({'batt_out_soc':'mean'}).rename(columns={'batt_out_soc': 'SOC_out_mean'}))
                                .join(df_battery_swaps_g_15mins.agg({'batt_out_soc':'max'}).rename(columns={'batt_out_soc': 'SOC_out_max'}))
                                .join(df_battery_swaps_g_15mins.agg({'batt_out_soc':'min'}).rename(columns={'batt_out_soc': 'SOC_out_min'}))
                        )

# Create a list and dataframe of all the 15 min values (i.e. 0.25-24)
index_list_15mins = np.arange(0.25,24.25,0.25)
df_index_15mins = pd.DataFrame(index=index_list_15mins)
# Join list of 15 min increments with swap distribution dataframe, so there is an entry for every 15 mins
df_swap_distrib_stats_15mins = df_swap_distrib_stats_15mins.join(df_index_15mins,how='right')
# Set total_swaps to zero in all time increments where no swaps occurred (NaN values)
df_swap_distrib_stats_15mins['total_swaps'] = df_swap_distrib_stats_15mins['total_swaps'].fillna(0)
# Calculate probability for a swap occuring in each time increment
df_swap_distrib_stats_15mins['prob'] = normalize(df_swap_distrib_stats_15mins['total_swaps'].values.reshape(1,-1), norm="l1").reshape(-1,1)
# Create spreadsheet of swap distribution statistics for use in other models
df_swap_distrib_stats_15mins.to_excel("swap_distrib_stats_15mins.xlsx", index_label='time_bin', na_rep='NA')
# Display dataframe
df_swap_distrib_stats_15mins.head()

In [None]:
# Create bar plot of swap probabilities in each time increment
df_swap_distrib_stats_15mins['prob_percent']=df_swap_distrib_stats_15mins['prob']*100
px = df_swap_distrib_stats_15mins.plot.bar(y='prob_percent', figsize=(20,8), legend=False)
# ax.set_title('Battery swap probability distribution for every 15 mins of day')
ax.set_xlabel("Time period (hours)");
ax.set_ylabel("Probability of swap occurring (%)");

In [None]:
# ax.figure.savefig("images/Battery swap probability distribution by hours of day.png", bbox_inches='tight')

In [None]:
# Join dataframe of 15 min increments with battery_swap dataframe, so there is an entry for every 15 mins
df_15_mins_inc = pd.DataFrame(index_list_15mins, columns=['15_mins_inc'])
df_battery_swaps_15_mins = pd.merge(df_15_mins_inc,df_battery_swaps,how='left', on = '15_mins_inc')

# Create box plot of SOC values for each time increment
boxplot_15mins = df_battery_swaps_15_mins.boxplot(column=['batt_in_soc'], by = ['15_mins_inc'], 
                                          grid=False, rot=90, fontsize=8, figsize = [12,5])
# boxplot_15mins.set_title('Boxplot of arriving battery SOC values by 15 minute increments')
boxplot_15mins.set_xlabel("Time period (hours)");
boxplot_15mins.set_ylabel("SOC of battery (%)");
boxplot_15mins.get_figure().suptitle('');

In [None]:
# boxplot_15mins.figure.savefig("images/Boxplot of arriving battery SOC values by 15 mins.png", bbox_inches='tight')

## 4. Additional methods to select data between different dates or times

These methods were not used in this analysis but may be helful for any exploratory data analysis.

### Method to select dataframe rows between two different times

In [None]:
start_time = '6:00'
end_time = '18:00'
df_between_times = df_battery_swaps.between_time(start_time, end_time)
df_between_times

### Method to select dataframe rows between two different dates

In [None]:
start_date = '2022-12-01'
end_date = '2022-12-30'
mask = (df_battery_swaps.index > start_date) & (df_battery_swaps.index <= end_date)
df_between_dates = df_battery_swaps.loc[mask]
df_between_dates