In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Charger data 

There are two sources for charger data:
1. Grants funding private chargers provide a count of private chargers by region:  2014-2021 on the Local Authority (LA) level.
2. Public charger count by region: quarterly from 10/2019-04/2021 on LA level.

### Private chargers

In [2]:
column_names_private = ['LA_code','LA_name',
                '2015', '2016','2017', '2018','2019','2020','2021','Total']
df_private_chargers = pd.read_excel("raw_data_to_edit/electric-vehicle-charging-device-grant-statistics-april-2021.ods", 
                                   engine="odf", sheet_name ="EVG_02", skiprows=7, names=column_names_private)

In [3]:
print(df_private_chargers.shape)
print(df_private_chargers.columns)
df_private_chargers.head()

(436, 10)
Index(['LA_code', 'LA_name', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', 'Total'],
      dtype='object')


Unnamed: 0,LA_code,LA_name,2015,2016,2017,2018,2019,2020,2021,Total
0,K02000001,UNITED KINGDOM,15779.0,12835.0,18039.0,21612.0,25796.0,42425.0,244.0,136730.0
1,K03000001,GREAT BRITAIN,15349.0,12565.0,17611.0,21124.0,25039.0,41085.0,242.0,133015.0
2,E92000001,ENGLAND,13607.0,11261.0,15614.0,18734.0,22334.0,36517.0,225.0,118292.0
3,E12000001,NORTH EAST,904.0,529.0,718.0,690.0,673.0,1733.0,2.0,5249.0
4,E06000047,County Durham,214.0,118.0,110.0,142.0,150.0,280.0,1.0,1015.0


In [4]:
# First let's aggregate the summation over the years
for date_index in range(df_private_chargers.shape[1]-4):
    df_private_chargers.iloc[:,date_index+3] =df_private_chargers.iloc[:,date_index+2] + df_private_chargers.iloc[:,date_index+3]


In [5]:
df_private_chargers.head()

Unnamed: 0,LA_code,LA_name,2015,2016,2017,2018,2019,2020,2021,Total
0,K02000001,UNITED KINGDOM,15779.0,28614.0,46653.0,68265.0,94061.0,136486.0,136730.0,136730.0
1,K03000001,GREAT BRITAIN,15349.0,27914.0,45525.0,66649.0,91688.0,132773.0,133015.0,133015.0
2,E92000001,ENGLAND,13607.0,24868.0,40482.0,59216.0,81550.0,118067.0,118292.0,118292.0
3,E12000001,NORTH EAST,904.0,1433.0,2151.0,2841.0,3514.0,5247.0,5249.0,5249.0
4,E06000047,County Durham,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0


In [6]:
# Load look up table to map between MSOA and LSOA
region_lookup = pd.read_csv('data/Output_Area_to_LSOA_to_MSOA_to_Local_Authority_District_(December_2017)_Lookup_with_Area_Classifications_in_Great_Britain.csv')

In [7]:
print(region_lookup.shape)
print(region_lookup.columns)
la_unique =region_lookup.LAD17CD.unique()

(227759, 18)
Index(['OA11CD', 'OAC11CD', 'OAC11NM', 'LSOA11CD', 'LSOA11NM', 'SOAC11CD',
       'SOAC11NM', 'MSOA11CD', 'MSOA11NM', 'LAD17CD', 'LAD17NM', 'LACCD',
       'LACNM', 'RGN11CD', 'RGN11NM', 'CTRY11CD', 'CTRY11NM', 'FID'],
      dtype='object')


In [8]:
# want to keep LA that we have the region lookup data for. Reduce the private charger data
df_private_chargers = df_private_chargers[df_private_chargers['LA_code'].isin(la_unique)]
print(df_private_chargers.shape)
print(len(la_unique))

(358, 10)
380


In [9]:
# Note we have 22 missing LA regions. Fill these with zeros assuming there are no chargers here
missing_la_private = list(set(la_unique) - set(df_private_chargers.LA_code.unique()))

In [10]:
column_names_private = df_private_chargers.columns.values
column_names_private

array(['LA_code', 'LA_name', '2015', '2016', '2017', '2018', '2019',
       '2020', '2021', 'Total'], dtype=object)

In [11]:
# pad these missing LA regions into the dataframe assuming zero values
# add to the dataframes
df_missing_la_private = pd.DataFrame(columns=column_names_private)
for i in range(len(missing_la_private)):
    df_missing_la_private = df_missing_la_private.append(pd.Series({"LA_code": missing_la_private[i], "LA_name": missing_la_private[i],
                         column_names_private[2]: 0, column_names_private[3]: 0,
                         column_names_private[4]: 0, column_names_private[5]: 0,
                         column_names_private[6]: 0, column_names_private[7]: 0,
                         column_names_private[8]: 0, column_names_private[9]: 0}),ignore_index=True)

In [12]:
df_missing_la_private.shape

(22, 10)

In [13]:
df_private_chargers= df_private_chargers.append(df_missing_la_private)
df_private_chargers.reset_index(drop=True)
print(df_private_chargers.shape)
print(df_private_chargers.LA_code.nunique())

(380, 10)
380


## Convert to MSOA level

In [15]:
# for each LA we now have the value for chargers for each MSOA within that MSOA. 
# We can now merge with MSOA
df_private_chargers_msoa = pd.merge(df_private_chargers, region_lookup, right_on='LAD17CD',left_on='LA_code')

In [16]:
df_private_chargers_msoa.drop_duplicates(subset=['MSOA11CD'],inplace=True)
df_private_chargers_msoa = df_private_chargers_msoa[['MSOA11CD','LA_code',
                                                    '2015','2016','2017','2018','2019','2020','2021','Total']]
print(df_private_chargers_msoa.shape)
df_private_chargers_msoa.head()

(8480, 10)


Unnamed: 0,MSOA11CD,LA_code,2015,2016,2017,2018,2019,2020,2021,Total
0,E02004296,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0
1,E02004302,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0
3,E02004305,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0
5,E02004303,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0
6,E02004301,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0


In [17]:
df_private_chargers_msoa.MSOA11CD.nunique()

8480

In [18]:
# Have lost some chargers
# count the number of MSOA in each LSOA
msoa_count = {}
for index, row in df_private_chargers.iterrows():
    msoa_count[row['LA_code']] = df_private_chargers_msoa[df_private_chargers_msoa['LA_code']==row['LA_code']].shape[0]

In [19]:
len(msoa_count)

380

In [20]:
la_to_msoa = pd.DataFrame.from_dict(msoa_count,orient='index',
                       columns=['msoa_count'])
la_to_msoa.reset_index(level=0, inplace=True)
la_to_msoa.head()

Unnamed: 0,index,msoa_count
0,E06000047,66
1,E06000005,15
2,E06000001,12
3,E06000002,19
4,E06000057,40


In [21]:
df_private_chargers_msoa = pd.merge(df_private_chargers_msoa, la_to_msoa,right_on='index',left_on='LA_code')
df_private_chargers_msoa.head()

Unnamed: 0,MSOA11CD,LA_code,2015,2016,2017,2018,2019,2020,2021,Total,index,msoa_count
0,E02004296,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66
1,E02004302,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66
2,E02004305,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66
3,E02004303,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66
4,E02004301,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66


In [22]:
df_private_chargers_msoa.columns

Index(['MSOA11CD', 'LA_code', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', 'Total', 'index', 'msoa_count'],
      dtype='object')

In [23]:
df_private_chargers_msoa['2015_msoa']=df_private_chargers_msoa['2015']/df_private_chargers_msoa['msoa_count']
df_private_chargers_msoa['2016_msoa']=df_private_chargers_msoa['2016']/df_private_chargers_msoa['msoa_count']
df_private_chargers_msoa['2017_msoa']=df_private_chargers_msoa['2017']/df_private_chargers_msoa['msoa_count']
df_private_chargers_msoa['2018_msoa']=df_private_chargers_msoa['2018']/df_private_chargers_msoa['msoa_count']
df_private_chargers_msoa['2019_msoa']=df_private_chargers_msoa['2019']/df_private_chargers_msoa['msoa_count']
df_private_chargers_msoa['2020_msoa']=df_private_chargers_msoa['2020']/df_private_chargers_msoa['msoa_count']
df_private_chargers_msoa['2021_msoa']=df_private_chargers_msoa['2021']/df_private_chargers_msoa['msoa_count']

In [24]:
df_private_chargers_msoa.head()

Unnamed: 0,MSOA11CD,LA_code,2015,2016,2017,2018,2019,2020,2021,Total,index,msoa_count,2015_msoa,2016_msoa,2017_msoa,2018_msoa,2019_msoa,2020_msoa,2021_msoa
0,E02004296,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66,3.242424,5.030303,6.69697,8.848485,11.121212,15.363636,15.378788
1,E02004302,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66,3.242424,5.030303,6.69697,8.848485,11.121212,15.363636,15.378788
2,E02004305,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66,3.242424,5.030303,6.69697,8.848485,11.121212,15.363636,15.378788
3,E02004303,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66,3.242424,5.030303,6.69697,8.848485,11.121212,15.363636,15.378788
4,E02004301,E06000047,214.0,332.0,442.0,584.0,734.0,1014.0,1015.0,1015.0,E06000047,66,3.242424,5.030303,6.69697,8.848485,11.121212,15.363636,15.378788


In [25]:
print(df_private_chargers_msoa['2021_msoa'].sum())
print(df_private_chargers['2021'].sum())

125341.00000000057
125341.0


In [26]:
df_private_chargers_msoa.shape

(8480, 19)

In [27]:
# drop unnecessary columns and save
df_private_chargers_msoa = df_private_chargers_msoa[['MSOA11CD','2015_msoa',
                                                     '2016_msoa','2017_msoa',
                                                     '2018_msoa','2019_msoa',
                                                     '2020_msoa','2021_msoa']]

In [28]:
#df_private_chargers_msoa.to_csv('data/private_chargers_msoa_timeseries.csv')

## Public chargers

In [29]:
column_names_public = ['LA_code','LA_name',
                '04-2021', 'Apr21_per100kpop',
               '01-2021', 'Jan21_per100kpop',
               '10-2020', 'Oct20_per100kpop',
               '07-2020', 'Jul20_per100kpop',
               '04-2020', 'Apr20_per100kpop',
               '01-2020', 'Jan20_per100kpop',
               '10-2019', 'Oct19_per100kpop']
df_public_chargers = pd.read_excel("raw_data_to_edit/electric-vehicle-charging-device-statistics-april-2021.ods", 
                                   engine="odf", sheet_name ="EVCD_01a", skiprows=7, names=column_names_public)

In [30]:
df_public_chargers = df_public_chargers[['LA_code','04-2021','01-2021',
                                         '10-2020','07-2020',
                                         '04-2020', '01-2020','10-2019']]
print(df_public_chargers.shape)
print(df_public_chargers.columns)
df_public_chargers.head()

(441, 8)
Index(['LA_code', '04-2021', '01-2021', '10-2020', '07-2020', '04-2020',
       '01-2020', '10-2019'],
      dtype='object')


Unnamed: 0,LA_code,04-2021,01-2021,10-2020,07-2020,04-2020,01-2020,10-2019
0,K02000001,22790.0,20775.0,19487.0,18265.0,17947.0,16505.0,15116.0
1,K03000001,22463.0,20455.0,19169.0,17953.0,17642.0,16210.0,14821.0
2,E92000001,19261.0,17459.0,16456.0,15395.0,14979.0,13719.0,12549.0
3,E12000001,854.0,820.0,849.0,812.0,786.0,752.0,738.0
4,E06000047,121.0,110.0,106.0,105.0,102.0,96.0,92.0


In [31]:
df_public_chargers.LA_code.nunique()

435

In [32]:
# want to keep LA that we have the region lookup data for. Reduce the private charger data
df_public_chargers = df_public_chargers[df_public_chargers['LA_code'].isin(la_unique)]
print(df_public_chargers.shape)
print(len(la_unique))

(358, 8)
380


In [33]:
# Again we have 22 missing regions

missing_la_public = list(set(la_unique) - set(df_public_chargers.LA_code.unique()))
column_names_public = df_public_chargers.columns.values
print(column_names_public)


['LA_code' '04-2021' '01-2021' '10-2020' '07-2020' '04-2020' '01-2020'
 '10-2019']


In [34]:
df_missing_la_public = pd.DataFrame(columns=column_names_public)
for i in range(len(missing_la_public)):
    df_missing_la_public = df_missing_la_public.append(pd.Series({"LA_code": missing_la_public[i],
                         column_names_public[1]: 0, column_names_public[2]: 0,
                         column_names_public[3]: 0, column_names_public[4]: 0,
                         column_names_public[5]: 0, column_names_public[6]: 0, column_names_public[7]: 0}),ignore_index=True)

In [35]:
print(df_missing_la_public.shape)
df_public_chargers= df_public_chargers.append(df_missing_la_public)
df_public_chargers.reset_index(drop=True)
print(df_public_chargers.LA_code.nunique())
df_public_chargers.drop_duplicates(subset='LA_code',inplace=True)

(22, 8)
380


In [36]:
print(df_public_chargers.shape)
df_public_chargers.head()

(380, 8)


Unnamed: 0,LA_code,04-2021,01-2021,10-2020,07-2020,04-2020,01-2020,10-2019
4,E06000047,121.0,110.0,106.0,105.0,102.0,96.0,92.0
5,E06000005,29.0,29.0,29.0,28.0,28.0,27.0,27.0
6,E06000001,9.0,8.0,6.0,6.0,6.0,5.0,5.0
7,E06000002,30.0,30.0,29.0,29.0,29.0,27.0,25.0
8,E06000057,153.0,149.0,162.0,152.0,149.0,138.0,139.0


In [37]:
df_public_chargers_msoa = pd.merge(df_public_chargers, region_lookup, right_on='LAD17CD',left_on='LA_code')
df_public_chargers_msoa.drop_duplicates(subset=['MSOA11CD'],inplace=True)
df_public_chargers_msoa = df_public_chargers_msoa[['MSOA11CD','LA_code',
                                                    '10-2019','01-2020',
                                                   '04-2020','07-2020',
                                                   '10-2020','01-2021','04-2021']]
print(df_public_chargers_msoa.shape)
df_public_chargers_msoa.head()

(8480, 9)


Unnamed: 0,MSOA11CD,LA_code,10-2019,01-2020,04-2020,07-2020,10-2020,01-2021,04-2021
0,E02004296,E06000047,92.0,96.0,102.0,105.0,106.0,110.0,121.0
1,E02004302,E06000047,92.0,96.0,102.0,105.0,106.0,110.0,121.0
3,E02004305,E06000047,92.0,96.0,102.0,105.0,106.0,110.0,121.0
5,E02004303,E06000047,92.0,96.0,102.0,105.0,106.0,110.0,121.0
6,E02004301,E06000047,92.0,96.0,102.0,105.0,106.0,110.0,121.0


In [38]:
# Aggregate by summation
# First let's aggregate the summation over the years
for date_index in range(df_public_chargers_msoa.shape[1]-3):
    df_public_chargers_msoa.iloc[:,date_index+3] =df_public_chargers_msoa.iloc[:,date_index+2] + df_public_chargers_msoa.iloc[:,date_index+3]


In [39]:
df_public_chargers_msoa.head()

Unnamed: 0,MSOA11CD,LA_code,10-2019,01-2020,04-2020,07-2020,10-2020,01-2021,04-2021
0,E02004296,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0
1,E02004302,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0
3,E02004305,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0
5,E02004303,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0
6,E02004301,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0


In [40]:
df_public_chargers_msoa = pd.merge(df_public_chargers_msoa, la_to_msoa,right_on='index',left_on='LA_code')
df_public_chargers_msoa.head()

Unnamed: 0,MSOA11CD,LA_code,10-2019,01-2020,04-2020,07-2020,10-2020,01-2021,04-2021,index,msoa_count
0,E02004296,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66
1,E02004302,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66
2,E02004305,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66
3,E02004303,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66
4,E02004301,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66


In [41]:
df_public_chargers_msoa['10-2019_msoa']=df_public_chargers_msoa['10-2019']/df_public_chargers_msoa['msoa_count']
df_public_chargers_msoa['01-2020_msoa']=df_public_chargers_msoa['01-2020']/df_public_chargers_msoa['msoa_count']
df_public_chargers_msoa['04-2020_msoa']=df_public_chargers_msoa['04-2020']/df_public_chargers_msoa['msoa_count']
df_public_chargers_msoa['07-2020_msoa']=df_public_chargers_msoa['07-2020']/df_public_chargers_msoa['msoa_count']
df_public_chargers_msoa['10-2020_msoa']=df_public_chargers_msoa['10-2020']/df_public_chargers_msoa['msoa_count']
df_public_chargers_msoa['01-2021_msoa']=df_public_chargers_msoa['01-2021']/df_public_chargers_msoa['msoa_count']
df_public_chargers_msoa['04-2021_msoa']=df_public_chargers_msoa['04-2021']/df_public_chargers_msoa['msoa_count']

In [42]:
df_public_chargers_msoa.head()

Unnamed: 0,MSOA11CD,LA_code,10-2019,01-2020,04-2020,07-2020,10-2020,01-2021,04-2021,index,msoa_count,10-2019_msoa,01-2020_msoa,04-2020_msoa,07-2020_msoa,10-2020_msoa,01-2021_msoa,04-2021_msoa
0,E02004296,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66,1.393939,2.848485,4.393939,5.984848,7.590909,9.257576,11.090909
1,E02004302,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66,1.393939,2.848485,4.393939,5.984848,7.590909,9.257576,11.090909
2,E02004305,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66,1.393939,2.848485,4.393939,5.984848,7.590909,9.257576,11.090909
3,E02004303,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66,1.393939,2.848485,4.393939,5.984848,7.590909,9.257576,11.090909
4,E02004301,E06000047,92.0,188.0,290.0,395.0,501.0,611.0,732.0,E06000047,66,1.393939,2.848485,4.393939,5.984848,7.590909,9.257576,11.090909


In [43]:
print(df_public_chargers_msoa['01-2020_msoa'].sum())
print(df_public_chargers['01-2020'].sum()+df_public_chargers['10-2019'].sum())

29531.00000000014
29531.0


In [44]:
#df_public_chargers_msoa.to_csv('data/public_chargers_msoa_timeseries.csv')