In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
elec_2010 = pd.read_csv('raw_data_to_edit/LSOA_ELEC_2010.csv')

In [3]:
elec_2010.head()

Unnamed: 0,Local Authority Name,Local Authority Code,Middle Layer Super Output Area (MSOA) Name,Middle Layer Super Output Area (MSOA) Code,Lower Layer Super Output Area (LSOA) Name,Lower Layer Super Output Area (LSOA) Code,Total number of domestic electricity meters,Total domestic electricity consumption (kWh),Mean domestic electricity consumption \n(kWh per meter),Median domestic electricity consumption \n(kWh per meter)
0,Hartlepool,E06000001,Hartlepool 001,E02002483,Hartlepool 001A,E01011954,962,3061223.7,3182.145218,2812.65
1,Hartlepool,E06000001,Hartlepool 001,E02002483,Hartlepool 001B,E01011969,638,2184541.4,3424.046082,3050.75
2,Hartlepool,E06000001,Hartlepool 001,E02002483,Hartlepool 001C,E01011970,452,1813380.3,4011.903319,3479.9
3,Hartlepool,E06000001,Hartlepool 001,E02002483,Hartlepool 001D,E01011971,530,2076311.2,3917.568302,3672.2
4,Hartlepool,E06000001,Hartlepool 001,E02002483,Hartlepool 001F,E01033465,746,2620861.5,3513.219169,3077.1


In [4]:
elec_2010.shape

(41730, 10)

In [5]:
elec_2010.columns

Index(['Local Authority Name', 'Local Authority Code',
       'Middle Layer Super Output Area (MSOA) Name',
       'Middle Layer Super Output Area (MSOA) Code',
       'Lower Layer Super Output Area (LSOA) Name',
       'Lower Layer Super Output Area (LSOA) Code',
       'Total number of domestic electricity meters',
       'Total domestic electricity consumption (kWh)',
       'Mean domestic electricity consumption \n(kWh per meter)',
       'Median domestic electricity consumption \n(kWh per meter)'],
      dtype='object')

In [6]:
elec_2010['Lower Layer Super Output Area (LSOA) Code'].nunique()

41730

In [7]:
lsoa_codes_to_keep = pd.read_csv('data/lsoa_codes_reduced.csv', index_col=False)

In [8]:
lsoa_codes_to_keep.head()

Unnamed: 0,LSOA11CD,LSOA11NM
0,E01000001,City of London 001A
1,E01000002,City of London 001B
2,E01000003,City of London 001C
3,E01000005,City of London 001E
4,E01000006,Barking and Dagenham 016A


In [9]:
df_to_reduce = elec_2010

In [10]:
df_to_reduce.columns

Index(['Local Authority Name', 'Local Authority Code',
       'Middle Layer Super Output Area (MSOA) Name',
       'Middle Layer Super Output Area (MSOA) Code',
       'Lower Layer Super Output Area (LSOA) Name',
       'Lower Layer Super Output Area (LSOA) Code',
       'Total number of domestic electricity meters',
       'Total domestic electricity consumption (kWh)',
       'Mean domestic electricity consumption \n(kWh per meter)',
       'Median domestic electricity consumption \n(kWh per meter)'],
      dtype='object')

## Reduce data to LSOA codes in England and Wales

In [None]:
def func_to_reduce_LSOA_codes(df_to_reduce, lsoa_codes_to_keep, col_to_reduce, col_lsoa):
    """ Extract the rows of df_to_reduce that correspond to LSOA regions
    whose code are contained in lsoa_codes."""
    
    codes_to_keep = sorted(pd.Series(lsoa_codes_to_keep[col_lsoa]).astype('str'))
    codes_all = sorted(pd.Series(df_to_reduce[col_to_reduce]).astype('str'))
    codes_to_remove = list(set(codes_all) - set(codes_to_keep))
    return df_to_reduce[~df_to_reduce[col_to_reduce].isin(codes_to_remove)]

In [10]:
years = [2010,2011,2012,2013,2014,2015,2016,2017,2018,2019]

In [11]:
for year in years:
    df_to_reduce = pd.read_csv('raw_data_to_edit/LSOA_ELEC_{}.csv'.format(year))
    print(df_to_reduce.shape)
    print(df_to_reduce.columns)

(41730, 10)
Index(['Local Authority Name', 'Local Authority Code',
       'Middle Layer Super Output Area (MSOA) Name',
       'Middle Layer Super Output Area (MSOA) Code',
       'Lower Layer Super Output Area (LSOA) Name',
       'Lower Layer Super Output Area (LSOA) Code',
       'Total number of domestic electricity meters',
       'Total domestic electricity consumption (kWh)',
       'Mean domestic electricity consumption \n(kWh per meter)',
       'Median domestic electricity consumption \n(kWh per meter)'],
      dtype='object')
(41730, 10)
Index(['Local Authority Name', 'Local Authority Code',
       'Middle Layer Super Output Area (MSOA) Name',
       'Middle Layer Super Output Area (MSOA) Code',
       'Lower Layer Super Output Area (LSOA) Name',
       'Lower Layer Super Output Area (LSOA) Code',
       'Total number of domestic electricity meters',
       'Total domestic electricity consumption (kWh)',
       'Mean domestic electricity consumption \n(kWh per meter)',
     

In [12]:
col_to_reduce = 'Lower Layer Super Output Area (LSOA) Code'
col_lsoa = 'LSOA11CD'
for year in years:
    df_to_reduce = pd.read_csv('raw_data_to_edit/LSOA_ELEC_{}.csv'.format(year))
    df_reduced = func_to_reduce_LSOA_codes(df_to_reduce, lsoa_codes_to_keep, col_to_reduce, col_lsoa)
    df_reduced.to_csv('data/lsoa_elec_{}_reduced.csv'.format(year), index=False)

In [13]:
for year in years:
    test_df = pd.read_csv('data/lsoa_elec_{}_reduced.csv'.format(year))
    print(test_df.shape)

(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)
(34753, 10)


## MSOA granularity

Output dataframe with MSOA codes as rows and years as energy usage. For each year, reduce to MSOA and append to a master dataframe.

In [21]:
# Choose column to keep
col_name = 'Mean domestic electricity consumption \n(kWh per meter)'

In [22]:
year=2010
df = pd.read_csv('raw_data_to_edit/LSOA_ELEC_{}.csv'.format(year))
df.rename(columns={'Middle Layer Super Output Area (MSOA) Code':'MSOA11CD',
                   col_name:'energy_cons_kWh_{}'.format(year)},inplace=True)

df_msoa = pd.DataFrame(index=df.MSOA11CD)
for year in years:
    df_temp = pd.read_csv('raw_data_to_edit/LSOA_ELEC_{}.csv'.format(year))
    df_temp.rename(columns={'Middle Layer Super Output Area (MSOA) Code':'MSOA11CD',
                   col_name:'energy_cons_kWh_{}'.format(year)},inplace=True)
    df_msoa_year = df_temp[['MSOA11CD','energy_cons_kWh_{}'.format(year)]].groupby('MSOA11CD').sum()
    df_msoa = df_msoa.join(df_msoa_year)

In [23]:
df_msoa.drop_duplicates(inplace=True)

In [57]:
#df_msoa.to_csv('data/msoa_electricity_consumption_timeseries.csv')