# GHG Emissions API

## Imports

In [24]:
import pandas as pd
import os

## Global Variables

In [25]:
GHG_DATA_PATH = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1087005/uk-local-authority-ghg-emissions-2020-dataset.csv'
LOCAL_AUTHORITY = 'Watford' # First English Local Authority with Motorways data since 2005
YEAR = 2018

ROOT_DIR_PATH = os.path.abspath('..')

GHG_PROCESSED_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/')

## General Functions

## Data

In [26]:
df = pd.read_csv(GHG_DATA_PATH)

In [27]:
# Only take England
df = df.loc[df['Country'] == 'England']
df = df.sort_values(by='Local Authority')
df.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
272749,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2005,Agriculture,Agriculture Electricity,CH4,0.046825,0.0,59.821,43.6479
273522,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector 'Other',CO2,0.015106,0.015106,63.526,43.6479
273521,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector 'Other',CH4,8.4e-05,0.0,63.526,43.6479
273520,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector Gas,N2O,0.005411,0.0,63.526,43.6479
273519,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector Gas,CO2,5.373303,5.373303,63.526,43.6479


In [28]:
df.dtypes

Country                                                          object
Country Code                                                     object
Region                                                           object
Region Code                                                      object
Second Tier Authority                                            object
Local Authority                                                  object
Local Authority Code                                             object
Calendar Year                                                     int64
LA GHG Sector                                                    object
LA GHG Sub-sector                                                object
Greenhouse gas                                                   object
Territorial emissions (kt CO2e)                                 float64
CO2 emissions within the scope of influence of LAs (kt CO2e)    float64
Mid-year Population (thousands)                                 

In [29]:
# Filter by road transport
df = df.loc[df['LA GHG Sector'] == "Transport"]
df = df[df['LA GHG Sub-sector'].str.contains("Road")]
df.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
273532,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),N2O,0.357327,0.0,63.526,43.6479
273531,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),CO2,34.42187,34.42187,63.526,43.6479
273530,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),CH4,0.306636,0.0,63.526,43.6479
273529,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (A roads),N2O,0.531886,0.0,63.526,43.6479
273528,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (A roads),CO2,62.567559,62.567559,63.526,43.6479


## Analyse df

In [30]:
print("GHG's: {}".format(df['Greenhouse gas'].unique()))
print("Road transport types: {}".format(df['LA GHG Sub-sector'].unique()))

GHG's: ['N2O' 'CO2' 'CH4']
Road transport types: ['Road Transport (Minor roads)' 'Road Transport (A roads)'
 'Road Transport (Motorways)']


## Motorways

In [31]:
df_motorway = df.loc[df['LA GHG Sub-sector'] == "Road Transport (Motorways)"]
df_motorway.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
33803,England,E92000001,East Midlands,E12000004,Nottinghamshire,Ashfield,E07000170,2008,Transport,Road Transport (Motorways),N2O,0.371795,0.0,116.971,109.5581
33801,England,E92000001,East Midlands,E12000004,Nottinghamshire,Ashfield,E07000170,2008,Transport,Road Transport (Motorways),CH4,0.243297,0.0,116.971,109.5581
33802,England,E92000001,East Midlands,E12000004,Nottinghamshire,Ashfield,E07000170,2008,Transport,Road Transport (Motorways),CO2,66.180332,0.0,116.971,109.5581
33724,England,E92000001,East Midlands,E12000004,Nottinghamshire,Ashfield,E07000170,2007,Transport,Road Transport (Motorways),N2O,0.44401,0.0,116.301,109.5581
33722,England,E92000001,East Midlands,E12000004,Nottinghamshire,Ashfield,E07000170,2007,Transport,Road Transport (Motorways),CH4,0.316472,0.0,116.301,109.5581


### Total GHG Emissions by Local Authority and Year

In [32]:
df_motorway = pd.DataFrame(df_motorway.groupby(by=['Local Authority', 'Calendar Year'])['Territorial emissions (kt CO2e)'].sum())
df_motorway = df_motorway.reset_index(level=['Local Authority', 'Calendar Year'])
df_motorway = df_motorway.rename(columns={'Territorial emissions (kt CO2e)': 'Annual Territorial emissions (kt CO2e)'})
df_motorway.head()

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
0,Ashfield,2005,74.488642
1,Ashfield,2006,78.378314
2,Ashfield,2007,77.875712
3,Ashfield,2008,66.795423
4,Ashfield,2009,65.034277


### Filter by Local Authority and Year

In [33]:
# Use Global Variables defined
df_motorway_filter = df_motorway.loc[df_motorway['Local Authority'] == LOCAL_AUTHORITY]
df_motorway_filter = df_motorway_filter.loc[df_motorway_filter['Calendar Year'] == YEAR]
df_motorway_filter

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
2597,Watford,2018,9.31911


## All Roads

### Total GHG Emissions by Local Authority and Year

In [34]:
df_road = pd.DataFrame(df.groupby(by=['Local Authority', 'Calendar Year'])['Territorial emissions (kt CO2e)'].sum())
df_road = df_road.reset_index(level=['Local Authority', 'Calendar Year'])
df_road = df_road.rename(columns={'Territorial emissions (kt CO2e)': 'Annual Territorial emissions (kt CO2e)'})
df_road.head()

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
0,Adur,2005,125.042511
1,Adur,2006,118.014936
2,Adur,2007,115.096206
3,Adur,2008,107.019836
4,Adur,2009,104.521629


### Filter by Local Authority and Year

In [35]:
# Use Global Variables defined
df_road_filter = df_road.loc[df_road['Local Authority'] == LOCAL_AUTHORITY]
df_road_filter = df_road_filter.loc[df_road_filter['Calendar Year'] == YEAR]
df_road_filter

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
4541,Watford,2018,111.705683


In [36]:
# Compare to individual GHG emissions
df_filter = df.loc[df['Local Authority'] == LOCAL_AUTHORITY]
df_filter = df_filter.loc[df_filter['Calendar Year'] == YEAR]
df_filter.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
77828,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (A roads),CH4,0.498072,0.0,96.767,21.4305
77829,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (A roads),CO2,52.113468,52.113468,96.767,21.4305
77830,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (A roads),N2O,0.529611,0.0,96.767,21.4305
77831,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (Minor roads),CH4,0.496507,0.0,96.767,21.4305
77832,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (Minor roads),CO2,48.23174,48.23174,96.767,21.4305


In [37]:
df_filter['Territorial emissions (kt CO2e)'].sum() # Should be same as calculated value

111.705682557

## Saving Data

## Constraints on Local Authorities for selection


Minimise LA km$^2$ & Simplifies LA selection and increases likelihood of matching other constraints 


Avoid big cities and towns & Complicates data pre-processing and evaluation 


Only one motorway through LA & The pipeline is only considering motorway GHG Emissions 


Sufficient traffic count data from traffic site on motorway (above a threshold) & ANN AADT training 


Bidirectional traffic count site data & The pipeline considers both road directions separately 


Traffic count site not by a junction & Number of vehicles between 0-521-660cm 


Live Speed data from traffic count site available & Evaluation of live speed estimation method 

In [38]:
# Save Motorways and all roads seperately
df_smallest_areas = df.sort_values(by='Area (km2)')
df_smallest_areas.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
107492,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (Minor roads),CO2,24.677103,24.677103,7.654,3.1496
107490,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (A roads),N2O,0.45724,0.0,7.654,3.1496
107491,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (Minor roads),CH4,0.23446,0.0,7.654,3.1496
107048,England,E92000001,London,E12000007,City of London,City of London,E09000001,2011,Transport,Road Transport (Minor roads),CO2,23.330182,23.330182,7.412,3.1496
107493,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (Minor roads),N2O,0.28256,0.0,7.654,3.1496


In [39]:
city_regions = ['City of London', 'Kensington and Chelsea', 'Islington', 'Hammersmith and Fulham', 
                'Hackney', 'Tower Hamlets', 'Camden', 'Westminster', 'Lambeth', 'Haringey', 'Southwark', 'Wandsworth',
                'Lewisham', 'Merton', 'Barking and Dagenham', 'Newham', 'Waltham Forest', 'Brent', 'Greenwich',
                'Harrow', 'Ealing', 'Redbridge']

no_motorways = ['Oadby and Wigston', 'Gosport', 'Isles of Scilly', 'Hastings', 'Tamworth', 'Worthing', 'Epsom and Ewell', 'Lincoln', 'Kingston upon Thames',
                'Ipswich', 'Norwich', 'Gloucester', 'Adur', 'Sutton', 'Oxford', 'Cheltenham', 'Broxbourne', 'Redditch', 'Middlesbrough',
                'Richmond upon Thames']

only_junctions = ['Reading', 'Blackpool']

two_motorways = ['Spelthorne']

df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(city_regions)]
df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(no_motorways)]
df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(only_junctions)]
df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(two_motorways)]
df_smallest_areas.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
77602,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2015,Transport,Road Transport (Motorways),N2O,0.062223,0.0,96.348,21.4305
77599,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2015,Transport,Road Transport (Minor roads),N2O,0.492324,0.0,96.348,21.4305
77675,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2016,Transport,Road Transport (Minor roads),CO2,48.003237,48.003237,96.577,21.4305
77674,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2016,Transport,Road Transport (Minor roads),CH4,0.428091,0.0,96.577,21.4305
77673,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2016,Transport,Road Transport (A roads),N2O,0.59517,0.0,96.577,21.4305


In [40]:
df_smallest_areas['Local Authority'].unique()[29:59]

array(['Cannock Chase', 'Havant', 'Nuneaton and Bedworth', 'Broxtowe',
       'Kingston upon Hull, City of', 'Enfield', 'Plymouth',
       'North Tyneside', 'Eastleigh', 'Brighton and Hove', 'Sandwell',
       'Croydon', 'Knowsley', 'Barnet', 'Three Rivers', 'Halton',
       'Stoke-on-Trent', 'Surrey Heath', 'Elmbridge', 'Salford', 'Dudley',
       'Hartlepool', 'Coventry', 'Bury', 'Hertsmere', 'Tameside',
       'Walsall', 'Gravesham', 'Trafford', 'Bracknell Forest'],
      dtype=object)

### Potential Sites

In [41]:
potential_sites = ['Watford', 'Harlow', 'Slough', 'Worcester', 'Stevenage', 'Cambridge', 'Rushmoor', 'Luton', 'Crawley',
                    'Exeter', 'Southampton', 'Hounslow', 'Portsmouth', 'South Tyneside', 'Enfield', 'Barnet', 'Halton']

In [42]:
df_potential_sites = df_motorway.loc[df_motorway['Local Authority'].isin(potential_sites)]

assert(len(potential_sites) == len(df_potential_sites['Local Authority'].unique()))

df_potential_sites

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
32,Barnet,2005,53.988609
33,Barnet,2006,56.822203
34,Barnet,2007,53.186986
35,Barnet,2008,59.597040
36,Barnet,2009,57.067457
...,...,...,...
2787,Worcester,2016,1.644139
2788,Worcester,2017,1.541270
2789,Worcester,2018,1.487991
2790,Worcester,2019,1.476269


### Saving data

In [43]:
df_potential_sites.to_csv(GHG_PROCESSED_PATH + 'GHG_potential_sites.csv')

### Check specific potential site

In [44]:
df_site = df_potential_sites.loc[df_potential_sites['Local Authority'] == 'Watford']

df_site

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
2584,Watford,2005,9.732538
2585,Watford,2006,9.743873
2586,Watford,2007,9.35409
2587,Watford,2008,8.401761
2588,Watford,2009,8.226398
2589,Watford,2010,8.783804
2590,Watford,2011,8.678589
2591,Watford,2012,8.983163
2592,Watford,2013,8.993412
2593,Watford,2014,9.431423
