# GHG Emissions API

## Imports

In [1]:
import pandas as pd
import os

## Global Variables

In [2]:
GHG_DATA_PATH = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1087005/uk-local-authority-ghg-emissions-2020-dataset.csv'
LOCAL_AUTHORITY = 'Watford' # First English Local Authority with Motorways data since 2005
YEAR = 2018

COLAB = True

ROOT_DIR_PATH = os.path.abspath('..')

if COLAB:

  from google.colab import drive
  drive.mount('/content/drive')

  ROOT_DIR_PATH = os.path.abspath('drive/MyDrive/Spatial_Finance_Transport/minorRoads/')

GHG_PROCESSED_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/ghg_emissions/')

Mounted at /content/drive


## General Functions

## Data

In [3]:
df = pd.read_csv(GHG_DATA_PATH)

In [4]:
# Only take England
df = df.loc[df['Country'] == 'England']
df = df.sort_values(by='Local Authority')
df.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
272749,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2005,Agriculture,Agriculture Electricity,CH4,0.046825,0.0,59.821,43.6479
273522,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector 'Other',CO2,0.015106,0.015106,63.526,43.6479
273521,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector 'Other',CH4,8.4e-05,0.0,63.526,43.6479
273520,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector Gas,N2O,0.005411,0.0,63.526,43.6479
273519,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Public Sector,Public Sector Gas,CO2,5.373303,5.373303,63.526,43.6479


In [5]:
df.dtypes

Country                                                          object
Country Code                                                     object
Region                                                           object
Region Code                                                      object
Second Tier Authority                                            object
Local Authority                                                  object
Local Authority Code                                             object
Calendar Year                                                     int64
LA GHG Sector                                                    object
LA GHG Sub-sector                                                object
Greenhouse gas                                                   object
Territorial emissions (kt CO2e)                                 float64
CO2 emissions within the scope of influence of LAs (kt CO2e)    float64
Mid-year Population (thousands)                                 

In [6]:
# Filter by road transport
df = df.loc[df['LA GHG Sector'] == "Transport"]
df = df[df['LA GHG Sub-sector'].str.contains("Road")]
df.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
273532,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),N2O,0.357327,0.0,63.526,43.6479
273531,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),CO2,34.42187,34.42187,63.526,43.6479
273530,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),CH4,0.306636,0.0,63.526,43.6479
273529,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (A roads),N2O,0.531886,0.0,63.526,43.6479
273528,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (A roads),CO2,62.567559,62.567559,63.526,43.6479


## Analyse df

In [7]:
print("GHG's: {}".format(df['Greenhouse gas'].unique()))
print("Road transport types: {}".format(df['LA GHG Sub-sector'].unique()))

GHG's: ['N2O' 'CO2' 'CH4']
Road transport types: ['Road Transport (Minor roads)' 'Road Transport (A roads)'
 'Road Transport (Motorways)']


## Minor Roads

In [8]:
df_aroad = df.loc[df['LA GHG Sub-sector'] == "Road Transport (Minor roads)"]
df_aroad.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
273532,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),N2O,0.357327,0.0,63.526,43.6479
273531,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),CO2,34.42187,34.42187,63.526,43.6479
273530,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2015,Transport,Road Transport (Minor roads),CH4,0.306636,0.0,63.526,43.6479
273603,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2016,Transport,Road Transport (Minor roads),N2O,0.36612,0.0,63.621,43.6479
273602,England,E92000001,South East,E12000008,West Sussex,Adur,E07000223,2016,Transport,Road Transport (Minor roads),CO2,35.056795,35.056795,63.621,43.6479


### Total GHG Emissions by Local Authority and Year

In [9]:
df_aroad = pd.DataFrame(df_aroad.groupby(by=['Local Authority', 'Calendar Year'])['Territorial emissions (kt CO2e)'].sum())
df_aroad = df_aroad.reset_index(level=['Local Authority', 'Calendar Year'])
df_aroad = df_aroad.rename(columns={'Territorial emissions (kt CO2e)': 'Annual Territorial emissions (kt CO2e)'})
df_aroad.head()

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
0,Adur,2005,39.330867
1,Adur,2006,38.65573
2,Adur,2007,39.004837
3,Adur,2008,36.63743
4,Adur,2009,35.708492


### Filter by Local Authority and Year

In [10]:
# Use Global Variables defined
df_aroad_filter = df_aroad.loc[df_aroad['Local Authority'] == LOCAL_AUTHORITY]
df_aroad_filter = df_aroad_filter.loc[df_aroad_filter['Calendar Year'] == YEAR]
df_aroad_filter

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
4541,Watford,2018,49.245422


## All Roads

### Total GHG Emissions by Local Authority and Year

In [11]:
df_road = pd.DataFrame(df.groupby(by=['Local Authority', 'Calendar Year'])['Territorial emissions (kt CO2e)'].sum())
df_road = df_road.reset_index(level=['Local Authority', 'Calendar Year'])
df_road = df_road.rename(columns={'Territorial emissions (kt CO2e)': 'Annual Territorial emissions (kt CO2e)'})
df_road.head()

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
0,Adur,2005,125.042511
1,Adur,2006,118.014936
2,Adur,2007,115.096206
3,Adur,2008,107.019836
4,Adur,2009,104.521629


### Filter by Local Authority and Year

In [12]:
# Use Global Variables defined
df_road_filter = df_road.loc[df_road['Local Authority'] == LOCAL_AUTHORITY]
df_road_filter = df_road_filter.loc[df_road_filter['Calendar Year'] == YEAR]
df_road_filter

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
4541,Watford,2018,111.705683


In [13]:
# Compare to individual GHG emissions
df_filter = df.loc[df['Local Authority'] == LOCAL_AUTHORITY]
df_filter = df_filter.loc[df_filter['Calendar Year'] == YEAR]
df_filter.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
77828,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (A roads),CH4,0.498072,0.0,96.767,21.4305
77829,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (A roads),CO2,52.113468,52.113468,96.767,21.4305
77830,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (A roads),N2O,0.529611,0.0,96.767,21.4305
77831,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (Minor roads),CH4,0.496507,0.0,96.767,21.4305
77832,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2018,Transport,Road Transport (Minor roads),CO2,48.23174,48.23174,96.767,21.4305


In [14]:
df_filter['Territorial emissions (kt CO2e)'].sum() # Should be same as calculated value

111.705682557

## Saving Data

## Constraints on Local Authorities for selection


Minimise LA km$^2$ & Simplifies LA selection and increases likelihood of matching other constraints 


Avoid big cities and towns & Complicates data pre-processing and evaluation 


Only one motorway through LA & The pipeline is only considering motorway GHG Emissions 


Sufficient traffic count data from traffic site on motorway (above a threshold) & ANN AADT training 


Bidirectional traffic count site data & The pipeline considers both road directions separately 


Traffic count site not by a junction & Number of vehicles between 0-521-660cm 


Live Speed data from traffic count site available & Evaluation of live speed estimation method 

In [15]:
# Save Motorways and all roads seperately
df_smallest_areas = df.sort_values(by='Area (km2)')
df_smallest_areas.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
107492,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (Minor roads),CO2,24.677103,24.677103,7.654,3.1496
107490,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (A roads),N2O,0.45724,0.0,7.654,3.1496
107491,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (Minor roads),CH4,0.23446,0.0,7.654,3.1496
107048,England,E92000001,London,E12000007,City of London,City of London,E09000001,2011,Transport,Road Transport (Minor roads),CO2,23.330182,23.330182,7.412,3.1496
107493,England,E92000001,London,E12000007,City of London,City of London,E09000001,2017,Transport,Road Transport (Minor roads),N2O,0.28256,0.0,7.654,3.1496


In [16]:
city_regions = ['City of London', 'Kensington and Chelsea', 'Islington', 'Hammersmith and Fulham', 
                'Hackney', 'Tower Hamlets', 'Camden', 'Westminster', 'Lambeth', 'Haringey', 'Southwark', 'Wandsworth',
                'Lewisham', 'Merton', 'Barking and Dagenham', 'Newham', 'Waltham Forest', 'Brent', 'Greenwich',
                'Harrow', 'Ealing', 'Redbridge']

no_motorways = ['Oadby and Wigston', 'Gosport', 'Isles of Scilly', 'Hastings', 'Tamworth', 'Worthing', 'Epsom and Ewell', 'Lincoln', 'Kingston upon Thames',
                'Ipswich', 'Norwich', 'Gloucester', 'Adur', 'Sutton', 'Oxford', 'Cheltenham', 'Broxbourne', 'Redditch', 'Middlesbrough',
                'Richmond upon Thames']

only_junctions = ['Reading', 'Blackpool']

two_motorways = ['Spelthorne']

df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(city_regions)]
df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(no_motorways)]
df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(only_junctions)]
df_smallest_areas = df_smallest_areas[~df_smallest_areas['Local Authority'].isin(two_motorways)]
df_smallest_areas.head()

Unnamed: 0,Country,Country Code,Region,Region Code,Second Tier Authority,Local Authority,Local Authority Code,Calendar Year,LA GHG Sector,LA GHG Sub-sector,Greenhouse gas,Territorial emissions (kt CO2e),CO2 emissions within the scope of influence of LAs (kt CO2e),Mid-year Population (thousands),Area (km2)
77602,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2015,Transport,Road Transport (Motorways),N2O,0.062223,0.0,96.348,21.4305
77599,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2015,Transport,Road Transport (Minor roads),N2O,0.492324,0.0,96.348,21.4305
77675,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2016,Transport,Road Transport (Minor roads),CO2,48.003237,48.003237,96.577,21.4305
77674,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2016,Transport,Road Transport (Minor roads),CH4,0.428091,0.0,96.577,21.4305
77673,England,E92000001,East of England,E12000006,Hertfordshire,Watford,E07000103,2016,Transport,Road Transport (A roads),N2O,0.59517,0.0,96.577,21.4305


In [17]:
df_smallest_areas['Local Authority'].unique()[90:120]

array(['Pendle', 'Bournemouth, Christchurch and Poole', 'Solihull',
       'Wokingham', 'Warrington', 'Great Yarmouth', 'Fylde', 'Thurrock',
       'Wigan', 'Wyre Forest', 'Darlington', 'Windsor and Maidenhead',
       'North East Lincolnshire', 'Sefton', 'Chorley', 'Stockton-on-Tees',
       'Newcastle-under-Lyme', 'Dacorum', 'Hart', 'Bromsgrove', 'Arun',
       'Swindon', 'Bristol, City of', 'Tonbridge and Malling',
       'Tandridge', 'Wirral', 'Redcar and Cleveland', 'Mole Valley',
       'Rochford', 'Amber Valley'], dtype=object)

### Potential Sites

In [18]:
potential_sites = ['Watford', 'Harlow', 'Slough', 'Worcester', 'Stevenage', 'Cambridge', 'Rushmoor', 'Luton', 'Crawley',
                    'Exeter', 'Southampton', 'Hounslow', 'Portsmouth', 'South Tyneside', 'Enfield', 'Barnet', 'Halton', 'Dudley', 'Coventry',
                    'Trafford', 'Bracknell Forest', 'Havering', 'Liverpool', 'Blackburn with Darwen', 'Sunderland',
                    'Bolton']

In [19]:
df_potential_sites = df_aroad.loc[df_aroad['Local Authority'].isin(potential_sites)]

assert(len(potential_sites) == len(df_potential_sites['Local Authority'].unique()))

df_potential_sites

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
128,Barnet,2005,113.556626
129,Barnet,2006,107.078941
130,Barnet,2007,107.428381
131,Barnet,2008,102.377640
132,Barnet,2009,99.827042
...,...,...,...
4859,Worcester,2016,62.312940
4860,Worcester,2017,61.993240
4861,Worcester,2018,62.813507
4862,Worcester,2019,61.146978


### Saving data

In [20]:
df_potential_sites.to_csv(GHG_PROCESSED_PATH + 'GHG_potential_sites.csv')

### Check specific potential site

In [21]:
df_site = df_potential_sites.loc[df_potential_sites['Local Authority'] == 'Blackburn with Darwen']

df_site

Unnamed: 0,Local Authority,Calendar Year,Annual Territorial emissions (kt CO2e)
304,Blackburn with Darwen,2005,77.672108
305,Blackburn with Darwen,2006,72.453225
306,Blackburn with Darwen,2007,73.652467
307,Blackburn with Darwen,2008,70.704516
308,Blackburn with Darwen,2009,68.406503
309,Blackburn with Darwen,2010,68.212
310,Blackburn with Darwen,2011,68.03747
311,Blackburn with Darwen,2012,67.715805
312,Blackburn with Darwen,2013,68.342316
313,Blackburn with Darwen,2014,70.846383
