Start by importing necessary packages:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

We will work with a publicly available dataset on Particulate Matter (PM10) concentrations for different urban background and roadside sites in the UK. I have turned an .ods file into several separate .csv files.

In [2]:
pm10_road_df = pd.read_csv('pm10_roadside.csv', header=2)
pm10_road_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
0,1997,All sites,37.0,8.0
1,1998,All sites,33.0,4.0
2,1999,All sites,32.0,6.0
3,2000,All sites,31.0,8.0
4,2001,All sites,31.0,6.0


In [3]:
pm10_road_df['Site'].unique()

array(['All sites', 'Armagh Roadside', 'Barnstaple A39',
       "Belfast Stockman's Lane", 'Birmingham A4540 Roadside',
       'Birmingham Tyburn Roadside', 'Brighton Roadside PM10',
       'Bristol Temple Way', 'Bury Roadside', 'Bury Whitefield Roadside',
       'Camden Kerbside', 'Carlisle Roadside', 'Chatham Roadside',
       'Chepstow A48', 'Chesterfield Roadside', 'Coventry Binley Road',
       'Dumfries', 'Ealing Horn Lane', 'Glasgow High Street',
       'Glasgow Kerbside', 'Greenock A8 Roadside', 'Haringey Roadside',
       'Hull Holderness Road', 'Inverness', 'Leamington Spa Rugby Road',
       'Leeds Headingley Kerbside', 'Leicester A594 Roadside',
       'London A3 Roadside', 'London Marylebone Road',
       'Newcastle Cradlewell Roadside', 'Nottingham Western Boulevard',
       'Portsmouth Anglesea Road', 'Reading London Road',
       'Saltash Callington Road', 'Saltash Roadside', 'Sandy Roadside',
       'Southampton A33', 'Southwark A2 Old Kent Road',
       'St Helens Lin

In [4]:
# Filter to get rid of the 'all sites' data
pm10_road_df = pm10_road_df[pm10_road_df['Site'] != 'All sites']
pm10_road_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
22,2009,Armagh Roadside,23.0,
23,2010,Armagh Roadside,29.0,
24,2011,Armagh Roadside,19.0,
25,2012,Armagh Roadside,16.0,
26,2013,Armagh Roadside,19.0,


In [5]:
# Convert year to datetime
#pm10_road_df['Year'] = pd.to_datetime(pm10_road_df['Year'], format='%Y')
# Row above doesn't work. Need to get rid of last 4 rows of dataframe
pm10_road_df.tail()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
331,2018,York Fishergate,18.0,
332,,,,
333,1. Annual means from individual monitoring sit...,,,
334,2. The annual mean for 'All Sites' in a given ...,,,
335,3. The bounds of the 95% confidence interval c...,,,


In [6]:
pm10_road_df.drop(pm10_road_df.tail(4).index,inplace=True)
pm10_road_df.tail()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
327,2014,York Fishergate,18.0,
328,2015,York Fishergate,18.0,
329,2016,York Fishergate,16.0,
330,2017,York Fishergate,16.0,
331,2018,York Fishergate,18.0,


In [7]:
# Convert Year to datetime
pm10_road_df['Year'] = pd.to_datetime(pm10_road_df['Year'], format='%Y')
pm10_road_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
22,2009-01-01,Armagh Roadside,23.0,
23,2010-01-01,Armagh Roadside,29.0,
24,2011-01-01,Armagh Roadside,19.0,
25,2012-01-01,Armagh Roadside,16.0,
26,2013-01-01,Armagh Roadside,19.0,


In [8]:
# Load the next csv
pm10_urban_df = pd.read_csv('pm10_urban_background.csv', header=2)
pm10_urban_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
0,2000,Aberdeen,19.0,
1,2001,Aberdeen,15.0,
2,2003,Aberdeen,22.0,
3,2004,Aberdeen,19.0,
4,2005,Aberdeen,19.0,


In [9]:
# Find out the unique site names
pm10_urban_df['Site'].unique()

array(['Aberdeen', 'All sites', 'Belfast Centre', 'Birmingham Centre',
       'Birmingham East', 'Birmingham Tyburn', 'Blackpool',
       'Blackpool Marton', 'Bolton', 'Bournemouth', 'Bradford Centre',
       'Bristol Centre', "Bristol St Paul's", 'Canterbury',
       'Cardiff Centre', 'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'London Westminster', 'Manchester Piccadilly', 'Newcastle Centre',
       'Newport', 'Northampton', 'Northampton PM10', 'Norwich Centre',
       'Norwich Lakenfields', 'Nottingham Centre', 'Oxford St Ebbes',
       'Plymouth Centre', 'Portsmouth', 'Preston', 'Readin

In [10]:
# Get rid of 'all sites'
pm10_urban_df = pm10_urban_df[pm10_urban_df['Site'] != 'All sites']

In [11]:
# Convert year to datetime
#pm10_urban_df['Year'] = pd.to_datetime(pm10_urban_df['Year'], format='%Y')
pm10_urban_df.tail()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
743,2018,York Bootham,14.0,
744,,,,
745,1. Annual means from individual monitoring sit...,,,
746,2. The annual mean for 'All Sites' in a given ...,,,
747,3. The bounds of the 95% confidence interval c...,,,


In [12]:
# Get rid of last 4 rows
pm10_urban_df.drop(pm10_urban_df.tail(4).index,inplace=True)
pm10_urban_df.tail()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
739,2014,York Bootham,15.0,
740,2015,York Bootham,15.0,
741,2016,York Bootham,15.0,
742,2017,York Bootham,13.0,
743,2018,York Bootham,14.0,


In [13]:
# Convert year to datetime
pm10_urban_df['Year'] = pd.to_datetime(pm10_urban_df['Year'], format='%Y')
pm10_urban_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-)
0,2000-01-01,Aberdeen,19.0,
1,2001-01-01,Aberdeen,15.0,
2,2003-01-01,Aberdeen,22.0,
3,2004-01-01,Aberdeen,19.0,
4,2005-01-01,Aberdeen,19.0,


In [14]:
# Load next csv
pm10_road_hours_df = pd.read_csv('pm10_hours_roadside.csv', header=2)
pm10_road_hours_df.head()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
0,1997,All sites (mean),578,304,174.0,127.0,69.0,69.0,50.0,1370
1,1998,All sites (mean),459,255,124.0,58.0,33.0,13.0,30.0,972
2,1999,All sites (mean),431,224,106.0,30.0,14.0,8.0,35.0,849
3,2000,All sites (mean),460,258,85.0,33.0,22.0,7.0,9.0,873
4,2001,All sites (mean),403,182,92.0,44.0,28.0,22.0,47.0,819


In [15]:
pm10_road_hours_df = pm10_road_hours_df[pm10_road_hours_df['Site'] != 'All sites (mean)']
pm10_road_hours_df.head()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
22,2009,Armagh Roadside,135,126,28.0,1.0,0.0,0.0,0.0,290
23,2010,Armagh Roadside,237,158,135.0,104.0,78.0,40.0,104.0,856
24,2011,Armagh Roadside,208,38,42.0,47.0,24.0,8.0,16.0,383
25,2012,Armagh Roadside,32,28,12.0,3.0,0.0,0.0,0.0,75
26,2013,Armagh Roadside,78,109,34.0,13.0,0.0,0.0,0.0,234


In [16]:
# Convert year to datetime. First check tail:
pm10_road_hours_df.tail()
#pm10_road_hours_df['Year'] = pd.to_datetime(pm10_road_hours_df['Year'], format='%Y')

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
289,2018,York Fishergate,20.0,28.0,20.0,0.0,0.0,0.0,0.0,68.0
290,,,,,,,,,,
291,1. Annual hours from individual monitoring sit...,,,,,,,,,
292,2. The annual mean for 'All Sites' in a given ...,,,,,,,,,
293,3. The categories relate to those defined by t...,,,,,,,,,


In [17]:
# Get rid of last 4 rows
pm10_road_hours_df.drop(pm10_road_hours_df.tail(4).index,inplace=True)
pm10_road_hours_df.tail()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
285,2014,York Fishergate,44,66,68.0,15.0,0.0,0.0,0.0,193
286,2015,York Fishergate,113,56,13.0,0.0,0.0,0.0,0.0,182
287,2016,York Fishergate,15,12,16.0,16.0,0.0,0.0,0.0,59
288,2017,York Fishergate,83,41,13.0,0.0,0.0,0.0,0.0,137
289,2018,York Fishergate,20,28,20.0,0.0,0.0,0.0,0.0,68


In [18]:
# Convert year to datetime
pm10_road_hours_df['Year'] = pd.to_datetime(pm10_road_hours_df['Year'], format='%Y')
pm10_road_hours_df.head()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
22,2009-01-01,Armagh Roadside,135,126,28.0,1.0,0.0,0.0,0.0,290
23,2010-01-01,Armagh Roadside,237,158,135.0,104.0,78.0,40.0,104.0,856
24,2011-01-01,Armagh Roadside,208,38,42.0,47.0,24.0,8.0,16.0,383
25,2012-01-01,Armagh Roadside,32,28,12.0,3.0,0.0,0.0,0.0,75
26,2013-01-01,Armagh Roadside,78,109,34.0,13.0,0.0,0.0,0.0,234


In [19]:
# Load the last csv
pm10_urban_hours_df = pd.read_csv('pm10_hours_urban_background.csv', header=2)
pm10_urban_hours_df.head()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
0,2000,Aberdeen,19.0,13.0,15.0,0.0,0.0,0.0,0.0,47
1,2001,Aberdeen,25.0,9.0,0.0,0.0,0.0,0.0,0.0,34
2,2003,Aberdeen,162.0,80.0,70.0,34.0,11.0,13.0,0.0,370
3,2004,Aberdeen,105.0,68.0,9.0,12.0,16.0,0.0,0.0,210
4,2005,Aberdeen,48.0,38.0,0.0,0.0,0.0,0.0,0.0,86


In [20]:
# Check tail
pm10_urban_hours_df.tail()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
724,2018,York Bootham,17.0,32.0,5.0,0.0,0.0,0.0,0.0,54.0
725,,,,,,,,,,
726,1. Annual hours from individual monitoring sit...,,,,,,,,,
727,2. The annual mean for 'All Sites' in a given ...,,,,,,,,,
728,3. The categories relate to those defined by t...,,,,,,,,,


In [21]:
# Get rid of last 4 rows
pm10_urban_hours_df.drop(pm10_urban_hours_df.tail(4).index,inplace=True)
pm10_urban_hours_df.tail()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
720,2014,York Bootham,63.0,5.0,5.0,11.0,0.0,0.0,0.0,84
721,2015,York Bootham,87.0,24.0,11.0,0.0,0.0,0.0,0.0,122
722,2016,York Bootham,10.0,11.0,14.0,13.0,0.0,0.0,0.0,48
723,2017,York Bootham,99.0,29.0,0.0,0.0,0.0,0.0,0.0,128
724,2018,York Bootham,17.0,32.0,5.0,0.0,0.0,0.0,0.0,54


In [22]:
# Convert years to datetime
pm10_urban_hours_df['Year'] = pd.to_datetime(pm10_urban_hours_df['Year'], format='%Y')
pm10_urban_hours_df.head()

Unnamed: 0,Year,Site,4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
0,2000-01-01,Aberdeen,19.0,13.0,15.0,0.0,0.0,0.0,0.0,47
1,2001-01-01,Aberdeen,25.0,9.0,0.0,0.0,0.0,0.0,0.0,34
2,2003-01-01,Aberdeen,162.0,80.0,70.0,34.0,11.0,13.0,0.0,370
3,2004-01-01,Aberdeen,105.0,68.0,9.0,12.0,16.0,0.0,0.0,210
4,2005-01-01,Aberdeen,48.0,38.0,0.0,0.0,0.0,0.0,0.0,86


In [23]:
# Check unique site names
pm10_urban_hours_df['Site'].unique()

array(['Aberdeen', 'All sites (mean)', 'Belfast Centre',
       'Birmingham Centre', 'Birmingham East', 'Birmingham Tyburn',
       'Blackpool', 'Blackpool Marton', 'Bolton', 'Bradford Centre',
       'Bristol Centre', "Bristol St Paul's", 'Canterbury',
       'Cardiff Centre', 'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'Manchester Piccadilly', 'Newcastle Centre', 'Newport',
       'Northampton', 'Norwich Centre', 'Norwich Lakenfields',
       'Nottingham Centre', 'Oxford St Ebbes', 'Plymouth Centre',
       'Portsmouth', 'Preston', 'Reading', 'Reading New Town',
       'Salford Eccles', '

In [24]:
# Get rid of "all sites"
pm10_urban_hours_df = pm10_urban_hours_df[pm10_urban_hours_df['Site'] != 'All sites (mean)']
pm10_urban_hours_df['Site'].unique()

array(['Aberdeen', 'Belfast Centre', 'Birmingham Centre',
       'Birmingham East', 'Birmingham Tyburn', 'Blackpool',
       'Blackpool Marton', 'Bolton', 'Bradford Centre', 'Bristol Centre',
       "Bristol St Paul's", 'Canterbury', 'Cardiff Centre',
       'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'Manchester Piccadilly', 'Newcastle Centre', 'Newport',
       'Northampton', 'Norwich Centre', 'Norwich Lakenfields',
       'Nottingham Centre', 'Oxford St Ebbes', 'Plymouth Centre',
       'Portsmouth', 'Preston', 'Reading', 'Reading New Town',
       'Salford Eccles', 'Sheffield Centre', '

Now we try merging these 4 datasets.

In [25]:
road_df = pd.merge(pm10_road_df, pm10_road_hours_df)
road_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
0,2009-01-01,Armagh Roadside,23.0,,135,126,28.0,1.0,0.0,0.0,0.0,290
1,2010-01-01,Armagh Roadside,29.0,,237,158,135.0,104.0,78.0,40.0,104.0,856
2,2011-01-01,Armagh Roadside,19.0,,208,38,42.0,47.0,24.0,8.0,16.0,383
3,2012-01-01,Armagh Roadside,16.0,,32,28,12.0,3.0,0.0,0.0,0.0,75
4,2013-01-01,Armagh Roadside,19.0,,78,109,34.0,13.0,0.0,0.0,0.0,234


In [26]:
urban_df = pd.merge(pm10_urban_df, pm10_urban_hours_df)
urban_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours
0,2000-01-01,Aberdeen,19.0,,19.0,13.0,15.0,0.0,0.0,0.0,0.0,47
1,2001-01-01,Aberdeen,15.0,,25.0,9.0,0.0,0.0,0.0,0.0,0.0,34
2,2003-01-01,Aberdeen,22.0,,162.0,80.0,70.0,34.0,11.0,13.0,0.0,370
3,2004-01-01,Aberdeen,19.0,,105.0,68.0,9.0,12.0,16.0,0.0,0.0,210
4,2005-01-01,Aberdeen,19.0,,48.0,38.0,0.0,0.0,0.0,0.0,0.0,86


In [27]:
urban_df['Site'].unique()

array(['Aberdeen', 'Belfast Centre', 'Birmingham Centre',
       'Birmingham East', 'Birmingham Tyburn', 'Blackpool',
       'Blackpool Marton', 'Bolton', 'Bradford Centre', 'Bristol Centre',
       "Bristol St Paul's", 'Canterbury', 'Cardiff Centre',
       'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'Manchester Piccadilly', 'Newcastle Centre', 'Newport',
       'Northampton', 'Norwich Centre', 'Norwich Lakenfields',
       'Nottingham Centre', 'Oxford St Ebbes', 'Plymouth Centre',
       'Portsmouth', 'Preston', 'Reading', 'Reading New Town',
       'Salford Eccles', 'Sheffield Centre', '

In [28]:
postcodes_df = pd.read_csv('outcodes.csv')
postcodes_df.head()

Unnamed: 0,id,postcode,latitude,longitude
0,2,AB10,57.13514,-2.11731
1,3,AB11,57.13875,-2.09089
2,4,AB12,57.101,-2.1106
3,5,AB13,57.10801,-2.23776
4,6,AB14,57.10076,-2.27073


In [29]:
len(postcodes_df['postcode'].unique())

3003

In [30]:
len(postcodes_df['postcode'].unique())

3003

In [31]:
postcodes_df['postcode'] = postcodes_df['postcode'].map(str.strip)

In [32]:
lat_long_dict = {
    'Aberdeen': 'AB11', 
    'Belfast Centre': 'BT1', 
    'Birmingham Centre': 'B1',
       'Birmingham East': 'B90', 
    'Birmingham Tyburn': 'B24',
    'Blackpool': 'FY1',
       'Blackpool Marton': 'FY3',
    'Bolton': 'BL1', 
    'Bradford Centre': 'BD1', 
    'Bristol Centre': 'BS1',
       "Bristol St Paul's": 'BS2', 
    'Canterbury': 'CT1', 
    'Cardiff Centre':'CF10',
       'Chesterfield': 'S40', 
    'Chesterfield Loundsley Green': 'S41',
       'Coventry Memorial Park': 'CV3', 
    'Cwmbran': 'NP44', 
    'Derry': 'BT47', 
    'Derry Rosemount': 'BT48',
       'Eastbourne': 'BN20',
    'Edinburgh Centre': 'EH1',
    'Edinburgh St Leonards': 'EH8',
       'Glasgow Centre': 'G1', 
    'Glasgow Townhead': 'G4',
    'Hull Centre': 'HU1',
       'Hull Freetown': 'HU2',
    'Leamington Spa': 'CV31',
    'Leeds Centre': 'LS1',
       'Leicester Centre': 'LE1',
    'Liverpool Centre': 'L1',
    'London Bloomsbury': 'WC1',
       'London Brent': 'HA0',
    'London Hillingdon': 'UB8',
    'London N. Kensington': 'W10',
       'Manchester Piccadilly': 'M60',
    'Newcastle Centre': 'NE1',
    'Newport': 'NP10',
       'Northampton': 'NN1',
    'Norwich Centre': 'NR1',
    'Norwich Lakenfields': 'NR1',
       'Nottingham Centre': 'NG1',
    'Oxford St Ebbes': 'OX1', 
    'Plymouth Centre': 'PL1',
       'Portsmouth': 'PO1',
    'Preston': 'PR0', 
    'Reading': 'RG1', 
    'Reading New Town': 'RG2',
       'Salford Eccles': 'M17', 
    'Sheffield Centre': 'S1',
    'Sheffield Devonshire Green': 'S3',
       'Southampton Centre': 'SO14',
    'Southend-on-Sea': 'SS0',
    'Stockport': 'SK1',
       'Stockport Shaw Heath': 'SK2',
    'Stoke-on-Trent Centre': 'ST1',
    'Swansea': 'SA1',
       'Thurrock': 'CM13',
    'Wigan Centre': 'WN1',
    'Wigan Leigh': 'WN7', 
    'Wirral Tranmere': 'CH41',
       'Wolverhampton Centre': 'WV1',
    'York Bootham': 'YO30'
}

In [33]:
urban_df['postcode'] = urban_df['Site'].map(lat_long_dict)
urban_df

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours,postcode
0,2000-01-01,Aberdeen,19.0,,19.0,13.0,15.0,0.0,0.0,0.0,0.0,47,AB11
1,2001-01-01,Aberdeen,15.0,,25.0,9.0,0.0,0.0,0.0,0.0,0.0,34,AB11
2,2003-01-01,Aberdeen,22.0,,162.0,80.0,70.0,34.0,11.0,13.0,0.0,370,AB11
3,2004-01-01,Aberdeen,19.0,,105.0,68.0,9.0,12.0,16.0,0.0,0.0,210,AB11
4,2005-01-01,Aberdeen,19.0,,48.0,38.0,0.0,0.0,0.0,0.0,0.0,86,AB11
5,2006-01-01,Aberdeen,20.0,,92.0,9.0,7.0,7.0,18.0,10.0,0.0,143,AB11
6,2007-01-01,Aberdeen,17.0,,45.0,47.0,0.0,0.0,0.0,0.0,0.0,92,AB11
7,2008-01-01,Aberdeen,16.0,,20.0,0.0,0.0,0.0,0.0,0.0,0.0,20,AB11
8,2009-01-01,Aberdeen,15.0,,7.0,9.0,11.0,0.0,0.0,0.0,0.0,27,AB11
9,2010-01-01,Aberdeen,13.0,,12.0,0.0,0.0,0.0,0.0,0.0,0.0,12,AB11


In [34]:
urban_df['Site'].unique()

array(['Aberdeen', 'Belfast Centre', 'Birmingham Centre',
       'Birmingham East', 'Birmingham Tyburn', 'Blackpool',
       'Blackpool Marton', 'Bolton', 'Bradford Centre', 'Bristol Centre',
       "Bristol St Paul's", 'Canterbury', 'Cardiff Centre',
       'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'Manchester Piccadilly', 'Newcastle Centre', 'Newport',
       'Northampton', 'Norwich Centre', 'Norwich Lakenfields',
       'Nottingham Centre', 'Oxford St Ebbes', 'Plymouth Centre',
       'Portsmouth', 'Preston', 'Reading', 'Reading New Town',
       'Salford Eccles', 'Sheffield Centre', '

In [35]:
urban_df2 = pd.merge(urban_df, postcodes_df, on='postcode')
urban_df2

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours,postcode,id,latitude,longitude
0,2000-01-01,Aberdeen,19.0,,19.0,13.0,15.0,0.0,0.0,0.0,0.0,47,AB11,3,57.13875,-2.09089
1,2001-01-01,Aberdeen,15.0,,25.0,9.0,0.0,0.0,0.0,0.0,0.0,34,AB11,3,57.13875,-2.09089
2,2003-01-01,Aberdeen,22.0,,162.0,80.0,70.0,34.0,11.0,13.0,0.0,370,AB11,3,57.13875,-2.09089
3,2004-01-01,Aberdeen,19.0,,105.0,68.0,9.0,12.0,16.0,0.0,0.0,210,AB11,3,57.13875,-2.09089
4,2005-01-01,Aberdeen,19.0,,48.0,38.0,0.0,0.0,0.0,0.0,0.0,86,AB11,3,57.13875,-2.09089
5,2006-01-01,Aberdeen,20.0,,92.0,9.0,7.0,7.0,18.0,10.0,0.0,143,AB11,3,57.13875,-2.09089
6,2007-01-01,Aberdeen,17.0,,45.0,47.0,0.0,0.0,0.0,0.0,0.0,92,AB11,3,57.13875,-2.09089
7,2008-01-01,Aberdeen,16.0,,20.0,0.0,0.0,0.0,0.0,0.0,0.0,20,AB11,3,57.13875,-2.09089
8,2009-01-01,Aberdeen,15.0,,7.0,9.0,11.0,0.0,0.0,0.0,0.0,27,AB11,3,57.13875,-2.09089
9,2010-01-01,Aberdeen,13.0,,12.0,0.0,0.0,0.0,0.0,0.0,0.0,12,AB11,3,57.13875,-2.09089


In [36]:
urban_df2['Site'].unique()

array(['Aberdeen', 'Belfast Centre', 'Birmingham Centre',
       'Birmingham East', 'Birmingham Tyburn', 'Blackpool',
       'Blackpool Marton', 'Bolton', 'Bradford Centre', 'Bristol Centre',
       "Bristol St Paul's", 'Canterbury', 'Cardiff Centre',
       'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'Manchester Piccadilly', 'Newcastle Centre', 'Newport',
       'Northampton', 'Norwich Centre', 'Norwich Lakenfields',
       'Nottingham Centre', 'Oxford St Ebbes', 'Plymouth Centre',
       'Portsmouth', 'Preston', 'Reading', 'Reading New Town',
       'Salford Eccles', 'Sheffield Centre', '

In [67]:
road_df['Site'].unique()

array(['Armagh Roadside', 'Barnstaple A39', "Belfast Stockman's Lane",
       'Birmingham A4540 Roadside', 'Birmingham Tyburn Roadside',
       'Bristol Temple Way', 'Bury Roadside', 'Bury Whitefield Roadside',
       'Camden Kerbside', 'Carlisle Roadside', 'Chatham Roadside',
       'Chepstow A48', 'Chesterfield Roadside', 'Coventry Binley Road',
       'Ealing Horn Lane', 'Glasgow High Street', 'Glasgow Kerbside',
       'Greenock A8 Roadside', 'Haringey Roadside',
       'Hull Holderness Road', 'Leamington Spa Rugby Road',
       'Leeds Headingley Kerbside', 'Leicester A594 Roadside',
       'London A3 Roadside', 'London Marylebone Road',
       'Newcastle Cradlewell Roadside', 'Nottingham Western Boulevard',
       'Portsmouth Anglesea Road', 'Reading London Road',
       'Saltash Callington Road', 'Saltash Roadside', 'Sandy Roadside',
       'Southampton A33', 'Southwark A2 Old Kent Road',
       'St Helens Linkway', 'Stanford-le-Hope Roadside',
       'Stockton-on-Tees Eaglesclif

In [69]:
lat_long_dict2 = {'Armagh Roadside': 'BT60',
                  'Barnstaple A39': 'EX31',
                  "Belfast Stockman's Lane": 'BT9',
                  'Birmingham A4540 Roadside': 'B16',
                  'Birmingham Tyburn Roadside': 'B24',
                  'Bristol Temple Way': 'BS2',
                  'Bury Roadside': 'BL0',
                  'Bury Whitefield Roadside': 'BL9',
                  'Camden Kerbside': 'NW1',
                  'Carlisle Roadside': 'CA1',
                  'Chatham Roadside': 'ME1',
                  'Chepstow A48': 'NP16',
                  'Chesterfield Roadside': 'S18',
                  'Coventry Binley Road': 'CV3',
                  'Ealing Horn Lane': 'W3',
                  'Glasgow High Street': 'G1',
                  'Glasgow Kerbside': 'G3',
                  'Greenock A8 Roadside': 'PA14',
                  'Haringey Roadside': 'N4',
                  'Hull Holderness Road': 'HU8',
                  'Leamington Spa Rugby Road': 'CV32',
                  'Leeds Headingley Kerbside': 'LS6',
                  'Leicester A594 Roadside': 'LE1',
                  'London A3 Roadside': 'GU1',
                  'London Marylebone Road': 'NW1',
                  'Newcastle Cradlewell Roadside': 'NE2',
                  'Nottingham Western Boulevard': 'NG8',
                  'Portsmouth Anglesea Road': 'PO1',
                  'Reading London Road': 'RG1',
                  'Saltash Callington Road': 'PL12',
                  'Saltash Roadside': 'PL12',
                  'Sandy Roadside': 'SG19',
                  'Southampton A33': 'SO14',
                  'Southwark A2 Old Kent Road': 'SE15',
                  'St Helens Linkway': 'WA9',
                  'Stanford-le-Hope Roadside': 'SS17',
                  'Stockton-on-Tees Eaglescliffe': 'TS18',
                  'Stockton-on-Tees Yarm': 'TS15',
                  'Stoke-on-Trent A50 Roadside': 'ST4',
                  'Storrington Roadside':'RH20',
                  'Sutton Roadside':'KT17',
                  'Swansea Roadside': 'SA1',
                  'York Fishergate': 'YO10'
    
}

In [70]:
road_df['postcode'] = road_df['Site'].map(lat_long_dict2)
road_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours,postcode
0,2009-01-01,Armagh Roadside,23.0,,135,126,28.0,1.0,0.0,0.0,0.0,290,BT60
1,2010-01-01,Armagh Roadside,29.0,,237,158,135.0,104.0,78.0,40.0,104.0,856,BT60
2,2011-01-01,Armagh Roadside,19.0,,208,38,42.0,47.0,24.0,8.0,16.0,383,BT60
3,2012-01-01,Armagh Roadside,16.0,,32,28,12.0,3.0,0.0,0.0,0.0,75,BT60
4,2013-01-01,Armagh Roadside,19.0,,78,109,34.0,13.0,0.0,0.0,0.0,234,BT60


In [71]:
road_df2 = pd.merge(road_df, postcodes_df, on='postcode')
road_df2.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours,postcode,id,latitude,longitude
0,2009-01-01,Armagh Roadside,23.0,,135,126,28.0,1.0,0.0,0.0,0.0,290,BT60,343,54.29219,-6.64951
1,2010-01-01,Armagh Roadside,29.0,,237,158,135.0,104.0,78.0,40.0,104.0,856,BT60,343,54.29219,-6.64951
2,2011-01-01,Armagh Roadside,19.0,,208,38,42.0,47.0,24.0,8.0,16.0,383,BT60,343,54.29219,-6.64951
3,2012-01-01,Armagh Roadside,16.0,,32,28,12.0,3.0,0.0,0.0,0.0,75,BT60,343,54.29219,-6.64951
4,2013-01-01,Armagh Roadside,19.0,,78,109,34.0,13.0,0.0,0.0,0.0,234,BT60,343,54.29219,-6.64951


In [77]:
all_sites_df = urban_df2.append(road_df2)
all_sites_df.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours,postcode,id,latitude,longitude
0,2000-01-01,Aberdeen,19.0,,19,13,15.0,0.0,0.0,0.0,0.0,47,AB11,3,57.13875,-2.09089
1,2001-01-01,Aberdeen,15.0,,25,9,0.0,0.0,0.0,0.0,0.0,34,AB11,3,57.13875,-2.09089
2,2003-01-01,Aberdeen,22.0,,162,80,70.0,34.0,11.0,13.0,0.0,370,AB11,3,57.13875,-2.09089
3,2004-01-01,Aberdeen,19.0,,105,68,9.0,12.0,16.0,0.0,0.0,210,AB11,3,57.13875,-2.09089
4,2005-01-01,Aberdeen,19.0,,48,38,0.0,0.0,0.0,0.0,0.0,86,AB11,3,57.13875,-2.09089


In [79]:
all_sites_df['Site'].unique()

array(['Aberdeen', 'Belfast Centre', 'Birmingham Centre',
       'Birmingham East', 'Birmingham Tyburn', 'Blackpool',
       'Blackpool Marton', 'Bolton', 'Bradford Centre', 'Bristol Centre',
       "Bristol St Paul's", 'Canterbury', 'Cardiff Centre',
       'Chesterfield', 'Chesterfield Loundsley Green',
       'Coventry Memorial Park', 'Cwmbran', 'Derry', 'Derry Rosemount',
       'Eastbourne', 'Edinburgh Centre', 'Edinburgh St Leonards',
       'Glasgow Centre', 'Glasgow Townhead', 'Hull Centre',
       'Hull Freetown', 'Leamington Spa', 'Leeds Centre',
       'Leicester Centre', 'Liverpool Centre', 'London Bloomsbury',
       'London Brent', 'London Hillingdon', 'London N. Kensington',
       'Manchester Piccadilly', 'Newcastle Centre', 'Newport',
       'Northampton', 'Norwich Centre', 'Norwich Lakenfields',
       'Nottingham Centre', 'Oxford St Ebbes', 'Plymouth Centre',
       'Portsmouth', 'Preston', 'Reading', 'Reading New Town',
       'Salford Eccles', 'Sheffield Centre', '

In [80]:
all_sites_2017 = all_sites_df[all_sites_df['Year']=='2017-01-01T00:00:00.000000000']
all_sites_2017.head()

Unnamed: 0,Year,Site,Annual Mean PM10 concentration (µg/m3),95% confidence interval for 'All sites' (+/-),4 Moderate,5 Moderate,6 Moderate,7 High,8 High,9 High,10 Very High,Total Hours,postcode,id,latitude,longitude
16,2017-01-01,Aberdeen,11.0,,11,0,0.0,0.0,0.0,0.0,0.0,11,AB11,3,57.13875,-2.09089
39,2017-01-01,Belfast Centre,12.0,,4,7,10.0,3.0,0.0,0.0,0.0,24,BT1,290,54.59941,-5.92838
126,2017-01-01,Bristol St Paul's,15.0,,28,26,2.0,0.0,0.0,0.0,0.0,56,BS2,259,51.45945,-2.58013
153,2017-01-01,Cardiff Centre,16.0,,28,0,0.0,0.0,0.0,0.0,0.0,28,CF10,414,51.47387,-3.17648
162,2017-01-01,Chesterfield Loundsley Green,12.0,,31,38,0.0,0.0,0.0,0.0,0.0,69,S41,2177,53.24696,-1.42746


In [85]:
# Save this as csv
all_sites_df.to_csv('all_sites_df', sep='\t', encoding='utf-8')

Plotting with Bokeh:

In [83]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.palettes import Plasma
from bokeh.palettes import Spectral6
from bokeh.transform import linear_cmap
from bokeh.models import ColumnDataSource, ColorBar, HoverTool
from matplotlib import cm,colors
from sympy.ntheory.factor_ import factorint
from bokeh.models.widgets import Select

output_notebook()

source = ColumnDataSource(data=all_sites_2017)

min_val = all_sites_2017['Annual Mean PM10 concentration (µg/m3)'].min()
max_val = all_sites_2017['Annual Mean PM10 concentration (µg/m3)'].max()
sizes = all_sites_2017['Annual Mean PM10 concentration (µg/m3)']

mapper = linear_cmap(field_name='Annual Mean PM10 concentration (µg/m3)',
                     palette=Spectral6,
                     low=min_val,
                     high=max_val)

TOOLTIPS = [
    ("Site", "@Site"),
    ("Annual Mean PM10 concentration (µg/m3)", "@{Annual Mean PM10 concentration (µg/m3)}"),
]

p = figure(plot_width=500, plot_height=600, tooltips=TOOLTIPS,
           title="Annual Mean PM10 concentration (µg/m3) across the UK in 2017")

p.circle('longitude', 'latitude', color=mapper, size='Annual Mean PM10 concentration (µg/m3)', source=source)

color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))

p.add_layout(color_bar, 'right')

show(p)