In [None]:
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

## Spatial Manipulation
import geopandas as gpd
from shapely.geometry import Point

## Mapping
import folium
from folium import plugins

## Misc
import functools
from itertools import product
import contextily as ctx

pd.options.display.max_columns = None
#pd.options.display.max_rows = None

from mpl_toolkits.axes_grid1 import make_axes_locatable

## Highlighting where sensors are on the map 

In [None]:
#import base file
df = pd.read_csv("pm_no_clean.csv", index_col=0)

In [None]:
df = df.drop_duplicates(subset=['pod_id_location'])

In [None]:
#turning df into GIS file
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs = crs, geometry=geometry)
gdf = gdf.to_crs({'init': 'epsg:3857'})

In [None]:
#import ULEZ
path = "/Users/oliverpaul/Data_Science/EDF/LAEI - GIS geographies/Ultra_Low_Emissions_Zone.gpkg"
ULEZ = gpd.read_file(path)
crs = {'init': 'epsg:27700'}
ULEZ = gpd.GeoDataFrame(ULEZ, crs=crs)
ULEZ = ULEZ.to_crs({'init': 'epsg:3857'})

In [None]:
plt.rcParams['figure.dpi'] = 600

ax=gdf.plot(figsize=(10, 10), column='Site_Type_x', cmap='Paired', legend=True, alpha=0.9)
ULEZ.plot(ax=ax, edgecolor='k', facecolor='none', linewidth=2, alpha=0.5)
ctx.add_basemap(ax=ax, url=ctx.providers.Stamen.TonerLite)

ax.set_title('Breathe London Sensor Locations with ULEZ', fontdict={'fontsize': '25', 'fontweight' : '2'})
plt.axis('off')

#plt.savefig('/Users/oliverpaul/Data_Science/EDF/plots/sensor_locations.png', bbox_inches = 'tight', pad_inches = 0.1)
plt.show()

In [None]:
plt.rcParams['figure.dpi'] = 600

ax=gdf.plot(figsize=(10, 10), column='sensor_position', cmap='Paired', legend=True, alpha=0.9)
ULEZ.plot(ax=ax, edgecolor='k', facecolor='none', linewidth=2, alpha=0.5)
ctx.add_basemap(ax=ax, url=ctx.providers.Stamen.TonerLite)

ax.set_title('Sensor Location Types with ULEZ', fontdict={'fontsize': '25', 'fontweight' : '2'})
plt.axis('off')

#plt.savefig('/Users/oliverpaul/Data_Science/EDF/plots/sensor_locations.png', bbox_inches = 'tight', pad_inches = 0.1)
plt.show()

In [None]:
plt.rcParams['figure.dpi'] = 600

ax=gdf[(gdf['Site_Type_x'] == 'School')].plot(figsize=(10, 10), 
                                              color="dodgerblue", 
                                              alpha=0.8, 
                                              markersize = 80)

ULEZ.plot(ax=ax, 
          edgecolor='k', 
          facecolor='none', 
          linewidth=2, 
          alpha=0.5)

ctx.add_basemap(ax=ax, url=ctx.providers.Stamen.TonerLite)

ax.set_title('Breathe London School Locations with ULEZ', fontdict={'fontsize': '25', 'fontweight' : '2'})
plt.axis('off')
plt.show()

## Looking at tempural patterns in data

In [None]:
#import base file
df_1 = pd.read_csv("pm_no_clean.csv", index_col=0)

In [None]:
df_1['date_UTC'] = pd.to_datetime(df_1["date_UTC"])

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="pm2_5_ugm3",
                 col="w_day", hue="Zone",
                 kind="scatter", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")
    
g.fig.suptitle('Diurnal scatter plot to show hourly PM 2.5 values between ULEZ and non-ULEZ sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="pm2_5_ugm3",
                 col="w_day", hue="Zone",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")
    
g.fig.suptitle('Diurnal plot to show hourly average PM 2.5 values between ULEZ and non-ULEZ sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)


In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="no2_ugm3",
                 col="w_day", hue="Zone",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average No2")
for ax in axes:
    ax.set_xlabel("Hour")
    
g.fig.suptitle('Diurnal plot to show hourly average No2 values between ULEZ and non-ULEZ sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')


plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="no2_ugm3",
                 col="w_day", hue="Zone",
                 kind="scatter", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly No2")
for ax in axes:
    ax.set_xlabel("Hour")
    
for ax in axes:
    ax.axhline(200, ls='--', linewidth=3, color='red')

g.fig.suptitle('Diurnal scatter plot to show hourly No2 values between ULEZ and non-ULEZ sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.text(25,120, "*EU limit of\n18 times per year")


plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

### We see PM25 and NO2 have different profiles, PM25 tends to be high during the evening and early morning, seemingly correlated with temperature. This could be due to higher pressure trapping particles at ground level

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Spring", "Summer", "Autumn", "Winter"]

g = sns.relplot(x='hour', y="no2_ugm3",
                 col="season", hue="Zone",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Spring")
axes[1].set_title("Summer")
axes[2].set_title("Autumn")
axes[3].set_title("Winter")

axes[0].set_ylabel("Hourly Average No2")
for ax in axes:
    ax.set_xlabel("Hour")
    
g.fig.suptitle('Seasonal comparison of hourly No2 values between ULEZ and non-ULEZ sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')


#plt.margins(x=0)
#plt.subplots_adjust(hspace=0, wspace=0)

plt.show()

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Spring", "Summer", "Autumn", "Winter"]

g = sns.relplot(x='hour', y="pm2_5_ugm3",
                 col="season", hue="Zone",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Spring")
axes[1].set_title("Summer")
axes[2].set_title("Autumn")
axes[3].set_title("Winter")

axes[0].set_ylabel("Hourly Average PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")

g.fig.suptitle('Seasonal comparison of hourly PM 2.5 values between ULEZ and non-ULEZ sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

#plt.margins(x=0)
#plt.subplots_adjust(hspace=0, wspace=0)

plt.show()

### We see the highest fluctuations in Spring, which makes sense since Spring sees the greatest difference in maximal and mimumal temperatures. High temperature gradients also produce strong thermal conditions, which will draw PM into higher altitudes

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="pm2_5_ugm3",
                 col="w_day", hue="sensor_position",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")

g.fig.suptitle('Diurnal plot to show hourly average PM 2.5 values between background and non-background sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

marking typical school run periods

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="pm2_5_ugm3",
                 col="w_day", hue="sensor_position",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")
    
for ax in axes:
    ax.axvline(7, ls='--', linewidth=1.5, color='red')
    ax.axvline(9, ls='--', linewidth=1.5, color='red')

g.fig.suptitle('Diurnal plot to show hourly average PM 2.5 values between background and non-background sites, typical school run hours marked in red', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="pm2_5_ugm3",
                 col="w_day",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")
    
for ax in axes:
    ax.axvline(7, ls='--', linewidth=1.5, color='red')
    ax.axvline(9, ls='--', linewidth=1.5, color='red')

g.fig.suptitle('Diurnal plot to show hourly average PM 2.5 values, typical school run hours marked in red ', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

We can see there is a relationship between PM25 levels and temperature

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="temperature",
                 col="w_day",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average PM2.5")
for ax in axes:
    ax.set_xlabel("Hour")

g.fig.suptitle('Diurnal plot to show hourly average temperature', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

Splitting background from roadside for no2

In [None]:
plt.rcParams['figure.dpi'] = 150

col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

g = sns.relplot(x='hour', y="no2_ugm3",
                 col="w_day", hue="sensor_position",
                 kind="line", ci=95, data=df_1, col_order=col_order)

axes = g.axes.flatten()
axes[0].set_title("Monday")
axes[1].set_title("Tuesday")
axes[2].set_title("Wednesday")
axes[3].set_title("Thursday")
axes[4].set_title("Friday")
axes[5].set_title("Saturday")
axes[6].set_title("Sunday")

axes[0].set_ylabel("Hourly Average No2")
for ax in axes:
    ax.set_xlabel("Hour")
    
g.fig.suptitle('Diurnal plot to show hourly average No2 values between background and non-background sites', 
               weight='semibold', 
               y= 1.06, 
               size='x-large')

plt.margins(x=0)
plt.subplots_adjust(hspace=0, wspace=0)

PM25 by hour, month and weekday

In [None]:
plt.rcParams['figure.dpi'] = 200
#sns.set_style("whitegrid")

ax = sns.lineplot(x='hour', y="pm2_5_ugm3", hue="Zone", ci=95, data=df_1)#.grid(axis='x')


plt.xlabel("Hour")
plt.ylabel("Average PM 2.5")
plt.title("Average hourly PM 2.5 in μg/m3 between\nULEZ and non-ULEZ sites", 
          weight='semibold',
          size='x-large')
plt.margins(x=0)


In [None]:
plt.rcParams['figure.dpi'] = 200
#sns.set_style("whitegrid")

ax = sns.lineplot(x='hour', y="no2_ugm3", hue="Zone", ci=95, data=df_1)#.grid(axis='x')


plt.xlabel("Hour")
plt.ylabel("Average No2")
plt.title("Average hourly No2 in μg/m3 between\nULEZ and non-ULEZ sites", 
          weight='semibold',
          size='x-large')
plt.margins(x=0)


Weather effects on PM2.5 and NO2

In [None]:
df_wind_pm = df_1.groupby([df_1['compass']]).pm2_5_ugm3.mean().reset_index().dropna()
df_wind_no2 = df_1.groupby([df_1['compass']]).no2_ugm3.mean().reset_index().dropna()

In [None]:
from math import pi

df_wind = pd.DataFrame({
'group': ['PM 2.5','No2'],
'N': [13.02, 35.71],
'NNE': [13.78, 36.45],
'NE': [15.23, 36.75],
'ENE': [17.80, 40.60],
'E': [18.95, 44.37],
'ESE': [20.06, 48.93],
'SE': [17.58, 49.87],
'SSE': [13.49, 50.36],
'S': [12.03, 44.56],
'SSW': [10.07, 39.37],
'SW': [9.28, 37.75],
'WSW': [8.63, 36.67],
'W': [8.94, 37.42],
'WNW': [10.74, 37.86],
'NW': [10.36, 39.05],
'NNW': [10.74, 36.90]
})

 
def radar(row, title, color):
 
    categories=list(df_wind)[1:]
    N = len(categories)

    #for angles divide by number of variables
    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]

    ax = plt.subplot(1,2,row+1, polar=True, ) #change 1,2 etc if plotting more than one row etc

    #setting axis positon and direction:
    ax.set_theta_offset(pi / 2)
    ax.set_theta_direction(-1)

    # Draw one axe per variable + add labels
    plt.xticks(angles[:-1], categories, color='black', size=8)

    #ylabels
    ax.set_rlabel_position(0)
    plt.yticks([10,20,30], ["10","20","30"], color="black", size=7) #adjust ticks if plotting no2 - extend to beyond 60
    plt.ylim(0,22)

    #
    values=df_wind.loc[row].drop('group').values.flatten().tolist()
    values += values[:1]
    ax.plot(angles, values, color=color, linewidth=2, linestyle='solid')
    ax.fill(angles, values, color=color, alpha=0.4)

    #Add title
    plt.title(title, size=11, color='black', y=1.1)
 
my_dpi=96
plt.figure(figsize=(1000/my_dpi, 1000/my_dpi), dpi=my_dpi)
 
my_palette = plt.cm.get_cmap("tab10", len(df_wind.index))
 
# Loop to plot
#for row in range(0, len(df_wind.index)):
#    radar( row=row, title='Average '+df_wind['group'][row] +' by wind direction μg/m3', color=my_palette(row))
#radar( row=0, title=' Average PM 2.5 by wind direction μg/m3', color=my_palette(0)) #if plotting individually run this instead

#plt.savefig('/Users/oliverpaul/Data_Science/EDF/plots/wind_pm25_average.png', bbox_inches = 'tight', pad_inches = 0.1)


In [None]:
radar( row=0, title=' Average PM 2.5 by wind direction μg/m3', color=my_palette(0))

### We see that when the wind blows from Europe, average PM25 levels are more than double when winds blow from the Atlantic. Strong motivation for internations cooperation on air pollution. When plotting the same plot for No2 the variation is much less extreme, indicating that No2 is locally produced. 

In [None]:
plt.rcParams['figure.dpi'] = 200
#sns.set_style("whitegrid")

order = ['N','NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW']

ax = sns.countplot(x='compass', data=df_1, order=order, alpha=0.9, palette="tab10")

ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

plt.xlabel("Wind Direction")
plt.ylabel("Number of measurements")
plt.title("Plot to show prevailing wind directions", 
          weight='semibold',
          size='large')

ax.axes.get_yaxis().set_ticks([])

Weather correlation PM2.5

In [None]:
pm_cor = df_1[['pm2_5_ugm3', 'motorway_min_dist', 'a_road_min_dist', 'temperature', 'windBearing', 'windSpeed', 'ozone', 'pressure']].dropna()

In [None]:
## Plot correlation matrix for pm25
cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
sns.clustermap(pm_cor.corr(), figsize= (20,20), cmap = cmap, annot=True).ax_row_dendrogram.set_visible(False)
plt.show()

In [None]:
no2_cor = df_1[['no2_ugm3', 'motorway_min_dist', 'a_road_min_dist', 'temperature', 'windBearing', 'windSpeed', 'ozone', 'pressure']].dropna()

In [None]:
## Plot correlation matrix for no2
cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
sns.clustermap(no2_cor.corr(), figsize= (20,20), cmap = cmap, annot=True).ax_row_dendrogram.set_visible(False)

plt.show()

In [None]:
pm_traffic_cor = df_1.drop_duplicates(subset=['pod_id_location'])
pm_traffic_cor = pm_traffic_cor[['pm2_5_ugm3', ' VKM Motorcycle', ' VKM Petrol Car', ' VKM Diesel Car', ' VKM Taxi', ' VKM Electric Car', ' VKM Petrol LGV', ' VKM Diesel LGV', ' VKM Electric LGV', ' VKM Bus', ' VKM Coach', ' VKM Rigid HGV', ' VKM Artic HGV', ' VKM TOTAL']]

In [None]:
## Plot correlation matrix for pm25 traffic
cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
sns.clustermap(pm_traffic_cor.corr(), figsize= (20,20), cmap = cmap, annot=True).ax_row_dendrogram.set_visible(False)

plt.show()

In [None]:
no2_traffic_cor = df_1.drop_duplicates(subset=['pod_id_location'])
no2_traffic_cor = no2_traffic_cor[['no2_ugm3', ' VKM Motorcycle', ' VKM Petrol Car', ' VKM Diesel Car', ' VKM Taxi', ' VKM Electric Car', ' VKM Petrol LGV', ' VKM Diesel LGV', ' VKM Electric LGV', ' VKM Bus', ' VKM Coach', ' VKM Rigid HGV', ' VKM Artic HGV', ' VKM TOTAL']]

In [None]:
## Plot correlation matrix for no2 traffic
cmap = sns.diverging_palette(220, 20, sep=20, as_cmap=True)
sns.clustermap(no2_traffic_cor.corr(), figsize= (20,20), cmap = cmap, annot=True).ax_row_dendrogram.set_visible(False)

plt.show()

Looking at pm25 levels over bonfire weekend

In [None]:
def make_date_range_diurnal(data_range, title, pollutant):

    plt.rcParams['figure.dpi'] = 150

    col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

    g = sns.relplot(x='hour', y=pollutant,
                     col="w_day",
                     kind="line", ci=95, data=data_range, col_order=col_order)


    axes = g.axes.flatten()
    axes[0].set_title("Monday")
    axes[1].set_title("Tuesday")
    axes[2].set_title("Wednesday")
    axes[3].set_title("Thursday")
    axes[4].set_title("Friday")
    axes[5].set_title("Saturday")
    axes[6].set_title("Sunday")

    axes[0].set_ylabel("Hourly Average PM2.5")
    for ax in axes:
        ax.set_xlabel("Hour")

    g.fig.suptitle(title, 
                   weight='semibold', 
                   y= 1.06, 
                   size='x-large')

    plt.margins(x=0)
    plt.subplots_adjust(hspace=0, wspace=0)

### Very difficult to compare one week to another.... hard to explain results

In [None]:
make_date_range_diurnal(df_1[(df_1['date_UTC'] >= '2019-11-04 00:00:00') & (df_1['date_UTC'] <= '2019-11-11 00:00:00')],
                       'PM2.5 diurnal plot for Guy Fawkes week 2019 (Tuesday)',
                       'pm2_5_ugm3')

In [None]:
make_date_range_diurnal(df_1[(df_1['date_UTC'] >= '2019-10-28 00:00:00') & (df_1['date_UTC'] <= '2019-11-04 00:00:00')],
                       'PM2.5 diurnal plot for week prior to Guy Fawkes week 2019',
                       'pm2_5_ugm3')

Investigating demographics on pm25 and no2

In [None]:
#converting lsoa demographic data to strings so we can plot at categories 

df_1['Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
df_1['Income Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Income Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
df_1['Employment Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Employment Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
df_1['Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
df_1['Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
df_1['Crime Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Crime Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
df_1['Living Environment Decile (where 1 is most deprived 10% of LSOAs)'] = df_1['Living Environment Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)

In [None]:
#where are sensors for different LSOA groups?

#### make map 

In [None]:
#Function for LSOA boxplots

def box_lsoa(title, variable, order):
    
    fig, axs = plt.subplots(dpi=600, nrows=2, figsize=(9, 7))

    a = sns.boxplot(x=variable, 
                    y='pm2_5_ugm3', 
                    data = df_1, 
                    showfliers = False,
                    palette='Set2',
                    order=order,
                    ax=axs[0])

    ax_0 = a.axes
    lines = ax_0.get_lines()
    categories = ax_0.get_xticks()

    ax_0.axes.get_xaxis().set_ticks([])
    ax_0.axes.get_xaxis().set_visible(False)

    for cat in categories:

        y = round(lines[4+cat*5].get_ydata()[0],1) 

        ax_0.text(
            cat, 
            y, 
            f'{y}', 
            ha='center', 
            va='center', 
            fontweight='bold', 
            size=10,
            color='white',
            bbox=dict(facecolor='#445A64'))

    a.figure.tight_layout()

    ax_0.grid(False)
    fig.suptitle(title, 
                 weight='semibold', 
                 y= 1.03, 
                 size='x-large')

    a.set(ylabel='Average PM 2.5 concentrations in µg/m3')

    ##----------------------------------

    b = sns.boxplot(x=variable, 
                    y='no2_ugm3', 
                    data = df_1, 
                    showfliers = False,
                    palette='Set2',
                    order=order,
                    ax=axs[1])

    ax_1 = b.axes
    lines = ax_1.get_lines()
    categories = ax_1.get_xticks()

    for cat in categories:

        y = round(lines[4+cat*5].get_ydata()[0],1) 

        ax_1.text(
            cat, 
            y, 
            f'{y}', 
            ha='center', 
            va='center', 
            fontweight='bold', 
            size=10,
            color='white',
            bbox=dict(facecolor='#445A64'))

    b.figure.tight_layout()
    b.set(ylabel='Average No2 concentrations in µg/m3')

    ax_1.grid(False)


Plotting boxplots across LSOA categories 

In [None]:
#converting lsoa demographic data to strings so we can plot at categories FOR GDF dataframe

gdf['Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
gdf['Income Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Income Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
gdf['Employment Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Employment Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
gdf['Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
gdf['Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
gdf['Crime Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Crime Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)
gdf['Living Environment Decile (where 1 is most deprived 10% of LSOAs)'] = gdf['Living Environment Decile (where 1 is most deprived 10% of LSOAs)'].astype(str)

In [None]:
##Multiple deprevation locations 

def make_lsoa_map(variable, title):
    
    plt.rcParams['figure.dpi'] = 600

    ax=gdf.plot(figsize=(10, 10), column=variable, cmap='Paired', legend=True, alpha=1)
    ULEZ.plot(ax=ax, edgecolor='k', facecolor='none', linewidth=2, alpha=0.5)
    ctx.add_basemap(ax=ax, url=ctx.providers.Stamen.TonerLite)

    ax.set_title(title, fontdict={'fontsize': 'x-large', 'fontweight' : 'semibold'})
    plt.axis('off')

    #plt.savefig('/Users/oliverpaul/Data_Science/EDF/plots/sensor_locations.png', bbox_inches = 'tight', pad_inches = 0.1)
    plt.show()


In [None]:
make_lsoa_map('Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)',
             'Sensor locations with respect to LSOA areas for Multiple Deprivation scores')

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)',
        ['1','2','3','4','5','6','7','8','9'])

In [None]:
make_lsoa_map('Income Decile (where 1 is most deprived 10% of LSOAs)',
             'Sensor locations with respect to LSOA areas for Income Decile scores')

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Income Decile (where 1 is most deprived 10% of LSOAs)',
         ['1','2','3','4','5','6','7','8','9', '10']
        )

In [None]:
make_lsoa_map('Employment Decile (where 1 is most deprived 10% of LSOAs)',
             'Sensor locations with respect to LSOA areas for Employment Decile scores')

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Employment Decile (where 1 is most deprived 10% of LSOAs)',
        ['1','2','3','4','5','6','7','8','9', '10'])

In [None]:
make_lsoa_map('Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)',
             'Sensor locations with respect to LSOA areas for\nEducation, Skills and Training Decile scores')

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)',
        ['3','4','5','6','7','8','9','10'])

In [None]:
make_lsoa_map('Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)',
             'Sensor locations with respect to LSOA areas for\nHealth Deprivation and Disability Decile scores')

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)',
        ['1','2','3','4','5','6','7','8','9','10'])

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Crime Decile (where 1 is most deprived 10% of LSOAs)',
        ['1','2','3','4','5','6','7','8','9','10'])

In [None]:
make_lsoa_map('Living Environment Decile (where 1 is most deprived 10% of LSOAs)',
             'Sensor locations with respect to LSOA areas for Living Environment Decile scores')

In [None]:
box_lsoa('Boxplot comparing PM 2.5 and No2 concentrations across LSOA Deprivation Deciles (2019)', 
         'Living Environment Decile (where 1 is most deprived 10% of LSOAs)',
        ['1','2','3','4','5','6','8'])