In [None]:
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

## Spatial Manipulation
import geopandas as gpd
from shapely.geometry import Point

## Mapping
import folium
from folium import plugins

## Misc
import functools
from itertools import product
import contextily as ctx

pd.options.display.max_columns = None
#pd.options.display.max_rows = None

from mpl_toolkits.axes_grid1 import make_axes_locatable

In [None]:
df = pd.read_csv("pm_no_clean.csv", index_col=0)
df['date_UTC'] = pd.to_datetime(df["date_UTC"])

In [None]:
def make_date_range_diurnal(data_range, title, pollutant, hue, y_axis):

    plt.rcParams['figure.dpi'] = 150

    col_order=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

    g = sns.relplot(x='hour',
                    y=pollutant, 
                    col="w_day", 
                    kind="line", 
                    ci=95, 
                    data=data_range, 
                    hue=hue,
                    col_order=col_order)


    axes = g.axes.flatten()
    axes[0].set_title("Monday")
    axes[1].set_title("Tuesday")
    axes[2].set_title("Wednesday")
    axes[3].set_title("Thursday")
    axes[4].set_title("Friday")
    axes[5].set_title("Saturday")
    axes[6].set_title("Sunday")

    axes[0].set_ylabel(y_axis)
    for ax in axes:
        ax.set_xlabel("Hour")

    g.fig.suptitle(title, 
                   weight='semibold', 
                   y= 1.06, 
                   size='x-large')

    plt.margins(x=0)
    plt.subplots_adjust(hspace=0, wspace=0)

## Weekly Seasons

In [None]:
make_date_range_diurnal(df[df['season'] == 'Winter'],
                       'PM2.5 diurnal plot for Winter',
                       'pm2_5_ugm3', None, 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Winter'],
                       'No2 diurnal plot for Winter',
                       'no2_ugm3', None, 'Hourly Average No2 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Winter'],
                       'PM2.5 diurnal plot for Winter - with ULEZ',
                       'pm2_5_ugm3', 'Zone', 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Winter'],
                       'No2 diurnal plot for Winter - with ULEZ',
                       'no2_ugm3', 'Zone', 'Hourly Average No2 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Summer'],
                       'PM2.5 diurnal plot for Summer',
                       'pm2_5_ugm3', None, 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Summer'],
                       'No2 diurnal plot for Summer',
                       'no2_ugm3', None, 'Hourly Average No2 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Summer'],
                       'PM2.5 diurnal plot for Summer - with ULEZ',
                       'pm2_5_ugm3', 'Zone', 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Summer'],
                       'No2 diurnal plot for Summer - with ULEZ',
                       'no2_ugm3', 'Zone', 'Hourly Average No2 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Spring'],
                       'PM2.5 diurnal plot for Spring',
                       'pm2_5_ugm3', None, 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Spring'],
                       'No2 diurnal plot for Spring',
                       'no2_ugm3', None, 'Hourly Average No2 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Spring'],
                       'PM2.5 diurnal plot for Spring - with ULEZ',
                       'pm2_5_ugm3', 'Zone', 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Spring'],
                       'No2 diurnal plot for Spring - with ULEZ',
                       'no2_ugm3', 'Zone', 'Hourly Average No2 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Autumn'],
                       'PM2.5 diurnal plot for Autumn',
                       'pm2_5_ugm3', None, 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Autumn'],
                       'No2 diurnal plot for Autumn',
                       'no2_ugm3', None, 'Hourly Average No2 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Autumn'],
                       'PM2.5 diurnal plot for Autumn - with ULEZ',
                       'pm2_5_ugm3', 'Zone', 'Hourly Average PM 2.5 μg/m3')

In [None]:
make_date_range_diurnal(df[df['season'] == 'Autumn'],
                       'No2 diurnal plot for Autumn - with ULEZ',
                       'no2_ugm3', 'Zone', 'Hourly Average No2 μg/m3')

### There are some interesting changes in weekly patterns for PM 2.5 between seasons. To investigate this further, I would start by look at what kind of schedules farmers activities have across seasons have across seasons. 

In [None]:
plt.rcParams['figure.dpi'] = 300

ax = sns.boxplot(x="season", y="no2_ugm3", data= df, showfliers=False, order=None, palette='Set2')

ax.set_xlabel('')
ax.set_ylabel('Average No2 μg/m3')
ax.set_title('Average No2 measurements by Season', fontdict={'fontsize': 'large', 'fontweight' : 'semibold'})

ax_0 = ax.axes
lines = ax_0.get_lines()
categories = ax_0.get_xticks()

for cat in categories:

    y = round(lines[4+cat*5].get_ydata()[0],1) 

    ax_0.text(
        cat, 
        y, 
        f'{y}', 
        ha='center', 
        va='center', 
        fontweight='bold', 
        size=10,
        color='white',
        bbox=dict(facecolor='#445A64'))
    
ax.annotate(s= '*Median averages annotated', xy=(-0.5,-18.0), annotation_clip=False, size=7)
    
plt.show()

In [None]:
plt.rcParams['figure.dpi'] = 300

ax = sns.boxplot(x="season", y="pm2_5_ugm3", data= df, showfliers=False, order=None, palette='Set2')

ax.set_xlabel('')
ax.set_ylabel('Average PM 2.5 μg/m3')
ax.set_title('Average PM 2.5 measurements by Season', fontdict={'fontsize': 'large', 'fontweight' : 'semibold'})

ax_0 = ax.axes
lines = ax_0.get_lines()
categories = ax_0.get_xticks()

for cat in categories:

    y = round(lines[4+cat*5].get_ydata()[0],1) 

    ax_0.text(
        cat, 
        y, 
        f'{y}', 
        ha='center', 
        va='center', 
        fontweight='bold', 
        size=10,
        color='white',
        bbox=dict(facecolor='#445A64'))

ax.annotate(s= '*Median averages annotated', xy=(-0.5,-7.0), annotation_clip=False, size=7)
    
plt.show()

## Significant dates 

### Plotting sensor locations using folium

In [None]:
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
gdf = gpd.GeoDataFrame(df, geometry=geometry)

In [None]:
gdf = gdf.drop_duplicates(subset=['pod_id_location'])

In [None]:
df_lat_lon = (gdf
              ['geometry']
              .apply(lambda p: (p.x, p.y))
              .apply(pd.Series)
              .rename(columns={
                  0:'longitude', 
                  1:'latitude'
              })
              [['latitude', 'longitude']]
             )

df_lat_lon.head()

In [None]:
## Initialising map with custom tiles
m = folium.Map([51.5, -0.1], zoom_start=10.5, tiles='openstreetmap') # cartodbpositron

# converting lon and lat to 2d array for heatmap
sensors = df_lat_lon.values

# plotting the heatmap
m.add_child(plugins.HeatMap(sensors, radius=20))
m

Looking to see which sensors lie within Notting Hill carnival area

![](notting_hill.png)

In [None]:
df_notting_hill = df[df['location_name'] == 'Ladbroke Grove']

Since the carnival took place on a sunday and public holiday, we can compare measurements during the carnival with all other sundays and public holidays 

In [None]:
#make_date_range_diurnal(df_notting_hill[(df_notting_hill['date_UTC'] >= '2019-11-04 00:00:00') & (df_notting_hill['date_UTC'] <= '2019-11-11 00:00:00')],
#                       'PM2.5 diurnal plot for Guy Fawkes week 2019 (Tuesday)',
#                       'pm2_5_ugm3', None , 'df')

In [None]:
df_notting_hill = df_notting_hill[(df_notting_hill['w_day'] == 'Sunday') | (df_notting_hill['public_hols'] == 'public holiday')]


In [None]:
carnival = {
             pd.to_datetime('2019-08-25',format='%Y-%m-%d'): 'Notting Hill Carnival',
             pd.to_datetime('2019-08-26',format='%Y-%m-%d'): 'Notting Hill Carnival'
}

df_notting_hill['Notting Hill'] = df_notting_hill['date_UTC'].dt.date.map(carnival).fillna('Other Sunday or Bank Holiday')

In [None]:
order = ['Notting Hill Carnival', 'Other Sunday or Bank Holiday']

ax = sns.boxplot(x="Notting Hill", y='no2_ugm3', data= df_notting_hill, showfliers=False, order=order)

ax.set_xlabel('')
ax.set_ylabel('Average No2 μg/m3')
ax.set_title('Average No2 measurements at Ladbroke Grove during\nNotting Hill Carnival\n', fontdict={'fontsize': 'large', 'fontweight' : 'semibold'})
plt.show()

### hmmm surprising result for Notting Hill Carnival... Taking a look at Pride London

![](pride.png)

### Pride occupied much of the ULEZ, so looking at sensors within ULEZ. It took place on Saturday 6 July. Will compare this date with all other Saturdays for ULEZ

In [None]:
pride = df[(df['w_day'] == 'Saturday') & (df['Zone'] == 'ULEZ') & (df['season'] == 'Summer')].copy()

In [None]:
date = {
             pd.to_datetime('2019-07-06',format='%Y-%m-%d'): 'London Pride'
}

pride['London Pride'] = pride['date_UTC'].dt.date.map(date).fillna('All other Saturdays in ULEZ')

### My guess is Pride doesn't effect the whole city... will remove sensors that are far away from the event - ie south of the river. However, unfortunately, there is a gap in sensor coverage for much of the Pride route

In [None]:
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
crs = {'init': 'epsg:4326'}
gdf_2 = gpd.GeoDataFrame(df, crs = crs, geometry=geometry)
gdf_2 = gdf_2.to_crs({'init': 'epsg:3857'})

In [None]:
gdf_2 = gdf_2[['pod_id_location', 'geometry', 'Zone']]
gdf_2 = gdf_2.drop_duplicates(subset=['pod_id_location'])
gdf_2 = gdf_2[gdf_2['Zone'] == 'ULEZ']

In [None]:
plt.rcParams['figure.dpi'] = 300

ax = gdf_2.plot(figsize=(12,12))

gdf_2.apply(lambda x: ax.annotate(s=x.pod_id_location, xy=x.geometry.coords[0]), axis=1)

ctx.add_basemap(ax=ax)

plt.axis('off')
plt.show()

In [None]:
remove = [77245, 57245, 83245, 8245, 22245, 10245, 36245, 85245, 88245, 47245]

pride = pride[~pride['pod_id_location'].isin(remove)]
gdf_2 = gdf_2[~gdf_2['pod_id_location'].isin(remove)]

In [None]:
plt.rcParams['figure.dpi'] = 300

ax = gdf_2.plot(figsize=(12,12))

gdf_2.apply(lambda x: ax.annotate(s=x.pod_id_location, xy=x.geometry.coords[0]), axis=1)

ctx.add_basemap(ax=ax)

plt.axis('off')
plt.show()

In [None]:
order = ['London Pride', 'All other Saturdays in ULEZ']

ax = sns.boxplot(x="London Pride", y="no2_ugm3", data= pride, showfliers=False, order=order)

ax_1 = ax.axes
lines = ax_1.get_lines()
categories = ax_1.get_xticks()

for cat in categories:

    y = round(lines[4+cat*5].get_ydata()[0],1) 

    ax_1.text(
        cat, 
        y, 
        f'{y}', 
        ha='center', 
        va='center', 
        fontweight='bold', 
        size=10,
        color='white',
        bbox=dict(facecolor='#445A64'))

ax.set_xlabel('')
ax.set_ylabel('Average No2 μg/m3')
ax.set_title('Average No2 measurements in ULEZ during\nLondon Pride\n', fontdict={'fontsize': 'large', 'fontweight' : 'semibold'})
plt.show()

### There is a slightly lower average, however there is little sensor density in the area of interest, particuarly since sensor location 172450 wasn't producing data at the time

### Looking at extinction rebellion will compare this week with all other workdays in Spring (since we know No2 is higher in winter so will scew average)

Extract from Wikipedia so I know where to look:

Starting from Monday 15 April 2019, Extinction Rebellion organised demonstrations in London, focusing their attention on Oxford Circus, Marble Arch, Waterloo Bridge and the area around Parliament Square.[52] Activists fixed a pink boat named after murdered Honduran environmental activist Berta Cáceres in the middle of the busy intersection of Oxford Street and Regent Street (Oxford Circus) and glued themselves to it,[53][54] and also set up several gazebos, potted plants and trees, a mobile stage and a skate ramp whilst also occupying Waterloo Bridge.[55][56] Five activists, including XR co-founder Simon Bramwell, were arrested for criminal damage when they targeted Shell's headquarters, near Waterloo.[57][55] After the police imposed a 24-hour Section 14 condition at 18:55 requiring activists to move to Marble Arch[58] the police tried to clear Waterloo Bridge arresting 113 people, without gaining control of the bridge.[59][60]

On the second day of actions on Waterloo Bridge police began making arrests of the activists at 12.40 pm,[61] but stopped a few hours later,[62] after running out of holding cells.[63] By the end of Tuesday 16 April an estimated 500,000 people had been affected by the disruptions and 290 activists had been arrested in London.[64] In Scotland, more than 1,000 protesters occupied the North Bridge for seven hours in Edinburgh, bringing one of the main routes into the city centre to a standstill. Police said they made 29 arrests.[65][66]

On the morning of Wednesday 17 April two activists climbed onto the roof of a Docklands Light Railway train at Canary Wharf station whilst another glued himself to the side, spreading disruption to railway services.[67] The following day the three activists were charged with obstructing trains and after pleading not guilty sent to jail for four weeks, with no bail, whilst awaiting their next hearing.[68] In response to the protests, the British Transport Police suspended access to public Wi-Fi at London Underground stations the same day.[69][70][71] Towards the end of Wednesday a large force of police marched on the camp at Parliament Square, arresting people and partially removing roadblocks[72] before it was retaken later the same night by protesters who arrived with a samba band and re-established the roadblocks.[73]

At the start of Thursday 18 April, the fourth day of continuous occupations at four locations, the arrest figure had risen to 428, the majority for breaching public order laws and obstructing a highway.[53][74] During the morning of 18 April about 20 XR activists spread traffic disruption wider with a series of swarming (short duration) roadblocks on Vauxhall Bridge.[75]


A mural appeared at Marble Arch after the closing ceremony on 25 April and this was attributed to the artist Banksy.[76][77] The slogan "From this moment despair ends and tactics begin" is a quotation from The Revolution of Everyday Life.[78]
On the morning of 19 April, after significant media speculation about a threat to Heathrow Airport,[79][80][81] around a dozen teenagers, some aged 13 and 14, approached the access road holding a banner which read “Are we the last generation?” Some of the teenagers wept and hugged each other, as they were surrounded by a far larger squad of police.[54][82][83] In the middle of the day police moved in force to surround the pink boat as Emma Thompson read poetry from the deck, eventually removing the people who were either locked-on or glued to it. After seven hours police had moved the boat without clearing Oxford Circus.[53][84] By late evening police said that 682 people had thus far been arrested in London.[54]

On 25 April thirteen protesters blockaded the London Stock Exchange by gluing themselves across its entrances, wearing LED signs. Despite this, the operation of the market was not affected.[85] Another 4 protesters climbed on to a Docklands Light Railway train at Canary Wharf, and held the banners, which resulted in a short delay between Bank and Monument station and Stratford/Lewisham station. 26 people were arrested.[85] In the afternoon, the activists gathered at Hyde Park as the "closing ceremony" of the movement, which ended the 11-day demonstrations in London. A total of 1,130 people were arrested during the demonstrations.[86] As of June 2019, one protester, Angie Zelter, has been convicted of a public order offence for taking part in the occupations.[87]

Again most disruption seems to have taken place in the ULEZ, so lets look there

In [None]:
er = df[(df['day_type'] == 'weekday') & (df['Zone'] == 'ULEZ') & (df['season'] == 'Spring')].copy()


In [None]:
date = {
    pd.to_datetime('2019-04-15',format='%Y-%m-%d'): 'Extinction Rebellion',
    pd.to_datetime('2019-04-16',format='%Y-%m-%d'): 'Extinction Rebellion',
    pd.to_datetime('2019-04-17',format='%Y-%m-%d'): 'Extinction Rebellion',
    pd.to_datetime('2019-04-18',format='%Y-%m-%d'): 'Extinction Rebellion',
    pd.to_datetime('2019-04-19',format='%Y-%m-%d'): 'Extinction Rebellion'    
}

er['Extinction Rebellion'] = er['date_UTC'].dt.date.map(date).fillna('All other work weeks in ULEZ')

In [None]:
#order = ['London Pride', 'All other Saturdays in ULEZ']

ax = sns.boxplot(x="Extinction Rebellion", y="no2_ugm3", data= er, showfliers=False, order=None)

ax.set_xlabel('')
ax.set_ylabel('Average No2 μg/m3')
ax.set_title('Average No2 measurements in ULEZ during\nExtinction Rebellion April 2019\n', fontdict={'fontsize': 'large', 'fontweight' : 'semibold'})
plt.show()

In [None]:
ax = sns.boxplot(x="pod_id_location", y="no2_ugm3", data= er[er['Extinction Rebellion']=='Extinction Rebellion'], showfliers=False, order=None)
plt.xticks(rotation=90)

In [None]:
ax = sns.boxplot(x="pod_id_location", y="no2_ugm3", data= er[er['Extinction Rebellion']=='All other work weeks in ULEZ'], showfliers=False, order=None)
plt.xticks(rotation=90)

No2 levels seem higher during extinction rebellion week compared with other work weeks in Spring... Possible the protests caused high traffic and hense higher pollution levels???