# Imports

In [91]:
# IMPORT
import pandas as pd
from matplotlib.path import Path
import numpy as np
import seaborn as sns
import time
from datetime import datetime as dt
from bokeh.models.widgets import Panel, Tabs
import utm
import json

# bokeh
import bokeh.palettes
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool, Legend, ColumnDataSource, Span
from bokeh.tile_providers import get_provider, CARTODBPOSITRON
from bokeh.models import GeoJSONDataSource


output_notebook()
np.random.seed(42)

# Constants and helper functions

In [88]:
def utm_from_latlon(lat, lon):
    """ From a given lat and lon, calculates the correct UTM coordinates to 
        plot using `bokeh` 
    """
    r_major = 6378137.000
    x = r_major * np.radians(lon)
    scale = x/lon
    y = 180.0/np.pi * np.log(np.tan(np.pi/4.0 + 
        lat * (np.pi/180.0)/2.0)) * scale

    return x, y

def get_lat_lon_utm(row):
    """ From a row containing the columns 'st_x' and 'st_y' calculates both the lat and lon
        and the correct UTM coordinates to plot using `bokeh`
    """

    # 30 and 'T' is the zone of Madrid
    lat, lon = utm.to_latlon(row["st_x"], row["st_y"], 30, "T")
    
    x, y = utm_from_latlon(lat, lon)

    return pd.Series([lat, lon, x, y])

In [None]:
def get_color_from_palette(color):
    """ Getting colors for plotting """
    return tuple([int(c * 255) for c in color])

def get_dark_color_from_palette(color):
    """ Getting darker colors for plotting """
    return tuple([int(c * 200) for c in color])

PALETTE = "colorblind"

In [84]:
get_color_from_palette(sns.color_palette(PALETTE)[0])

(1, 115, 178)

In [22]:
district_colors = [
    get_color_from_palette(c)
    for c in sns.color_palette(PALETTE, 21)
    ]

district_dark_colors = [
    get_dark_color_from_palette(c)
    for c in sns.color_palette(PALETTE, 21)
    ]

madrid_in_out_colors = [
    get_color_from_palette(c)
    for c in sns.color_palette(PALETTE, 2)
    ]

madrid_in_out_dark_colors = [
    get_dark_color_from_palette(c)
    for c in sns.color_palette(PALETTE, 2)
    ]

# Madrid Central

We are going to analyze the impact of Madrid Central both from an air quality and a traffic viewpoint.

## Air Quality

In [5]:
# load air quality stations
df_stations = pd.read_csv('shared_data/air_quality/air_quality_stations.csv')

# load magnitud table
df_magnitud = pd.read_csv('shared_data/air_quality/air_quality_magnitud.csv', sep=';')

# load air quality data
df = pd.read_csv('data/air_quality_data.csv')

# converting Date to datetime type
df["datetime"] = pd.to_datetime(df["datetime"])

# merge with air quality stations
df = pd.merge(df, df_stations, left_on = 'PUNTO_MUESTREO', right_on='punto_muestreo', how='left').drop('PUNTO_MUESTREO', axis=1)

# merge with air quality magnitud
df = pd.merge(df, df_magnitud, left_on = 'MAGNITUD', right_on='magnitud_id', how='left').drop('MAGNITUD', axis=1)

df.head()

Unnamed: 0,PROVINCIA,MUNICIPIO,ESTACION,value,datetime,punto_muestreo,name,longitude,latitude,altitude,utm_x,utm_y,magnitud_id,formula,unit_per_m3
0,28,79,4,7.0,2016-04-01 01:00:00,28079004,Plaza de España,-3.712197,40.423883,637,-413239.904502,4927732.0,1,SO2,µg
1,28,79,4,8.0,2016-04-02 01:00:00,28079004,Plaza de España,-3.712197,40.423883,637,-413239.904502,4927732.0,1,SO2,µg
2,28,79,4,10.0,2016-04-03 01:00:00,28079004,Plaza de España,-3.712197,40.423883,637,-413239.904502,4927732.0,1,SO2,µg
3,28,79,4,7.0,2016-04-04 01:00:00,28079004,Plaza de España,-3.712197,40.423883,637,-413239.904502,4927732.0,1,SO2,µg
4,28,79,4,8.0,2016-04-05 01:00:00,28079004,Plaza de España,-3.712197,40.423883,637,-413239.904502,4927732.0,1,SO2,µg


In [8]:
print('AIR QUALITY STATION NAMES:')
print([name for name in df.name.unique()])

AIR QUALITY STATION NAMES:
['Plaza de España', 'Escuelas Aguirre', 'Ramón y Cajal', 'Arturo Soria', 'Villaverde', 'Farolillo', 'Casa de Campo', 'Barajas Pueblo', 'Plaza del Carmen', 'Moratalaz', 'Cuatro Caminos', 'Barrio del Pilar', 'Vallecas', 'Méndez Álvaro', 'Castellana', 'Retiro', 'Plaza Castilla', 'Ensanche de Vallecas', 'Urbanización Embajada', 'Plaza Elíptica', 'Sanchinarro', 'El Pardo', 'Juan Carlos I', 'Tres Olivos']


In [14]:
# MAP
# load MC area
cm_points = pd.read_csv('shared_data/districts/central_madrid_points.csv')

points = df_stations[["utm_x", "utm_y"]].values
path = Path(cm_points[["utm_x", "utm_y"]].values)
points_in_path_mask = path.contains_points(points)

df_stations["madrid_central"] = False

df_stations.loc[points_in_path_mask, "madrid_central"] = True

df_stations.head()

In [55]:
# MAP

# load MC area
cm_points = pd.read_csv('shared_data/districts/central_madrid_points.csv')

points = df_stations[["utm_x", "utm_y"]].values
path = Path(cm_points[["utm_x", "utm_y"]].values)
points_in_path_mask = path.contains_points(points)

# plot map
p = figure(title="Air quality stations in Madrid", x_axis_type="mercator", y_axis_type="mercator")

source = ColumnDataSource(df_stations)
cr = p.circle(x="utm_x", y="utm_y",  size=10, source=source)

cartodb = get_provider(CARTODBPOSITRON)
p.add_tile(cartodb)

p.add_tools(HoverTool(tooltips=[('Name', '@name')], renderers=[cr_in, cr_out]))

# add interactive legend
# legend = Legend(items=[(, [cr_in]), ('OUT of Madrid Central area', [cr_out])], location='center') 
p.legend.click_policy="hide"
# legend.location = "top_left"

p = layout(p, sizing_mode='scale_both')

# output_file("html_plots/air_quality_stations.html", title="Air quality stations")
# save(p)
# reset_output()
# output_notebook()

show(p)

Air quality in different stations

Air quality difference respect previous years

In [80]:
def get_month_year(aRow):
    return aRow.name.month_name() + ' ' + str(aRow.name.year)

def get_stats_dataframe(station):
    df1 = df[(df.name == station)][['formula','value','datetime']]
    df1 = df1.pivot(index='datetime', columns='formula', values='value').reset_index()
    df1['datetime'] = df1.datetime.dt.floor('D')
    df1['datetime'] = df1['datetime'].apply(lambda dt: dt.replace(day=1))
    tracking_gas = df1.columns.values[1:]
    # cut outliers!!!
    if station == 'Plaza del Carmen':
        df1 = df1[df1.SO2 <500 ]
        df1 = df1[df1.CO < 10 ]
    # homogenization (everything in µg/m^3)
    for aGas in df1.columns.values[1:]:
        unit = df_magnitud[df_magnitud.formula==aGas].unit_per_m3.values[0]
        if ((unit == 'mg') or (unit == '10μg')):
            # we change to 10
            df1[aGas] = df1[aGas] * 100 #*1000 to get it in μg/m3 exactly
            idx = df_magnitud[df_magnitud.formula==aGas].index
            df_magnitud.loc[idx, 'unit_per_m3'] = '10μg'
    # get all stats
    df1_stats = df1.groupby(['datetime']).agg(['mean','std'])
    df1_stats.columns = df1_stats.columns.to_flat_index()
    df1_stats.columns = pd.Index([a+'_'+b for a,b in df1_stats.columns])
    df1_stats['date'] = df1_stats.apply(get_month_year, axis=1)
    for aGas in tracking_gas:
        meanColumn = aGas+'_mean'
        stdColumn = aGas+'_std'
        df1_stats[aGas+'_upper'] = df1_stats[meanColumn]+df1_stats[aGas+'_std']
        df1_stats[aGas+'_lower'] = df1_stats[meanColumn]-df1_stats[aGas+'_std']
    return tracking_gas, df1_stats

def get_bokeh_viz_evolution_over_time(df1_stats, aText, tracking_gas):

    # create annotations for time marks
    startMC = time.mktime(dt(2018, 11, 30, 0, 0, 0).timetuple())*1000
    startMC_span = Span(location=startMC,
                                dimension='height', line_color='black',
                                line_dash='dashed', line_width=2, line_alpha=0.3)

    finesMC = time.mktime(dt(2019, 3, 15, 0, 0, 0).timetuple())*1000
    finesMC_span = Span(location=finesMC,
                                dimension='height', line_color='black',
                                line_dash='dashed', line_width=2, line_alpha=0.3)

    endMC = time.mktime(dt(2019, 7, 1, 0, 0, 0).timetuple())*1000
    endMC_span = Span(location=endMC,
                                dimension='height', line_color='black',
                                line_dash='dashed', line_width=2, line_alpha=0.3)

    cds_stats = ColumnDataSource(data=df1_stats)

    p = figure(
        x_axis_type="datetime",
        width=950,
        height=450,
        title='Evolution of pollutant concentrations over time in '+aText, 
        y_axis_label='Gas Concentration', 
        x_axis_label='Date'
    )

    # create color palette
    colors_gas = dict(zip(tracking_gas,list(bokeh.palettes.brewer['Dark2'][len(tracking_gas)])))

    # add the data of each gas + interactive legend
    lines, circles, bands = {}, {}, {}
    items = [] 
    for aGas in tracking_gas:
        unit = df_magnitud[df_magnitud.formula==aGas].unit_per_m3.values[0]
        # add line of mean
        lines[aGas] = p.line('datetime', aGas+'_mean', source=cds_stats, color = colors_gas[aGas])
        # add dots of mean
        circles[aGas] = p.circle('datetime',aGas+'_mean', source=cds_stats, color=colors_gas[aGas], size=5, alpha=0.5)
        p.add_tools(HoverTool(tooltips=[
            ('Gas',aGas),
            ('Date', '@date'),
            ('Average value', f'@{aGas}_mean {unit}/m3'), 
            ('Standard Deviation', f'@{aGas}_std {unit}/m3')
        ], renderers=[circles[aGas]]))
        # add variance
        bands[aGas] = p.varea(x='datetime', y1=aGas+'_upper', y2=aGas+'_lower', source=cds_stats, fill_alpha=0.1, fill_color=colors_gas[aGas])
        # append legend list
        items.append((f'{aGas} ({unit}/m3)', [lines[aGas], circles[aGas], bands[aGas]]))

    # add legend
    legend = Legend(items=items, location='center') 
    legend.click_policy="hide"
    legend.location = 'top_left'
    p.add_layout(legend)

    # add annotations to plot
    p.add_layout(startMC_span)
    p.add_layout(finesMC_span)
    p.add_layout(endMC_span)
    
    return p

In [85]:
stations = ['Plaza del Carmen', "Plaza de España", "Castellana", "Retiro", "Méndez Álvaro"]

tabs = []

for station in stations:
    all_tracking_gas, df_stats_station = get_stats_dataframe(station)
    p = get_bokeh_viz_evolution_over_time(df_stats_station, station, all_tracking_gas)

    p = layout(p, sizing_mode='stretch_both')

    tabs.append(Panel(child=p, title=station))

tabs = Tabs(tabs=tabs)

# output_file("html_plots/air_quality_evolution_tabs.html")
# save(tabs)
# reset_output()
# output_notebook()

show(tabs)

## Traffic points

In [90]:
traffic_points = pd.read_csv("shared_data/traffic_points/pmed_trafico_03052016.csv", sep=";")

traffic_points[["latitude", "longitude", "utm_x", "utm_y"]] = traffic_points.apply(get_lat_lon_utm, axis=1)

traffic_points.head()

Unnamed: 0,idelem,tipo_elem,cod_cent,nombre,st_x,st_y,latitude,longitude,utm_x,utm_y
0,1044,494,03FT08PM01,03FT08PM01,438963.314635,4474734.0,40.421001,-3.719488,-414051.481782,4927311.0
1,3600,494,PM30901,PM30901,443729.047369,4473268.0,40.408129,-3.663184,-407783.811885,4925429.0
2,3705,494,PM41451,PM41451,439858.261097,4471574.0,40.392598,-3.70864,-412843.963659,4923159.0
3,6823,494,PM41453,PM41453,439188.095183,4470895.0,40.386431,-3.716471,-413715.710072,4922257.0
4,7033,495,01015,Pº Castellana S-N - Pl. Colon-Hermosilla,441569.555897,4475502.0,40.428107,-3.688839,-410639.639249,4928350.0


In [92]:
with open("shared_data/districts/districts.geojson", "r") as geojson:
    geodata = json.load(geojson)

df_districts = pd.DataFrame([], columns=["name", "latitude",
                                         "longitude", "utm_x",
                                         "utm_y"])
for district in geodata["features"]:
    # Get district name
    district_name = district["properties"]["NOMBRE"]
    
    # Get district coordinates
    district_coord = district["geometry"]["coordinates"][0]
    df_district = pd.DataFrame(district["geometry"]["coordinates"][0], columns=["st_x", "st_y"])
    df_district["name"] = district_name
    
    # Calculate correct utm
    df_district[["latitude", "longitude", "utm_x", "utm_y"]] = df_district.apply(get_lat_lon_utm, axis=1)
    df_district = df_district.drop(columns=["st_x", "st_y"])
    
    # Append to all districts dataframe
    df_districts = pd.concat([df_districts, df_district]).reset_index(drop=True)


district_name = df_districts["name"].unique()
df_districts

Unnamed: 0,name,latitude,longitude,utm_x,utm_y
0,Centro,40.407345,-3.693162,-411120.867401,4.925314e+06
1,Centro,40.407196,-3.693202,-411125.341089,4.925293e+06
2,Centro,40.406986,-3.693227,-411128.197118,4.925262e+06
3,Centro,40.407127,-3.693677,-411178.251532,4.925282e+06
4,Centro,40.407256,-3.693849,-411197.420454,4.925301e+06
...,...,...,...,...,...
9466,Moncloa - Aravaca,40.469899,-3.802628,-423306.659924,4.934464e+06
9467,Moncloa - Aravaca,40.469823,-3.802359,-423276.645066,4.934452e+06
9468,Moncloa - Aravaca,40.469748,-3.802093,-423247.039940,4.934441e+06
9469,Moncloa - Aravaca,40.469672,-3.801822,-423216.881019,4.934430e+06
