In [329]:
import pandas as pd
import plotly.graph_objects as go

In [330]:
def plot_percentage_exceedances_site_area():
    df_excee_area = pd.read_csv('airquality.csv')

    who_standards = {
        'pm10': 45,
        'pm2.5': 15,
        'no2': 25,
        'o3': 100,
        'so2': 40,
    }

    for i in who_standards:
        df_excee_area[f"{i}_exc"] = df_excee_area[i] > who_standards[i]

    df_excee_area_gb = df_excee_area.groupby('site_area')[['pm10_exc', 'pm2.5_exc', 'no2_exc', 'o3_exc', 'so2_exc']]

    # how many times each site area exceeded the limits
    df_excee_area_frec = df_excee_area_gb.sum() / df_excee_area_gb.count()

    fig = go.Figure()
    for i in df_excee_area_frec.columns:
        fig.add_bar(
            x=df_excee_area_frec.index, 
            y=round(df_excee_area_frec[i] * 100,1), 
            name=i.strip('_exc'), 
            text=round(df_excee_area_frec[i] * 100,1), 
            textposition='auto',
            opacity=1 if i == 'no2_exc' else 0.40,
            hoverinfo='skip',
        )
        
    fig.update_layout(
        title=dict(
            text='Percentage of exceedances per site area',
            font=dict(
                size=30,
            ),
            subtitle=dict(
                text='NO2 is the most exceeded limit',
                font=dict(
                    size=20,
                ),
            ),
        ),
        yaxis=dict(
            showgrid=False,
            tickvals=[],
            title='',
        ),
        xaxis=dict(
            title='',
        ),
        plot_bgcolor='rgba(0,0,0,0)',
    )


    fig.add_annotation(
        text="33.4% of days exceeded the NO2 limit",
        xref="paper",
        yref="y",
        x=0.85,
        y=31,
        showarrow=True,
        ay=0,
        ax=-150,
        arrowhead=4,
        arrowwidth=1,
        arrowcolor="black",
        font=dict(
            size=12,
            color="black"
        ),
        align="left",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4
    )
    fig.add_annotation(
        text="Rural sites have the lowest exceedances",
        xref="paper",
        yref="y",
        x=0,
        y=15,
        showarrow=False,
        font=dict(
            size=12,
            color="black"
        ),
        align="center",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4
    )
    fig.show()

plot_percentage_exceedances_site_area()

In [331]:
pollutant = 'no2'

def plot_worse_sites(pollutant, show):
    df_most_site_country = pd.read_csv('airquality.csv')
    df_most_site_country = df_most_site_country.query('site_area == "urban"')
    df_most_site_country = df_most_site_country.groupby(['country','site_id'])[[pollutant]].mean()
    df_most_site_country.sort_values(by=pollutant, ascending=False, inplace=True)
    df_most_site_country.reset_index(inplace=True)
    df_most_site_country

    colors = [
        'rgba(0,0,255,0.5)' if country == "greece" 
        else 'rgba(255,255,0,0.5)' if country == "germany" 
        else 'rgba(255,0,0,0.5)' if country == "spain" 
        else 'rgba(0,255,0,0.5)' if country == "portugal" 
        else 'rgba(100,100,100,0.5)' if country == "hungary" 
        else 'grey'
            for country in df_most_site_country['country'].head(show)
    ]
    colors[0] = 'rgba(0,0,255,1)'

    texts = [
        f'{country}<br>{round(value,2)}' 
        for country, value in
        zip(df_most_site_country['country'].head(show), df_most_site_country[pollutant].head(show))
    ]

    fig = go.Figure()
    fig.add_bar(
        x=df_most_site_country['site_id'].head(show), 
        y=df_most_site_country[pollutant].head(show), 
        text=texts, 
        textangle=0,
        textposition='auto',
        marker=dict(
            color=colors,
        ),
        opacity=0.8,
    )

    fig.update_layout(
        title=dict(
            text='Most polluted urban sites in Europe',
            font=dict(
                    size=30,
            ),
            subtitle=dict(
                text=f'Displaying no2 levels in the top {show} sites',
                font=dict(
                    size=20,
                ),
            ),
            pad=dict(
                b=100,
            ),
        ),
        xaxis=dict(
            title='Site ID'
        ),
        yaxis=dict(
            title=f'{pollutant} (µg/m³)',
            tickvals=[],
        ),
        plot_bgcolor='rgba(0,0,0,0)',
        barmode='group',
    )

    fig.add_annotation(
        text="Greece has 4 sites in the top 8",
        xref="paper",
        yref="y",
        x=0.85,
        y=50,
        showarrow=False,
        font=dict(
            size=14,
            color="black"
        ),
        align="center",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4
    )

    fig.add_annotation(
        text="Most no2 polluted site",
        xref="paper",
        yref="y",
        x=0.12,
        y=55,
        showarrow=True,
        arrowhead=4,
        arrowwidth=1,
        ax=95,
        ay=0,
        font=dict(
            size=12,
            color="black"
        ),
        align="center",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4
    )

    fig.show()

plot_worse_sites(pollutant, 8)

In [332]:
limit = 25
most_polluted_site = 'gr0030a'
def plot_site_analysis(limit,most_polluted_site):
    df_most_polluted_site = pd.read_csv('airquality.csv')
    df_most_polluted_site = df_most_polluted_site.query(f'site_id == "{most_polluted_site}"')[['site_id','date','no2']]

    df_most_polluted_site['date'] = pd.to_datetime(df_most_polluted_site['date'])
    df_most_polluted_site['month'] = df_most_polluted_site['date'].dt.month
    df_most_polluted_site_month = df_most_polluted_site.groupby('month')[['no2']].mean()


    df_mean_pollution = pd.read_csv('airquality.csv')
    df_mean_pollution['date'] = pd.to_datetime(df_mean_pollution['date'])
    df_mean_pollution['month'] = df_mean_pollution['date'].dt.month
    df_mean_pollution = df_mean_pollution.groupby('month')[['no2']].mean()

    fig = go.Figure()
    fig.add_scatter(
        x=df_most_polluted_site_month.index, 
        y=df_most_polluted_site_month['no2'], 
        mode='markers+lines', 
        name=most_polluted_site, 
        opacity=1,
        marker=dict(
            size=10,
            color='red',
        ),
        line=dict(
            color='blue',
            width=2,
        ),
        showlegend=True,
    )

    fig.add_scatter(
        x=df_mean_pollution.index,
        y=df_mean_pollution['no2'],
        mode='lines+markers',
        name='mean',
        opacity=1,
        marker=dict(
            size=6,
            color='rgb(200,200,200)',
            line=dict(
                width=2,
                color='rgb(200,200,200)',
            ),
        ),
        showlegend=True,
    )

    fig.add_scatter(
        x = df_mean_pollution.index,
        y = df_most_polluted_site_month['no2'] - df_mean_pollution['no2'],
        mode='lines',
        name='difference between<br>mean and site',
        opacity=1,
        line=dict(
            color='rgb(200,200,200)',
            width=2,
            dash='dash',
        ),
        showlegend=True,
    )

    fig.update_layout(
        title=dict(
            text='Levels of no2 in the most polluted site',
            font=dict(
                    size=30,
            ),
            subtitle=dict(
                text='Comparison with the mean no2 levels and difference between them',
                font=dict(
                    size=20,
                ),
            ),
            pad=dict(
                b=100,
            ),
        ),
        xaxis=dict(
            showgrid=False,
            tickvals=[1,2,3,4,5,6,7,8,9,10,11,12],
            ticktext=['January','February','March','April','May','June','July','August','September','October','November','December'],
        ),
        yaxis=dict(
            showgrid=False,
            tickvals=list(range(10,66,10))+[limit],
            title='NO2 levels (µg/m³)',
        ),
        plot_bgcolor='rgba(0,0,0,0)',
        showlegend=True
    )

    fig.add_annotation(
        text="every month doubles the standard",
        xref="paper",
        yref="y",
        x=0.95,
        y=65,
        showarrow=False,
        font=dict(
            size=14,
            color="black"
        ),
        align="center",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4
    )

    fig.add_annotation(
        text="On summer months <br>the difference rises",
        xref="paper",
        yref="y",
        x=0.46,
        y=35,
        showarrow=False,
        font=dict(
            size=12,
            color="black"
        ),
        align="center",
        bgcolor="rgba(255, 255, 255, 0.8)",
        bordercolor="black",
        borderwidth=1,
        borderpad=4
    )

    fig.add_shape(
        type="rect",
        x0=0.35, x1=.57,
        y0=10, y1=70,
        xref="paper",
        yref="y",
        layer="below",
        fillcolor="rgba(0,255,0,0.2)",
        line=dict(
            width=0,
        )
    ),
    fig.add_shape(
        type="line",
        x0=0, x1=1,
        y0=limit, y1=limit,
        xref="paper",
        yref="y",
        layer="below",
        line=dict(
            color="red", 
            width=2,
            dash="dash"
        )
    )


    fig.show()

plot_site_analysis(limit,most_polluted_site)