In [2]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd


In [3]:
df = pd.read_csv('airquality.csv')
df.date.min(), df.date.max()

('2023-01-01', '2023-12-31')

In [None]:
# Change all country names to have the first letter in uppercase
df['country'] = df['country'].str.capitalize()

# Pollutant limits (µg/m3)
limits = {
    "pm10": 45,
    "pm2.5": 15,
    "no2": 25,
    "o3": 100,
    "so2": 40
}

# Calculate exceedances for all pollutants
for pollutant, limit in limits.items():
    exceedance_column = f"{pollutant}_exceedance"
    df[exceedance_column] = (df[pollutant] > limit).astype(int)

# Get the number of unique sites per country
site_counts = df.groupby("country")["site"].nunique().reset_index()
site_counts.columns = ["country", "site_count"]

# Aggregate exceedances by country and normalize by site count
exceedance_totals_country = (
    df.groupby("country", as_index=False)[[f"{pollutant}_exceedance" for pollutant in limits.keys()]]
    .sum()
    .merge(site_counts, on="country")
)
for pollutant in limits.keys():
    exceedance_totals_country[f"{pollutant}_exceedance"] = (
        exceedance_totals_country[f"{pollutant}_exceedance"] / exceedance_totals_country["site_count"]
    ).round(0)

# Prepare data for plotting
exceedance_melted = exceedance_totals_country.melt(
    id_vars="country",
    value_vars=[f"{pollutant}_exceedance" for pollutant in limits.keys()],
    var_name="pollutant",
    value_name="exceedances"
)

# Rename pollutants for readability
exceedance_melted["pollutant"] = exceedance_melted["pollutant"].str.replace("_exceedance", "").str.upper()

# Sort data by total exceedances per country
total_exceedances = exceedance_melted.groupby("country")["exceedances"].sum().sort_values(ascending=False)
exceedance_melted = exceedance_melted.sort_values(by="country", key=lambda x: x.map(total_exceedances), ascending=False)


In [73]:
# Define matching colors for pollutants
pollutant_colors = ['#9b2d23', '#158d1a', '#00407a', '#b35b14', '#660c5f']

# Create the stacked bar plot
fig = px.bar(
    exceedance_melted,
    x="country",
    y="exceedances",
    color="pollutant",
    title="Fig 1. Total WHO limit Exceedances by Country and Pollutant (Ordered High to Low)",
    labels={"exceedances": "Total Exceedances", "country": "Country"},
    template="simple_white",
    color_discrete_sequence=pollutant_colors 
)

# Add horizontal grid lines back
fig.update_layout(
    yaxis=dict(showgrid=True),
    legend_title=dict(text='Pollutant:', font=dict(family="Arial")),
    legend=dict(
        itemclick="toggle",
        itemdoubleclick="toggleothers",
        orientation="h",
        yanchor="top",
        y=1.12,
        xanchor="left",
        x=-0.014
    ),
    title=dict(text="Fig 1. Total daily WHO limit Exceedances by Country and Pollutant (2023, Ordered High to Low)", font=dict(family="Arial", size=20),x=0.025),
    xaxis_title=dict(text="", font=dict(family="Arial", size=14)),
    yaxis_title=dict(text="Total Exceedances", font=dict(family="Arial", size=14)),
    font=dict(family="Arial")
)

# Add bottom text 
fig.add_annotation(
    text="<i>Note: Exceedances are normalized by the number of measuring sites per country for better comparability. NO2: nitrogen dioxide. PM10: particulate matter with a maximum diameter of 10 micrometers. <br>O3: ozone. PM2.5: particulate matter with a maximum diameter of 2.5 micrometers. SO2: sulfur dioxide",
    xref="paper", yref="paper",
    x=-0.03, y=-0.25,
    showarrow=False,
    font=dict(family="Arial", size=12, color="gray"),
    align="left"
)

fig.show()


In [72]:
# Filter the data for Greece
greece_data = df[df["country"] == "Greece"]

# Define the pollutants and their limits
pollutants = ['no2', 'pm10', 'o3', 'pm2.5', 'so2']

# Create the subplots (5 rows, 1 column)
fig = make_subplots(rows=5, cols=1, shared_xaxes=True, 
                    subplot_titles=[f"<span style='color:{pollutant_colors[i]}'>{pollutant.upper()}</span>" for i, pollutant in enumerate(pollutants)],
                    vertical_spacing=0.04)

for i, pollutant in enumerate(pollutants):
    # Calculate the average pollutant values across all stations for each date
    average_pollutant = greece_data.groupby('date')[pollutant].mean().reset_index()

    # Add a trace for the average pollutant level
    fig.add_trace(go.Scatter(
        x=average_pollutant['date'], 
        y=average_pollutant[pollutant],
        mode="lines", 
        name=f"{pollutant.upper()} (Average)",
        line=dict(width=2, color=pollutant_colors[i]),
        legendgroup='pollutants',
        showlegend=False
    ), row=i+1, col=1)

    # Add a trace for the WHO limit
    fig.add_trace(go.Scatter(
        x=average_pollutant['date'], 
        y=[limits[pollutant]] * len(average_pollutant),
        hoverinfo='skip',
        mode="lines", 
        name="WHO Limit",
        line=dict(color="red", width=2, dash="dash"),
        legendgroup='who_limits',
        showlegend=(i == 0)
    ), row=i+1, col=1)

    # Add annotation to the right of the last data point
    fig.add_annotation(
        text=f"{limits[pollutant]} µg/m³",
        x=greece_data["date"].max(),
        y=limits[pollutant],
        xref=f"x{i+1}", yref=f"y{i+1}",
        showarrow=False,
        font=dict(family="Arial", size=13, color='red'),
        align="left",
        xshift=30,
        row=i+1, col=1,
    )

    fig.add_annotation(
        text=f"Total normalized<br>Exceedances:{round(exceedance_melted[exceedance_melted['country']=='Greece'].iloc[i,2])}",
        x=greece_data["date"].max(),
        y=average_pollutant[average_pollutant['date'] == greece_data["date"].max()][pollutant].values[0],
        xref=f"x{i+1}", yref=f"y{i+1}",
        showarrow=False,
        font=dict(family="Arial", size=13, color=pollutant_colors[i]),
        align="left",
        xshift=55,
        row=i+1, col=1
    )

# Add bottom text (annotation)
fig.add_annotation(
    text="<i>Note: Average pollutant concentration of 6 measuring sites throughout Greece . NO2: nitrogen dioxide. PM10: particulate matter with a maximum diameter of 10 micrometers. <br>O3: ozone. PM2.5: particulate matter with a maximum diameter of 2.5 micrometers. SO2: sulfur dioxide. Total normalized exceedances were calculated by the total number of WHO limit exceedances of all measuring sites in Greece divided by the amount of measuring sites and then rounded",
    xref="paper", yref="paper",
    x=-0.02, y=-0.08,
    showarrow=False,
    font=dict(family="Arial", size=12, color="gray"),
    align="left"
)

fig.update_layout(
    title=dict(text="Fig 2. Average Pollutant Levels in Greece over all measured sites (with WHO Limits)", font=dict(family="Arial", size=20), x=0.038),
    template="simple_white",
    height=1000,
    font=dict(family="Arial"),
    legend=dict(
        groupclick="toggleitem",
        yanchor="top",
        y=1.05,
        xanchor="left",
        x=-0.0
    )
)

fig.update_yaxes(title_text="Daily countrywide concentration in µg/m3", showgrid=True, gridwidth=0.7,title_font=dict(size=18))

# Remove duplicate y-axis titles
fig.update_yaxes(
    title=None,
    row=1, col=1
)

fig.update_yaxes(
    title=None,
    row=2, col=1
)

fig.update_yaxes(
    title=None,
    row=4, col=1
)

fig.update_yaxes(
    title=None,
    row=5, col=1
)

# Tick every 1 month
fig.update_xaxes(
    dtick="M1",
)

fig.show()