In [105]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd


In [106]:
df = pd.read_csv('airquality.csv')
df.date.min(), df.date.max()

('2023-01-01', '2023-12-31')

In [108]:
sites = df.groupby('country').site.nunique()

In [153]:
# Change all country names to have the first letter in uppercase
df['country'] = df['country'].str.capitalize()

# Pollutant limits (µg/m3)
limits = {
    "pm10": 45,
    "pm2.5": 15,
    "no2": 25,
    "o3": 100,
    "so2": 40
}

# Calculate exceedances for all pollutants
for pollutant, limit in limits.items():
    exceedance_column = f"{pollutant}_exceedance"
    df[exceedance_column] = (df[pollutant] > limit).astype(int)

# Get the number of unique sites per country
site_counts = df.groupby("country")["site"].nunique().reset_index()
site_counts.columns = ["country", "site_count"]

# Aggregate exceedances by country and normalize by site count
exceedance_totals_country = (
    df.groupby("country", as_index=False)[[f"{pollutant}_exceedance" for pollutant in limits.keys()]]
    .sum()
    .merge(site_counts, on="country")
)
for pollutant in limits.keys():
    exceedance_totals_country[f"{pollutant}_exceedance"] = (
        exceedance_totals_country[f"{pollutant}_exceedance"] / exceedance_totals_country["site_count"]
    ).round(0)  # Round to 2 decimal places

# Prepare data for plotting
exceedance_melted = exceedance_totals_country.melt(
    id_vars="country",
    value_vars=[f"{pollutant}_exceedance" for pollutant in limits.keys()],
    var_name="pollutant",
    value_name="exceedances"
)

# Rename pollutants for readability
exceedance_melted["pollutant"] = exceedance_melted["pollutant"].str.replace("_exceedance", "").str.upper()

# Sort data by total exceedances per country
total_exceedances = exceedance_melted.groupby("country")["exceedances"].sum().sort_values(ascending=False)
exceedance_melted = exceedance_melted.sort_values(by="country", key=lambda x: x.map(total_exceedances), ascending=False)


In [245]:
exceedance_melted[exceedance_melted['country']=='Greece'].iloc[0,2]

246.0

In [270]:
# Define consistent colors for pollutants
pollutant_colors = ['#9b2d23', '#158d1a', '#00407a', '#b35b14', '#660c5f']

  # Customize the colors if needed

# Create the stacked bar plot
fig = px.bar(
    exceedance_melted,
    x="country",
    y="exceedances",
    color="pollutant",
    title="Fig 1. Total WHO limit Exceedances by Country and Pollutant (Ordered High to Low)",
    labels={"exceedances": "Total Exceedances", "country": "Country"},
    template="simple_white",
    color_discrete_sequence=pollutant_colors  # Ensure consistent color sequence
)

# Add horizontal grid lines back
fig.update_layout(
    yaxis=dict(showgrid=True),
    legend_title=dict(text='Pollutant:', font=dict(family="Arial")),
    legend=dict(
        itemclick="toggle",  # Dim other items when a legend item is clicked
        itemdoubleclick="toggleothers",  # Toggle visibility of others
        orientation="h",  # Horizontal layout for the legend
        yanchor="top",  # Anchor the legend to the bottom
        y=1.12,  # Position the legend below the title
        xanchor="left",  # Center the legend horizontally
        x=-0.015  # Align to the center of the plot
    ),
    title=dict(text="Fig 1. Total daily WHO limit Exceedances by Country and Pollutant for the year 2023(Ordered High to Low)", font=dict(family="Arial", size=20)),
    xaxis_title=dict(text="", font=dict(family="Arial", size=14)),
    yaxis_title=dict(text="Total Exceedances", font=dict(family="Arial", size=14)),
    font=dict(family="Arial"),  # Set font for all text elements
    # plot_bgcolor='#dcdcdc',  # Set the plot background color to a light gray
    # paper_bgcolor='#dcdcdc',  # Set the outer background color to a slightly darker gray
)

# Add bottom text (annotation)
fig.add_annotation(
    text="<i>Note: Exceedances are normalized by the number of measuring sites per country for better comparability. NO2: nitrogen dioxide. PM10: particulate matter with a maximum diameter of 10 micrometers. <br>O3: ozone. PM2.5: particulate matter with a maximum diameter of 2.5 micrometers. SO2: sulfur dioxide",
    xref="paper", yref="paper",  # Reference to the entire figure
    x=-0.03, y=-0.25,  # Centered horizontally and positioned below the plot
    showarrow=False,
    font=dict(family="Arial", size=12, color="gray"),
    align="left"
)

fig.show()


In [271]:
# Filter the data for Greece
greece_data = df[df["country"] == "Greece"]

# Define the pollutants and their limits
pollutants = ['no2', 'pm10', 'o3', 'pm2.5', 'so2']

# Create the subplots (5 rows, 1 column)
fig = make_subplots(rows=5, cols=1, shared_xaxes=True, 
                    subplot_titles=[f"<span style='color:{pollutant_colors[i]}'>{pollutant.upper()}</span>" for i, pollutant in enumerate(pollutants)],
                    vertical_spacing=0.04)


for i, pollutant in enumerate(pollutants):
    # Calculate the average pollutant values across all stations for each date
    average_pollutant = greece_data.groupby('date')[pollutant].mean().reset_index()

    # Add a trace for the average pollutant level
    fig.add_trace(go.Scatter(
        x=average_pollutant['date'], 
        y=average_pollutant[pollutant],  # Average of pollutant
        mode="lines", 
        name=f"{pollutant.upper()} (Average)",
        line=dict(width=2, color=pollutant_colors[i])  # Use the corresponding color
    ), row=i+1, col=1)

    # Add a horizontal line for the limit of the pollutant
    fig.add_shape(
        type="line",
        x0=greece_data["date"].min(), x1=greece_data["date"].max(),
        y0=limits[pollutant], y1=limits[pollutant],
        line=dict(color="red", width=2, dash="dash"),
        row=i+1, col=1)
    
        # Add annotation to the right of the last data point (limit line)
    fig.add_annotation(
        text=f"WHO limit:<br>{limits[pollutant]} µg/m³",  # Text with limit value
        x=greece_data["date"].max(),  # Last date in the dataset
        y=limits[pollutant],  # Position at the limit value
        xref=f"x{i+1}", yref=f"y{i+1}",  # Specific reference to each subplot's axes
        showarrow=False,  # No arrow
        font=dict(family="Arial", size=13, color='red'),  # Set the font style
        align="left",  # Align text to the left of the position
        xshift=33,  # Shift the text 50 pixels to the right
        row=i+1, col=1  # Positioning in the subplot
    )

    fig.add_annotation(
        text=f"Total:{round(exceedance_melted[exceedance_melted['country']=='Greece'].iloc[i,2])} <br>Exceedances",  # Text with limit value
        x=greece_data["date"].max(),  # Last date in the dataset
        y=average_pollutant[average_pollutant['date'] == greece_data["date"].max()][pollutant].values[0],  # Position at the limit value
        xref=f"x{i+1}", yref=f"y{i+1}",  # Specific reference to each subplot's axes
        showarrow=False,  # No arrow
        font=dict(family="Arial", size=13, color=pollutant_colors[i]),  # Set the font style
        align="left",  # Align text to the left of the position
        xshift=40,  # Shift the text 50 pixels to the right
        row=i+1, col=1  # Positioning in the subplot
    )

# Add bottom text (annotation)
fig.add_annotation(
    text="<i>Note: Average pollutant concentration of 6 measuring sites throughout Greece . NO2: nitrogen dioxide. PM10: particulate matter with a maximum diameter of 10 micrometers. <br>O3: ozone. PM2.5: particulate matter with a maximum diameter of 2.5 micrometers. SO2: sulfur dioxide",
    xref="paper", yref="paper",  # Reference to the entire figure
    x=-0.04, y=-0.08,  # Centered horizontally and positioned below the plot
    showarrow=False,
    font=dict(family="Arial", size=12, color="gray"),
    align="left"
)

fig.update_layout(
    title=dict(text="Fig 2. Average Pollutant Levels in Greece over all measured sites (with WHO Limits)", font=dict(family="Arial", size=20)),
    template="simple_white",
    showlegend=False, 
    height=1000,  # Adjust height to fit all subplots
    font=dict(family="Arial"),  # Set font to Arial for the entire plot
    # plot_bgcolor='#dcdcdc',  # Set the plot background color to a light gray
    # paper_bgcolor='#dcdcdc',  # Set the outer background color to a slightly darker gray
)

fig.update_yaxes(title_text="Daily countrywide concentration in µg/m3", showgrid=True, gridwidth=0.7,title_font=dict(size=18))
# Update the y-axis titles
fig.update_yaxes(
    title=None,  # Remove the y-axis title
    row=1, col=1  # First subplot
)

fig.update_yaxes(
    title=None,  # Remove the y-axis title
    row=2, col=1  # Second subplot
)

fig.update_yaxes(
    title=None,  # Remove the y-axis title
    row=4, col=1  # Fourth subplot
)

fig.update_yaxes(
    title=None,  # Remove the y-axis title
    row=5, col=1  # Fifth subplot
)

# Show figure
fig.show()


In [113]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Filter the data for Greece
greece_data = df[df["country"] == "greece"]

# Create the subplots (5 rows, 1 column)
fig = make_subplots(rows=5, cols=1, shared_xaxes=True, 
                    subplot_titles=["PM10", "PM2.5", "NO2", "O3", "SO2"])

# Define the pollutants, their WHO limits, and EU limits
pollutants = ['pm10', 'pm2.5', 'no2', 'o3', 'so2']
limits = {
    "pm10": {"who": 45, "eu": {"limit": 50, "exceedances": "35 per year"}},
    "pm2.5": {"who": 15, "eu": {"limit": None, "exceedances": None}},
    "no2": {"who": 25, "eu": {"limit": None, "exceedances": None}},
    "o3": {"who": 100, "eu": {"limit": 120, "exceedances": "25 per 3 years"}},
    "so2": {"who": 40, "eu": {"limit": 125, "exceedances": "3 per year"}}
}

# Add a line plot for each pollutant (average across all stations) with its limits
for i, pollutant in enumerate(pollutants):
    # Calculate the average pollutant values across all stations for each date
    average_pollutant = greece_data.groupby('date')[pollutant].mean().reset_index()

    # Add a trace for the average pollutant level
    fig.add_trace(go.Scatter(x=average_pollutant['date'], 
                             y=average_pollutant[pollutant],  # Average of pollutant
                             mode="lines", 
                             name=f"{pollutant.upper()} (Average)",
                             line=dict(width=2)), 
                  row=i+1, col=1)

    # Add a horizontal line for the WHO limit
    fig.add_shape(
        type="line",
        x0=greece_data["date"].min(), x1=greece_data["date"].max(),
        y0=limits[pollutant]["who"], y1=limits[pollutant]["who"],
        line=dict(color="red", width=2, dash="dash"),
        row=i+1, col=1
    )

    # Add a horizontal line for the EU limit if applicable
    if limits[pollutant]["eu"]["limit"] is not None:
        fig.add_shape(
            type="line",
            x0=greece_data["date"].min(), x1=greece_data["date"].max(),
            y0=limits[pollutant]["eu"]["limit"], y1=limits[pollutant]["eu"]["limit"],
            line=dict(color="blue", width=2, dash="dot"),
            row=i+1, col=1
        )

        # Add annotation for EU permitted exceedances
        fig.add_annotation(
            x=greece_data["date"].max(), 
            y=limits[pollutant]["eu"]["limit"], 
            text=f"EU Limit: {limits[pollutant]['eu']['limit']} µg/m³<br>{limits[pollutant]['eu']['exceedances']}",
            showarrow=False,
            font=dict(color="blue", size=10),
            xanchor="left",
            yanchor="bottom",
            row=i+1, col=1
        )

            # Add unified red text annotation for WHO limits
            who_limits_text = (
                "<b>WHO Pollutant Limits (µg/m³) and Permitted Exceedances:</b><br>"
                "PM10: 45 (3 per year)<br>"
                "PM2.5: 15 (3 per year)<br>"
                "NO2: 25 (3 per year)<br>"
                "O3: 100 (3 per year)<br>"
                "SO2: 40 (3 per year)"
            )
            fig.add_annotation(
                x=0.5, y=1.1, 
                text=who_limits_text,
                showarrow=False,
                font=dict(color="red", size=12),
                xref="paper", yref="paper",
                align="left",
                xanchor="center"
            )

# Update layout
fig.update_layout(
    title="Total Pollutant Levels in Greece (with WHO and EU Limits)", 
    template="simple_white",
    showlegend=True,
    height=1200  # Adjust height to fit all subplots
)

# Update x and y axes
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Pollutant Concentration")

# Show figure
fig.show()

IndentationError: unexpected indent (3649763887.py, line 66)

In [None]:
import plotly.graph_objects as go

# Filter the data for Greece
greece_data = df[df["country"] == "greece"]

# Get unique stations in Greece
stations = greece_data["site"].unique()

# Create the plot
fig = go.Figure()

# Add a line plot for each station in Greece
for site in stations:
    site_data = greece_data[greece_data["site"] == site]
    fig.add_trace(go.Scatter(x=site_data["date"], 
                             y=site_data["pm2.5"],  # PM10 data for each station
                             mode="lines", 
                             name=f"{site} (Greece)",
                             line=dict(width=2)))   # Customize line width

# Add horizontal line for WHO allowed PM10 limit (45 µg/m³)
fig.add_shape(
    type="line",
    x0=greece_data["date"].min(), x1=greece_data["date"].max(),
    y0=15, y1=15,
    line=dict(color="red", width=2, dash="dash"),
)

# Update layout
fig.update_layout(title="PM10 Levels in Greece by Station", 
                  template="plotly_dark", 
                  showlegend=True)
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="PM10 Concentration", range=[0, greece_data["pm10"].max() + 10])

# Show figure
fig.show()


In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Get unique countries
countries = df["country"].unique()

# Create subplots
fig = make_subplots(rows=len(countries), cols=1, shared_xaxes=True, 
                     subplot_titles=[f"pm2.5 Levels in {country}" for country in countries])

# Add a line plot for each site in each country
for i, country in enumerate(countries):
    country_data = df[df["country"] == country]
    sites = country_data["site"].unique()
    for site in sites:
        site_data = country_data[country_data["site"] == site]
        fig.add_trace(go.Scatter(x=site_data["date"], 
                                 y=site_data["so2"], 
                                 mode="lines", 
                                 name=f"{site} ({country})",
                                 legendgroup=f"{site}",  # Grouping by site
                                 line=dict(width=2)),   # Customize line width
                      row=i + 1, col=1)

# Update layout
fig.update_layout(height=300 * len(countries), 
                  title="pm2.5 Levels by Country and Site", 
                  template="plotly_dark", 
                  showlegend=True)
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="pm2.5 Concentration")

# Show figure
fig.show()


In [None]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Define the exceedance limit for pm10
exceedance_limit_pm10 = 45

# Add a column to count exceedances for pm10
df["pm10_exceedance"] = df["pm10"] > exceedance_limit_pm10

# Aggregate the exceedances by site and country
exceedance_totals = df.groupby(["country", "site"], as_index=False)["pm10_exceedance"].sum()

# Get unique countries
countries = exceedance_totals["country"].unique()

# Create subplots
fig = make_subplots(rows=len(countries), cols=1, 
                    shared_xaxes=False, 
                    subplot_titles=[f"PM10 Exceedances in {country}" for country in countries])

# Add a bar plot for each country, sorted by exceedances from highest to lowest
for i, country in enumerate(countries):
    country_data = exceedance_totals[exceedance_totals["country"] == country]
    country_data = country_data.sort_values("pm10_exceedance", ascending=False)  # Sort by PM10 exceedances

    fig.add_trace(go.Bar(x=country_data["site"], 
                         y=country_data["pm10_exceedance"], 
                         name=country, 
                         marker_color=px.colors.qualitative.Plotly[i % len(px.colors.qualitative.Plotly)]),
                  row=i + 1, col=1)

# Update layout
fig.update_layout(height=300 * len(countries), 
                  title="Exceedances of PM10 Limits by Site for Each Country",
                  template="plotly_dark", 
                  showlegend=False)
fig.update_xaxes(title_text="Site", tickangle=45)  # Rotate x-axis labels for better visibility
fig.update_yaxes(title_text="PM10 Exceedances")

# Show figure
fig.show()


In [None]:
# Aggregate the total exceedances for pm10 by country
exceedance_totals_country = df.groupby("country", as_index=False)["pm10_exceedance"].sum()

# Create a bar plot
fig = px.bar(exceedance_totals_country, 
             x="country", 
             y="pm10_exceedance", 
             title="Total PM10 Exceedances by Country",
             labels={"pm10_exceedance": "Total PM10 Exceedances", "country": "Country"},
             color="country", 
             template="plotly_dark",
             color_discrete_sequence=px.colors.qualitative.Plotly)

# Show the figure
fig.show()
