# Loading and Cleaning/Preparing the Data

In [61]:
import pandas as pd

csv_file = "openaq.csv"

openaq_df = pd.read_csv(csv_file, sep=';', low_memory=False)

print("Columns in dataset:", openaq_df.columns.tolist())
print(openaq_df.head())

Columns in dataset: ['Country Code', 'City', 'Location', 'Coordinates', 'Pollutant', 'Source Name', 'Unit', 'Value', 'Last Updated', 'Country Label']
  Country Code City Location                             Coordinates  \
0           CN  NaN    市八十六中             23.1047, 113.43319999999999   
1           CN  NaN     市农科院                       21.9508, 108.6553   
2           CN  NaN     市发改委                       29.8454, 114.3107   
3           CN  NaN       市委  30.457600000000003, 106.63030000000002   
4           CN  NaN     市委党校            27.731400000000004, 112.0194   

  Pollutant   Source Name   Unit  Value               Last Updated  \
0        O3  ChinaAQIData  µg/m³   36.0  2021-08-09T12:00:00+01:00   
1       SO2  ChinaAQIData  µg/m³    7.0  2020-12-31T16:00:00+00:00   
2     PM2.5  ChinaAQIData  µg/m³   26.0  2021-08-09T12:00:00+01:00   
3        O3  ChinaAQIData  µg/m³   91.0  2021-08-09T12:00:00+01:00   
4       NO2  ChinaAQIData  µg/m³   19.0  2021-08-09T12:00:00+01:00 

In [62]:
print("Unique countries in dataset:")
print(openaq_df['Country Label'].unique())

Unique countries in dataset:
['China' 'Colombia' 'Cyprus' 'Czech Republic' 'Germany' 'Denmark'
 'Ecuador' 'Estonia' 'Spain' 'Finland' 'France' 'United Kingdom' 'Greece'
 'Hong Kong, China' 'Korea, Republic of' 'Lithuania' 'Luxembourg' 'Latvia'
 'Montenegro' 'Macedonia, The former Yugoslav Rep. of' 'Mongolia' 'Malta'
 'Mexico' 'Japan' 'Netherlands' 'Norway' 'Nepal' 'Peru' 'Poland' 'India'
 'Iraq' 'Iceland' 'Italy' 'Croatia' 'Hungary' 'Israel' 'Kyrgyzstan'
 'Taiwan, China' 'United States' 'Thailand' 'Turkey' 'Serbia'
 'Russian Federation' 'Sweden' 'Singapore' 'Slovenia' 'Slovakia'
 'South Africa' nan 'West Bank and Gaza Strip' 'Portugal' 'Romania'
 'Austria' 'Australia' 'Canada' 'Switzerland' 'Chile'
 'Bosnia and Herzegovina' 'Belgium' 'Andorra' 'United Arab Emirates'
 'Argentina' 'Bulgaria' 'Brazil' 'Ghana' 'Indonesia' 'Ireland' 'Kenya'
 'Trinidad and Tobago' 'New Zealand' 'Chad' 'Puerto Rico' 'Qatar' 'Egypt'
 'Serbia and Montenegro' 'Gibraltar' 'Jordan' 'Saudi Arabia' 'Uzbekistan'
 "La

In [63]:
print("Number of unique cities:", openaq_df['City'].nunique())

print("Number of NaN city values:", openaq_df['City'].isna().sum())

print("\nUnique cities in dataset:")
print(openaq_df['City'].unique())

Number of unique cities: 4464
Number of NaN city values: 29146

Unique cities in dataset:
[nan 'Medellin' 'Αγία Μαρίνα Ξυλιάτου - Σταθμός Υποβάθρου' ... 'Svalöv'
 'LJ Bežigrad' 'Złockie']


In [64]:
print("Unique pollutants in dataset:")
print(openaq_df['Pollutant'].unique())

Unique pollutants in dataset:
['O3' 'SO2' 'PM2.5' 'NO2' 'CO' 'PM10' 'NO' 'PM1' 'RELATIVEHUMIDITY'
 'TEMPERATURE' 'NOX' 'UM003' 'BC']


In [65]:
# First convert to datetime if not done already
openaq_df['Last Updated'] = pd.to_datetime(openaq_df['Last Updated'], errors='coerce', utc=True)

# Check date range
print("Earliest date:", openaq_df['Last Updated'].min())
print("Latest date:", openaq_df['Last Updated'].max())

# List all years available
print("Years available in dataset:")
print(openaq_df['Last Updated'].dt.year.unique())

Earliest date: 2014-03-13 12:00:00+00:00
Latest date: 2025-01-31 23:00:00+00:00
Years available in dataset:
[2021 2020 2024 2022 2025 2019 2023 2018 2016 2017 2014 2015]


In [66]:
target_cities = ["Dublin", "London", "Paris", "Delhi", "Beijing"]

for city in target_cities:
    matches = openaq_df['City'].str.contains(city, case=False, na=False)
    print(f"{city}: {matches.sum()} rows")


Dublin: 8 rows
London: 45 rows
Paris: 48 rows
Delhi: 0 rows
Beijing: 2 rows


In [67]:
openaq_df[["Latitude", "Longitude"]] = (openaq_df["Coordinates"].str.split(",", expand=True).astype(float))

print(openaq_df.head())

  Country Code City Location                             Coordinates  \
0           CN  NaN    市八十六中             23.1047, 113.43319999999999   
1           CN  NaN     市农科院                       21.9508, 108.6553   
2           CN  NaN     市发改委                       29.8454, 114.3107   
3           CN  NaN       市委  30.457600000000003, 106.63030000000002   
4           CN  NaN     市委党校            27.731400000000004, 112.0194   

  Pollutant   Source Name   Unit  Value              Last Updated  \
0        O3  ChinaAQIData  µg/m³   36.0 2021-08-09 11:00:00+00:00   
1       SO2  ChinaAQIData  µg/m³    7.0 2020-12-31 16:00:00+00:00   
2     PM2.5  ChinaAQIData  µg/m³   26.0 2021-08-09 11:00:00+00:00   
3        O3  ChinaAQIData  µg/m³   91.0 2021-08-09 11:00:00+00:00   
4       NO2  ChinaAQIData  µg/m³   19.0 2021-08-09 11:00:00+00:00   

  Country Label  Latitude  Longitude  
0         China   23.1047   113.4332  
1         China   21.9508   108.6553  
2         China   29.8454   114.310

In [68]:
openaq_df["City"] = openaq_df["City"].fillna("Unknown")

print(openaq_df.head())

  Country Code     City Location                             Coordinates  \
0           CN  Unknown    市八十六中             23.1047, 113.43319999999999   
1           CN  Unknown     市农科院                       21.9508, 108.6553   
2           CN  Unknown     市发改委                       29.8454, 114.3107   
3           CN  Unknown       市委  30.457600000000003, 106.63030000000002   
4           CN  Unknown     市委党校            27.731400000000004, 112.0194   

  Pollutant   Source Name   Unit  Value              Last Updated  \
0        O3  ChinaAQIData  µg/m³   36.0 2021-08-09 11:00:00+00:00   
1       SO2  ChinaAQIData  µg/m³    7.0 2020-12-31 16:00:00+00:00   
2     PM2.5  ChinaAQIData  µg/m³   26.0 2021-08-09 11:00:00+00:00   
3        O3  ChinaAQIData  µg/m³   91.0 2021-08-09 11:00:00+00:00   
4       NO2  ChinaAQIData  µg/m³   19.0 2021-08-09 11:00:00+00:00   

  Country Label  Latitude  Longitude  
0         China   23.1047   113.4332  
1         China   21.9508   108.6553  
2         C

# Data Aggregation for Dashboard

In [69]:
country_pollution = openaq_df.groupby(["Country Label", "Pollutant"], as_index = False).agg(avg_value = ("Value", "mean"))

print(country_pollution)

                Country Label Pollutant   avg_value
0                 Afghanistan     PM2.5 -431.500000
1                     Algeria     PM2.5   14.000000
2                     Andorra        CO  500.000000
3                     Andorra        NO   16.900000
4                     Andorra       NO2   36.500000
..                        ...       ...         ...
602                  Viet Nam     PM2.5   26.000000
603  West Bank and Gaza Strip        CO    0.433333
604  West Bank and Gaza Strip       NO2    0.018467
605  West Bank and Gaza Strip        O3    0.010167
606  West Bank and Gaza Strip       SO2    0.000267

[607 rows x 3 columns]


In [70]:
openaq_df["Date"] = openaq_df["Last Updated"].dt.date

time_pollution = openaq_df.groupby(["Date", "Pollutant"], as_index = False).agg(avg_value = ("Value", "mean"))

print(time_pollution)

            Date Pollutant   avg_value
0     2014-03-13     PM2.5    5.000000
1     2014-08-12     PM2.5    7.100000
2     2015-08-21     PM2.5   36.900000
3     2016-02-09     PM2.5   10.000000
4     2016-02-16        CO  740.700000
...          ...       ...         ...
6038  2025-01-31        O3   31.038400
6039  2025-01-31       PM1   10.196597
6040  2025-01-31      PM10   15.021268
6041  2025-01-31     PM2.5    3.292397
6042  2025-01-31       SO2    4.446568

[6043 rows x 3 columns]


# Interactive Dashboard

In [71]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objs as go

pollutants = ["All", "PM2.5", "PM10", "NO2", "SO2", "O3", "CO"]
# Keep only the pollutant types we want to show (exclude 'All' since it's an option not a real pollutant)
openaq_df = openaq_df[openaq_df["Pollutant"].isin([p for p in pollutants if p != 'All'])].copy()

# Load world data for continent mapping (used to split sunburst by continent)
try:
    world_df = pd.read_csv('world_data.csv')
    # Build multiple lookup tables to improve matching
    country_to_region = dict(zip(world_df['name'].str.lower().str.strip(), world_df['region'].fillna('Unknown')))
    alpha2_to_region = dict(zip(world_df['alpha-2'].astype(str).str.upper(), world_df['region'].fillna('Unknown')))
    alpha3_to_region = dict(zip(world_df['alpha-3'].astype(str).str.upper(), world_df['region'].fillna('Unknown')))

    # Use pycountry when available for better matching; fall back to fuzzy matching
    try:
        import pycountry
    except Exception:
        pycountry = None

    import difflib

    def resolve_continent(country_label):
        """Resolve a country label to a region/continent using multiple strategies.
        Returns None if no reasonable match is found.
        """
        if not isinstance(country_label, str) or not country_label.strip():
            return None
        key = country_label.lower().strip()

        # Handle known legacy or variant names explicitly
        special = {
            "lao people's dem. rep.": 'Asia',
            "lao people's dem rep": 'Asia',
            "lao people's democratic republic": 'Asia',
            "lao people's democratic rep": 'Asia',
            "lao people's dem. rep": 'Asia',
            "lao people's republic": 'Asia',
            'lao peoples dem rep': 'Asia',
            'lao pdr': 'Asia',
            'ussr': 'Europe',
            'soviet union': 'Europe',
            'serbia and montenegro': 'Europe',
            'serbia & montenegro': 'Europe'
        }
        if key in special:
            return special[key]

        # Direct exact name match
        if key in country_to_region:
            return country_to_region[key]

        # Try resolving with pycountry to get ISO codes and map with those
        if pycountry is not None:
            try:
                c = pycountry.countries.lookup(country_label)
                code2 = getattr(c, 'alpha_2', None)
                code3 = getattr(c, 'alpha_3', None)
                if code2 and code2.upper() in alpha2_to_region:
                    return alpha2_to_region[code2.upper()]
                if code3 and code3.upper() in alpha3_to_region:
                    return alpha3_to_region[code3.upper()]
            except Exception:
                pass

        # Fuzzy match against known country names
        match = difflib.get_close_matches(key, country_to_region.keys(), n=1, cutoff=0.82)
        if match:
            return country_to_region[match[0]]

        # Try simplified name (remove parentheses, parts after comma)
        simplified = key.split('(')[0].split(',')[0].strip()
        if simplified in country_to_region:
            return country_to_region[simplified]

        return None

except Exception:
    country_to_region = {}
    def resolve_continent(x):
        return None

app = dash.Dash(__name__)
app.title = "Global Air Quality Dashboard"

# Layout
app.layout = html.Div([
    html.Div([html.H1("Global Air Quality Dashboard", style={"textAlign": "center", "color": "#2C3E50"})]),

    # Control bar: pollutant dropdown + date picker
    html.Div([
        html.Div([
            dcc.Dropdown(
                id = "pollutant dropdown",
                options = [{"label": p, "value": p} for p in pollutants],
                value = "PM2.5",
                clearable = False,
                style={"width": "220px"}
            )
        ], style={"display": "inline-block", "verticalAlign": "middle", "marginRight": "20px"}),

        html.Div([
            dcc.DatePickerRange(
                id='date-picker',
                min_date_allowed=openaq_df['Date'].min(),
                max_date_allowed=openaq_df['Date'].max(),
                start_date=openaq_df['Date'].min(),
                end_date=openaq_df['Date'].max(),
                display_format='YYYY-MM-DD'
            )
        ], style={"display": "inline-block", "verticalAlign": "middle"}),

    ], style={"textAlign": "center", "padding": "10px"}),

    # Metric cards (Mean / Median / Max)
    html.Div([
        html.Div(id='avg-card', children=[html.H4("Mean"), html.P("-")], style={"backgroundColor": "#F8F9FA", "padding": "12px", "borderRadius": "8px", "textAlign": "center", "width": "210px"}),
        html.Div(id='median-card', children=[html.H4("Median"), html.P("-")], style={"backgroundColor": "#F8F9FA", "padding": "12px", "borderRadius": "8px", "textAlign": "center", "width": "210px"}),
        html.Div(id='max-card', children=[html.H4("Max"), html.P("-")], style={"backgroundColor": "#F8F9FA", "padding": "12px", "borderRadius": "8px", "textAlign": "center", "width": "210px"}),
    ], style={"display": "flex", "justifyContent": "center", "gap": "20px", "padding": "8px"}),

    # Main content: two charts per row using CSS grid
    html.Div([
        html.Div(dcc.Graph(id = "map graph", style={"height": "420px"}), style={"width": "100%", "boxSizing": "border-box", "padding": "10px"}),
        html.Div(dcc.Graph(id = "sunburst-chart", style={"height": "420px"}), style={"width": "100%", "boxSizing": "border-box", "padding": "10px"}),
        html.Div(dcc.Graph(id = "time series graph", style={"height": "420px"}), style={"width": "100%", "boxSizing": "border-box", "padding": "10px"}),
        html.Div(dcc.Graph(id = "bar chart", style={"height": "420px"}), style={"width": "100%", "boxSizing": "border-box", "padding": "10px"}),
    ], style={"display": "grid", "gridTemplateColumns": "repeat(2, 1fr)", "gap": "12px"}),
], style={"fontFamily": "Arial, sans-serif"})


# Callbacks
@app.callback(
    [Output("map graph", "figure"),
     Output("sunburst-chart", "figure"),
     Output("time series graph", "figure"),
     Output("bar chart", "figure"),
     Output("avg-card", "children"),
     Output("median-card", "children"),
     Output("max-card", "children")],
    [Input("pollutant dropdown", "value"),
     Input('date-picker', 'start_date'),
     Input('date-picker', 'end_date'),
     Input('map graph', 'clickData')]
)

def updated_graphs(selected_pollutant, start_date, end_date, map_click):
    # Filter by pollutant (support 'All')
    if selected_pollutant == 'All':
        filtered = openaq_df.copy()
    else:
        filtered = openaq_df[openaq_df["Pollutant"] == selected_pollutant].copy()

    # Normalize incoming dates
    if start_date is not None:
        start = pd.to_datetime(start_date).date()
    else:
        start = openaq_df['Date'].min()
    if end_date is not None:
        end = pd.to_datetime(end_date).date()
    else:
        end = openaq_df['Date'].max()

    filtered = filtered[(filtered['Date'] >= start) & (filtered['Date'] <= end)].copy()

    # Map continent to filtered early so clicks can filter by continent reliably
    if 'Country Label' in filtered.columns:
        filtered['Continent'] = filtered['Country Label'].apply(resolve_continent)
        filtered['Continent'] = filtered['Continent'].fillna('Unknown')
    else:
        filtered['Continent'] = 'Unknown'

    # Keep a separate dataframe for the map so we don't drop rows needed for other charts (e.g., sunburst)
    map_df = filtered.copy()

    # Also add continent to the map dataframe so customdata can include it
    if 'Country Label' in map_df.columns:
        map_df['Continent'] = map_df['Country Label'].apply(resolve_continent)
        map_df['Continent'] = map_df['Continent'].fillna('Unknown')
    else:
        map_df['Continent'] = 'Unknown'

    # If user clicked on the map, try to resolve the continent and filter to it
    try:
        selected_continent = None
        if map_click and isinstance(map_click, dict) and map_click.get('points'):
            point = map_click['points'][0]
            custom = point.get('customdata') or point.get('customData')
            if custom:
                # customdata commonly holds [Country Label, Continent]
                if isinstance(custom, (list, tuple)) and len(custom) >= 2 and custom[1]:
                    selected_continent = custom[1]
                elif isinstance(custom, (list, tuple)) and len(custom) >= 1:
                    selected_continent = resolve_continent(custom[0])
                elif isinstance(custom, str):
                    selected_continent = resolve_continent(custom)
            else:
                # Fallback: try hovertext/name and resolve
                country_hint = point.get('hovertext') or point.get('hover_name') or point.get('location') or None
                if country_hint:
                    selected_continent = resolve_continent(country_hint)

        if selected_continent:
            # Filter both the main filtered and the map df to the selected continent
            filtered = filtered[filtered['Continent'].fillna('Unknown') == selected_continent].copy()
            map_df = map_df[map_df['Continent'].fillna('Unknown') == selected_continent].copy()
    except Exception:
        # If anything goes wrong parsing clickData, ignore and continue (no continent filter)
        pass

    # Clean up coordinates for map only
    if 'Latitude' not in map_df.columns or 'Longitude' not in map_df.columns:
        if 'Coordinates' in map_df.columns:
            coords = map_df['Coordinates'].str.split(',', expand=True)
            if coords.shape[1] >= 2:
                map_df['Latitude'] = pd.to_numeric(coords.iloc[:, 0], errors='coerce')
                map_df['Longitude'] = pd.to_numeric(coords.iloc[:, 1], errors='coerce')

    # Drop rows missing coords in the map dataframe only
    if 'Latitude' in map_df.columns and 'Longitude' in map_df.columns:
        map_df = map_df.dropna(subset=['Latitude','Longitude'], how='any')

    # remove known sentinel/outlier values and ensure numeric values for both filtered (used by charts) and map_df (used by map)
    if 'Value' in filtered.columns:
        filtered['Value'] = pd.to_numeric(filtered['Value'], errors='coerce')
        filtered = filtered[~filtered['Value'].isin([-9999, 9999])]
        filtered = filtered[filtered['Value'].notnull()]
        filtered = filtered[filtered['Value'] >= 0]
        filtered = filtered[filtered['Value'].replace([float('inf'), float('-inf')], pd.NA).notna()]

        map_df['Value'] = pd.to_numeric(map_df['Value'], errors='coerce')
        map_df = map_df[~map_df['Value'].isin([-9999, 9999])]
        map_df = map_df[map_df['Value'].notnull()]
        map_df = map_df[map_df['Value'] >= 0]
        map_df = map_df[map_df['Value'].replace([float('inf'), float('-inf')], pd.NA).notna()]

    # Compute metric cards
    if filtered.empty or 'Value' not in filtered.columns:
        avg_card = html.Div([html.H4("Mean"), html.P("No data")], style={"padding":"6px", "textAlign":"center"})
        median_card = html.Div([html.H4("Median"), html.P("No data")], style={"padding":"6px", "textAlign":"center"})
        max_card = html.Div([html.H4("Max"), html.P("No data")], style={"padding":"6px", "textAlign":"center"})
    else:
        avg_card = html.Div([html.H4("Mean"), html.P(f"{filtered['Value'].mean():.2f} μg/m³")], style={"padding":"6px", "textAlign":"center"})
        median_card = html.Div([html.H4("Median"), html.P(f"{filtered['Value'].median():.2f} μg/m³")], style={"padding":"6px", "textAlign":"center"})
        max_card = html.Div([html.H4("Max"), html.P(f"{filtered['Value'].max():.2f} μg/m³")], style={"padding":"6px", "textAlign":"center"})

    # Create a normalized marker size column to avoid invalid sizes (map only)
    use_size = False
    if 'Value' in map_df.columns and not map_df.empty:
        q99 = map_df['Value'].quantile(0.99)
        map_df['ValueCapped'] = map_df['Value'].clip(upper=q99)
        vmin = map_df['ValueCapped'].min()
        vmax = map_df['ValueCapped'].max()
        if pd.isna(vmin) or pd.isna(vmax) or vmax == vmin:
            map_df['MarkerSize'] = 8.0
            use_size = True
        else:
            map_df['MarkerSize'] = 6.0 + ((map_df['ValueCapped'] - vmin) / (vmax - vmin)) * 14.0
            # ensure numeric, finite, and positive
            map_df['MarkerSize'] = pd.to_numeric(map_df['MarkerSize'], errors='coerce')
            map_df = map_df[map_df['MarkerSize'].notnull() & (map_df['MarkerSize'] > 0) & map_df['MarkerSize'].apply(lambda x: pd.notna(x) and pd.api.types.is_number(x))]
            use_size = not map_df['MarkerSize'].isnull().all()

    # Map
    try:
        if map_df.empty or 'Latitude' not in map_df.columns or 'Longitude' not in map_df.columns:
            map_fig = px.scatter_geo(lat=[], lon=[])
            map_fig.update_layout(title='No data for selected pollutant/date range', margin=dict(l=0,r=0,t=40,b=0))
        else:
            map_args = dict(
                data_frame=map_df,
                lat="Latitude",
                lon="Longitude",
                color="Value",
                hover_name="City",
                hover_data=["Country Label", "Value"],
                title=f"{selected_pollutant if selected_pollutant != 'All' else 'Pollutants'} Concentration Around the World",
                color_continuous_scale="Reds",
                custom_data=["Country Label", "Continent"]
            )
            if use_size:
                map_args['size'] = 'MarkerSize'
                map_args['size_max'] = 20
            # If size causes issues, omit it and fall back to color-only
            try:
                map_fig = px.scatter_geo(**map_args)
                map_fig.update_layout(geo=dict(showframe=False, showcountries=True), margin=dict(l=0,r=0,t=40,b=0))
            except Exception:
                # fallback without size
                map_args.pop('size', None)
                map_fig = px.scatter_geo(**map_args)
                map_fig.update_layout(geo=dict(showframe=False, showcountries=True), margin=dict(l=0,r=0,t=40,b=0))
    except Exception as e:
        map_fig = go.Figure()
        map_fig.update_layout(title=f'Error creating map: {e}', margin=dict(l=0,r=0,t=40,b=0))

    # Sunburst: show continents, countries and pollutant breakdowns (aggregated across selected date range)
    try:
        sb_df = filtered.copy()
        if not sb_df.empty and 'Value' in sb_df.columns:
            sb_df['Value'] = pd.to_numeric(sb_df['Value'], errors='coerce')
            sb_df = sb_df[sb_df['Value'].notnull() & (sb_df['Value'] >= 0)]

            # Map countries to continents using the robust resolver (ensure column exists in sb_df)
            if 'Country Label' in sb_df.columns:
                sb_df['Continent'] = sb_df['Country Label'].apply(resolve_continent)
                sb_df['Continent'] = sb_df['Continent'].fillna('Unknown')
            else:
                sb_df['Continent'] = 'Unknown'

            if selected_pollutant == 'All':
                sunburst_df = sb_df.groupby(['Continent', 'Country Label', 'Pollutant'], as_index=False).agg(avg_value=('Value', 'mean'))
                if sunburst_df.empty:
                    sunburst_fig = go.Figure()
                    sunburst_fig.update_layout(title='No data for sunburst', margin=dict(l=0,r=0,t=40,b=0))
                else:
                    sunburst_fig = px.sunburst(sunburst_df, path=['Continent', 'Country Label', 'Pollutant'], values='avg_value', color='avg_value', color_continuous_scale='bluered')
                    sunburst_fig.update_layout(margin=dict(l=0,r=0,t=40,b=0), title='Average pollutant value by Continent > Country > Pollutant')
            else:
                # single pollutant: show continent -> country segments sized by avg pollutant value
                sunburst_df = sb_df.groupby(['Continent', 'Country Label'], as_index=False).agg(avg_value=('Value', 'mean'))
                if sunburst_df.empty:
                    sunburst_fig = go.Figure()
                    sunburst_fig.update_layout(title='No data for sunburst', margin=dict(l=0,r=0,t=40,b=0))
                else:
                    sunburst_fig = px.sunburst(sunburst_df, path=['Continent', 'Country Label'], values='avg_value', color='avg_value', color_continuous_scale='bluered')
                    sunburst_fig.update_layout(margin=dict(l=0,r=0,t=40,b=0), title=f"Average {selected_pollutant} by Continent and Country")
        else:
            sunburst_fig = go.Figure()
            sunburst_fig.update_layout(title='No data for sunburst', margin=dict(l=0,r=0,t=40,b=0))
    except Exception as e:
        sunburst_fig = go.Figure()
        sunburst_fig.update_layout(title=f'Error creating sunburst: {e}', margin=dict(l=0,r=0,t=40,b=0))

    # Bar chart: top 10 countries by average value for current filtered set
    try:
        if filtered.empty or 'Country Label' not in filtered.columns:
            bar_fig = go.Figure()
            bar_fig.update_layout(title='No data for bar chart', margin=dict(l=0,r=0,t=40,b=0))
        else:
            bar_df = filtered.groupby('Country Label', as_index=False).agg(avg_value=('Value', 'mean'))
            bar_df = bar_df.sort_values('avg_value', ascending=False).head(10)
            if bar_df.empty:
                bar_fig = go.Figure()
                bar_fig.update_layout(title='No data for bar chart', margin=dict(l=0,r=0,t=40,b=0))
            else:
                bar_fig = px.bar(bar_df, x='avg_value', y='Country Label', orientation='h', title='Top 10 countries by average value', labels={'avg_value':'Avg Value', 'Country Label': 'Country'})
                bar_fig.update_layout(margin=dict(l=20,r=20,t=40,b=20), yaxis=dict(categoryorder='total ascending'))
    except Exception as e:
        bar_fig = go.Figure()
        bar_fig.update_layout(title=f'Error creating bar chart: {e}', margin=dict(l=0,r=0,t=40,b=0))

    # Time series (aggregate by date)
    try:
        if filtered.empty or 'Date' not in filtered.columns:
            time_fig = go.Figure()
            time_fig.update_layout(title='No data for time series', margin=dict(l=0,r=0,t=40,b=0))
        else:
            if selected_pollutant == 'All':
                # When 'All' is selected, show each pollutant as a separate line
                time_df = filtered.groupby(['Date', 'Pollutant'], as_index=False).agg(avg_value=("Value", "mean"))
                if time_df.empty:
                    time_fig = go.Figure()
                    time_fig.update_layout(title='No data for time series', margin=dict(l=0,r=0,t=40,b=0))
                else:
                    time_fig = px.line(time_df, x='Date', y='avg_value', color='Pollutant', title='Average Pollutants Over Time')
                    time_fig.update_layout(margin=dict(l=20,r=20,t=40,b=20))
            else:
                time_df = filtered.groupby('Date', as_index=False).agg(avg_value=("Value", "mean"))
                time_fig = px.line(time_df, x='Date', y='avg_value', title=f"Average {selected_pollutant} Over Time")
                time_fig.update_layout(margin=dict(l=20,r=20,t=40,b=20))
    except Exception as e:
        time_fig = go.Figure()
        time_fig.update_layout(title=f'Error creating time series: {e}', margin=dict(l=0,r=0,t=40,b=0))

    return map_fig, sunburst_fig, time_fig, bar_fig, avg_card, median_card, max_card

# Run the dashboard

Run the cell below to start the Dash app on port 8051. Interrupt the kernel to stop it.

In [72]:
# Start the Dash app
# Execute this cell to run the dashboard on port 8007.
# Note: running this will block the kernel until you interrupt it (Kernel -> Interrupt).
if __name__ == "__main__":
    app.run(debug=True, port=8007, host="0.0.0.0", use_reloader=False)