Import functions 


In [2]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.spatial import Voronoi
import ipywidgets as widgets
from IPython.display import display, clear_output


Loading data

In [3]:
stations_data = r'C:\Users\thoml\OneDrive\Documenten\Uhasselt master jaar 1 sem 2\visualisation of data science\data\stations.csv'
df_stations = pd.read_csv(stations_data)
print(df_stations.head(1))

         id            name          address       lon        lat  elevation
0  28079004  Pza. de España  Plaza de España -3.712247  40.423853        635


In [4]:
madrid_2018 = r'C:\Users\thoml\OneDrive\Documenten\Uhasselt master jaar 1 sem 2\visualisation of data science\data\madrid_2018.csv'
df_madrid_2018 = pd.read_csv(madrid_2018)
print(df_madrid_2018.head(1))

                  date  BEN  CH4   CO  EBE  NMHC   NO  NO_2   NOx  O_3  PM10   
0  2018-03-01 01:00:00  NaN  NaN  0.3  NaN   NaN  1.0  29.0  31.0  NaN   NaN  \

   PM25  SO_2  TCH  TOL   station  
0   NaN   2.0  NaN  NaN  28079004  


Graph 2
The graph only works in the local environment.

In [None]:
# Get available pollutants
pollutants = [col for col in df_madrid_2018.columns if col not in ['date', 'station']]

# Calculate average pollution per station
pollution_avg = df_madrid_2018.groupby('station')[pollutants].mean().reset_index()
stations_with_pollution = pd.merge(df_stations, pollution_avg, left_on='id', right_on='station', how='inner')

output_widget = widgets.Output()

def create_pollution_map(pollutant):
    with output_widget:
        clear_output(wait=True)

        # Set map bounds
        lon_min, lon_max = df_stations['lon'].min() - 0.02, df_stations['lon'].max() + 0.02
        lat_min, lat_max = df_stations['lat'].min() - 0.02, df_stations['lat'].max() + 0.02

        # Create Voronoi diagram
        points = df_stations[['lon', 'lat']].values
        offset = 0.3
        boundary = [
            [lon_min - offset, lat_min - offset],
            [lon_min - offset, lat_max + offset],
            [lon_max + offset, lat_min - offset],
            [lon_max + offset, lat_max + offset],
            [lon_min - offset, (lat_min + lat_max) / 2],
            [lon_max + offset, (lat_min + lat_max) / 2],
            [(lon_min + lon_max) / 2, lat_min - offset],
            [(lon_min + lon_max) / 2, lat_max + offset]
        ]
        all_points = np.vstack([points, boundary])
        vor = Voronoi(all_points)

        # Calculate average
        avg = stations_with_pollution[pollutant].dropna().mean()

        # Set colors
        colors = {}
        for _, station in stations_with_pollution.iterrows():
            value = station[pollutant]
            if pd.isna(value): # type: ignore
                colors[station['id']] = 'rgba(211, 211, 211, 0.5)'
            elif value < avg:
                colors[station['id']] = 'rgba(255, 0, 0, 0.5)'
            else:
                colors[station['id']] = 'rgba(0, 0, 255, 0.5)'

        fig = go.Figure()

        # Fill regions
        for i, region in enumerate(vor.point_region[:len(df_stations)]):
            if region != -1 and len(vor.regions[region]) > 0:
                vertices_list = vor.regions[region]
                if -1 not in vertices_list:
                    vertices = vor.vertices[vertices_list]
                    if (np.any(vertices[:, 0] >= lon_min - 0.5) and np.any(vertices[:, 0] <= lon_max + 0.5) and
                        np.any(vertices[:, 1] >= lat_min - 0.5) and np.any(vertices[:, 1] <= lat_max + 0.5)):
                        station_id = df_stations.iloc[i]['id']
                        color = colors.get(station_id, 'rgba(211, 211, 211, 0.5)')
                        fig.add_trace(go.Scattermapbox(
                            lon=np.append(vertices[:, 0], vertices[0, 0]),
                            lat=np.append(vertices[:, 1], vertices[0, 1]),
                            fill='toself', fillcolor=color, line=dict(width=0),
                            hoverinfo='skip', showlegend=False))

        # Draw lines
        for simplex in vor.ridge_vertices:
            simplex = np.asarray(simplex)
            if np.all(simplex >= 0):
                line_points = vor.vertices[simplex]
                fig.add_trace(go.Scattermapbox(
                    lon=line_points[:, 0], lat=line_points[:, 1], mode='lines',
                    line=dict(color='black', width=2),
                    hoverinfo='skip', showlegend=False))

        # Create hover text
        hovers = []
        for _, station in stations_with_pollution.iterrows():
            text = f"<b>{station['name']}</b><br>{station['address']}<br>ID: {station['id']}<br>"
            for p in pollutants:
                value = station[p]
                text += f"{p}: {'N/A' if pd.isna(value) else f'{value:.2f}'}<br>"
            hovers.append(text)

        # Add stations
        fig.add_trace(go.Scattermapbox(
            lon=stations_with_pollution['lon'], lat=stations_with_pollution['lat'],
            mode='markers', marker=dict(color='black', size=12),
            text=hovers, hovertemplate='%{text}<extra></extra>',
            showlegend=False))

        # Update layout
        center_x = (lon_min + lon_max) / 2
        center_y = (lat_min + lat_max) / 2
        
        fig.update_layout(
            title=f'Madrid Air Quality 2018: {pollutant}',
            mapbox=dict(style='open-street-map', center=dict(lat=center_y, lon=center_x), zoom=10.8),
            legend=dict(x=0.02, y=0.98, bgcolor='rgba(255,255,255,0.8)'),
            width=1000, height=800, margin=dict(l=0, r=0, t=50, b=0))

        # Add legend
        fig.add_trace(go.Scattermapbox(lon=[None], lat=[None], mode='markers',
            marker=dict(color='rgba(255, 0, 0, 0.8)', size=10),
            name=f'Below average (< {avg:.1f})', showlegend=True, hoverinfo='skip'))
        fig.add_trace(go.Scattermapbox(lon=[None], lat=[None], mode='markers',
            marker=dict(color='rgba(0, 0, 255, 0.8)', size=10),
            name=f'Above average (> {avg:.1f})', showlegend=True, hoverinfo='skip'))
        fig.add_trace(go.Scattermapbox(lon=[None], lat=[None], mode='markers',
            marker=dict(color='rgba(211, 211, 211, 0.8)', size=10),
            name='No Data (N/A)', showlegend=True, hoverinfo='skip'))
        fig.add_trace(go.Scattermapbox(lon=[None], lat=[None], mode='markers',
            marker=dict(color='black', size=8),
            name='Station Location', showlegend=True, hoverinfo='skip'))
        fig.show()

# Create dropdown
dropdown = widgets.Dropdown(
    options=pollutants,
    value=pollutants[0],
    description='Pollutant:')

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        create_pollution_map(change['new'])

dropdown.observe(on_change)
display(widgets.VBox([dropdown, output_widget]))
create_pollution_map(dropdown.value)


VBox(children=(Dropdown(description='Pollutant:', options=('BEN', 'CH4', 'CO', 'EBE', 'NMHC', 'NO', 'NO_2', 'N…