# Air Quality Data Analysis

In [99]:
import pandas as pd
import numpy as np
import seaborn as sns
import folium

import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

print('Pandas version:', pd.__version__) # 2.2.3
print('Numpy version:', np.__version__) # 1.26.4
print('Seaborn version:', sns.__version__) # 0.13.2

Pandas version: 2.2.3
Numpy version: 1.26.4
Seaborn version: 0.13.2


## Loading the dataset

In [100]:
df = pd.read_csv('./data/cleaned_data.csv')
df.head()

Unnamed: 0,Date,Latitude,Longitude,PM1,PM2.5,PM10,NO2,Sensor_ID
0,2021-01-12,514568,54383,12.57,14.43,18.98,7.0,I07
1,2021-01-13,514568,54383,2.95,4.3,9.31,18.0,I07
2,2021-01-14,514567,54382,7.14,9.92,19.15,35.0,I07
3,2021-01-15,514567,54382,6.45,8.39,14.2,28.0,I07
4,2021-01-16,514567,54382,15.0,16.44,20.35,31.0,I07


In [101]:
df['Latitude'] = df['Latitude'].astype(str).str.replace(',', '.').astype(float)
df['Longitude'] = df['Longitude'].astype(str).str.replace(',', '.').astype(float)

df['Date'] = pd.to_datetime(df['Date'])


In [102]:
df.dtypes

Date         datetime64[ns]
Latitude            float64
Longitude           float64
PM1                 float64
PM2.5               float64
PM10                float64
NO2                 float64
Sensor_ID            object
dtype: object

In [103]:
sensor_coordinates = df.groupby('Sensor_ID')[['Latitude', 'Longitude']].apply(lambda group: group.mode().iloc[0])

eindhoven_map = folium.Map(location=[51.4416, 5.4697], zoom_start=12, scrollWheelZoom=False)

for sensor_id, row in sensor_coordinates.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        icon=folium.DivIcon(html=f"""
            <div style="
                font-size: 10px;
                color: white;
                background: green;
                border-radius: 50%;
                text-align: center;
                width: 30px;
                height: 30px;
                display: flex;
                justify-content: center;
                align-items: center;
                border: 1px solid black;">
                {sensor_id}
            </div>
        """),
    ).add_to(eindhoven_map)

eindhoven_map

Since the UHI data is an average of the months June, Juli and August of 2022 we will also take the average of the values of the sensors for these months.

In [104]:
start_date = '2022-06-01'
end_date = '2022-08-31'

df_filtered = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

average_values = (
    df_filtered.groupby('Sensor_ID')[['PM1', 'PM2.5', 'PM10', 'NO2']]
    .mean()
    .reset_index()
)

mode_coords = df_filtered.groupby('Sensor_ID')[['Latitude', 'Longitude']].agg(lambda x: x.mode()[0]).reset_index()

average_values = average_values.merge(mode_coords, on='Sensor_ID', how='left')

average_values


Unnamed: 0,Sensor_ID,PM1,PM2.5,PM10,NO2,Latitude,Longitude
0,I07,4.29,5.3125,10.531304,18.554348,51.4568,5.4383
1,I12,7.5875,9.175588,16.493971,11.955882,51.4416,5.4714
2,I17,6.76913,8.295,14.146739,21.413043,51.4136,5.4707
3,I19,6.910778,8.591333,17.882556,22.088889,51.4332,5.5198
4,I30,6.484778,8.196333,17.676222,17.433333,51.4375,5.4518
5,I36,8.633371,10.531573,19.163034,20.449438,51.4387,5.4817
6,I37,10.433696,11.966739,20.024022,18.978261,51.4686,5.4723


In [105]:
# df_average = pd.DataFrame(average_values)

# def normalise_column(df, column):
#     return (df[column] - df[column].min()) / (df[column].max() - df[column].min())

# df_normalised = pd.DataFrame({
#     'Sensor_ID': df_average['Sensor_ID'],
#     'PM1': normalise_column(df_average, 'PM1'),
#     'PM2.5': normalise_column(df_average, 'PM2.5'),
#     'PM10': normalise_column(df_average, 'PM10'),
#     'NO2': normalise_column(df_average, 'NO2'),
#     'Latitude': df_average['Latitude'],
#     'Longitude': df_average['Longitude']
# })

# df_normalised

In [106]:
import folium
import pandas as pd
from folium import FeatureGroup
from matplotlib import cm, colors

def normalize_values(values, cmap_name='coolwarm'):
    cmap = cm.get_cmap(cmap_name)
    norm = colors.Normalize(vmin=min(values), vmax=max(values))
    return [colors.rgb2hex(cmap(norm(value))) for value in values]

def add_parameter_layer(map_object, df, parameter, layer_name):
    valid_rows = df[df[parameter].notnull()]
    if valid_rows.empty:
        print(f"No data available for {parameter}. Skipping layer.")
        return
    
    values = valid_rows[parameter].tolist()
    color_map = normalize_values(values)
    
    layer = FeatureGroup(name=layer_name)
    
    for index, row in valid_rows.iterrows():
        value = row[parameter]
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            icon=folium.DivIcon(html=f"""
                <div style="
                    font-size: 12px;
                    color: black;
                    text-align: center;
                    background-color: {color_map[index % len(color_map)]};
                    border-radius: 50%;
                    text-align: center;
                    width: 40px;
                    height: 40px;
                    display: flex;
                    justify-content: center;
                    align-items: center;
                    border: 1px solid black;">
                    {value:.2f}
                </div>
            """),
            popup=folium.Popup(f"Sensor: {row['Sensor_ID']}<br>{parameter}: {value:.2f}"),
            tooltip=f"Sensor: {row['Sensor_ID']}<br>{parameter}"
        ).add_to(layer)
    
    map_object.add_child(layer)
    
    return layer

eindhoven_map = folium.Map(location=[51.4416, 5.4697], zoom_start=13, scrollWheelZoom=False)

df_normalised['Latitude'] = pd.to_numeric(df_normalised['Latitude'], errors='coerce')
df_normalised['Longitude'] = pd.to_numeric(df_normalised['Longitude'], errors='coerce')

layers = {
    'PM1': add_parameter_layer(eindhoven_map, average_values, 'PM1', 'PM1 Levels'),
    'PM2.5': add_parameter_layer(eindhoven_map, average_values, 'PM2.5', 'PM2.5 Levels'),
    'PM10': add_parameter_layer(eindhoven_map, average_values, 'PM10', 'PM10 Levels'),
    'NO2': add_parameter_layer(eindhoven_map, average_values, 'NO2', 'NO2 Levels')
}

folium.LayerControl().add_to(eindhoven_map)

# eindhoven_map.save("eindhoven_pollution_map.html")
eindhoven_map


  cmap = cm.get_cmap(cmap_name)
