### Styling and imports

In [1]:
%%html
<style>
    .purple {
        background-color: #e6ccff;
    }
    .green {
        background-color: #99ffdd;
    }
    .blue {
        background-color: #b3f0ff;
    }
    .yellow{
        background-color: #ffffb3;
    }
    .center {
        text-align: center;
    }
    .small-padding {
        padding: 5px;
    }
    .extra-padding {
        padding: 20px;
    }
</style>

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from datetime import datetime, timedelta
import time
import json
import statistics
import seaborn as sns
import csv

<div class="green extra-padding">
<h1 class="center">Our question: What is the impact of air pollution and how can we improve air quality in the London Borough of Hammersmith and Fulham?</h1>
<strong>In this analysis:</strong>
<ol>
<li>What is the main cause of air pollution in the borough and what factors are influencing levels of air pollutants? E.g. times of day, month, traffic, weather, location- urban/roadside etc</li>
<li>What is the impact of air pollution on our health and the NHS?</li>
<li>What has been the impact of schemes such as ULEZ and Covid on reducing air pollution and how can this inform future policies?</li>
</ol>
</div>

<div class="yellow extra-padding">
    <h2>What factors are impacting air pollution?</h2>
</div>

<div class="purple small-padding">
    <h3>Fetch data from API and save to CSV</h3>
</div>

In [3]:
# These sections are commented out as the data has already been retrieved using the API
# Please load data from the saved csv 
# To test the API response, please enter and API key below

# Hammersmith coords for school near A4
# lat= '51.493096'
# lon= '-0.224079'

# API = 'ADD_API_KEY_HERE'

# # Past 3 years 
# end_date = datetime(2024, 1, 1, 12, 00, 00)
# start_date = end_date - timedelta(days=1095)

# start = int(start_date.timestamp())
# end = int(end_date.timestamp())

# response = requests.get(f'http://api.openweathermap.org/data/2.5/air_pollution/history?lat={lat}&lon={lon}&start={start}&end={end}&appid={API}')


# # Check response success
# if response.status_code == 200:
#     data = response.json()
# else:
#     print(f"Error: {response.status_code}")

In [4]:
# Data already saved
# file_path = "../data_unclean/air_pollution_data_hammersmith_3_years.csv"

# def save_to_csv(response, file_path):
#     records = []
#     for entry in response['list']:
#         record = {
#             "date_time": entry["dt"],
#             "aqi": entry["main"]["aqi"],
#             "co": entry["components"]["co"],
#             "no": entry["components"]["no"],
#             "no2": entry["components"]["no2"],
#             "o3": entry["components"]["o3"],
#             "so2": entry["components"]["so2"],
#             "pm2_5": entry["components"]["pm2_5"],
#             "pm10": entry["components"]["pm10"],
#             "nh3": entry["components"]["nh3"],
#         }
#         records.append(record)
#     aq_df = pd.DataFrame(records)
#     path = file_path
#     aq_df.to_csv(path, index=False)
#     return aq_df

# save_to_csv(response.json(), file_path)

<div class="purple small-padding">
    <h3>Load data from CSV</h3>
</div>

In [6]:
air_quality_hammersmith = pd.read_csv('data_unclean/air_pollution_data_hammersmith_3_years.csv')
air_quality_hammersmith.head(10)

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3
0,1609502400,2,320.44,13.19,46.61,2.33,28.61,16.74,17.83,1.98
1,1609506000,2,323.77,15.65,43.87,2.12,24.08,16.56,17.74,1.44
2,1609509600,2,323.77,17.43,43.18,1.43,21.22,15.27,16.5,1.24
3,1609513200,2,327.11,18.33,44.55,0.65,19.31,13.24,14.59,1.03
4,1609516800,2,330.45,17.43,45.93,0.1,17.41,10.69,12.15,0.82
5,1609520400,2,323.77,14.53,47.3,0.04,15.74,7.85,9.31,0.71
6,1609524000,2,310.42,11.06,49.35,0.26,14.66,6.31,7.63,0.66
7,1609527600,2,307.08,8.61,50.72,0.68,15.14,6.21,7.32,0.62
8,1609531200,2,310.42,6.87,49.35,1.11,15.74,6.32,7.38,0.59
9,1609534800,2,303.75,4.92,45.24,1.97,15.14,6.0,7.15,0.56


In [7]:
air_quality_hammersmith.tail(10)

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3
26007,1704078000,2,233.65,0.0,2.25,67.23,1.42,0.5,0.83,0.2
26008,1704081600,2,233.65,0.0,2.46,67.23,1.61,0.5,0.86,0.21
26009,1704085200,2,233.65,0.0,2.78,66.52,1.86,0.5,0.9,0.24
26010,1704088800,2,233.65,0.0,3.47,67.23,2.21,0.5,0.93,0.29
26011,1704092400,2,240.33,0.0,6.17,65.8,3.13,0.55,1.07,0.4
26012,1704096000,2,240.33,0.0,9.42,63.66,3.87,0.72,1.39,0.52
26013,1704099600,2,240.33,0.01,10.88,64.37,4.29,0.81,1.65,0.62
26014,1704103200,2,236.99,0.08,10.37,70.1,4.29,0.8,1.69,0.67
26015,1704106800,2,236.99,0.21,9.08,75.1,3.99,0.68,1.44,0.67
26016,1704110400,2,233.65,0.3,7.97,77.96,3.61,0.57,1.13,0.63


<div class="purple small-padding">
    <h3>Change datetime to readable format</h3>
</div>

In [8]:
air_quality_hammersmith['date_time'] = pd.to_datetime(air_quality_hammersmith['date_time'], unit='s')
air_quality_hammersmith.head(10)

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3
0,2021-01-01 12:00:00,2,320.44,13.19,46.61,2.33,28.61,16.74,17.83,1.98
1,2021-01-01 13:00:00,2,323.77,15.65,43.87,2.12,24.08,16.56,17.74,1.44
2,2021-01-01 14:00:00,2,323.77,17.43,43.18,1.43,21.22,15.27,16.5,1.24
3,2021-01-01 15:00:00,2,327.11,18.33,44.55,0.65,19.31,13.24,14.59,1.03
4,2021-01-01 16:00:00,2,330.45,17.43,45.93,0.1,17.41,10.69,12.15,0.82
5,2021-01-01 17:00:00,2,323.77,14.53,47.3,0.04,15.74,7.85,9.31,0.71
6,2021-01-01 18:00:00,2,310.42,11.06,49.35,0.26,14.66,6.31,7.63,0.66
7,2021-01-01 19:00:00,2,307.08,8.61,50.72,0.68,15.14,6.21,7.32,0.62
8,2021-01-01 20:00:00,2,310.42,6.87,49.35,1.11,15.74,6.32,7.38,0.59
9,2021-01-01 21:00:00,2,303.75,4.92,45.24,1.97,15.14,6.0,7.15,0.56


In [9]:
air_quality_hammersmith.tail(10)

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3
26007,2024-01-01 03:00:00,2,233.65,0.0,2.25,67.23,1.42,0.5,0.83,0.2
26008,2024-01-01 04:00:00,2,233.65,0.0,2.46,67.23,1.61,0.5,0.86,0.21
26009,2024-01-01 05:00:00,2,233.65,0.0,2.78,66.52,1.86,0.5,0.9,0.24
26010,2024-01-01 06:00:00,2,233.65,0.0,3.47,67.23,2.21,0.5,0.93,0.29
26011,2024-01-01 07:00:00,2,240.33,0.0,6.17,65.8,3.13,0.55,1.07,0.4
26012,2024-01-01 08:00:00,2,240.33,0.0,9.42,63.66,3.87,0.72,1.39,0.52
26013,2024-01-01 09:00:00,2,240.33,0.01,10.88,64.37,4.29,0.81,1.65,0.62
26014,2024-01-01 10:00:00,2,236.99,0.08,10.37,70.1,4.29,0.8,1.69,0.67
26015,2024-01-01 11:00:00,2,236.99,0.21,9.08,75.1,3.99,0.68,1.44,0.67
26016,2024-01-01 12:00:00,2,233.65,0.3,7.97,77.96,3.61,0.57,1.13,0.63


<div class="purple small-padding">
    <h3>Load Weather Data (retrieved from Open Meteo)</h3>
</div>

In [10]:
weather_data = pd.read_csv('data_unclean/weather_hammersmith.csv')
weather_data.head(10)

Unnamed: 0,time,temperature_2m (°C),relative_humidity_2m (%),precipitation (mm),rain (mm),snowfall (cm),surface_pressure (hPa),wind_speed_10m (km/h),wind_speed_100m (km/h),wind_direction_10m (°)
0,2021-01-01T00:00,-0.1,96,0.0,0.0,0.0,1007.8,3.8,7.6,253
1,2021-01-01T01:00,-0.7,97,0.0,0.0,0.0,1008.0,4.7,10.5,261
2,2021-01-01T02:00,-1.1,98,0.0,0.0,0.0,1008.3,5.0,12.7,270
3,2021-01-01T03:00,-1.7,97,0.0,0.0,0.0,1008.7,6.1,14.0,270
4,2021-01-01T04:00,-2.3,97,0.0,0.0,0.0,1008.7,6.9,14.7,279
5,2021-01-01T05:00,-1.5,97,0.0,0.0,0.0,1008.8,5.2,14.0,282
6,2021-01-01T06:00,-0.9,98,0.0,0.0,0.0,1008.8,6.5,13.0,270
7,2021-01-01T07:00,-1.1,100,0.0,0.0,0.0,1009.4,7.6,14.2,275
8,2021-01-01T08:00,-1.0,99,0.0,0.0,0.0,1009.8,8.8,12.9,282
9,2021-01-01T09:00,-0.9,97,0.0,0.0,0.0,1010.2,9.1,11.9,279


In [12]:
weather_data.time = pd.to_datetime(weather_data.time)
weather_data.head()

Unnamed: 0,time,temperature_2m (°C),relative_humidity_2m (%),precipitation (mm),rain (mm),snowfall (cm),surface_pressure (hPa),wind_speed_10m (km/h),wind_speed_100m (km/h),wind_direction_10m (°)
0,2021-01-01 00:00:00,-0.1,96,0.0,0.0,0.0,1007.8,3.8,7.6,253
1,2021-01-01 01:00:00,-0.7,97,0.0,0.0,0.0,1008.0,4.7,10.5,261
2,2021-01-01 02:00:00,-1.1,98,0.0,0.0,0.0,1008.3,5.0,12.7,270
3,2021-01-01 03:00:00,-1.7,97,0.0,0.0,0.0,1008.7,6.1,14.0,270
4,2021-01-01 04:00:00,-2.3,97,0.0,0.0,0.0,1008.7,6.9,14.7,279


<div class="purple small-padding">
    <h3>Merge weather and air quality data and get overview</h3>
</div>

In [13]:
weather_aq = pd.merge(air_quality_hammersmith, weather_data, left_on='date_time',right_on='time', how='left')

In [14]:
weather_aq.head()

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,time,temperature_2m (°C),relative_humidity_2m (%),precipitation (mm),rain (mm),snowfall (cm),surface_pressure (hPa),wind_speed_10m (km/h),wind_speed_100m (km/h),wind_direction_10m (°)
0,2021-01-01 12:00:00,2,320.44,13.19,46.61,2.33,28.61,16.74,17.83,1.98,2021-01-01 12:00:00,1.4,92,0.0,0.0,0.0,1010.4,8.3,10.5,265
1,2021-01-01 13:00:00,2,323.77,15.65,43.87,2.12,24.08,16.56,17.74,1.44,2021-01-01 13:00:00,2.1,91,0.0,0.0,0.0,1010.0,9.4,11.9,270
2,2021-01-01 14:00:00,2,323.77,17.43,43.18,1.43,21.22,15.27,16.5,1.24,2021-01-01 14:00:00,2.5,91,0.0,0.0,0.0,1010.0,9.4,12.6,263
3,2021-01-01 15:00:00,2,327.11,18.33,44.55,0.65,19.31,13.24,14.59,1.03,2021-01-01 15:00:00,2.8,92,0.0,0.0,0.0,1010.1,8.3,13.3,252
4,2021-01-01 16:00:00,2,330.45,17.43,45.93,0.1,17.41,10.69,12.15,0.82,2021-01-01 16:00:00,2.7,95,0.0,0.0,0.0,1010.5,6.5,15.0,267


In [15]:
weather_aq.tail()

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,time,temperature_2m (°C),relative_humidity_2m (%),precipitation (mm),rain (mm),snowfall (cm),surface_pressure (hPa),wind_speed_10m (km/h),wind_speed_100m (km/h),wind_direction_10m (°)
26012,2024-01-01 08:00:00,2,240.33,0.0,9.42,63.66,3.87,0.72,1.39,0.52,2024-01-01 08:00:00,5.9,71,0.0,0.0,0.0,1001.5,19.6,35.5,256
26013,2024-01-01 09:00:00,2,240.33,0.01,10.88,64.37,4.29,0.81,1.65,0.62,2024-01-01 09:00:00,6.4,69,0.0,0.0,0.0,1002.3,19.3,34.1,256
26014,2024-01-01 10:00:00,2,236.99,0.08,10.37,70.1,4.29,0.8,1.69,0.67,2024-01-01 10:00:00,7.1,69,0.0,0.0,0.0,1002.7,19.1,31.7,248
26015,2024-01-01 11:00:00,2,236.99,0.21,9.08,75.1,3.99,0.68,1.44,0.67,2024-01-01 11:00:00,7.8,69,0.0,0.0,0.0,1003.2,18.2,29.4,236
26016,2024-01-01 12:00:00,2,233.65,0.3,7.97,77.96,3.61,0.57,1.13,0.63,2024-01-01 12:00:00,8.4,68,0.0,0.0,0.0,1003.1,16.6,25.6,228


In [16]:
weather_aq['aqi'].value_counts()

aqi
1    14572
2     9317
3     1344
4      681
5      103
Name: count, dtype: int64

In [17]:
weather_aq['aqi'].value_counts(normalize=True)

aqi
1    0.560095
2    0.358112
3    0.051659
4    0.026175
5    0.003959
Name: proportion, dtype: float64

<div class="blue">
According to this dataset, Air quality in Hammersmith is rated 'good' 56% of the time, 44% of the time fair or below. It is 'poor' or 'moderate' over 7% of the time and 'very poor' less than one percent of the time.
</div>

In [18]:
weather_aq.columns

Index(['date_time', 'aqi', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10',
       'nh3', 'time', 'temperature_2m (°C)', 'relative_humidity_2m (%)',
       'precipitation (mm)', 'rain (mm)', 'snowfall (cm)',
       'surface_pressure (hPa)', 'wind_speed_10m (km/h)',
       'wind_speed_100m (km/h)', 'wind_direction_10m (°)'],
      dtype='object')

In [19]:
weather_aq.shape

(26017, 20)

In [20]:
weather_aq.dtypes

date_time                   datetime64[ns]
aqi                                  int64
co                                 float64
no                                 float64
no2                                float64
o3                                 float64
so2                                float64
pm2_5                              float64
pm10                               float64
nh3                                float64
time                        datetime64[ns]
temperature_2m (°C)                float64
relative_humidity_2m (%)             int64
precipitation (mm)                 float64
rain (mm)                          float64
snowfall (cm)                      float64
surface_pressure (hPa)             float64
wind_speed_10m (km/h)              float64
wind_speed_100m (km/h)             float64
wind_direction_10m (°)               int64
dtype: object

<div class="purple small-padding">
    <h3>Data Cleaning</h3>
</div>

In [21]:
weather_aq = weather_aq.drop(columns='time')

In [22]:
weather_aq.columns

Index(['date_time', 'aqi', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10',
       'nh3', 'temperature_2m (°C)', 'relative_humidity_2m (%)',
       'precipitation (mm)', 'rain (mm)', 'snowfall (cm)',
       'surface_pressure (hPa)', 'wind_speed_10m (km/h)',
       'wind_speed_100m (km/h)', 'wind_direction_10m (°)'],
      dtype='object')

In [23]:
# Rename columns
weather_aq = weather_aq.rename(columns = {
    'temperature_2m (°C)':'temp_C', 
    'relative_humidity_2m (%)': 'percent_humidity',
    'precipitation (mm)':'precipitation',
    'rain (mm)': 'rain',
    'snowfall (cm)': 'snowfall',
    'surface_pressure (hPa)':'surface_pressure',
    'wind_speed_10m (km/h)': 'wind_speed_10m',
    'wind_speed_100m (km/h)':'wind_speed_100m',
    'wind_direction_10m (°)':'wind_direction'
})

In [24]:
weather_aq.columns

Index(['date_time', 'aqi', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10',
       'nh3', 'temp_C', 'percent_humidity', 'precipitation', 'rain',
       'snowfall', 'surface_pressure', 'wind_speed_10m', 'wind_speed_100m',
       'wind_direction'],
      dtype='object')

In [25]:
# Create new columns for day, month, hour

weather_aq['day'] = weather_aq['date_time'].dt.dayofweek
# The day of the week with Monday=0, Sunday=6.

In [26]:
weather_aq['month'] = weather_aq['date_time'].dt.month

In [27]:
weather_aq['hour'] = weather_aq['date_time'].dt.hour

In [28]:
weather_aq.head()

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,...,precipitation,rain,snowfall,surface_pressure,wind_speed_10m,wind_speed_100m,wind_direction,day,month,hour
0,2021-01-01 12:00:00,2,320.44,13.19,46.61,2.33,28.61,16.74,17.83,1.98,...,0.0,0.0,0.0,1010.4,8.3,10.5,265,4,1,12
1,2021-01-01 13:00:00,2,323.77,15.65,43.87,2.12,24.08,16.56,17.74,1.44,...,0.0,0.0,0.0,1010.0,9.4,11.9,270,4,1,13
2,2021-01-01 14:00:00,2,323.77,17.43,43.18,1.43,21.22,15.27,16.5,1.24,...,0.0,0.0,0.0,1010.0,9.4,12.6,263,4,1,14
3,2021-01-01 15:00:00,2,327.11,18.33,44.55,0.65,19.31,13.24,14.59,1.03,...,0.0,0.0,0.0,1010.1,8.3,13.3,252,4,1,15
4,2021-01-01 16:00:00,2,330.45,17.43,45.93,0.1,17.41,10.69,12.15,0.82,...,0.0,0.0,0.0,1010.5,6.5,15.0,267,4,1,16


In [31]:
# Look for missing/anomalous data

weather_aq.describe()

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,...,precipitation,rain,snowfall,surface_pressure,wind_speed_10m,wind_speed_100m,wind_direction,day,month,hour
count,26017,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,...,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0,26017.0
mean,2022-07-02 09:44:53.669523712,1.55579,249.606006,6.358509,18.793544,50.661807,7.872453,6.815382,8.654816,0.885957,...,0.085175,0.084126,0.000735,1014.434466,14.640931,24.320202,190.313334,2.991967,6.530192,11.500019
min,2021-01-01 12:00:00,1.0,141.86,0.0,1.34,-9999.0,0.86,0.5,0.53,0.0,...,0.0,0.0,0.0,952.7,0.0,0.0,1.0,0.0,1.0,0.0
25%,2021-09-30 12:00:00,1.0,205.28,0.0,7.8,30.4,3.81,2.23,3.31,0.29,...,0.0,0.0,0.0,1007.8,8.8,16.5,102.0,1.0,4.0,6.0
50%,2022-06-30 12:00:00,1.0,233.65,0.29,13.37,53.64,5.66,3.99,5.87,0.55,...,0.0,0.0,0.0,1015.5,13.2,23.2,217.0,3.0,7.0,12.0
75%,2023-04-04 12:00:00,2.0,270.37,1.5,23.99,70.81,9.06,7.76,10.42,1.04,...,0.0,0.0,0.0,1022.1,19.2,30.8,253.0,5.0,10.0,17.0
max,2024-01-01 12:00:00,5.0,1268.39,447.03,126.12,205.99,129.7,104.54,108.95,15.2,...,14.7,14.7,1.4,1044.7,68.0,99.8,360.0,6.0,12.0,23.0
std,,0.744838,80.892569,23.804967,15.823728,69.255641,7.844575,8.264211,8.995321,1.104444,...,0.373876,0.372101,0.018808,11.136033,7.555999,11.24984,93.849017,2.002452,3.436163,6.922187


<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>There are some negative values in the o3 data which is impossible. These will be counted and dealt with ..</li>
        <li>There is a large amount of variance in the data for different pollutants, particularly co and o3, however we will be mostly focussing on NO2 and pm2.5 measurements</li>
        <li>There are some 0 values in pollutants data which is also highly unlikely near such a busy road. </li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

In [32]:
# Check and remove negative values
negative_values = weather_aq[weather_aq.o3 < 0]
len(negative_values)

1

In [33]:
# As there is only 1 negative value, this row can be deleted
weather_aq = weather_aq[weather_aq.o3 >= 0]
weather_aq.describe()

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,...,precipitation,rain,snowfall,surface_pressure,wind_speed_10m,wind_speed_100m,wind_direction,day,month,hour
count,26016,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,...,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0
mean,2022-07-02 09:23:22.167896576,1.555812,249.608223,6.358238,18.793687,51.048095,7.872474,6.815534,8.655013,0.885966,...,0.085178,0.084129,0.000735,1014.43454,14.641248,24.320783,190.311308,2.992005,6.530174,11.500192
min,2021-01-01 12:00:00,1.0,141.86,0.0,1.34,0.0,0.86,0.5,0.53,0.0,...,0.0,0.0,0.0,952.7,0.0,0.0,1.0,0.0,1.0,0.0
25%,2021-09-30 11:45:00,1.0,205.28,0.0,7.8,30.4,3.81,2.23,3.31,0.29,...,0.0,0.0,0.0,1007.8,8.8,16.5,102.0,1.0,4.0,5.75
50%,2022-06-30 11:30:00,1.0,233.65,0.29,13.37,53.64,5.66,3.99,5.87,0.55,...,0.0,0.0,0.0,1015.5,13.2,23.2,217.0,3.0,7.0,12.0
75%,2023-04-04 11:15:00,2.0,270.37,1.5,23.99,70.81,9.06,7.76,10.42,1.04,...,0.0,0.0,0.0,1022.1,19.2,30.8,253.0,5.0,10.0,17.25
max,2024-01-01 12:00:00,5.0,1268.39,447.03,126.12,205.99,129.7,104.54,108.95,15.2,...,14.7,14.7,1.4,1044.7,68.0,99.8,360.0,6.0,12.0,23.0
std,,0.744845,80.893333,23.805384,15.824015,30.235222,7.844725,8.264334,8.995438,1.104464,...,0.373882,0.372107,0.018808,11.13624,7.555971,11.249665,93.850252,2.002481,3.436228,6.922264


In [34]:
# Convert zero values for pollutants to NaN
weather_aq.iloc[:,:10] = weather_aq.iloc[:,:10].apply(lambda x: x.replace(0, np.nan), axis = 1)
weather_aq.describe()

Unnamed: 0,date_time,aqi,co,no,no2,o3,so2,pm2_5,pm10,nh3,...,precipitation,rain,snowfall,surface_pressure,wind_speed_10m,wind_speed_100m,wind_direction,day,month,hour
count,26016,26016.0,26016.0,19189.0,26016.0,25064.0,26016.0,26016.0,26016.0,25879.0,...,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0,26016.0
mean,2022-07-02 09:23:22.167896576,1.555812,249.608223,8.620351,18.793687,52.987042,7.872474,6.815534,8.655013,0.890656,...,0.085178,0.084129,0.000735,1014.43454,14.641248,24.320783,190.311308,2.992005,6.530174,11.500192
min,2021-01-01 12:00:00,1.0,141.86,0.01,1.34,0.01,0.86,0.5,0.53,0.01,...,0.0,0.0,0.0,952.7,0.0,0.0,1.0,0.0,1.0,0.0
25%,2021-09-30 11:45:00,1.0,205.28,0.15,7.8,33.62,3.81,2.23,3.31,0.29,...,0.0,0.0,0.0,1007.8,8.8,16.5,102.0,1.0,4.0,5.75
50%,2022-06-30 11:30:00,1.0,233.65,0.72,13.37,55.07,5.66,3.99,5.87,0.55,...,0.0,0.0,0.0,1015.5,13.2,23.2,217.0,3.0,7.0,12.0
75%,2023-04-04 11:15:00,2.0,270.37,2.68,23.99,71.53,9.06,7.76,10.42,1.04,...,0.0,0.0,0.0,1022.1,19.2,30.8,253.0,5.0,10.0,17.25
max,2024-01-01 12:00:00,5.0,1268.39,447.03,126.12,205.99,129.7,104.54,108.95,15.2,...,14.7,14.7,1.4,1044.7,68.0,99.8,360.0,6.0,12.0,23.0
std,,0.744845,80.893333,27.36463,15.824015,29.088648,7.844725,8.264334,8.995438,1.105496,...,0.373882,0.372107,0.018808,11.13624,7.555971,11.249665,93.850252,2.002481,3.436228,6.922264


In [35]:
# Check missing values
for column_name in weather_aq.columns:
    print(f"{column_name}: {weather_aq[column_name].isna().sum()}")


date_time: 0
aqi: 0
co: 0
no: 6827
no2: 0
o3: 952
so2: 0
pm2_5: 0
pm10: 0
nh3: 137
temp_C: 0
percent_humidity: 0
precipitation: 0
rain: 0
snowfall: 0
surface_pressure: 0
wind_speed_10m: 0
wind_speed_100m: 0
wind_direction: 0
day: 0
month: 0
hour: 0


In [38]:
# Replace missing values with mean

weather_aq['no'] = weather_aq['no'].fillna(weather_aq['no'].mean())
weather_aq['nh3'] = weather_aq['nh3'].fillna(weather_aq['nh3'].mean())
weather_aq['o3'] = weather_aq['o3'].fillna(weather_aq['o3'].mean())
weather_aq.isna().sum()

date_time           0
aqi                 0
co                  0
no                  0
no2                 0
o3                  0
so2                 0
pm2_5               0
pm10                0
nh3                 0
temp_C              0
percent_humidity    0
precipitation       0
rain                0
snowfall            0
surface_pressure    0
wind_speed_10m      0
wind_speed_100m     0
wind_direction      0
day                 0
month               0
hour                0
dtype: int64

<div class="purple small-padding">
    <h3>Save Cleaned data to CSV</h3>
</div>

In [39]:
path = 'data_clean/hammersmith_clean.csv'
weather_aq.to_csv(path, index=False)

<div class="yellow extra-padding">
    <h2>What is the impact of air pollution on our health and the NHS?</h2>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="yellow extra-padding">
    <h2>What has been the impact of schemes such as ULEZ and Covid on reducing air pollution and how can this inform future policies?</h2>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="purple small-padding">
    <h3>Second class heading</h3>
</div>

<div class="purple extra-padding">
    <h3>Second class heading</h3>
</div>

<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

<div class="blue small-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>

<div class="blue extra-padding">
    <strong>Observations</strong>
    <ul>
        <li>one</li>
        <li>two</li>
        <li>three</li>
        <li>four</li>
        <li>five</li>
    </ul>
</div>