# Time patterns of air quality

This notebook explores temporal patterns in PM2.5 and PM10 concentrations using the cleaned dataset generated by `src/main.py` and saved in `data/process_data/weather_stage1_loaded.csv`.

We aggregate the data by season, month, hour of day, and day of week to:
- compute average PM2.5 and PM10 levels for each time scale, and
- identify typical daily, weekly, and seasonal cycles in air pollution.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv(
    "../data/process_data/weather_stage1_loaded.csv",
    parse_dates=["timestamp"],
)
df.head()

In [None]:
df.info()


In [None]:
df.describe()


In [None]:
#create month column from timestamp

df["month"] = df["timestamp"].dt.month

df[["timestamp", "month"]].head()

In [None]:
def get_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Autumn"

df["season"] = df["month"].apply(get_season)

df[["timestamp", "month", "season"]].head()

In [None]:
season_mean = df.groupby("season")[["pm25", "pm10"]].mean()
season_mean


In [None]:
plt.figure(figsize=(8, 5))

season_mean.plot(kind="bar", figsize=(8,5))

plt.title("Average PM2.5 and PM10 by Season")
plt.ylabel("Concentration (µg/m³)")
plt.xlabel("Season")
plt.xticks(rotation=0)
plt.tight_layout()

plt.show()


In [None]:
# compute monthly mean for PM2.5 and PM10
monthly_mean = df.groupby("month")[["pm25", "pm10"]].mean()
monthly_mean


In [None]:
plt.figure(figsize=(10,5))

plt.plot(monthly_mean.index, monthly_mean["pm25"], marker="o", label="PM2.5")
plt.plot(monthly_mean.index, monthly_mean["pm10"], marker="o", label="PM10")

plt.title("Monthly Average PM2.5 and PM10")
plt.xlabel("Month")
plt.ylabel("Concentration (µg/m³)")
plt.xticks(range(1,13))
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()

plt.show()


In [None]:
# extract hour and weekday
df["hour"] = df["timestamp"].dt.hour
df["weekday"] = df["timestamp"].dt.dayofweek   # Monday=0 ... Sunday=6


In [None]:
hourly_mean = df.groupby("hour")[["pm25", "pm10"]].mean()
hourly_mean


In [None]:
plt.figure(figsize=(10,5))
plt.plot(hourly_mean.index, hourly_mean["pm25"], marker="o", label="PM2.5")
plt.plot(hourly_mean.index, hourly_mean["pm10"], marker="o", label="PM10")

plt.title("Hourly Average PM2.5 and PM10")
plt.xlabel("Hour of Day")
plt.ylabel("Concentration (µg/m³)")
plt.xticks(range(0,24))
plt.grid(alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# average PM by weekday (0=Mon, 6=Sun)
weekday_mean = df.groupby("weekday")[["pm25", "pm10"]].mean()
weekday_mean


In [None]:
plt.figure(figsize=(8,5))
plt.plot(weekday_mean.index, weekday_mean["pm25"], marker="o", label="PM2.5")
plt.plot(weekday_mean.index, weekday_mean["pm10"], marker="o", label="PM10")

plt.title("Weekly Average PM2.5 and PM10")
plt.xlabel("Day of Week (0 = Mon, 6 = Sun)")
plt.ylabel("Concentration (µg/m³)")
plt.xticks(range(0,7))
plt.grid(alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()
