In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load Data
daily = pd.read_csv('../data/AQIData/aqi_daily.csv', parse_dates=["time"])
hourly = pd.read_csv('../data/AQIData/aqi_hourly.csv', parse_dates=["time"])

In [5]:
daily.info()
hourly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3536 entries, 0 to 3535
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   location                  3536 non-null   object 
 1   time                      3536 non-null   object 
 2   pm10 (μg/m³)              3536 non-null   float64
 3   pm2_5 (μg/m³)             3536 non-null   float64
 4   carbon_monoxide (μg/m³)   3536 non-null   float64
 5   nitrogen_dioxide (μg/m³)  3536 non-null   float64
 6   sulphur_dioxide (μg/m³)   3536 non-null   float64
 7   ozone (μg/m³)             3536 non-null   float64
dtypes: float64(6), object(2)
memory usage: 221.1+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84864 entries, 0 to 84863
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   time                      84864 non-null  object 
 1   pm10 (μg/m³)

In [6]:
print(daily.describe())

       pm10 (μg/m³)  pm2_5 (μg/m³)  carbon_monoxide (μg/m³)  \
count   3536.000000    3536.000000              3536.000000   
mean      53.840801      32.831135               432.511006   
std       45.889356      27.184148               392.513388   
min        0.789286       0.572262                59.208333   
25%       23.457262      14.629732               216.508036   
50%       39.000952      24.768274               281.223810   
75%       63.428036      40.278284               403.924206   
max      251.186905     147.424960              2101.107937   

       nitrogen_dioxide (μg/m³)  sulphur_dioxide (μg/m³)  ozone (μg/m³)  
count               3536.000000              3536.000000    3536.000000  
mean                  15.974289                14.344324      81.458649  
std                   19.663942                16.171359      21.854015  
min                    0.000000                 0.241905      23.814286  
25%                    3.833988                 2.808661      

In [7]:
print(hourly.describe())

       pm10 (μg/m³)  pm2_5 (μg/m³)  carbon_monoxide (μg/m³)  \
count  84864.000000   84864.000000             84864.000000   
mean      53.840801      32.831135               432.511006   
std       49.087739      29.085325               446.481677   
min       -6.100000      -3.854286                -0.371429   
25%       22.005714      13.951429               207.485714   
50%       38.651429      24.325714               274.242857   
75%       64.812143      40.462857               399.864286   
max      289.900000     170.741905              2738.200000   

       nitrogen_dioxide (μg/m³)  sulphur_dioxide (μg/m³)  ozone (μg/m³)  
count              84864.000000             84864.000000   84864.000000  
mean                  15.974289                14.344324      81.458649  
std                   22.982734                17.274625      40.751885  
min                   -4.868571                -3.971429     -25.638095  
25%                    3.062857                 2.550714      

AQI Trends over time - Temporal Analysis

In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(data=daily, x="time", y="AQI", marker="o", color="red")
plt.title("Daily AQI Trend Over Time")
plt.ylabel("AQI")
plt.xlabel("Time")
plt.xticks(rotation=45)
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(data=hourly, x="time", y="AQI", color="blue")
plt.title("Hourly AQI Trend Over Time")
plt.ylabel("AQI")
plt.xlabel("Time")
plt.xticks(rotation=45)
plt.grid()
plt.show()

Spatial Analysis (AQI by Location)

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x="location", y="AQI", data=daily, palette="coolwarm")
plt.title("AQI Distribution Across Locations (Daily)")
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(x="location", y="AQI", data=hourly, palette="coolwarm")
plt.title("AQI Distribution Across Locations (Hourly)")
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
daily.groupby("location")["AQI"].mean().plot(kind="bar", color="purple")
plt.title("Average AQI by Location (Daily)")
plt.ylabel("AQI")
plt.xticks(rotation=45)
plt.show()


Correlation Analysis

plt.figure(figsize=(10, 6))
sns.heatmap(daily.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix (Daily Data)")
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(hourly.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Matrix (Hourly Data)")
plt.show()

Spatio-Temporal AQI Variation

In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(data=daily, x="time", y="AQI", hue="location", palette="tab10")
plt.title("Daily AQI Trends Across Locations")
plt.ylabel("AQI")
plt.xlabel("Time")
plt.legend(title="Location")
plt.xticks(rotation=45)
plt.show()


In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(data=hourly, x="time", y="AQI", hue="location", palette="tab10")
plt.title("Hourly AQI Trends Across Locations")
plt.ylabel("AQI")
plt.xlabel("Time")
plt.legend(title="Location")
plt.xticks(rotation=45)
plt.show()
