In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")

In [None]:
df = pd.read_csv("../data/processed/city_day_cleaned.csv")
df.head()

In [None]:
df.info()
df.describe()

In [None]:
df['Datetime'] = pd.to_datetime(df['Datetime'])
df['Year'] = df['Datetime'].dt.year

plt.figure(figsize=(10,5))
sns.lineplot(data=df, x="Year", y="AQI")
plt.title("India AQI Trend (2015–2024)")
plt.show()


In [None]:
top_cities = df.groupby("City")["AQI"].mean().sort_values(ascending=False).head(10)
top_cities.plot(kind="bar", figsize=(10,5))
plt.title("Top 10 Most Polluted Cities (Avg AQI)")
plt.ylabel("Average AQI")
plt.show()


In [None]:
df['Month'] = df['Datetime'].dt.month

plt.figure(figsize=(12,6))
sns.lineplot(data=df, x="Month", y="PM2.5", ci=None)
plt.title("Seasonal Trend of PM2.5 in India")
plt.show()


In [None]:
df["AQI_Bucket"].value_counts().plot(kind="bar", figsize=(8,4))
plt.title("AQI Category Distribution in India (2015–2024)")
plt.show()


In [None]:
pivot = df.pivot_table(
    values="AQI",
    index="City",
    columns="Year",
    aggfunc="mean"
)

plt.figure(figsize=(14,10))
sns.heatmap(pivot, cmap="Reds")
plt.title("City vs Year — AQI Heatmap")
plt.show()


In [None]:
### Key Insights
- Delhi and nearby NCR cities consistently show the highest AQI.
- Winter months (Nov–Jan) show peak PM2.5 levels.
- Overall AQI trend has shown slight improvement after 2019.
- Several cities show seasonal patterns linked to crop burning or festivals.
