In [None]:

# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configure visualization settings
plt.style.use('seaborn-darkgrid')
sns.set_theme(style="whitegrid")

# Loading the dataset
data = pd.read_csv("./datasets/data.csv", encoding="cp1252")
data.head()


In [None]:

# Explore the dataset
data.info()
data.describe()


In [None]:

# Convert date column to datetime if needed (replace 'date_column_name' with actual name)
data['date'] = pd.to_datetime(data['date'], errors='coerce')

# Drop rows with NaT values in 'date' column
data = data.dropna(subset=['date'])

# Sort by date
data = data.sort_values('date')

# Plot AQI trend over time
plt.figure(figsize=(12, 6))
plt.plot(data['date'], data['AQI'], label='AQI', color='b')
plt.xlabel("Date")
plt.ylabel("Air Quality Index (AQI)")
plt.title("AQI Trend Over Time")
plt.legend()
plt.show()


In [None]:

# Plotting individual pollutant trends over time
pollutants = ['PM2.5', 'PM10', 'CO']  # Replace with actual column names if different

plt.figure(figsize=(12, 6))
for pollutant in pollutants:
    plt.plot(data['date'], data[pollutant], label=pollutant)

plt.xlabel("Date")
plt.ylabel("Pollutant Levels")
plt.title("Pollutant Trends Over Time")
plt.legend()
plt.show()


In [None]:

# Bar plot to compare AQI values across specific time periods (e.g., monthly averages)
data['month'] = data['date'].dt.to_period('M')

monthly_aqi = data.groupby('month')['AQI'].mean()

plt.figure(figsize=(12, 6))
monthly_aqi.plot(kind='bar', color='teal')
plt.xlabel("Month")
plt.ylabel("Average AQI")
plt.title("Average AQI by Month")
plt.show()


In [None]:

# Box plot of AQI values for different pollutants
plt.figure(figsize=(10, 6))
sns.boxplot(data=data[pollutants])
plt.xlabel("Pollutants")
plt.ylabel("Levels")
plt.title("Distribution of Pollutant Levels")
plt.show()


In [None]:

# Scatter plot to explore relationships between AQI and each pollutant level
plt.figure(figsize=(12, 6))
for pollutant in pollutants:
    plt.scatter(data[pollutant], data['AQI'], label=pollutant, alpha=0.6)

plt.xlabel("Pollutant Levels")
plt.ylabel("AQI")
plt.title("Relationship between AQI and Pollutant Levels")
plt.legend()
plt.show()


In [None]:

# Customizing the visualization further
plt.figure(figsize=(12, 6))
sns.lineplot(data=data, x="date", y="AQI", label="AQI", color="blue", marker="o")
sns.lineplot(data=data, x="date", y="PM2.5", label="PM2.5", color="red", marker="x")
plt.xlabel("Date")
plt.ylabel("Index/Level")
plt.title("Customized AQI and PM2.5 Trends Over Time")
plt.legend()
plt.show()
