In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display


In [None]:
# Step 1: Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Step 2: Load the CSV file
df = pd.read_csv('delhiaqi.csv')

# Step 3: Explore the data
df.info()
df.head()


In [None]:
# Convert 'date' to datetime format
df['date'] = pd.to_datetime(df['date'])

# Extract parts of the date
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['hour'] = df['date'].dt.hour
df['weekday'] = df['date'].dt.day_name()


In [None]:
# Function to assign seasons based on month
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Summer'
    elif month in [6, 7, 8]:
        return 'Monsoon'
    else:
        return 'Post-Monsoon'

df['season'] = df['month'].apply(get_season)

In [None]:
df[['date', 'month', 'season']].head()

In [None]:
pollutants = ['pm25', 'pm10', 'no', 'no2', 'nox', 'nh3', 'co', 'so2', 'o3', 'benzene', 'toluene']


In [None]:
df.columns

In [None]:
pollutants = ['pm10', 'no', 'no2', 'co', 'so2', 'o3', 'nh3']


In [None]:
seasonal_avg = df.groupby('season')[pollutants].mean()

In [None]:
import matplotlib.pyplot as plt

# Plotting the seasonal variation
plt.figure(figsize=(12, 6))
seasonal_avg.plot(kind='bar')
plt.title('Average Pollutant Levels by Season in Delhi')
plt.ylabel('Pollutant Concentration (Âµg/mÂ³)')
plt.xlabel('Season')
plt.xticks(rotation=0)
plt.grid(True)
plt.legend(title='Pollutants')
plt.tight_layout()
plt.show()

In [None]:
print(df.columns.tolist())

In [None]:
# Convert 'date' column to datetime and extract only the date part (no time)
df['only_date'] = pd.to_datetime(df['date']).dt.date


In [None]:
import matplotlib.pyplot as plt

# Step 1: Calculate average of pm2.5 and pm10 per day
daily_avg = df.groupby('only_date')[['pm2_5', 'pm10']].mean()
daily_avg['approx_aqi'] = (daily_avg['pm2_5'] + daily_avg['pm10']) / 2

# Step 2: Plot it
plt.figure(figsize=(14,6))
plt.plot(daily_avg.index, daily_avg['approx_aqi'], color='crimson')
plt.title("Approximate Daily AQI Trend in Delhi", fontsize=16)
plt.xlabel("Date")
plt.ylabel("Approx AQI (Average of PM2.5 and PM10)")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Show column names again to verify pollutant columns
df.columns.tolist()


In [None]:
# List of pollutant columns
pollutants = ['pm2_5', 'pm10', 'co', 'no', 'no2', 'so2', 'o3', 'nh3']

# Calculate seasonal averages
seasonal_avg = df.groupby('season')[pollutants].mean()

# Display result
seasonal_avg


In [None]:
import matplotlib.pyplot as plt

# Plot bar chart
seasonal_avg.plot(kind='bar', figsize=(12,6))
plt.title('Average Pollutant Levels by Season')
plt.ylabel('Concentration')
plt.xlabel('Season')
plt.xticks(rotation=0)
plt.legend(title='Pollutants')
plt.tight_layout()
plt.show()


In [None]:
pollutant_cols = ['co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3']
pollutant_data = df[pollutant_cols]


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(pollutant_data.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap of Air Pollutants")
plt.show()


In [None]:
['date', 'co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3', 'year', 'month', 'day', 'hour', 'weekday', 'season', 'only_date']


In [None]:
monthly_avg = df.groupby('month')['pm2_5'].mean()


In [None]:
plt.figure(figsize=(10, 5))
monthly_avg.plot(marker='o', color='purple')
plt.title('Average PM2.5 Levels by Month')
plt.xlabel('Month')
plt.ylabel('PM2.5 Concentration')
plt.xticks(range(1, 13))  # Ensure months show correctly
plt.grid(True)
plt.show()

In [None]:
hourly_avg = df.groupby('hour')['pm2_5'].mean()


In [None]:
plt.figure(figsize=(10, 5))
hourly_avg.plot(marker='o', color='green')
plt.title('Average PM2.5 Levels by Hour of Day')
plt.xlabel('Hour (0 to 23)')
plt.ylabel('PM2.5 Concentration')
plt.xticks(range(0, 24))
plt.grid(True)
plt.show()


In [None]:
df = pd.read_csv('delhiaqi.csv')

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

# Create additional time-based columns
df['only_date'] = df['date'].dt.date
df['month'] = df['date'].dt.month
df['season'] = df['month'].map({
    12: 'Winter', 1: 'Winter', 2: 'Winter',
    3: 'Summer', 4: 'Summer', 5: 'Summer',
    6: 'Monsoon', 7: 'Monsoon', 8: 'Monsoon',
    9: 'Post-Monsoon', 10: 'Post-Monsoon', 11: 'Post-Monsoon'
})


In [None]:
pollutants = ['co', 'no', 'no2', 'o3', 'so2', 'pm2_5', 'pm10', 'nh3']

pollutant_dropdown = widgets.Dropdown(
    options=pollutants,
    value='pm2_5',
    description='Pollutant:',
    style={'description_width': 'initial'}
)

display(pollutant_dropdown)


In [None]:
def plot_pollution(pollutant):
    daily_avg = df.groupby('only_date')[pollutant].mean()
    plt.figure(figsize=(14, 5))
    sns.lineplot(x=daily_avg.index, y=daily_avg.values)
    plt.title(f'Daily Average of {pollutant.upper()} Over Time')
    plt.xlabel('Date')
    plt.ylabel(f'{pollutant.upper()} Level')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'{pollutant}_trend.png')  # ðŸ”½ Save for Option D
    plt.show()

pollutant_dropdown.observe(lambda change: plot_pollution(change['new']), names='value')


In [None]:
plot_pollution(pollutant_dropdown.value)
