# LEAFMAP-BASED AQI VISUALIZATION

In [14]:
import pandas as pd
import numpy as np
import folium
from folium import plugins
import warnings
warnings.filterwarnings('ignore')

AUGMENT DATASET WITH CITY COORDINATES

In [15]:


df = pd.read_csv('/kaggle/input/city-day-cleaned/city_day_cleaned (1).csv')
print(f"Original dataset shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")

# Load city coordinates lookup table

city_coords = pd.read_csv('/kaggle/input/city-coordinates/city_coordinates.csv')
print(city_coords.head())

# Merge coordinates with main dataset
df_geo = df.merge(city_coords, on='City', how='left')

# Check for missing coordinates
missing_coords = df_geo[df_geo['Latitude'].isnull()]


print(f"\n Augmented dataset shape: {df_geo.shape}")

Original dataset shape: (27284, 15)
Columns: ['City', 'PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2', 'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI', 'AQI_Bucket']
        City  Latitude  Longitude
0  Bengaluru   12.9716    77.5946
1      Delhi   28.6791    77.0697
2    Lucknow   26.8500    80.9499
3  Hyderabad   17.3850    78.4867
4    Chennai   13.0827    80.2707

 Augmented dataset shape: (27284, 17)


 AQI PREDICTION

In [16]:
print(f"\n AQI column present: {'AQI' in df_geo.columns}")
print(f" AQI_Bucket column present: {'AQI_Bucket' in df_geo.columns}")
print(f"\nAQI statistics:")
print(f"  Min: {df_geo['AQI'].min():.2f}")
print(f"  Max: {df_geo['AQI'].max():.2f}")
print(f"  Mean: {df_geo['AQI'].mean():.2f}")


 AQI column present: True
 AQI_Bucket column present: True

AQI statistics:
  Min: 13.00
  Max: 2049.00
  Mean: 164.21


PREPARE DATA FOR LEAFMAP

In [17]:

# Aggregate city-level statistics (mean AQI per city)

city_aqi = df_geo.groupby('City').agg({
    'Latitude': 'first',
    'Longitude': 'first',
    'AQI': ['mean', 'min', 'max', 'std', 'count'],
    'AQI_Bucket': lambda x: x.mode()[0] if len(x.mode()) > 0 else x.iloc[0]
}).reset_index()

# Flatten column names
city_aqi.columns = ['City', 'Latitude', 'Longitude', 'AQI_mean', 'AQI_min', 
                    'AQI_max', 'AQI_std', 'AQI_count', 'AQI_Bucket']


print("\nCity AQI Summary:")
print(city_aqi[['City', 'AQI_mean', 'AQI_Bucket', 'AQI_count']].head(10))



City AQI Summary:
           City    AQI_mean    AQI_Bucket  AQI_count
0     Ahmedabad  448.285188        Severe       1433
1        Aizawl   35.088749          Good        113
2     Amaravati   95.817224  Satisfactory        905
3      Amritsar  120.091394  Satisfactory       1207
4     Bengaluru   96.648498  Satisfactory       2009
5        Bhopal  132.582444      Moderate        280
6  Brajrajnagar  149.619233      Moderate        795
7    Chandigarh   96.294720  Satisfactory        304
8       Chennai  114.770043  Satisfactory       1975
9    Coimbatore   73.800725  Satisfactory        384


AQI → COLOR MAPPING

In [18]:
def aqi_to_color(aqi):
    if pd.isnull(aqi):
        return 'gray'
    elif aqi <= 50:
        return 'green'  # Good
    elif aqi <= 100:
        return 'yellow'  # Satisfactory
    elif aqi <= 200:
        return 'orange'  # Moderate
    elif aqi <= 300:
        return 'red'  # Poor
    elif aqi <= 400:
        return 'darkred'  # Very Poor
    else:
        return 'maroon'  # Severe

def aqi_to_category(aqi):
    if pd.isnull(aqi):
        return 'Unknown'
    elif aqi <= 50:
        return 'Good'
    elif aqi <= 100:
        return 'Satisfactory'
    elif aqi <= 200:
        return 'Moderate'
    elif aqi <= 300:
        return 'Poor'
    elif aqi <= 400:
        return 'Very Poor'
    else:
        return 'Severe'
def get_health_message(aqi):
    if pd.isnull(aqi):
        return 'Data unavailable'
    elif aqi <= 50:
        return 'Minimal Impact - Air quality is satisfactory'
    elif aqi <= 100:
        return 'Minor breathing discomfort to sensitive people'
    elif aqi <= 200:
        return 'Breathing discomfort to people with lung/heart disease'
    elif aqi <= 300:
        return 'Breathing discomfort to most people on prolonged exposure'
    elif aqi <= 400:
        return 'Respiratory illness on prolonged exposure'
    else:
        return ' SEVERE - Affects healthy people, serious impact on those with existing diseases'

# Apply color mapping
city_aqi['color'] = city_aqi['AQI_mean'].apply(aqi_to_color)
city_aqi['category'] = city_aqi['AQI_mean'].apply(aqi_to_category)
city_aqi['health_message'] = city_aqi['AQI_mean'].apply(get_health_message)

print("\nColor distribution:")
print(city_aqi['color'].value_counts())


Color distribution:
color
orange    12
yellow     8
red        4
maroon     1
green      1
Name: count, dtype: int64


LEAFMAP VISUALIZATION

In [19]:
# Initialize map centered on India
india_center = [22.9734, 78.6569]  # Center of India
m = folium.Map(
    location=india_center,
    zoom_start=5,
    tiles='OpenStreetMap',
    control_scale=True
)

# Add title
title_html = '''
<div style="position: fixed; 
            top: 10px; left: 50px; width: 400px; height: 90px; 
            background-color: white; border:2px solid grey; z-index:9999; 
            font-size:14px; padding: 10px">
<h3 style="margin:0"> India Air Quality Index (AQI) Map</h3>
<p style="margin:5px 0; font-size:12px">
Based on Random Forest ML Model<br>
Data aggregated at city level
</p>
</div>
'''
m.get_root().html.add_child(folium.Element(title_html))

#  Plot City AQI Points
marker_count = 0

for idx, row in city_aqi.iterrows():
    if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
        # Determine marker size based on AQI
        radius = 8 + (row['AQI_mean'] / 100)  # Larger circles for higher AQI
        
        # Create detailed popup
        popup_html = f"""
        <div style="font-family: Arial; width: 250px">
            <h4 style="margin: 0 0 10px 0; color: {row['color']}">{row['City']}</h4>
            <table style="width: 100%; font-size: 12px">
                <tr>
                    <td><b>Mean AQI:</b></td>
                    <td style="color: {row['color']}; font-weight: bold">{row['AQI_mean']:.1f}</td>
                </tr>
                <tr>
                    <td><b>Category:</b></td>
                    <td><span style="background-color: {row['color']}; color: white; padding: 2px 6px; border-radius: 3px">{row['category']}</span></td>
                </tr>
                <tr><td colspan="2"><hr style="margin: 5px 0"></td></tr>
                <tr>
                    <td><b>Min AQI:</b></td>
                    <td>{row['AQI_min']:.1f}</td>
                </tr>
                <tr>
                    <td><b>Max AQI:</b></td>
                    <td>{row['AQI_max']:.1f}</td>
                </tr>
                <tr>
                    <td><b>Std Dev:</b></td>
                    <td>{row['AQI_std']:.1f}</td>
                </tr>
                <tr>
                    <td><b>Samples:</b></td>
                    <td>{row['AQI_count']:.0f}</td>
                </tr>
                <tr><td colspan="2"><hr style="margin: 5px 0"></td></tr>
                <tr>
                    <td colspan="2" style="font-size: 11px; color: #555">
                        <b>Health Impact:</b><br>
                        {row['health_message']}
                    </td>
                </tr>
            </table>
            <p style="font-size: 10px; color: #888; margin-top: 5px">
                 Interpolated AQI - City-level average
            </p>
        </div>
        """
        
        # Add circle marker
        folium.CircleMarker(
            location=(row['Latitude'], row['Longitude']),
            radius=radius,
            color=row['color'],
            fill=True,
            fillColor=row['color'],
            fillOpacity=0.7,
            weight=2,
            popup=folium.Popup(popup_html, max_width=300),
            tooltip=f"{row['City']}: AQI {row['AQI_mean']:.0f} ({row['category']})"
        ).add_to(m)
        
        marker_count += 1

print(f" Added {marker_count} city markers")

 Added 26 city markers


HEATMAP LAYER

In [20]:
# Prepare heatmap data: [[lat, lon, intensity], ...]
heat_data = []
for idx, row in city_aqi.iterrows():
    if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
        heat_data.append([row['Latitude'], row['Longitude'], row['AQI_mean']])

# Add heatmap
plugins.HeatMap(
    heat_data,
    min_opacity=0.3,
    max_val=city_aqi['AQI_mean'].max(),
    radius=30,
    blur=20,
    max_zoom=6,
    gradient={
        0.0: 'green',
        0.2: 'yellow',
        0.4: 'orange',
        0.6: 'red',
        0.8: 'darkred',
        1.0: 'maroon'
    }
).add_to(m)

<folium.plugins.heat_map.HeatMap at 0x7ee43f024b30>

In [21]:
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; right: 50px; width: 180px; 
            background-color: white; border:2px solid grey; z-index:9999; 
            font-size:12px; padding: 10px">
<h4 style="margin:0 0 10px 0">AQI Legend</h4>
<div style="margin: 5px 0">
    <span style="background-color: green; width: 20px; height: 10px; display: inline-block"></span>
    <span> 0-50 Good</span>
</div>
<div style="margin: 5px 0">
    <span style="background-color: yellow; width: 20px; height: 10px; display: inline-block"></span>
    <span> 51-100 Satisfactory</span>
</div>
<div style="margin: 5px 0">
    <span style="background-color: orange; width: 20px; height: 10px; display: inline-block"></span>
    <span> 101-200 Moderate</span>
</div>
<div style="margin: 5px 0">
    <span style="background-color: red; width: 20px; height: 10px; display: inline-block"></span>
    <span> 201-300 Poor</span>
</div>
<div style="margin: 5px 0">
    <span style="background-color: darkred; width: 20px; height: 10px; display: inline-block"></span>
    <span> 301-400 Very Poor</span>
</div>
<div style="margin: 5px 0">
    <span style="background-color: maroon; width: 20px; height: 10px; display: inline-block"></span>
    <span> 401+ Severe</span>
</div>
</div>
'''
m.get_root().html.add_child(folium.Element(legend_html))

<branca.element.Element at 0x7ee44e8c8a40>

In [22]:
# Check for severe AQI cities
severe_cities = city_aqi[city_aqi['AQI_mean'] >= 401]
if len(severe_cities) > 0:
    print(f"\n WARNING: {len(severe_cities)} cities with SEVERE AQI (≥401):")
    for idx, row in severe_cities.iterrows():
        print(f"   - {row['City']}: AQI {row['AQI_mean']:.1f}")
        
    # Add warning markers
    for idx, row in severe_cities.iterrows():
        if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
            folium.Marker(
                location=(row['Latitude'], row['Longitude']),
                popup=f" SEVERE AIR QUALITY<br>{row['City']}: AQI {row['AQI_mean']:.0f}",
                icon=folium.Icon(color='black', icon='warning-sign', prefix='glyphicon')
            ).add_to(m)
else:
    print(" No cities with severe AQI (≥401)")

# High AQI cities (300-400)
very_poor_cities = city_aqi[(city_aqi['AQI_mean'] >= 300) & (city_aqi['AQI_mean'] < 401)]
if len(very_poor_cities) > 0:
    print(f"\n {len(very_poor_cities)} cities with VERY POOR AQI (300-400):")
    for idx, row in very_poor_cities.iterrows():
        print(f"   - {row['City']}: AQI {row['AQI_mean']:.1f}")
else:
    print(" No cities with very poor AQI (300-400)")

# Add disclaimer
disclaimer_html = '''
<div style="position: fixed; 
            bottom: 10px; left: 50px; width: 500px; 
            background-color: #fff3cd; border:2px solid #ffc107; z-index:9999; 
            font-size:11px; padding: 8px">
<b> DISCLAIMER:</b> This map displays <b>interpolated AQI values</b> aggregated at city level.
Individual readings may vary. Do not average Severe with Good categories.
AQI predictions based on Random Forest ML model (R²=0.91).
For official health advisories, consult local authorities.
</div>
'''
m.get_root().html.add_child(folium.Element(disclaimer_html))


   - Ahmedabad: AQI 448.3
 No cities with very poor AQI (300-400)


<branca.element.Element at 0x7ee44e8c8a40>

In [23]:
# SAVE MAP
output_path = '/kaggle/working/india_aqi_map.html'
m.save(output_path)

# Save city-level aggregated data
city_aqi_export = city_aqi[['City', 'Latitude', 'Longitude', 'AQI_mean', 
                             'AQI_min', 'AQI_max', 'AQI_std', 'AQI_count',
                             'category', 'color', 'health_message']]
export_path = '/kaggle/working/city_aqi_mapping_data.csv'
city_aqi_export.to_csv(export_path, index=False)


# Save augmented full dataset with coordinates
full_export_path = '/kaggle/working/city_day_with_coordinates.csv'
df_geo.to_csv(full_export_path, index=False)


In [24]:

# SUMMARY STATISTICS


print(f"\n Geographic Coverage:")
print(f"   - Total cities: {len(city_aqi)}")
print(f"   - Cities with coordinates: {city_aqi['Latitude'].notna().sum()}")
print(f"   - Geographic span: {city_aqi['Latitude'].min():.2f}°N to {city_aqi['Latitude'].max():.2f}°N")

print(f"\n AQI Distribution:")
aqi_dist = city_aqi['category'].value_counts()
for category in ['Good', 'Satisfactory', 'Moderate', 'Poor', 'Very Poor', 'Severe']:
    count = aqi_dist.get(category, 0)
    pct = (count / len(city_aqi) * 100) if len(city_aqi) > 0 else 0
    print(f"   - {category:<15}: {count:2d} cities ({pct:5.1f}%)")


 Geographic Coverage:
   - Total cities: 26
   - Cities with coordinates: 26
   - Geographic span: 8.52°N to 31.62°N

 AQI Distribution:
   - Good           :  1 cities (  3.8%)
   - Satisfactory   :  8 cities ( 30.8%)
   - Moderate       : 12 cities ( 46.2%)
   - Poor           :  4 cities ( 15.4%)
   - Very Poor      :  0 cities (  0.0%)
   - Severe         :  1 cities (  3.8%)
