In [44]:
import requests
import re
import pandas as pd
from datetime import datetime
import pdfplumber
import certifi
import sys
import folium
from folium.plugins import HeatMap
import googlemaps
import pytz
from dotenv import load_dotenv
import os
load_dotenv()

In [2]:
# time_zone = pytz.timezone('Asia/Kolkata')
# date = datetime.now(time_zone).strftime("%Y%m%d")
# date

In [3]:
date = '20250320'

In [4]:
url = f'https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_{date}.pdf'

In [5]:
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, verify=certifi.where())

if response.status_code == 200:
    with open(f"AQI_Bulletin_{date}.pdf", "wb") as f:
        f.write(response.content)
    print("Download successful.")
else:
    print("Failed to download:", response.status_code)

Download successful.


In [6]:
daily_doc = f'AQI_Bulletin_{date}.pdf'

In [7]:
pdfplumber.open(daily_doc)

<pdfplumber.pdf.PDF at 0x12c254f50>

In [8]:
tables = []
with pdfplumber.open(daily_doc) as pdf:
    for page in pdf.pages[:-1]:
        extracted_tables = page.extract_tables()
        # print(extracted_tables[0])
        tables.extend(extracted_tables)

In [9]:
clean_tables = []
for table in tables:
    table_wihtout_headers = table[1:]
    clean_tables.append(table_wihtout_headers)

In [10]:
df_list = []
for each_table in clean_tables:
    df = pd.DataFrame(each_table)
    df_list.append(df)

In [11]:
df_combined = pd.concat(df_list, ignore_index=True)
df_combined.columns = tables[0][0]

In [12]:
df = df_combined.drop(columns=['S.No', 'Prominent Pollutant', 'No. of Stations\nParticipated/\nTotal Stations'])

In [13]:
df.columns = df.columns.str.replace("\n", "").str.replace(" ","_")

In [14]:
df['IndexValue'] = df['IndexValue'].astype(int)

In [50]:
API_KEY = os.getenv("GOOGLE_API_KEY")

In [16]:
gmaps = googlemaps.Client(key=API_KEY)

In [17]:
# Function to get state using reverse geocoding
def get_state(city):
    try:
        geocode_result = gmaps.geocode(f"{city}, India")
        for component in geocode_result[0]['address_components']:
            if 'administrative_area_level_1' in component['types']:
                return component['long_name']
    except Exception as e:
        return None

df["State"] = df["City"].apply(get_state)

In [18]:
df.sort_values(by='IndexValue', ascending=False).head(20)#.to_csv('daily_aqi.csv', index=False)

Unnamed: 0,City,Air_Quality,IndexValue,State
51,Byrnihat,Very Poor,324,Meghalaya
157,Nalbari,Poor,299,Assam
84,Ghaziabad,Poor,266,Uttar Pradesh
190,Samastipur,Poor,264,Bihar
198,Siliguri,Poor,263,West Bengal
91,Hajipur,Poor,259,Bihar
161,Nayagarh,Poor,251,Odisha
89,Guwahati,Poor,250,Assam
209,Thiruvananthapuram,Poor,240,Kerala
185,Rourkela,Moderate,192,Odisha


In [19]:
# Function to geocode addresses
def geocode_google(address):
    try:
        geocode_result = gmaps.geocode(address)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            return pd.Series([location['lat'], location['lng']])
        else:
            return pd.Series([None, None])
    except Exception as e:
        return pd.Series([None, None])

In [20]:
# Apply geocoding function to the DataFrame
df[['Latitude', 'Longitude']] = df['City'].apply(geocode_google)

In [21]:
if len(df[df['Latitude'].isna()]['City']) > 0:
    df[['Latitude', 'Longitude']] = df['City'].apply(geocode_google)

In [22]:
if len(df[df['Latitude'].isna()]['City']) > 0:
    df['Latitude'] = df['Latitude'].fillna('0')
    df['Longitude'] = df['Longitude'].fillna('0')

In [23]:
len(df[df['Latitude'].isna()]['City']) 

0

In [24]:
with pdfplumber.open(daily_doc) as pdf:
    last_page_text = pdf.pages[-1].extract_text()

In [25]:
text = last_page_text.replace("\n", " ")

In [26]:
list_of_no_data_cities = text.split(":")[1].split(",")

In [27]:
no_data_cities = []
for each_city in list_of_no_data_cities:
    city_name = each_city.split(")")[1].strip()
    no_data_cities.append(city_name)

In [28]:
geocoded_no_data_cities = []
for each_city in no_data_cities:
    no_data_city_dict = {}
    no_data_city_dict['city'] = each_city 
    no_data_city_dict['lat'] = geocode_google(each_city)[0]
    no_data_city_dict['long'] = geocode_google(each_city)[1]
    geocoded_no_data_cities.append(no_data_city_dict)

In [29]:
df_no_data = pd.DataFrame(geocoded_no_data_cities)

In [30]:
df_no_data.to_csv('no_data_cities.csv', index=False)

In [31]:
# Create base map
m1 = folium.Map(location=[22.9734, 78.6569], zoom_start=4.5)

heat_data = [[row['Latitude'], row['Longitude'], row['IndexValue']] for index, row in df.iterrows()]
HeatMap(heat_data, radius=13, blur=10, max_zoom=4.5).add_to(m1)

folium.map.Marker(
    [35.5, 75],  # Approximate location to place annotation (adjust as needed)
    icon=folium.DivIcon(html="""
        <div style="position: absolute; top: 10px; left: 10px; font-size: 12px; font-weight: bold; 
                    background: rgba(255, 255, 255, 0.5); padding: 5px; border-radius: 2px; 
                    border: 0.5px solid black; z-index:9999;">
            Zoom in at area of interest
        </div>
    """)
).add_to(m1)

m1.save(f"daily_map_{date}.html")

In [32]:
with open("daily_map.html", "r") as source_file:
    content = source_file.read()

with open(f"daily_map_{date}.html", "w") as target_file:
    target_file.write(content)

In [33]:
# Create a base map centered around India
m2 = folium.Map(location=[20.5937, 78.9629], zoom_start=4.5)

# Add markers with city names as labels
for _, row in df_no_data.iterrows():
    folium.Marker(
        location=[row["lat"], row["long"]],
        popup=folium.Popup(row["city"], parse_html=True),
        icon=folium.Icon(color="red", icon="flag")
    ).add_to(m2)

folium.map.Marker(
    [33, 75],  # Approximate location to place annotation (adjust as needed)
    icon=folium.DivIcon(html="""
        <div style="position: absolute; top: 10px; left: 10px; font-size: 12px; font-weight: bold; 
                    background: rgba(255, 255, 255, 0.5); padding: 5px; border-radius: 2px; 
                    border: 0.5px solid black; z-index:9999;">
            Zoom in at area of interest
        </div>
    """)
).add_to(m2)

# Display the map
m2.save(f"no_data_map_{date}.html")

In [34]:
with open("no_data_map.html", "r") as source_file:
    content = source_file.read()

with open(f"no_data_map_{date}.html", "w") as target_file:
    target_file.write(content)