In [16]:
import requests
import re
import pandas as pd
from datetime import datetime
import pdfplumber
import certifi
import sys
import folium
from folium.plugins import HeatMap
import googlemaps
import pytz
from dotenv import load_dotenv
import os

In [17]:
load_dotenv()

False

In [84]:
time_zone = pytz.timezone('Asia/Kolkata')
date = datetime.now(time_zone).strftime("%Y%m%d")
date

'20250323'

In [76]:
url = f'https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_{date}.pdf'

In [77]:
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, verify=certifi.where())

if response.status_code == 200:
    with open(f"AQI_Bulletin_{date}.pdf", "wb") as f:
        f.write(response.content)
    print("Download successful.")
else:
    print("Failed to download:", response.status_code)

Download successful.


In [78]:
daily_doc = f'AQI_Bulletin_{date}.pdf'

In [79]:
pdfplumber.open(daily_doc)

<pdfplumber.pdf.PDF at 0x158c1a4d0>

In [83]:
if daily_doc:
    tables = []
    with pdfplumber.open(daily_doc) as pdf:
        for page in pdf.pages[:-1]:
            extracted_tables = page.extract_tables()
            # print(extracted_tables[0])
            tables.extend(extracted_tables)

In [8]:
clean_tables = []
for table in tables:
    table_wihtout_headers = table[1:]
    clean_tables.append(table_wihtout_headers)

In [9]:
df_list = []
for each_table in clean_tables:
    df = pd.DataFrame(each_table)
    df_list.append(df)

In [10]:
df_combined = pd.concat(df_list, ignore_index=True)
df_combined.columns = tables[0][0]

In [11]:
df = df_combined.drop(columns=['S.No', 'Prominent Pollutant', 'No. of Stations\nParticipated/\nTotal Stations'])

In [12]:
df.columns = df.columns.str.replace("\n", "").str.replace(" ","_")

In [13]:
df['IndexValue'] = df['IndexValue'].astype(int)

In [20]:
API_KEY = os.getenv("GOOGLE_API_KEY")

In [25]:
gmaps = googlemaps.Client(key=API_KEY)

In [26]:
# Function to get state using reverse geocoding
def get_state(city):
    try:
        geocode_result = gmaps.geocode(f"{city}, India")
        for component in geocode_result[0]['address_components']:
            if 'administrative_area_level_1' in component['types']:
                return component['long_name']
    except Exception as e:
        return None

df["State"] = df["City"].apply(get_state)

In [27]:
df.sort_values(by='IndexValue', ascending=False).head(20)#.to_csv('daily_aqi.csv', index=False)

Unnamed: 0,City,Air_Quality,IndexValue,State
159,Nalbari,Poor,280,Assam
86,Ghaziabad,Poor,257,Uttar Pradesh
210,Thiruvananthapuram,Poor,253,Kerala
91,Guwahati,Poor,233,Assam
201,Siwan,Poor,216,Bihar
90,Gurugram,Poor,202,Haryana
3,Ahmednagar,Moderate,185,Maharashtra
203,Sri Ganganagar,Moderate,177,Rajasthan
22,Baghpat,Moderate,171,Uttar Pradesh
70,Delhi,Moderate,161,Delhi


In [28]:
# Function to geocode addresses
def geocode_google(address):
    try:
        geocode_result = gmaps.geocode(address)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            return pd.Series([location['lat'], location['lng']])
        else:
            return pd.Series([None, None])
    except Exception as e:
        return pd.Series([None, None])

In [29]:
# Apply geocoding function to the DataFrame
df[['Latitude', 'Longitude']] = df['City'].apply(geocode_google)

In [81]:
df['date'] = date

In [30]:
if len(df[df['Latitude'].isna()]['City']) > 0:
    df[['Latitude', 'Longitude']] = df['City'].apply(geocode_google)

In [32]:
len(df[df['Latitude'].isna()]['City'])

3

In [33]:
if len(df[df['Latitude'].isna()]['City']) > 0:
    df['Latitude'] = df['Latitude'].fillna('0')
    df['Longitude'] = df['Longitude'].fillna('0')

In [34]:
len(df[df['Latitude'].isna()]['City']) 

0

In [35]:
with pdfplumber.open(daily_doc) as pdf:
    last_page_text = pdf.pages[-1].extract_text()

In [36]:
text = last_page_text.replace("\n", " ")

In [37]:
list_of_no_data_cities = text.split(":")[1].split(",")

In [38]:
no_data_cities = []
for each_city in list_of_no_data_cities:
    city_name = each_city.split(")")[1].strip()
    no_data_cities.append(city_name)

In [39]:
geocoded_no_data_cities = []
for each_city in no_data_cities:
    no_data_city_dict = {}
    no_data_city_dict['city'] = each_city 
    no_data_city_dict['lat'] = geocode_google(each_city)[0]
    no_data_city_dict['long'] = geocode_google(each_city)[1]
    geocoded_no_data_cities.append(no_data_city_dict)

In [55]:
df_no_data = pd.DataFrame(geocoded_no_data_cities)
df_no_data['state'] = df_no_data['city'].apply(get_state)

In [82]:
df_no_data['date'] = date

In [57]:
if len(df_no_data[df_no_data['lat'].isna()]) > 0:
    df_no_data['city'].apply(geocode_google)

In [68]:
if len(df_no_data[df_no_data['lat'].isna()]) > 0:
    df_no_data['lat'] = df_no_data['lat'].fillna('0')
    df_no_data['long'] = df_no_data['long'].fillna('0')

In [70]:
df_no_data.to_csv('no_data_cities.csv', index=False)

In [43]:
# Create base map
m1 = folium.Map(location=[22.9734, 78.6569], zoom_start=4.5)

heat_data = [[row['Latitude'], row['Longitude'], row['IndexValue']] for index, row in df.iterrows()]
HeatMap(heat_data, radius=13, blur=10, max_zoom=4.5).add_to(m1)

folium.map.Marker(
    [35.5, 75],  # Approximate location to place annotation (adjust as needed)
    icon=folium.DivIcon(html="""
        <div style="position: absolute; top: 10px; left: 10px; font-size: 12px; font-weight: bold; 
                    background: rgba(255, 255, 255, 0.5); padding: 5px; border-radius: 2px; 
                    border: 0.5px solid black; z-index:9999;">
            Zoom in at area of interest
        </div>
    """)
).add_to(m1)

m1#.save(f"daily_map_{date}.html")

In [32]:
with open("daily_map.html", "r") as source_file:
    content = source_file.read()

with open(f"daily_map_{date}.html", "w") as target_file:
    target_file.write(content)

In [72]:
# Create a base map centered around India
m2 = folium.Map(location=[20.5937, 78.9629], zoom_start=4.5)

# Add markers with city names as labels
for _, row in df_no_data.iterrows():
    folium.Marker(
        location=[row["lat"], row["long"]],
        popup=folium.Popup(row["city"], parse_html=True),
        icon=folium.Icon(color="red", icon="flag")
    ).add_to(m2)

folium.map.Marker(
    [33, 75],  # Approximate location to place annotation (adjust as needed)
    icon=folium.DivIcon(html="""
        <div style="position: absolute; top: 10px; left: 10px; font-size: 12px; font-weight: bold; 
                    background: rgba(255, 255, 255, 0.5); padding: 5px; border-radius: 2px; 
                    border: 0.5px solid black; z-index:9999;">
            Zoom in at area of interest
        </div>
    """)
).add_to(m2)

# Display the map
m2.save(f"no_data_map_{date}.html")

In [73]:
with open("no_data_map.html", "r") as source_file:
    content = source_file.read()

with open(f"no_data_map_{date}.html", "w") as target_file:
    target_file.write(content)