# **Data Ingestion**

In [None]:
%pip install tweepy

In [1]:
# Import the necessary library
import tweepy
import json
import os
from dotenv import load_dotenv

load_dotenv()

# Client object to interact with the API
client = tweepy.Client(os.getenv("bearer_token"))

print("✅ Authentication successful!")

✅ Authentication successful!


In [None]:
# Define a simple search query
# The '-is:retweet' part is useful to avoid duplicates and get original posts
query = "tsunami -is:retweet"

# Use the client to search for recent tweets that match the query. We'll just ask for 10 tweets to start with
response = client.search_recent_tweets(query=query, max_results=10)

# The data is inside the 'data' attribute of the response
tweets = response.data

# Let's check if we got anything and print the text of each tweet
if tweets:
    for tweet in tweets:
        print(tweet.text)
        print("---")
else:
    print("No tweets found for this query.")

Gempa 5,2 M Guncang Simeulue, Tidak Berpotensi Tsunami, BMKG Imbau Warga Tetap Tenang

Gempa dengan kekuatan Magnitudo 5,2 kembali mengguncang wilayah kepulauan Aceh tepatnya di Kota Sinabang, Kabupaten Simeulue, Selasa (23/9/2025).

#Gempa #GempaDiAceh

https://t.co/uHuKzhscn4
---
Setiap sudut Museum Tsunami Aceh menyimpan jejak luka dan harapan.
Bukan sekadar bangunan, tapi simbol kekuatan dan ingatan.
Arsitekturnya berbicara tentang kehilangan, perjuangan, dan bangkit kembali. #museumtsunami https://t.co/SPYJbSO9aH
---
@tsunami_lonely YOONGI 😭 He really posted 💜
---
walaupun melewati badai ombak tsunami dulu ges gpp aku bercaya 😔
https://t.co/egq5jVgrUL
---
Guten Morgen! ☕️💙

So darf ein Dienstag gerne starten. ☝️😎

Der Wind dreht sich! ⚠️ Der blaue Tsunami 🌊 trifft langsam aber sicher das Land… Wir sind näher am Ziel, als wir denken. 😮‍💨🔥 https://t.co/cHdCpmBEn8
---
Kjjj ganamos con media reserva, Funes Mori, y Alario hizo medio gol, mañana lo más tranquilo es un tsunami.
---
@Alph

### **Raising the Bar - A More Powerful Search 📈**

In [6]:
# 1. Build a more complex query
# Use OR to look for any of these words. Use () to group them.
# 'lang:en' filters for English language tweets.
keywords = "(tsunami OR flood OR cyclone OR highwave OR stormsurge OR highwaves OR coastalflood)"
language = "lang:en"
# Geo-filtering is very limited in the standard API. A good trick is to add place names.
# This isn't perfect, but it's a great start.
places = "(India OR Chennai OR Mumbai OR Kerala OR Odisha OR Bengal)"

# Combine everything into one query
# The query now looks for any of our keywords AND any of our places.
final_query = f"{keywords} {places} -is:retweet {language}"

print(f"Searching with query: {final_query}")

# 2. Ask for more data fields. We want to know who posted it, when, and if there's any location info.
response = client.search_recent_tweets(
    query=final_query,
    max_results=50,  # Let's get 100 tweets this time
    tweet_fields=["created_at", "author_id", "geo"], # What info we want about the tweet
    user_fields=["username", "verified", "location"], # What info we want about the user
    expansions=["author_id", "geo.place_id"] # Tells the API to give us the full user and place objects
)

# The data is more complex now, so we need a good way to store it.

Searching with query: (tsunami OR flood OR cyclone OR highwave OR stormsurge OR highwaves OR coastalflood) (India OR Chennai OR Mumbai OR Kerala OR Odisha OR Bengal) -is:retweet lang:en


In [7]:
import json

# This assumes the 'response' variable from your last run is still in memory
tweet_data = []

# The response includes the main tweet data and a separate 'includes' dictionary
if response.data:
    users = {user["id"]: user for user in response.includes['users']}
    for tweet in response.data:
        author = users[tweet.author_id]
        
        # Create a clean dictionary for each tweet
        tweet_dict = {
            "tweet_id": tweet.id,
            "created_at": str(tweet.created_at), # Convert to string for JSON
            "text": tweet.text,
            "author_username": author.username,
            "author_verified": author.verified,
            "author_profile_location": author.location,
            "tweet_geo": tweet.geo 
        }
        tweet_data.append(tweet_dict)

# Now, save the collected data to a file
file_name = "my_collected_tweets.json"
with open(file_name, "w") as f:
    json.dump(tweet_data, f, indent=4)

print(f"✅ Success! Saved {len(tweet_data)} tweets to {file_name}")

✅ Success! Saved 50 tweets to my_collected_tweets.json


**Model Client**

In [2]:
import google.generativeai as genai
import pandas as pd
import json
import os
import time
from tqdm import tqdm
from dotenv import load_dotenv

# --- 1. Setup ---
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# Initialize the Gemini 1.5 Flash model
# NOTE: The correct model name is 'gemini-1.5-flash-latest'
model = genai.GenerativeModel('gemini-1.5-flash-latest')
print("✅ Gemini model initialized successfully.")


  from .autonotebook import tqdm as notebook_tqdm


✅ Gemini model initialized successfully.


**Classifier Function**

In [13]:
# --- 3. The New, More Sophisticated Batch Classifier ---
def classify_tweet_batch_v2(tweet_batch):
    """
    Uses the Gemini LLM with highly specific instructions to identify ACTIVE disaster reports.
    """
    tweets_for_prompt = "\n".join([f'{i+1}. "{text}"' for i, text in enumerate(tweet_batch)])

    # --- THE REFINED PROMPT ---
    prompt = f"""
        You are a real-time situational awareness analyst. Your task is to identify ACTIVE, first-hand reports of ongoing natural disasters or emergencies from a list of texts.

        **CRITICAL Instructions:**
        1.  Your goal is to find reports from people on the ground, not news or discussions.
        2.  A text is only a "disaster" if it describes an event happening NOW or very recently.
        3.  **EXCLUDE** the following:
            - News articles reporting on past or future events.
            - General discussions about disaster preparedness.
            - Political promises or policy suggestions (e.g., "we need better cyclone protection").
            - Figurative language or metaphors (e.g., "a tsunami of fans").
        4.  Provide your response ONLY in a valid JSON array format. Each object must have four keys: "id" (the input number), "is_disaster" (boolean), "confidence_score" (float reflecting severity), and "report_type" (string: "Active Report", "News/Discussion", or "Figurative/Unrelated").

        **List of Texts to Analyze:**
        {tweets_for_prompt}
    """
    try:
        time.sleep(5)
        response = model.generate_content(prompt)
        json_response = json.loads(response.text.strip().replace("```json", "").replace("```", ""))
        return json_response

    except Exception as e:
        print(f"An error occurred during batch classification: {e}")
        return []

**Batch_Classifier function** - Instead of sending 20 requests to the LLM for 20 times and hitting the limit we send 20 requests as a whole batch one at a time.

In [14]:
# --- 2. Function to Create Batches ---
def create_batches(data, batch_size=20):
    for i in range(0, len(data), batch_size):
        yield data[i:i + batch_size]

**Dummy Test**

In [None]:
# --- 3. The New Test Function ---
def run_test_cases():
    print("\n--- Running Test Cases ---")
    
    # Create a list of test cases to send as one batch
    test_tweets = [
        "A Tsunami of fans flooded the road when MS. Dhoni came out", # Metaphor
        "Alert: Water logging reported in several parts of Mumbai after heavy rain.", # Real, low severity
        "Just enjoying a beautiful sunset at the beach today!", # Not a disaster
        "BREAKING: Cyclone is expected to make landfall on the east coast tomorrow morning with winds over 150 km/h." # Real, high severity
    ]

    # Call the batch function once with the entire list
    results = classify_tweet_batch_v2(test_tweets)

    if not results:
        print("Could not get results from the API.")
        return

    # Neatly print the result for each test case
    for result in results:
        # The 'id' from the result corresponds to the position in our test_tweets list
        # We subtract 1 because lists are 0-indexed
        tweet_index = result['id'] - 1
        tweet_text = test_tweets[tweet_index]
        is_dis = result['is_disaster']
        score = result['confidence_score']
        
        print(f"Text: '{tweet_text}'")
        print(f"Is Disaster: {is_dis} (Score: {score:.2f})\n")

# --- Run the Test ---
run_test_cases()


--- Running Test Cases ---
Text: 'A Tsunami of fans flooded the road when MS. Dhoni came out'
Is Disaster: False (Score: 0.00)

Text: 'Alert: Water logging reported in several parts of Mumbai after heavy rain.'
Is Disaster: True (Score: 0.70)

Text: 'Just enjoying a beautiful sunset at the beach today!'
Is Disaster: False (Score: 0.00)

Text: 'BREAKING: Cyclone is expected to make landfall on the east coast tomorrow morning with winds over 150 km/h.'
Is Disaster: True (Score: 0.95)



**Augmenting with data**

In [15]:
# --- 4. Load Data and Process ---
try:
    df = pd.read_json('my_collected_tweets.json')
    print(f"\nSuccessfully loaded {len(df)} tweets.")
    
    all_results = []
    tweet_list = df['text'].tolist()
    
    for batch in tqdm(list(create_batches(tweet_list, 20)), desc="Processing Batches with v2 Prompt"):
        batch_results = classify_tweet_batch_v2(batch)
        all_results.extend(batch_results)

    # --- 5. Map Results and Filter ---
    if all_results:
        results_df = pd.DataFrame(all_results)
        results_df['index'] = results_df['id'] - 1
        df = df.reset_index().merge(results_df, on='index', how='left').drop(columns=['index', 'id'])

        # --- THE NEW FILTERING LOGIC ---
        # We now filter for "Active Report" type for much higher accuracy
        verified_disasters_df = df[
            (df['is_disaster'] == True) & 
            (df['report_type'] == 'Active Report')
        ].copy()

        print("\n--- ✅ Classification Complete! ---")
        print(f"Found {len(verified_disasters_df)} high-confidence ACTIVE disaster reports.")
        print("\n--- Sample of Verified Reports ---")
        print(verified_disasters_df.sort_values(by='confidence_score', ascending=False)[['text', 'confidence_score']].head())

except FileNotFoundError:
    print("\nError: 'my_collected_tweets.json' not found.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


Successfully loaded 50 tweets.


Processing Batches with v2 Prompt:   0%|          | 0/3 [00:00<?, ?it/s]

Processing Batches with v2 Prompt: 100%|██████████| 3/3 [00:34<00:00, 11.53s/it]


--- ✅ Classification Complete! ---
Found 12 high-confidence ACTIVE disaster reports.

--- Sample of Verified Reports ---
                                                 text  confidence_score
6   ‘Worse than Amphan Cyclone’: Houses flooded, s...               0.9
32  @motherrr @manjusipayya @dksardana @Ambarseriy...               0.9
44  ‘Worse than Amphan Cyclone’: Houses flooded, s...               0.9
0   River #Ganga at #Farakka(FF) in #MURSHIDABAD d...               0.8
10  BJP is inviting sugg on how  to make Mumbai be...               0.8





In [17]:
verified_disasters_df.sort_values(by='confidence_score', ascending=False)[['text', 'confidence_score']]

Unnamed: 0,text,confidence_score
6,"‘Worse than Amphan Cyclone’: Houses flooded, s...",0.9
32,@motherrr @manjusipayya @dksardana @Ambarseriy...,0.9
44,"‘Worse than Amphan Cyclone’: Houses flooded, s...",0.9
0,River #Ganga at #Farakka(FF) in #MURSHIDABAD d...,0.8
10,BJP is inviting sugg on how to make Mumbai be...,0.8
3,River #Ganga at #CS 97 A GANGA FARAKKA in #MUR...,0.8
19,@Matt_Pinner Ofcourse yes! The same flood stor...,0.7
41,@RealDrJaneRuby Literally the most synchronize...,0.6
43,@BaddieDean @hoar27796 @Codex_India3 thanx to ...,0.6
45,"‘Worse than Amphan Cyclone’: Houses flooded, s...",0.6


In [18]:
verified_disasters_df.shape

(12, 10)

In [28]:
verified_disasters_df

Unnamed: 0,tweet_id,created_at,text,author_username,author_verified,author_profile_location,tweet_geo,is_disaster,confidence_score,report_type
0,1970351141624902010,2025-09-23 04:54:54+00:00,River #Ganga at #Farakka(FF) in #MURSHIDABAD d...,CWCOfficial_FF,False,"New Delhi, India",,True,0.8,Active Report
3,1970350823323627600,2025-09-23 04:53:38+00:00,River #Ganga at #CS 97 A GANGA FARAKKA in #MUR...,CWCOfficial_FF,False,"New Delhi, India",,True,0.8,Active Report
6,1970350658026185072,2025-09-23 04:52:58+00:00,"‘Worse than Amphan Cyclone’: Houses flooded, s...",akularsnvas,False,"Hyderabad, India",,True,0.9,Active Report
10,1970348990198849929,2025-09-23 04:46:21+00:00,BJP is inviting sugg on how to make Mumbai be...,Sanjeevdesiguy,False,SPACE,,True,0.8,Active Report
19,1970340495512740159,2025-09-23 04:12:35+00:00,@Matt_Pinner Ofcourse yes! The same flood stor...,VedicBeat,False,,,True,0.7,Active Report
32,1970327052403515841,2025-09-23 03:19:10+00:00,@motherrr @manjusipayya @dksardana @Ambarseriy...,joydeepg9,False,"Gurgaon, India",,True,0.9,Active Report
41,1970322667883606430,2025-09-23 03:01:45+00:00,@RealDrJaneRuby Literally the most synchronize...,DJohnT1122,False,,,True,0.6,Active Report
43,1970320416435585122,2025-09-23 02:52:48+00:00,@BaddieDean @hoar27796 @Codex_India3 thanx to ...,jhonbap78,False,,,True,0.6,Active Report
44,1970319702703439939,2025-09-23 02:49:58+00:00,"‘Worse than Amphan Cyclone’: Houses flooded, s...",FinancialXpress,False,India,,True,0.9,Active Report
45,1970319702703439939,2025-09-23 02:49:58+00:00,"‘Worse than Amphan Cyclone’: Houses flooded, s...",FinancialXpress,False,India,,True,0.6,Active Report


In [None]:
%pip install spacy geopy

**Location Extraction** - using Named Entity Recognition extracting the Location of the post 
1. either from the user's tweet
2. fall back to the user profile location if tweet location unavailable 

In [None]:
import pandas as pd
import spacy
from geopy.geocoders import Nominatim
from tqdm import tqdm

# --- 1. Setup ---
try:
    nlp = spacy.load("en_core_web_sm")
    print("✅ spaCy model loaded successfully.")
except OSError:
    print("spaCy model not found. Please run: python -m spacy download en_core_web_sm")
    nlp = None

geolocator = Nominatim(user_agent="sih_disaster_mapper_v4")
print("✅ GeoPy geocoder initialized.")

# --- 2. Load Your Classified Data ---

# --- DEBUGGING STEP (to confirm) ---
print("\n--- 🐞 Inspecting your DataFrame ---")
print("Columns found:")
print(verified_disasters_df.columns)
print("----------------------------------\n")

# --- 3. The Location Extraction Function (No changes needed) ---
def get_coordinates(text, profile_location):
    """
    Extracts coordinates by first checking the tweet text (NER),
    then falling back to the user's profile location.
    """
    location_name = None
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ in ["GPE", "LOC"]:
            location_name = ent.text
            break
            
    if not location_name and isinstance(profile_location, str):
        location_name = profile_location
        
    if location_name:
        try:
            location = geolocator.geocode(location_name, timeout=10)
            if location:
                return location.latitude, location.longitude
        except Exception:
            pass
            
    return None, None

# --- 4. Apply Function and Get Final Data ---
if nlp and not verified_disasters_df.empty:
    tqdm.pandas(desc="📍 Extracting Coordinates")
    
    # We directly use the 'author_profile_location' column that is already here
    verified_disasters_df[['latitude', 'longitude']] = verified_disasters_df.progress_apply(
        lambda row: pd.Series(get_coordinates(row['text'], row['author_profile_location'])),
        axis=1
    )

    # Drop rows where we couldn't find any coordinates
    final_geocoded_df = verified_disasters_df.dropna(subset=['latitude', 'longitude']).copy()

    print(f"\n✅ Successfully found coordinates for {len(final_geocoded_df)} reports.")
    print("\n--- Sample of Final Data with Coordinates ---")
    print(final_geocoded_df[['text', 'latitude', 'longitude']].head())

    # --- 5. Save Your Final Progress ---
    final_geocoded_df.to_csv('final_geocoded_reports.csv', index=False)
    print("\n💾 Final geocoded data saved to 'final_geocoded_reports.csv'.")

✅ spaCy model loaded successfully.
✅ GeoPy geocoder initialized.

--- 🐞 Inspecting your DataFrame ---
Columns found:
Index(['tweet_id', 'created_at', 'text', 'author_username', 'author_verified',
       'author_profile_location', 'tweet_geo', 'is_disaster',
       'confidence_score', 'report_type'],
      dtype='object')
----------------------------------



📍 Extracting Coordinates: 100%|██████████| 12/12 [00:13<00:00,  1.09s/it]


✅ Successfully found coordinates for 11 reports.

--- Sample of Final Data with Coordinates ---
                                                 text   latitude  longitude
0   River #Ganga at #Farakka(FF) in #MURSHIDABAD d...  28.641926  77.221750
3   River #Ganga at #CS 97 A GANGA FARAKKA in #MUR...  28.641926  77.221750
6   ‘Worse than Amphan Cyclone’: Houses flooded, s...  22.572646  88.363895
10  BJP is inviting sugg on how  to make Mumbai be...  19.054999  72.869203
19  @Matt_Pinner Ofcourse yes! The same flood stor...  22.351115  78.667743

💾 Final geocoded data saved to 'final_geocoded_reports.csv'.





In [33]:
verified_disasters_df.head()

Unnamed: 0,tweet_id,created_at,text,author_username,author_verified,author_profile_location,tweet_geo,is_disaster,confidence_score,report_type,latitude,longitude
0,1970351141624902010,2025-09-23 04:54:54+00:00,River #Ganga at #Farakka(FF) in #MURSHIDABAD d...,CWCOfficial_FF,False,"New Delhi, India",,True,0.8,Active Report,28.641926,77.22175
3,1970350823323627600,2025-09-23 04:53:38+00:00,River #Ganga at #CS 97 A GANGA FARAKKA in #MUR...,CWCOfficial_FF,False,"New Delhi, India",,True,0.8,Active Report,28.641926,77.22175
6,1970350658026185072,2025-09-23 04:52:58+00:00,"‘Worse than Amphan Cyclone’: Houses flooded, s...",akularsnvas,False,"Hyderabad, India",,True,0.9,Active Report,22.572646,88.363895
10,1970348990198849929,2025-09-23 04:46:21+00:00,BJP is inviting sugg on how to make Mumbai be...,Sanjeevdesiguy,False,SPACE,,True,0.8,Active Report,19.054999,72.869203
19,1970340495512740159,2025-09-23 04:12:35+00:00,@Matt_Pinner Ofcourse yes! The same flood stor...,VedicBeat,False,,,True,0.7,Active Report,22.351115,78.667743


In [None]:
%pip install h3 folium

In [39]:
import pandas as pd
import h3
import folium

# --- 1. Load Your Final Geocoded Data ---
try:
    df = pd.read_csv('final_geocoded_reports.csv')
    print(f"✅ Successfully loaded {len(df)} geocoded reports.")
except FileNotFoundError:
    print("🛑 Error: 'final_geocoded_reports.csv' not found.")
    df = pd.DataFrame()

if not df.empty:
    # --- 2. H3 Conversion and Density Calculation (No changes here) ---
    H3_RESOLUTION = 7
    df['h3_index'] = df.apply(
        lambda row: h3.latlng_to_cell(row['latitude'], row['longitude'], H3_RESOLUTION),
        axis=1
    )
    report_density = df['h3_index'].value_counts().reset_index()
    report_density.columns = ['h3_index', 'report_count']

    # --- 3. Prepare Hexagons for Visualization (No changes here) ---
    def get_hexagon_boundary(h3_index):
        boundary = h3.cell_to_boundary(h3_index)
        return [[lon, lat] for lat, lon in boundary]
    
    report_density['hexagon'] = report_density['h3_index'].apply(get_hexagon_boundary)
    print("✅ Prepared hexagon boundaries for mapping.")

    # --- 4. Visualize the Hotspot Map with Enhanced Styling ---

    # --- NEW: A function to determine color based on report count ---
    def get_fill_color(count):
        max_count = report_density['report_count'].max()
        if count > max_count * 0.66:
            return '#d73027' # Dark Red for high density
        elif count > max_count * 0.33:
            return '#fc8d59' # Orange for medium density
        else:
            return '#fee08b' # Yellow for low density

    map_center = [df['latitude'].mean(), df['longitude'].mean()]
    hotspot_map = folium.Map(location=map_center, zoom_start=10, tiles="cartodbpositron")

    for i, row in report_density.iterrows():
        folium.GeoJson(
            data={
                "type": "Feature",
                "geometry": {
                    "type": "Polygon",
                    "coordinates": [row['hexagon']],
                },
            },
            # --- NEW: Updated style function ---
            style_function=lambda feature, count=row['report_count']: {
                'fillColor': get_fill_color(count), # Use our new color function
                'color': 'white',      # A high-contrast white border
                'weight': 2.5,         # Make the border thicker
                'fillOpacity': 0.75,     # A consistent, strong opacity
            },
            tooltip=f"<b>🔥 Hotspot 🔥</b><br>Reports: {row['report_count']}<br>H3 Index: {row['h3_index']}"
        ).add_to(hotspot_map)

    hotspot_map.save("disaster_hotspot_map_enhanced.html")
    print("\n🗺️  Success! Your ENHANCED hotspot map has been saved to 'disaster_hotspot_map_enhanced.html'.")

✅ Successfully loaded 11 geocoded reports.
✅ Prepared hexagon boundaries for mapping.

🗺️  Success! Your ENHANCED hotspot map has been saved to 'disaster_hotspot_map_enhanced.html'.


In [40]:
import pandas as pd
import h3
import folium
from folium.features import MacroElement
from jinja2 import Template

# --- 1. Load Your Final Geocoded Data ---
try:
    df = pd.read_csv('final_geocoded_reports.csv')
    print(f"✅ Successfully loaded {len(df)} geocoded reports.")
except FileNotFoundError:
    print("🛑 Error: 'final_geocoded_reports.csv' not found.")
    df = pd.DataFrame()

if not df.empty:
    # --- 2. H3 Conversion and Density Calculation ---
    H3_RESOLUTION = 7
    df['h3_index'] = df.apply(
        lambda row: h3.latlng_to_cell(row['latitude'], row['longitude'], H3_RESOLUTION),
        axis=1
    )
    report_density = df['h3_index'].value_counts().reset_index()
    report_density.columns = ['h3_index', 'report_count']

    # --- 3. Prepare Hexagons and Get Center Coordinates ---
    def get_hexagon_data(h3_index):
        boundary = h3.cell_to_boundary(h3_index)
        center = h3.cell_to_latlng(h3_index)
        return [[lon, lat] for lat, lon in boundary], center

    # Apply the function to get both boundary and center
    report_density[['hexagon', 'center']] = report_density['h3_index'].apply(
        lambda x: pd.Series(get_hexagon_data(x))
    )
    print("✅ Prepared hexagon data for mapping.")

    # --- 4. Create the Map with Satellite View ---
    map_center = [df['latitude'].mean(), df['longitude'].mean()]
    
    # Initialize the map with a standard view
    hotspot_map = folium.Map(location=map_center, zoom_start=10, tiles="OpenStreetMap")

    # Add the satellite tile layer
    folium.TileLayer(
        tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
        attr='Esri',
        name='Satellite View',
        overlay=False,
        control=True
    ).add_to(hotspot_map)
    
    print("🛰️  Added Satellite base layer.")

    # --- 5. Draw the Hexagonal Hotspots (no changes here) ---
    # (Your existing code for styling and drawing hexagons)
    def get_fill_color(count):
        max_count = report_density['report_count'].max()
        if count > max_count * 0.66: return '#d73027'
        elif count > max_count * 0.33: return '#fc8d59'
        else: return '#fee08b'

    for i, row in report_density.iterrows():
        folium.GeoJson(
            data={"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [row['hexagon']]}},
            style_function=lambda feature, count=row['report_count']: {
                'fillColor': get_fill_color(count), 'color': 'white', 'weight': 2.5, 'fillOpacity': 0.75,
            },
            tooltip=f"<b>🔥 Hotspot 🔥</b><br>Reports: {row['report_count']}<br>H3 Index: {row['h3_index']}"
        ).add_to(hotspot_map)

    # --- 6. Create and Add the Clickable Index ---
    # We use Jinja2 to create a custom HTML/CSS/JS element
    template = """
    {% macro html(this, kwargs) %}
    <div id="maplegend" class="leaflet-control leaflet-bar" 
         style="position: fixed; bottom: 20px; right: 20px; z-index:9999;
                background-color: rgba(255, 255, 255, 0.8);
                border-radius: 5px; padding: 10px; font-size:12px;
                max-height: 200px; overflow-y: auto;
                border: 2px solid grey;">
    <h4 style="margin-top:0;">Hotspot Zones</h4>
    <ul style="list-style-type:none; padding-left:0;">
    {% for i, row in this.hotspots.iterrows() %}
        <li style="margin-bottom: 5px;">
            <a href="#" style="text-decoration: none; color: black;" 
               onclick="
                   L.DomEvent.preventDefault(event);
                   {{this._parent.get_name()}}.setView([{{row.center[0]}}, {{row.center[1]}}], 13);
                   return false;
               ">
               Hotspot {{i+1}} ({{row.report_count}} reports)
            </a>
        </li>
    {% endfor %}
    </ul>
    </div>
    {% endmacro %}
    """
    macro = MacroElement()
    macro._template = Template(template)
    # Pass the hotspot data from Python to the HTML template
    macro.hotspots = report_density.sort_values(by='report_count', ascending=False)
    hotspot_map.add_child(macro)
    
    # Add a layer control to switch between maps
    folium.LayerControl().add_to(hotspot_map)
    print("🧭 Added clickable index and layer control.")

    # --- 7. Save the Final Map ---
    hotspot_map.save("disaster_hotspot_map_interactive.html")
    print("\n🗺️  Success! Your new INTERACTIVE map has been saved to 'disaster_hotspot_map_interactive.html'.")

✅ Successfully loaded 11 geocoded reports.
✅ Prepared hexagon data for mapping.
🛰️  Added Satellite base layer.
🧭 Added clickable index and layer control.

🗺️  Success! Your new INTERACTIVE map has been saved to 'disaster_hotspot_map_interactive.html'.


In [3]:
import os
import requests
from ddgs.ddgs import DDGS
from PIL import Image

# The main media directory
MEDIA_DIR = "media"
# The specific subdirectory for downloaded images, as per your structure
VISUALS_DIR = os.path.join(MEDIA_DIR, "visuals")

def get_media_visuals(keywords, location, num_images=5):
    """
    Searches for and downloads relevant visuals into the 'media/visuals' directory.
    This function is designed to be called from your core.ipynb notebook.
    """
    # Create the visuals subdirectory if it doesn't exist
    if not os.path.exists(VISUALS_DIR):
        os.makedirs(VISUALS_DIR)

    # Clean up keywords and create a search query
    search_keywords = ' '.join(keywords) if isinstance(keywords, list) else keywords
    search_query = f"{search_keywords} {location}"
    print(f"🎥 Searching for visuals with query: '{search_query}'...")
    downloaded_images = []
    
    try:
        with DDGS() as ddgs:
            results = ddgs.images(
                search_query,
                region="in-en",
                safesearch="on",
                size="Large",
                type_image="photo"
            )
            
            if not results:
                print(f"🛑 No images found for '{search_query}'.")
                return []

            print(f"Downloading up to {num_images} images...")
            # Create a unique prefix for filenames based on the location
            location_prefix = location.split(',')[0].lower().replace(' ', '_')
            for i, r in enumerate(results):
                if i >= num_images:
                    break
                try:
                    image_url = r.get("image")
                    if not image_url:
                        continue
                    
                    image_data = requests.get(image_url, timeout=15).content
                    # Save the file with the location prefix for easy lookup later
                    filename = os.path.join(VISUALS_DIR, f"{location_prefix}_image_{i}.jpg")
                    with open(filename, 'wb') as f:
                        f.write(image_data)
                    Image.open(filename).verify()
                    downloaded_images.append(filename)
                except Exception as e:
                    print(f"  - Could not download or verify an image: {e}")
                    
        print(f"✅ Downloaded {len(downloaded_images)} valid images for {location}.")
        return downloaded_images
        
    except Exception as e:
        print(f"🛑 Error fetching visuals: {e}")
        return []

    