In [59]:
import pandas as pd
import folium
from folium.plugins import HeatMap
from collections import Counter
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import spacy
import time
from tqdm import tqdm


In [60]:
# Load NLP model
nlp = spacy.load("en_core_web_sm")
geolocator = Nominatim(user_agent="crisis_mapper")


In [61]:
# Load your dataset
df = pd.read_csv("classified_mental_health_tweets.csv")  # Should include a 'text' column and 'risk_level'




In [62]:
df

Unnamed: 0,post_id,timestamp,content,likes,retweets,replies,risk_level,Sentiment
0,1907753011637592104,2025-04-03 11:12:16+00:00,another poor man beaten raj thackerays goons s...,0,1302,0,High-Risk,Negative
1,1907753010643804454,2025-04-03 11:12:16+00:00,mental health absolutely testing,0,666,0,Low Concern,Neutral
2,1907753009184125288,2025-04-03 11:12:16+00:00,mental health great ima strong christian lovin...,0,0,0,Low Concern,Positive
3,1907753006021439540,2025-04-03 11:12:15+00:00,cannot stress enough came 5th,0,7817,0,Low Concern,Positive
4,1907753004612153744,2025-04-03 11:12:15+00:00,meek helpless spineless response indian govt t...,0,74,0,High-Risk,Negative
5,1907753001810284898,2025-04-03 11:12:14+00:00,overwhelmed tasks ai help automate routine wor...,0,0,0,Moderate Concern,Positive
6,1907753000816333291,2025-04-03 11:12:14+00:00,cant stress enough men men women women denying...,0,0,0,Low Concern,Positive
7,1907753000782766323,2025-04-03 11:12:14+00:00,thoughts epidemic loneliness anxiety collated ...,0,19,0,Low Concern,Negative
8,1907752990280171651,2025-04-03 11:12:11+00:00,america help police officer doubt mental healt...,0,107,0,Low Concern,Positive
9,1907752989844033768,2025-04-03 11:12:11+00:00,kanye west confirms wife bianca censori left p...,0,0,0,Moderate Concern,Negative


In [63]:
# Extract geotagged data if available
geo_data = []

for _, row in df.iterrows():
    if 'latitude' in row and 'longitude' in row and pd.notna(row['latitude']) and pd.notna(row['longitude']):
        geo_data.append((row['latitude'], row['longitude']))

# Extract locations using NLP (for rows without geotag)
def extract_location(text):
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ in ["GPE", "LOC"]:
            return ent.text
    return None

In [64]:
df["extracted_place"] = df["content"].map(extract_location)

# Geocode NLP-extracted places
for place in tqdm(df["extracted_place"].dropna().unique()):
    try:
        location = geolocator.geocode(place)
        if location:
            geo_data.append((location.latitude, location.longitude))
        time.sleep(1)  # avoid rate limit
    except GeocoderTimedOut:
        continue

100%|██████████| 2/2 [00:02<00:00,  1.42s/it]


In [65]:
print("Non-null extracted places count:", df["extracted_place"].notna().sum())
print("Unique extracted places:", df["extracted_place"].dropna().unique())


Non-null extracted places count: 2
Unique extracted places: ['america' 'canada']


In [66]:
print(df['extracted_place'].value_counts())


extracted_place
america    1
canada     1
Name: count, dtype: int64


In [67]:
print(f"Number of geotagged entries: {len(geo_data)}")


Number of geotagged entries: 2


In [68]:
# Generate Heatmap
m = folium.Map(location=[20.59, 78.96], zoom_start=4)  # Default: India
HeatMap(geo_data, radius=12, blur=15).add_to(m)
m.save("crisis_heatmap.html")

In [69]:
# Count top 5 locations
top_locations = Counter(geo_data).most_common(5)
print("\n📍 Top 5 Crisis Discussion Locations:")
for i, ((lat, lon), count) in enumerate(top_locations, 1):
    print(f"{i}. Lat: {lat:.4f}, Lon: {lon:.4f} — {count} posts")

# Save top locations
top_df = pd.DataFrame([
    {"latitude": lat, "longitude": lon, "count": count}
    for (lat, lon), count in top_locations
])
top_df.to_csv("top_crisis_locations.csv", index=False)


📍 Top 5 Crisis Discussion Locations:
1. Lat: 39.7837, Lon: -100.4459 — 1 posts
2. Lat: 61.0667, Lon: -107.9917 — 1 posts
