<a href="https://colab.research.google.com/github/Skarthikak/AI-Search/blob/main/Crowd_Sourced_Health_Anomaly_Detection_and_Reporting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install necessary libraries
!pip install pandas spacy folium --quiet
!python -m spacy download en_core_web_sm --quiet

import pandas as pd
import spacy
import folium
import random
from collections import defaultdict
import datetime

# --- Step 1: Simulate Real-time Data Ingestion ---
# In a real-world scenario, this data would come from a live API.
# Here, we simulate a stream of 50 user reports with timestamps and locations in India.
print("Simulating real-time data ingestion...")
reports_data = []

# Define some keywords for our simple NLP model
anomaly_keywords = {
    'Water Contamination': ['water is bad', 'water is dirty', 'diarrhea', 'cholera', 'typhoid'],
    'Air Pollution': ['smog is heavy', 'air is toxic', 'breathing problem', 'respiratory issues', 'smoke is thick'],
    'Disease Outbreak': ['fever spreading', 'cough is common', 'illness in area', 'dengue', 'malaria']
}

# Create a list of possible reports to simulate
sample_reports = [
    "I've noticed the water is bad in my colony for the last three days.",
    "The air is toxic and I am facing breathing problem.",
    "There are many cases of fever spreading in our area.",
    "Heavy smog is causing respiratory issues for children.",
    "Official water supply seems dirty and causing diarrhea.",
    "A lot of people are getting ill with common cough.",
    "I saw a report of a dengue outbreak in the city.",
    "Smoke is thick near the industrial area today."
]

# Indian cities and their approximate coordinates for our simulation
cities = {
    "Mumbai": (19.0760, 72.8777),
    "Delhi": (28.7041, 77.1025),
    "Bangalore": (12.9716, 77.5946),
    "Kolkata": (22.5726, 88.3639)
}

# Generate 50 simulated reports
for i in range(50):
    report_text = random.choice(sample_reports)
    city_name, coords = random.choice(list(cities.items()))
    timestamp = datetime.datetime.now() - datetime.timedelta(minutes=random.randint(1, 100))
    reports_data.append({
        'report': report_text,
        'city': city_name,
        'latitude': coords[0] + random.uniform(-0.5, 0.5), # Add some randomness
        'longitude': coords[1] + random.uniform(-0.5, 0.5),
        'timestamp': timestamp
    })

df_reports = pd.DataFrame(reports_data)
print(f"Ingested {len(df_reports)} reports.")

# --- Step 2: NLP-based Anomaly Detection ---
print("\nRunning NLP model to detect anomalies...")
nlp = spacy.load("en_core_web_sm")
df_reports['anomaly_type'] = 'General'

def detect_anomaly(text):
    text_lower = text.lower()
    for anomaly, keywords in anomaly_keywords.items():
        if any(keyword in text_lower for keyword in keywords):
            return anomaly
    return 'General'

df_reports['anomaly_type'] = df_reports['report'].apply(detect_anomaly)

print(df_reports.head())
print(f"\nDetected {len(df_reports[df_reports['anomaly_type'] != 'General'])} specific anomalies.")

# --- Step 3: Geospatial Visualization ---
print("\nGenerating interactive map...")

# Create a base map centered on India using a working tile set
m = folium.Map(location=[20.5937, 78.9629], zoom_start=5, tiles='CartoDB Positron')


# Define colors for each anomaly type
colors = {
    'Water Contamination': 'blue',
    'Air Pollution': 'orange',
    'Disease Outbreak': 'red',
    'General': 'gray'
}

# Add markers for each report
for index, row in df_reports.iterrows():
    anomaly_color = colors.get(row['anomaly_type'], 'gray')

    # Create a popup with details of the report
    popup_html = f"""
    <b>Report:</b> {row['report']}<br>
    <b>Type:</b> {row['anomaly_type']}<br>
    <b>Timestamp:</b> {row['timestamp'].strftime('%Y-%m-%d %H:%M:%S')}<br>
    <b>Location:</b> {row['city']}
    """

    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,
        color=anomaly_color,
        fill=True,
        fill_color=anomaly_color,
        fill_opacity=0.7,
        tooltip=row['report'],
        popup=folium.Popup(popup_html, max_width=300)
    ).add_to(m)

# Save the map as an HTML file
map_filename = "health_anomalies_map.html"
m.save(map_filename)
print(f"\nInteractive map saved to {map_filename}")
print("You can download and open this file in your browser to view the results.")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m104.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Simulating real-time data ingestion...
Ingested 50 reports.

Running NLP model to detect anomalies...
                                              report       city   latitude  \
0  The air is toxic and I am facing breathing pro...     Mumbai  19.144430   
1  Official water supply seems dirty and causing ...  Bangalore  13.062474   
2     Smoke is thick near the industrial area today.      Delhi  28.814124   
3  There are many cases of fever spreading in our...  Bangalore  13.294172   
4  There are many c