In [2]:
import pandas as pd
import re

# Sample data
data = pd.DataFrame({
    "tweet": [
        "I'm feeling really sick with a bad cough and fever",
        "Enjoying the sun at the beach!",
        "High fever reported in our area. Hospitals are full.",
        "Caught a cold, sore throat and mild chills.",
        "No health issues here, just fine!"
    ],
    "latitude": [28.6, 19.1, 28.6, 12.9, 13.0],
    "longitude": [77.2, 72.8, 77.2, 77.6, 80.3]
})

# Preprocessing function
def preprocess(text):
    text = re.sub(r"http\S+", "", text.lower())  # remove URLs, lowercase
    text = re.sub(r"[^a-z\s]", "", text)  # remove punctuation/numbers
    return text

data["clean_tweet"] = data["tweet"].apply(preprocess)


In [4]:
symptom_keywords = ["fever", "cough", "sick", "flu", "cold", "chills", "hospital", "throat"]

def label_symptoms(text):
    return any(symptom in text for symptom in symptom_keywords)

data["label"] = data["clean_tweet"].apply(label_symptoms).astype(int)


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Vectorize
X = TfidfVectorizer().fit_transform(data["clean_tweet"])
y = data["label"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

# Train model
clf = LogisticRegression()
clf.fit(X_train, y_train)

# Evaluate
print(classification_report(y_test, clf.predict(X_test)))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
import folium

m = folium.Map(location=[20.5, 78.9], zoom_start=5)

for _, row in data.iterrows():
    if row["label"] == 1:
        folium.CircleMarker(
            location=[row["latitude"], row["longitude"]],
            radius=6,
            color="red",
            fill=True,
            fill_opacity=0.7,
            popup=row["tweet"]
        ).add_to(m)

m.save("outbreak_map.html")
print("Map saved as 'outbreak_map.html'")


Map saved as 'outbreak_map.html'
