# Dengue Prediction Using Linear Regression (Philippines)

This notebook builds a **Linear Regression Machine Learning model** to:

1. Predict **Dengue Cases** in the Philippines  
2. Identify **Dengue-Prone Months and Regions**

**Dataset Columns:**  
Month | Year | Region | Dengue_Cases | Dengue_Deaths


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Load dataset (change filename if needed)
df = pd.read_excel("Dengue_PH.xlsx")  # or pd.read_csv("Dengue_PH.csv")
df.head()

In [None]:
df.info()

In [None]:
# Convert Month to numeric
month_map = {
    "January":1, "February":2, "March":3, "April":4, "May":5, "June":6,
    "July":7, "August":8, "September":9, "October":10, "November":11, "December":12,
    "Jan":1, "Feb":2, "Mar":3, "Apr":4, "May":5, "Jun":6,
    "Jul":7, "Aug":8, "Sep":9, "Oct":10, "Nov":11, "Dec":12
}

if df["Month"].dtype == "object":
    df["Month_num"] = df["Month"].map(month_map)
else:
    df["Month_num"] = df["Month"]

df.head()

In [None]:
# One-hot encode Region
df_encoded = pd.get_dummies(df, columns=["Region"], drop_first=True)
df_encoded.head()

In [None]:
# Define features and target
region_cols = [col for col in df_encoded.columns if col.startswith("Region_")]

X = df_encoded[["Year", "Month_num"] + region_cols]
y = df_encoded["Dengue_Cases"]

X.head(), y.head()

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Train model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("===================================")
print(" LINEAR REGRESSION MODEL RESULTS ")
print("===================================")
print(f"MAE  : {mae:.2f}")
print(f"MSE  : {mse:.2f}")
print(f"RMSE : {rmse:.2f}")
print(f"RÂ²   : {r2:.4f}")

In [None]:
# Actual vs Predicted Plot
plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Dengue Cases")
plt.ylabel("Predicted Dengue Cases")
plt.title("Actual vs Predicted Dengue Cases")
plt.grid(True)
plt.show()

In [None]:
# Dengue-prone classification
risk_threshold = df_encoded["Dengue_Cases"].quantile(0.7)

df_encoded["Predicted_Cases"] = model.predict(X)
df_encoded["Dengue_Prone"] = np.where(
    df_encoded["Predicted_Cases"] >= risk_threshold,
    "High Risk",
    "Low Risk"
)

df_encoded[["Month", "Year", "Predicted_Cases", "Dengue_Prone"]].head()

In [None]:
# Dengue-prone summary
dengue_prone_summary = (
    df_encoded
    .groupby(["Month", "Year"])
    .agg(
        Avg_Predicted_Cases=("Predicted_Cases", "mean"),
        High_Risk_Count=("Dengue_Prone", lambda x: (x == "High Risk").sum())
    )
    .reset_index()
)

dengue_prone_summary.head()