In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

# === Step 1: Load Excel Data ===
excel_path = "Malaysia_Animal_Disease_Template(Copy).xlsx"  # Make sure this file is in the same folder
df = pd.read_excel(excel_path, sheet_name="Animal_Disease_Outbreaks_Data")

# === Step 2: Preprocess Data ===
df = df.dropna(subset=["Start Date"])
df["Start Date"] = pd.to_datetime(df["Start Date"])
df["Year"] = df["Start Date"].dt.year
df["Month"] = df["Start Date"].dt.month

# Group by year, month, and state to count number of outbreaks
grouped = df.groupby(["Year", "Month", "State"]).size().reset_index(name="Outbreaks")

# One-hot encode the 'State' column
df_encoded = pd.get_dummies(grouped, columns=["State"])

# === Step 3: Train AI Model ===
X = df_encoded.drop("Outbreaks", axis=1)
y = df_encoded["Outbreaks"]

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# Optional: Save the model and features
joblib.dump(model, "outbreak_model.pkl")
joblib.dump(X.columns.tolist(), "model_features.pkl")

print("✅ Model trained successfully!")

# === Step 4: Prediction Function ===
def predict_outbreak(year, month, state):
    # Load model and feature columns
    model = joblib.load("outbreak_model.pkl")
    features = joblib.load("model_features.pkl")

    # Prepare input data
    input_dict = {"Year": year, "Month": month}
    for col in features:
        if col.startswith("State_"):
            input_dict[col] = 1 if col == f"State_{state}" else 0

    # Fill missing columns with 0
    for col in features:
        if col not in input_dict:
            input_dict[col] = 0

    input_df = pd.DataFrame([input_dict])[features]
    prediction = model.predict(input_df)[0]
    return round(prediction, 2)

# === Example Usage ===
year_input = 2025
month_input = 7
state_input = "Selangor"

predicted_outbreaks = predict_outbreak(year_input, month_input, state_input)
print(f"📊 Predicted outbreaks in {state_input} ({year_input}-{month_input}): {predicted_outbreaks}")
