In [None]:
# trained_model.py

import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import joblib

# === 1. Load the dataset ===
data_path = r"C:\Users\Lenovo\DisasterPredictionProject\data\processed\combined_disaster_data.csv"
data = pd.read_csv(data_path)

# === 2. Check and rename target column ===
if "disaster_type" in data.columns:
    data.rename(columns={"disaster_type": "label"}, inplace=True)
else:
    raise ValueError("Expected column 'disaster_type' not found!")

# === 3. Drop rows with missing label or all-NA features ===
data.dropna(subset=["label"], inplace=True)
data.dropna(axis=1, how='all', inplace=True)

# === 4. Handle missing feature values ===
data.fillna(data.mean(numeric_only=True), inplace=True)

# === 5. Separate features and label ===
X = data.drop("label", axis=1)
y = data["label"]

# === 6. Encode label (e.g., 'Flood' → 0, 'Cyclone' → 1, etc.) ===
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# === 7. Train/Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# === 8. Train the model ===
model = XGBClassifier(n_estimators=100, learning_rate=0.1, use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

# === 9. Evaluate the model ===
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# === 10. Save the model and label encoder ===
model_path = os.path.join("models", "xgb_disaster_model.joblib")
encoder_path = os.path.join("models", "label_encoder.joblib")

joblib.dump(model, model_path)
joblib.dump(label_encoder, encoder_path)

print(f"\n✅ Model saved to: {model_path}")
print(f"✅ Label encoder saved to: {encoder_path}")
