In [None]:
# models/trained_model.py

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import classification_report
import joblib
import os

# Load the dataset
data_path = os.path.join("data", "processed", "combined_disaster_data.csv")
data = pd.read_csv(data_path)

# Drop rows with missing values (optional: you can use imputation instead)
data.dropna(inplace=True)

# Separate features and labels
X = data.drop("label", axis=1)
y = data["label"]

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define base models
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', max_iter=500, random_state=42)

# Combine into a voting classifier
hybrid_model = VotingClassifier(estimators=[
    ('xgb', xgb),
    ('mlp', mlp)
], voting='soft')

# Train the model
print("[INFO] Training hybrid model...")
hybrid_model.fit(X_train, y_train)

# Evaluate
print("[INFO] Classification Report:\n")
y_pred = hybrid_model.predict(X_test)
print(classification_report(y_test, y_pred))

# Save the trained model and scaler
os.makedirs("models", exist_ok=True)
joblib.dump(hybrid_model, "models/disaster_model.pkl")
joblib.dump(scaler, "models/scaler.pkl")
print("\n[INFO] Model and scaler saved to 'models/' folder.")
