In [4]:
# train_final_model.py (Version 4 - Definitive Time-Aware)
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import joblib

print("--- Training Final Predictor on Time-Aware Snapshots ---")

try:
    df = pd.read_csv('final_training_data.csv')
except FileNotFoundError:
    print("FATAL: 'final_training_data.csv' not found. Please run create_training_snapshots.py first.")
    exit()

# The 8 features for the model
features = [
    'feature_late_salary_count', 'feature_decline_week_count', 'feature_lending_app_tx_count',
    'feature_late_utility_count', 'feature_discretionary_spend_ratio', 'feature_atm_withdrawal_count',
    'feature_failed_debit_count', 'network_stress_feature'
]
X = df[features]
y = df['is_stressed']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print("Training XGBoost model...")
model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss', use_label_encoder=False)
model.fit(X_train, y_train)

print("\n--- Model Evaluation ---")
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Model Accuracy: {accuracy * 100:.2f}%") # Should be realistic now

model_filename = 'financial_stress_predictor.pkl'
joblib.dump(model, model_filename)
print(f"\n--- Model saved to '{model_filename}' ---")


--- Training Final Predictor on Time-Aware Snapshots ---
Training XGBoost model...

--- Model Evaluation ---
Model Accuracy: 92.00%

--- Model saved to 'financial_stress_predictor.pkl' ---


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
