In [None]:
import pandas as pd
import json
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# --- Load lender decision training data ---
lender_df = pd.read_csv('../data/lender_profiles/bank_decisions_cleaned.csv')

# --- Load training client profiles ---
client_dir = '../data/output_profiles'
client_data = []
for fname in os.listdir(client_dir):
    if fname.endswith('.json'):
        with open(os.path.join(client_dir, fname), 'r') as f:
            client_json = json.load(f)
            client_json['client_id'] = fname.replace('.json', '')
            client_data.append(client_json)

client_df = pd.json_normalize(client_data)

# --- Merge lender decisions with client profiles ---
merged = pd.merge(lender_df, client_df, on='client_id', how='inner')

# --- Feature Engineering ---
merged['fico_score'] = merged['credit_report.fico_score']
merged['utilization'] = merged['credit_report.utilization']
merged['annual_income'] = merged['application.income.gross_monthly'].astype(float) * 12
merged['loan_amount'] = merged['amount']
merged['loan_term'] = merged['term']

features = ['fico_score', 'utilization', 'annual_income', 'loan_amount', 'loan_term']
X = merged[features]
y = merged['approved']

# --- Train/test split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Train model ---
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- Evaluate ---
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# --- Save model ---
joblib.dump(model, '../models/approval_predictor.pkl')

# --- Save feature list ---
with open('../models/feature_encoder.json', 'w') as f:
    json.dump(features, f)

print(\"✅ Model training complete and saved.\")
