In [54]:
import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

pl_cluster_model = joblib.load("./risk_clustering.pkl")
pl_xgb_model = joblib.load("./xgb_model.pkl")
pl_scaler_with_feature_names = joblib.load("./scaler_with_feature_names.pkl")

pl_scaler = pl_scaler_with_feature_names['scaler']
pl_feature_names = pl_scaler_with_feature_names['feature_names']


def bin_fico_scores(df):
    ranges = [0, 580, 670, 740, 800, float('inf')]
    labels = ['Poor', 'Fair', 'Good', 'Very Good', 'Exceptional']
    df['last_fico_range_high'] = pd.cut(
        df['last_fico_range_high'], bins=ranges, labels=labels, right=False)
    label_encoder = LabelEncoder()
    label_encoder.fit(labels)
    df['last_fico_range_high'] = label_encoder.transform(
        df['last_fico_range_high'])
    return df


sample = {
    "loan_amnt": 24000.0,
    "int_rate": 0.0890,
    "annual_inc": 100000.0,
    "dti": 13.30,
    "last_fico_range_high": 729.0,
    "last_fico_range_low": 725.0,
    "debt_payments": 1108.333333,
}

sample_df = pd.DataFrame(sample, index=[0])
sample_df = bin_fico_scores(sample_df)

scaled_df = pd.DataFrame(pl_scaler.transform(
    sample_df[pl_feature_names]), columns=pl_feature_names)
sample_df[scaled_df.columns] = scaled_df

cluster_label = pl_cluster_model.predict(sample_df[['dti']])[0]
sample_df['cluster_label'] = cluster_label

expected_order = pl_xgb_model.get_booster().feature_names

pl_xgb_model.predict(sample_df[expected_order])[0]

  If you are loading a serialized model (like pickle in Python, RDS in R) generated by
  older XGBoost, please export the model by calling `Booster.save_model` from that version
  first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

  for more details about differences between saving model and serializing.



0