## Model Deployment

In [23]:
import numpy as np
import pandas as pd
import joblib
import xgboost as xgb
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


In [37]:
# Example new input data (ensure it matches the number of features the model expects)
X_new = np.array([[15, 40000, 1000, 200, 150, 200, 200, 400, 200, 200, 200, 650, 20, 890,100,1000, 36, 2, 1]])

# Column names (adjust with the actual feature names from your dataset)
feature_names = ['int_rate', 'loan_amnt','installment', 'annual_inc', 'delinq_2yrs', 'fico_range_low', 'fico_range_high',
                 'inq_last_6mths', 'open_acc', 'pub_rec', 'revol_bal', 'total_acc', 'out_prncp', 
                 'out_prncp_inv', 'last_pymnt_amnt','delinq_amnt',
                 'term', 'grade', 'home_ownership']  # Update this list if needed

# Convert X_new into a DataFrame with the correct column names
X_new_df = pd.DataFrame(X_new, columns=feature_names)

# Load the scaler used during training
scaler = joblib.load('scaler.pkl')

# Define categorical columns (adjust as necessary based on your dataset)
categorical_cols = ['term', 'grade', 'home_ownership']  # Update with your actual categorical columns

# Define the preprocessor pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['int_rate', 'loan_amnt','installment', 'annual_inc', 'delinq_2yrs', 
                                   'fico_range_low', 'fico_range_high', 'inq_last_6mths', 'open_acc', 
                                   'pub_rec', 'revol_bal', 'total_acc', 'out_prncp', 'out_prncp_inv', 
                                   'last_pymnt_amnt','delinq_amnt']),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# Apply preprocessing steps
X_new_transformed = preprocessor.fit_transform(X_new_df)

# Ensure that X_new_transformed has 19 features (matching the model's input size)
print(f'Shape of transformed input data: {X_new_transformed.shape}')



Shape of transformed input data: (1, 19)


In [41]:
# Load the model (XGBClassifier or Booster)
model = xgb.XGBClassifier()
model.load_model('xgboost_model.model')

# Make the prediction using the transformed input data
prediction = model.predict(X_new_transformed)

# Process the output (binary classification)
if prediction[0] == 0:
    print(f'Predicted result: Will not default')
if prediction[0] == 1:
    print(f'Predicted result: Will default')

Predicted result: Will not default


In [None]:
import streamlit as st
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import joblib

# Load the trained XGBoost model and the preprocessing objects
model = xgb.XGBClassifier()
model.load_model('xgboost_model.model')

# Assuming the scaler and encoder are saved as 'scaler.pkl' and 'preprocessor.pkl'
scaler = joblib.load('scaler.pkl')
preprocessor = joblib.load('preprocessor.pkl')

# Define the input fields for the parameters (ensure the correct order and type)
st.title('Loan Status Prediction')

st.write("Please enter the parameters:")

# Collect user input for all parameters (example for 18 features)
int_rate = st.number_input('Interest Rate', min_value=0.0, max_value=100.0, step=0.01)
loan_amnt = st.number_input('Loan Amount', min_value=0.0, step=1.0)
installment = st.number_input('Installment', min_value=0.0, step=1.0)
annual_inc = st.number_input('Annual Income', min_value=0.0, step=1.0)
delinq_2yrs = st.number_input('Delinquencies in 2 Years', min_value=0, step=1)
fico_range_low = st.number_input('FICO Range Low', min_value=300, max_value=850, step=1)
fico_range_high = st.number_input('FICO Range High', min_value=300, max_value=850, step=1)
inq_last_6mths = st.number_input('Inquiries in Last 6 Months', min_value=0, step=1)
open_acc = st.number_input('Number of Open Accounts', min_value=0, step=1)
pub_rec = st.number_input('Public Record', min_value=0, step=1)
revol_bal = st.number_input('Revolving Balance', min_value=0.0, step=1.0)
total_acc = st.number_input('Total Accounts', min_value=0, step=1)
out_prncp = st.number_input('Outstanding Principal', min_value=0.0, step=1.0)
out_prncp_inv = st.number_input('Outstanding Principal Investment', min_value=0.0, step=1.0)
last_pymnt_amnt = st.number_input('Last Payment Amount', min_value=0.0, step=1.0)

# Categorical features (example: 'term', 'grade', 'home_ownership')
term = st.selectbox('Loan Term', ['36 months', '60 months'])
grade = st.selectbox('Grade', ['A', 'B', 'C', 'D', 'E', 'F', 'G'])
home_ownership = st.selectbox('Home Ownership', ['OWN', 'MORTGAGE', 'RENT'])

# Create a DataFrame from the input values
input_data = pd.DataFrame([[int_rate, loan_amnt, installment, annual_inc, delinq_2yrs, fico_range_low, fico_range_high,
                            inq_last_6mths, open_acc, pub_rec, revol_bal, total_acc, out_prncp, out_prncp_inv,
                            last_pymnt_amnt, term, grade, home_ownership]],
                          columns=['int_rate', 'loan_amnt', 'installment', 'annual_inc', 'delinq_2yrs', 'fico_range_low',
                                   'fico_range_high', 'inq_last_6mths', 'open_acc', 'pub_rec', 'revol_bal', 'total_acc',
                                   'out_prncp', 'out_prncp_inv', 'last_pymnt_amnt', 'term', 'grade', 'home_ownership'])

# Preprocess the input data using the same preprocessor
input_data_transformed = preprocessor.transform(input_data)

# Predict the class and probabilities
prediction = model.predict(input_data_transformed)
probabilities = model.predict_proba(input_data_transformed)

# Show the results
if st.button('Predict'):
    st.write(f'Predicted class: {prediction[0]}')
    st.write(f'Probability of class = 1: {probabilities[0][1]:.2f}')