In this notebook I will be testing different variations to make sure that the final model is returning accurate predictions. 

In [1]:
import joblib  # Import joblib to load the saved model

# Load the saved model
model = joblib.load('best_bagging_model.pkl')

The order of columns that are being fed into the model is as follows: Age, Monthly In Hand Salary, Interest Rate, Delay from Due Date, Number of Delayed Payments, Changed Credit Limit, Credit Mix, Outstanding Debt, Credit Utilization Ratio, Credit History Age, Total EMI Per Month, and Amount Invested Monthly. 

Instead of entering Good, Standard or Poor for the Credit Mix I will just put 2, 1, or 0 for now so that it is in the proper form that the model needs, but in the actual application I have added preprocessing to make sure the string input is label encoded. 

In [11]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, RobustScaler

# Define column names
columns = ['Age', 'Monthly_Inhand_Salary', 'Interest_Rate', 'Delay_from_due_date',
           'Num_of_Delayed_Payment', 'Changed_Credit_Limit', 'Credit_Mix',
           'Outstanding_Debt', 'Credit_Utilization_Ratio', 'Credit_History_Age',
           'Total_EMI_per_month', 'Amount_invested_monthly']

# Define values for three rows
row_values_1 = [12, 2000.0, 4, 5, 2, 0.1, 'Good', 15000.0, 0.5, 3.5, 200.0, 300.0]
row_values_2 = [60, 5000.0, 10, 20, 5, 0.1, 'Standard', 50000.0, 0.1, 10.0, 1000.0, 2000.0]
row_values_3 = [25, 10000.0, 1, 1, 1, 0.01, 'Poor', 1000.0, 1.0, 0.1, 50.0, 100.0]
row_values_4 = [40, 1800.0, 7, 12, 5, 0.3, 'Good', 22000.0, 0.8, 5.0, 350.0, 450.0]
row_values_5 = [35, 2500.0, 2, 25, 10, 0.6, 'Standard', 30000.0, 0.2, 6.0, 500.0, 700.0]
row_values_6 = [55, 2200.0, 8, 18, 7, 0.5, 'Standard', 27000.0, 1.0, 7.0, 450.0, 550.0]

# Create a list of dictionaries, each dictionary represents a row
data_dicts = [
    dict(zip(columns, row_values_1)),
    dict(zip(columns, row_values_2)),
    dict(zip(columns, row_values_3)),
    dict(zip(columns, row_values_4)),
    dict(zip(columns, row_values_5)),
    dict(zip(columns, row_values_6))
]

# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(data_dicts)

# Define the appropriate scalers for each column based on the columns characteristics
scalers = {
    'Age': MinMaxScaler(),
    'Monthly_Inhand_Salary': RobustScaler(),
    'Interest_Rate': MinMaxScaler(),
    'Delay_from_due_date': RobustScaler(),
    'Num_of_Delayed_Payment': RobustScaler(),
    'Changed_Credit_Limit': RobustScaler(),
    'Outstanding_Debt': RobustScaler(),
    'Credit_Utilization_Ratio': MinMaxScaler(),
    'Credit_History_Age': MinMaxScaler(),
    'Total_EMI_per_month': MinMaxScaler(),
    'Amount_invested_monthly': MinMaxScaler(),
}

# Define ordinal mappings for the 'Credit_Mix' column
credit_mix_mapping = {'Good': 2, 'Standard': 1, 'Poor': 0}

# Function to scale a single column
def scale_column(scaler, column, df):
    return scaler.fit_transform(df[[column]]).flatten()

# Apply scaling to input data
scaled_df = pd.DataFrame()
for column in df.columns:
    scaler = scalers.get(column)
    if scaler:
        scaled_column = scale_column(scaler, column, df)
        scaled_df[column] = scaled_column
    elif column == 'Credit_Mix':
        df[column] = df[column].map(credit_mix_mapping)


# Now, you can make predictions for each row in the DataFrame using your bagging model
for index, row in df.iterrows():
    # Extract the values for the row
    input_data = pd.DataFrame([row])

    # Make prediction
    prediction = model.predict(input_data)

    # Print prediction for the row
    print(f"Prediction for Row {index + 1}: {prediction}")

Prediction for Row 1: ['Standard']
Prediction for Row 2: ['Standard']
Prediction for Row 3: ['Poor']
Prediction for Row 4: ['Standard']
Prediction for Row 5: ['Standard']
Prediction for Row 6: ['Standard']
