In [6]:
import pandas as pd
import pickle
import numpy as np

In [3]:
# Explore why model is only predicting 'No' 

# Load the model, scaler, and choices dictionary
with open('training/model.pkl', 'rb') as f: 
    model = pickle.load(f)
with open('training/scaler.pkl', 'rb') as f: 
    scaler = pickle.load(f)
with open('training/choices.pkl', 'rb') as f: 
    choices = pickle.load(f)

# Define the categorical and continuous columns
cat_cols = [col for col in choices.keys() if choices[col] is not None]
cont_cols = [col for col in choices.keys() if choices[col] is None]

# Sample input data for testing
input_data = {
    'Combined_income': 5000,
    'Credit_history': 'Yes',
    'Requested_amount': 200,
    'Community_type': 'Urban',
    'Dependents': '1',
    'College_degree': 'No'
}

# Function to predict loan approval
def predict(input_data): 
    """
    Helper function to make predictions based on input data.
    """
    # Create input DataFrame with the correct columns
    input_df = pd.DataFrame([input_data])
    X = pd.DataFrame(columns=choices.keys())
    X = pd.concat([X, input_df], ignore_index=True)
    
    # Handle categorical features
    X[cat_cols] = X[cat_cols].astype('category').apply(lambda x: x.cat.codes)
    
    # Handle continuous features
    X[cont_cols] = X[cont_cols].astype(float)
    
    # Ensure all columns are present
    X = X.reindex(columns=choices.keys(), fill_value=0)
    
    # Debugging statements to inspect the data
    print("Input DataFrame:")
    print(X.head())
    
    # Scale input data
    X_transformed = scaler.transform(X)
    print("Transformed Features:")
    print(X_transformed)
    
    # Make prediction
    output = model.predict(X_transformed)
    print("Model Output:")
    print(output)
    
    # Return 'No' for 0 and 'Yes' for 1
    return 'Yes' if output[0] == 1 else 'No'

# Run the prediction function with sample input data
result = predict(input_data)
print("Prediction Result:")
print(result)


Input DataFrame:
   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0              NaN                NaN         NaN               NaN   

   Credit_History  Gender_Female  Gender_Male  Married_No  Married_Yes  \
0              -1             -1           -1          -1           -1   

   Dependents_0  Dependents_1  Dependents_2  Dependents_3+  \
0            -1            -1            -1             -1   

   Education_Graduate  Education_Not Graduate  Self_Employed_No  \
0                  -1                      -1                -1   

   Self_Employed_Yes  Property_Area_Rural  Property_Area_Semiurban  \
0                 -1                   -1                       -1   

   Property_Area_Urban  
0                   -1  
Transformed Features:
[[        nan         nan         nan         nan -5.21089589 -3.10978953
  -4.6208168  -2.82851004 -3.53229937 -3.11534851 -3.08302843 -3.11919671
  -3.91663812 -4.43897724 -2.98283147 -4.54987597 -3.24141871 -2.83339

In [4]:
# Map input data to the correct columns
# Function to predict loan approval
def predict(input_data): 
    """
    Helper function to make predictions based on input data.
    """
    # Create a DataFrame with the correct columns
    X = pd.DataFrame(columns=choices.keys())
    
    # Map input data to the correct columns
    X.loc[0, 'ApplicantIncome'] = input_data['Combined_income'] * 0.6  # Example split
    X.loc[0, 'CoapplicantIncome'] = input_data['Combined_income'] * 0.4  # Example split
    X.loc[0, 'LoanAmount'] = input_data['Requested_amount']
    X.loc[0, 'Loan_Amount_Term'] = 360  # Example value, adjust as necessary
    
    # Handle categorical variables
    X.loc[0, 'Credit_History'] = 1 if input_data['Credit_history'] == 'Yes' else 0
    X.loc[0, 'Dependents_0'] = 1 if input_data['Dependents'] == '0' else 0
    X.loc[0, 'Dependents_1'] = 1 if input_data['Dependents'] == '1' else 0
    X.loc[0, 'Dependents_2'] = 1 if input_data['Dependents'] == '2' else 0
    X.loc[0, 'Dependents_3+'] = 1 if input_data['Dependents'] == '3+' else 0
    X.loc[0, 'Education_Graduate'] = 0 if input_data['College_degree'] == 'No' else 1
    X.loc[0, 'Education_Not Graduate'] = 1 if input_data['College_degree'] == 'No' else 0
    X.loc[0, 'Property_Area_Urban'] = 1 if input_data['Community_type'] == 'Urban' else 0
    X.loc[0, 'Property_Area_Semiurban'] = 1 if input_data['Community_type'] == 'Suburban' else 0
    X.loc[0, 'Property_Area_Rural'] = 1 if input_data['Community_type'] == 'Rural' else 0
    
    # Ensure all columns are present
    X = X.fillna(0)
    
    # Debugging statements to inspect the data
    print("Input DataFrame:")
    print(X.head())
    
    # Scale input data
    X_transformed = scaler.transform(X)
    print("Transformed Features:")
    print(X_transformed)
    
    # Make prediction
    output = model.predict(X_transformed)
    print("Model Output:")
    print(output)
    
    # Return 'No' for 0 and 'Yes' for 1
    return 'Yes' if output[0] == 1 else 'No'

# Run the prediction function with sample input data
result = predict(input_data)
print("Prediction Result:")
print(result)


Input DataFrame:
   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0           3000.0             2000.0         200               360   

   Credit_History  Gender_Female  Gender_Male  Married_No  Married_Yes  \
0               1              0            0           0            0   

   Dependents_0  Dependents_1  Dependents_2  Dependents_3+  \
0             0             1             0              0   

   Education_Graduate  Education_Not Graduate  Self_Employed_No  \
0                   0                       1                 0   

   Self_Employed_Yes  Property_Area_Rural  Property_Area_Semiurban  \
0                  0                    0                        0   

   Property_Area_Urban  
0                    1  
Transformed Features:
[[-0.39166957  0.13683214  0.5868408   0.29768245  0.45211846 -0.45429969
  -2.06871213 -0.70171306 -1.4120561  -1.10531419  2.15491953 -0.45103367
  -0.30184617 -1.965041    1.965041   -2.02844357 -0.41453312 -0.66642

  X = X.fillna(0)


In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
import pickle

# Load the model, scaler, and choices dictionary
with open('training/model.pkl', 'rb') as f: 
    model = pickle.load(f)
with open('training/scaler.pkl', 'rb') as f: 
    scaler = pickle.load(f)
with open('training/choices.pkl', 'rb') as f: 
    choices = pickle.load(f)

# Define the categorical and continuous columns
cat_cols = [col for col in choices.keys() if choices[col] is not None]
cont_cols = [col for col in choices.keys() if choices[col] is None]

# Sample input data for testing
input_data = {
    'Combined_income': 5000,
    'Credit_history': 'Yes',
    'Requested_amount': 200,
    'Community_type': 'Urban',
    'Dependents': '1',
    'College_degree': 'No'
}

# Function to predict loan approval
def predict(input_data): 
    """
    Helper function to make predictions based on input data.
    """
    # Create a DataFrame with the correct columns
    X = pd.DataFrame(columns=choices.keys())
    
    # Map input data to the correct columns
    X.loc[0, 'ApplicantIncome'] = input_data['Combined_income'] * 0.6  # Example split
    X.loc[0, 'CoapplicantIncome'] = input_data['Combined_income'] * 0.4  # Example split
    X.loc[0, 'LoanAmount'] = input_data['Requested_amount']
    X.loc[0, 'Loan_Amount_Term'] = 360  # Example value, adjust as necessary
    
    # Handle categorical variables
    X.loc[0, 'Credit_History'] = 1 if input_data['Credit_history'] == 'Yes' else 0
    X.loc[0, 'Dependents_0'] = 1 if input_data['Dependents'] == '0' else 0
    X.loc[0, 'Dependents_1'] = 1 if input_data['Dependents'] == '1' else 0
    X.loc[0, 'Dependents_2'] = 1 if input_data['Dependents'] == '2' else 0
    X.loc[0, 'Dependents_3+'] = 1 if input_data['Dependents'] == '3+' else 0
    X.loc[0, 'Education_Graduate'] = 0 if input_data['College_degree'] == 'No' else 1
    X.loc[0, 'Education_Not Graduate'] = 1 if input_data['College_degree'] == 'No' else 0
    X.loc[0, 'Property_Area_Urban'] = 1 if input_data['Community_type'] == 'Urban' else 0
    X.loc[0, 'Property_Area_Semiurban'] = 1 if input_data['Community_type'] == 'Suburban' else 0
    X.loc[0, 'Property_Area_Rural'] = 1 if input_data['Community_type'] == 'Rural' else 0
    
    # Ensure all columns are present
    X = X.fillna(0)
    
    # Debugging statements to inspect the data
    print("Input DataFrame:")
    print(X.head())
    
    # Scale input data
    X_transformed = scaler.transform(X)
    print("Transformed Features:")
    print(X_transformed)
    
    # Make prediction
    output = model.predict(X_transformed)
    print("Model Output:")
    print(output)
    
    # Apply threshold to determine binary class
    threshold = 0.5
    prediction = (output[0] > threshold).astype(int)
    
    # Return 'No' for 0 and 'Yes' for 1
    return 'Yes' if prediction == 1 else 'No'

# Run the prediction function with sample input data
result = predict(input_data)
print("Prediction Result:")
print(result)


Input DataFrame:
   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0           3000.0             2000.0         200               360   

   Credit_History  Gender_Female  Gender_Male  Married_No  Married_Yes  \
0               1              0            0           0            0   

   Dependents_0  Dependents_1  Dependents_2  Dependents_3+  \
0             0             1             0              0   

   Education_Graduate  Education_Not Graduate  Self_Employed_No  \
0                   0                       1                 0   

   Self_Employed_Yes  Property_Area_Rural  Property_Area_Semiurban  \
0                  0                    0                        0   

   Property_Area_Urban  
0                    1  
Transformed Features:
[[-0.39166957  0.13683214  0.5868408   0.29768245  0.45211846 -0.45429969
  -2.06871213 -0.70171306 -1.4120561  -1.10531419  2.15491953 -0.45103367
  -0.30184617 -1.965041    1.965041   -2.02844357 -0.41453312 -0.66642

  X = X.fillna(0)
