In [11]:
import joblib
from architecture import MLP
import torch
import pandas as pd
import json

In [12]:
model = MLP(input_dim=26, num_classes=2)
model.load_state_dict(torch.load('loan_approval_model.pth'))


<All keys matched successfully>

In [None]:
def get_user_inputs():
    # Define which columns are numeric and which are categorical
    numeric_cols = [
        'Age', 'AnnualIncome', 'CreditScore', 'Experience', 'LoanAmount',
        'LoanDuration', 'MonthlyDebtPayments',
         'SavingsAccountBalance', 'CheckingAccountBalance', 'MonthlyIncome', 'JobTenure', 'NetWorth',
        'BaseInterestRate', 'InterestRate', 'MonthlyLoanPayment',
    ]
    cat_cols = ['EmploymentStatus', 'EducationLevel', 'MaritalStatus', 'LoanPurpose', 'HomeOwnershipStatus']
    
    
    # user_data = {}
    # Collect numeric inputs
    # for col in numeric_cols:
    #     val = input(f"Enter numeric value for {col}: ")
    #     try:
    #         user_data[col] = float(val)
    #     except ValueError:
    #         print(f"Invalid numeric input for {col}.")
    #         return None
    # # Increase CreditScore weight for user input (match training)
    # user_data['CreditScore'] = user_data['CreditScore'] * 2
    
    # Load pre-fitted label encoders for categorical columns
    label_encoders = joblib.load('label_encoders.joblib')
    
    # Process categorical inputs using LabelEncoder(s)
    # for col in cat_cols:
    #     options = list(label_encoders[col].classes_)
    #     print(f"Options for {col}: {options}")
    #     val = input(f"Enter category for {col}: ")
    #     try:
    #         user_data[col] = int(label_encoders[col].transform([val])[0])
    #     except Exception:
    #         print(f"Invalid input for {col}.")
    #         return None
        
    # if JSON input is used, the following code can be used to load the user data
    def get_JSON_input(example_input):
        with open(example_input, 'r') as f:
            user_data = json.load(f)
            
        # use label encoders to transform categorical columns
        for col in cat_cols:
            user_data[col] = int(label_encoders[col].transform([user_data[col]])[0])
        return user_data
    
    user_data = get_JSON_input('example_input.json')
    
    # ...existing feature engineering code...
    user_data['TotalIncome'] = user_data['AnnualIncome'] + user_data['SavingsAccountBalance'] + user_data['CheckingAccountBalance']
    user_data['DebtToIncomeRatio'] = user_data['MonthlyDebtPayments'] / (user_data['MonthlyIncome'] + 1e-5)
    user_data['CreditScore_Income'] = user_data['CreditScore'] * user_data['AnnualIncome']
    user_data['DebtToIncome_CreditScore'] = user_data['DebtToIncomeRatio'] * user_data['CreditScore']
    user_data['InterestRate_LoanDuration'] = user_data['InterestRate'] * user_data['LoanDuration']

    user_df = pd.DataFrame([user_data])
    # Rearrange columns to match training order
    list_order = ['Age',
    'AnnualIncome',
    'CreditScore',
    'EmploymentStatus',
    'EducationLevel',
    'Experience',
    'LoanAmount',
    'LoanDuration',
    'MaritalStatus',
    'NumberOfDependents',
    'HomeOwnershipStatus',
    'MonthlyDebtPayments',
    'DebtToIncomeRatio',
    'LoanPurpose',
    'SavingsAccountBalance',
    'CheckingAccountBalance',
    'MonthlyIncome',
    'JobTenure',
    'NetWorth',
    'BaseInterestRate',
    'InterestRate',
    'MonthlyLoanPayment',
    'TotalIncome',
    'CreditScore_Income',
    'DebtToIncome_CreditScore',
    'InterestRate_LoanDuration']
    user_df = user_df.reindex(columns=list_order)
    
    # Check if any column contains NaN values and raise an error if so
    if user_df.isnull().any().any():
        raise ValueError("Input data contains NaN values")
    
    # Load preprocessor and apply transformation
    preprocessor = joblib.load('loan_approval_preprocessor.joblib')
    user_preprocessed = preprocessor.transform(user_df)
    
    # Convert to PyTorch tensor
    user_tensor = torch.tensor(user_preprocessed.tolist(), dtype=torch.float32)
    return user_tensor

# Example usage:
user_tensor = get_user_inputs()
if user_tensor is not None:
    with torch.no_grad():
        model.eval()
        prediction = model(user_tensor)
        # Get the predicted target by selecting the index with the highest logit
        # print("Prediction:", prediction)
        predicted_class = torch.argmax(prediction, dim=1)
        # print("Predicted class:", predicted_class.item())
        if predicted_class.item() == 0:
            print("The model predicts that the loan will not be approved.")
        else:
            print("The model predicts that the loan will be approved.")

   Age  AnnualIncome  CreditScore  EmploymentStatus  EducationLevel  \
0   30         60000          700                 0               1   

   Experience  LoanAmount  LoanDuration  MaritalStatus  NumberOfDependents  \
0          10       20000            36              1                   0   

   ...  MonthlyIncome  JobTenure  NetWorth  BaseInterestRate  InterestRate  \
0  ...           4000          5    150000               3.5           5.0   

   MonthlyLoanPayment  TotalIncome  CreditScore_Income  \
0                 600        75000            42000000   

   DebtToIncome_CreditScore  InterestRate_LoanDuration  
0                       NaN                        NaN  

[1 rows x 26 columns]
The model predicts that the loan will not be approved.


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


The model ultimately takes 26 input features that combine both raw and engineered data. Here's what each one means:

1. Age – The applicant’s age in years.  
2. AnnualIncome – The applicant’s yearly earnings.  
3. CreditScore – A measure of creditworthiness (and it’s later doubled to amplify its effect).  
4. EmploymentStatus – A categorical feature (e.g. "employed," "self-employed," "unemployed") converted to numeric.  
5. EducationLevel – A categorical indicator (e.g. "bachelor," "master," etc.) encoded as a number.  
6. Experience – Years of work or professional experience.  
7. LoanAmount – The amount of money the applicant is asking to borrow.  
8. LoanDuration – The term or duration of the loan (typically in months).  
9. MaritalStatus – Another categorical feature (e.g. "married," "single") encoded as a number.  
10. NumberOfDependents – The number of dependents the applicant has.  
11. HomeOwnershipStatus – Categorical status of home ownership (e.g. "own," "rent") encoded numerically.  
12. MonthlyDebtPayments – Total monthly payments on existing debts.  
13. DebtToIncomeRatio – The ratio of monthly debt obligations to monthly income.  
14. LoanPurpose – Categorical reason for the loan (for instance, "auto" or "education") encoded numerically.  
15. SavingsAccountBalance – Current balance in savings accounts.  
16. CheckingAccountBalance – Current balance in checking accounts.  
17. MonthlyIncome – The income the applicant earns each month.  
18. JobTenure – How long the applicant has been at their current job.  
19. NetWorth – The overall net worth of the applicant (assets minus liabilities).  
20. BaseInterestRate – A benchmark or base interest rate used in the loan calculations.  
21. InterestRate – The specific interest rate applied to the applicant’s loan.  
22. MonthlyLoanPayment – The expected monthly amount to be paid if the loan is approved.  
23. TotalIncome – An engineered feature computed as AnnualIncome plus SavingsAccountBalance and CheckingAccountBalance.  
24. CreditScore_Income – Another engineered feature calculated as CreditScore multiplied by AnnualIncome (capturing the combined effect of credit history and earnings).  
25. DebtToIncome_CreditScore – The product of DebtToIncomeRatio and CreditScore, which helps adjust debt load relative to creditworthiness.  
26. InterestRate_LoanDuration – The product of InterestRate and LoanDuration, giving a sense of the overall cost of borrowing over time.

These features, both raw and derived, allow the model to capture different aspects of an applicant's financial profile to predict loan approval reliably.

The warning indicates a version mismatch between the scikit-learn version used to pickle the LabelEncoder (1.6.1) and the one currently in use (1.4.0). This may result in breaking changes or invalid results. For further details, please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations