In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.inspection import permutation_importance
from sklearn.ensemble import RandomForestClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch
import joblib



In [3]:
# Load dataset
df = pd.read_csv('E:\\hacktu\\preprocessed_under.csv')
# df['CreditScore'] = df['CreditScore']

# Separate features and target
X = df.drop('LoanApproved', axis=1)
y = df['LoanApproved']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=9)

# Identify categorical and numerical columns
cat_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
num_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), num_cols),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), cat_cols)
    ])


# Apply preprocessing
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

In [None]:
# Temporary model for feature analysis
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train_preprocessed, y_train)

# Permutation importance
result = permutation_importance(
    rf, X_test_preprocessed, y_test, n_repeats=10, random_state=42
)

# Display feature importance
feature_names = preprocessor.get_feature_names_out()
sorted_idx = result.importances_mean.argsort()[::-1]

print("Feature ranking:")
for i in sorted_idx:
    print(f"{feature_names[i]:<30}: {result.importances_mean[i]:.3f}")

Feature ranking:
num__InterestRate             : 0.044
num__MonthlyIncome            : 0.041
num__NetWorth                 : 0.028
num__LoanAmount               : 0.028
num__MonthlyLoanPayment       : 0.018
num__InterestRate_LoanDuration: 0.005
num__AnnualIncome             : 0.004
num__TotalIncome              : 0.004
num__CreditScore              : 0.004
num__MonthlyDebtPayments      : 0.003
num__CreditScore_Income       : 0.003
num__EducationLevel           : 0.002
num__CheckingAccountBalance   : 0.001
num__HomeOwnershipStatus      : 0.001
num__DebtToIncome_CreditScore : 0.001
num__LoanDuration             : 0.001
num__LoanPurpose              : 0.000
num__DebtToIncomeRatio        : 0.000
num__EmploymentStatus         : 0.000
num__NumberOfDependents       : 0.000
num__SavingsAccountBalance    : 0.000
num__JobTenure                : 0.000
num__MaritalStatus            : -0.000
num__BaseInterestRate         : -0.000
num__Experience               : -0.001
num__Age                      

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_preprocessed.tolist(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_preprocessed.tolist(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define model
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),  # Stabilize training
            nn.ReLU(),
            nn.Dropout(0.5),  # Reduce overfitting
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )
        # Custom weight initialization
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        return self.layers(x)

# Initialize model
input_dim = X_train_preprocessed.shape[1]
num_classes = len(y.unique())
model = MLP(input_dim, num_classes)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # L2 regularization

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()

        # Optional: Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

# Evaluation
with torch.no_grad():
    model.eval()
    test_outputs = model(X_test_tensor)
    _, predicted = torch.max(test_outputs, 1)
    print(classification_report(y_test, predicted.numpy()))
    print(confusion_matrix(y_test, predicted.numpy()))

              precision    recall  f1-score   support

           0       0.96      0.87      0.91       956
           1       0.88      0.97      0.92       956

    accuracy                           0.92      1912
   macro avg       0.92      0.92      0.92      1912
weighted avg       0.92      0.92      0.92      1912

[[827 129]
 [ 30 926]]


In [7]:
joblib.dump(model, 'model.joblib')

['model.joblib']

In [6]:
joblib.dump(preprocessor, 'loan_approval_preprocessor.joblib')

['loan_approval_preprocessor.joblib']

In [None]:
def get_user_inputs():
    # Define which columns are numeric and which are categorical
    numeric_cols = [
        'Age', 'AnnualIncome', 'CreditScore', 'Experience', 'LoanAmount',
        'LoanDuration', 'MonthlyDebtPayments'
         'SavingsAccountBalance', 'CheckingAccountBalance', 'MonthlyIncome', 'JobTenure', 'NetWorth',
        'BaseInterestRate', 'InterestRate', 'MonthlyLoanPayment',
    ]
    cat_cols = ['EmploymentStatus', 'EducationLevel', 'MaritalStatus', 'LoanPurpose', 'HomeOwnerShipStatus']
    
    user_data = {}
    # Collect numeric inputs
    for col in numeric_cols:
        val = input(f"Enter numeric value for {col}: ")
        try:
            user_data[col] = float(val)
        except ValueError:
            print(f"Invalid numeric input for {col}.")
            return None
    
    # Load pre-fitted label encoders for categorical columns
    label_encoders = joblib.load('label_encoders.joblib')
    
    # Process categorical inputs using LabelEncoder(s)
    for col in cat_cols:
        options = list(label_encoders[col].classes_)
        print(f"Options for {col}: {options}")
        val = input(f"Enter category for {col}: ")
        try:
            user_data[col] = int(label_encoders[col].transform([val])[0])
        except Exception:
            print(f"Invalid input for {col}.")
            return None
    
    # ...existing feature engineering code...
    user_data['TotalIncome'] = user_data['AnnualIncome'] + user_data['SavingsAccountBalance'] + user_data['CheckingAccountBalance']
    user_data['DebtToIncomeRatio'] = user_data['MonthlyDebtPayments'] / (user_data['MonthlyIncome'] + 1e-5)
    user_data['CreditScore_Income'] = user_data['CreditScore'] * user_data['AnnualIncome']
    user_data['DebtToIncome_CreditScore'] = user_data['DebtToIncomeRatio'] * user_data['CreditScore']
    user_data['InterestRate_LoanDuration'] = user_data['InterestRate'] * user_data['LoanDuration']

    user_df = pd.DataFrame([user_data])
    # Rearrange columns to match training order
    list_order = ['Age',
    'AnnualIncome',
    'CreditScore',
    'EmploymentStatus',
    'EducationLevel',
    'Experience',
    'LoanAmount',
    'LoanDuration',
    'MaritalStatus',
    'NumberOfDependents',
    'HomeOwnershipStatus',
    'MonthlyDebtPayments',
    'DebtToIncomeRatio',
    'LoanPurpose',
    'SavingsAccountBalance',
    'CheckingAccountBalance',
    'MonthlyIncome',
    'JobTenure',
    'NetWorth',
    'BaseInterestRate',
    'InterestRate',
    'MonthlyLoanPayment',
    'TotalIncome',
    'CreditScore_Income',
    'DebtToIncome_CreditScore',
    'InterestRate_LoanDuration']
    user_df = user_df.reindex(columns=list_order)
    
    # Load preprocessor and apply transformation
    preprocessor = joblib.load('loan_approval_preprocessor.joblib')
    
    # Multiply the CreditScore column by 2 *before* preprocessing
    user_df['CreditScore'] *= 2
    
    user_preprocessed = preprocessor.transform(user_df)

    # Convert to PyTorch tensor
    user_tensor = torch.tensor(user_preprocessed.tolist(), dtype=torch.float32)
    return user_tensor

# Example usage:
# user_tensor = get_user_inputs()
# if user_tensor is not None:
#     with torch.no_grad():
#         model.eval()
#         prediction = model(user_tensor)
#         print(prediction)

In [36]:
def get_user_inputs():
    # Define which columns are numeric and which are categorical
    numeric_cols = [
        'Age', 'AnnualIncome', 'CreditScore', 'Experience', 'LoanAmount',
        'LoanDuration', 'MonthlyDebtPayments',
         'SavingsAccountBalance', 'CheckingAccountBalance', 'MonthlyIncome', 'JobTenure', 'NetWorth',
        'BaseInterestRate', 'InterestRate', 'MonthlyLoanPayment',
    ]
    cat_cols = ['EmploymentStatus', 'EducationLevel', 'MaritalStatus', 'LoanPurpose', 'HomeOwnershipStatus']
    
    user_data = {}
    # Collect numeric inputs
    for col in numeric_cols:
        val = input(f"Enter numeric value for {col}: ")
        try:
            user_data[col] = float(val)
        except ValueError:
            print(f"Invalid numeric input for {col}.")
            return None
    # Increase CreditScore weight for user input (match training)
    user_data['CreditScore'] = user_data['CreditScore'] * 2
    
    # Load pre-fitted label encoders for categorical columns
    label_encoders = joblib.load('label_encoders.joblib')
    
    # Process categorical inputs using LabelEncoder(s)
    for col in cat_cols:
        options = list(label_encoders[col].classes_)
        print(f"Options for {col}: {options}")
        val = input(f"Enter category for {col}: ")
        try:
            user_data[col] = int(label_encoders[col].transform([val])[0])
        except Exception:
            print(f"Invalid input for {col}.")
            return None
    
    # ...existing feature engineering code...
    user_data['TotalIncome'] = user_data['AnnualIncome'] + user_data['SavingsAccountBalance'] + user_data['CheckingAccountBalance']
    user_data['DebtToIncomeRatio'] = user_data['MonthlyDebtPayments'] / (user_data['MonthlyIncome'] + 1e-5)
    user_data['CreditScore_Income'] = user_data['CreditScore'] * user_data['AnnualIncome']
    user_data['DebtToIncome_CreditScore'] = user_data['DebtToIncomeRatio'] * user_data['CreditScore']
    user_data['InterestRate_LoanDuration'] = user_data['InterestRate'] * user_data['LoanDuration']

    user_df = pd.DataFrame([user_data])
    # Rearrange columns to match training order
    list_order = ['Age',
    'AnnualIncome',
    'CreditScore',
    'EmploymentStatus',
    'EducationLevel',
    'Experience',
    'LoanAmount',
    'LoanDuration',
    'MaritalStatus',
    'NumberOfDependents',
    'HomeOwnershipStatus',
    'MonthlyDebtPayments',
    'DebtToIncomeRatio',
    'LoanPurpose',
    'SavingsAccountBalance',
    'CheckingAccountBalance',
    'MonthlyIncome',
    'JobTenure',
    'NetWorth',
    'BaseInterestRate',
    'InterestRate',
    'MonthlyLoanPayment',
    'TotalIncome',
    'CreditScore_Income',
    'DebtToIncome_CreditScore',
    'InterestRate_LoanDuration']
    user_df = user_df.reindex(columns=list_order)
    
    # Load preprocessor and apply transformation
    preprocessor = joblib.load('loan_approval_preprocessor.joblib')
    user_preprocessed = preprocessor.transform(user_df)
    
    # Convert to PyTorch tensor
    user_tensor = torch.tensor(user_preprocessed.tolist(), dtype=torch.float32)
    return user_tensor

# Example usage:
user_tensor = get_user_inputs()
if user_tensor is not None:
    with torch.no_grad():
        model.eval()
        prediction = model(user_tensor)
        # Get the predicted target by selecting the index with the highest logit
        predicted_class = torch.argmax(prediction, dim=1)
        print("Predicted class:", predicted_class.item())
        if predicted_class.item() == 0:
            print("The model predicts that the loan will not be approved.")
        else:
            print("The model predicts that the loan will be approved.")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Options for EmploymentStatus: ['employed', 'self-employed', 'unemployed']
Options for EducationLevel: ['associate', 'bachelor', 'doctorate', 'high school', 'master']
Options for MaritalStatus: ['divorced', 'married', 'single', 'widowed']
Options for LoanPurpose: ['auto', 'debt consolidation', 'education', 'home', 'other']
Options for HomeOwnershipStatus: ['mortgage', 'other', 'own', 'rent']
tensor([[1046.6898, -866.2233]])


In [50]:
probabilities = torch.sigmoid(prediction)
predicted_class = (probabilities > 0.5).int()

print("Probabilities:", probabilities[0][1].item())  # Probability of class 1
print("Predicted class:", predicted_class[0])

if predicted_class[0].item() == 0:
    print("The model predicts that the loan will not be approved.")
else:
    print("The model predicts that the loan will be approved.")

Probabilities: 0.0
Predicted class: tensor([1, 0], dtype=torch.int32)


RuntimeError: a Tensor with 2 elements cannot be converted to Scalar

In [46]:
probabilities = torch.sigmoid(prediction)
        
        # Threshold the probabilities to get class predictions (0 or 1)
predicted_class = (probabilities > 0.5).int()

print("Probabilities:", probabilities.item())
print("Predicted class:", predicted_class)

RuntimeError: a Tensor with 2 elements cannot be converted to Scalar

In [8]:
# New cell: Save model as model.mar using TorchScript and torch-model-archiver
import os

# Convert the model to TorchScript
scripted_model = torch.jit.script(model)
scripted_model.save('model.pt')

# Archive the model. Adjust --handler if you have a custom handler.
os.system("torch-model-archiver --model-name neural_model --version 1.0 --serialized-file model.pt --handler classifier --extra-files loan_approval_preprocessor.joblib,label_encoders.joblib --export-path . --force")

# Optionally, rename the file to model.mar
os.rename("neural_model.mar", "model.mar")

FileNotFoundError: [WinError 2] The system cannot find the file specified: 'neural_model.mar' -> 'model.mar'

In [None]:
# New cell: Convert TorchScript model to GGUF format
import os

# Ensure that 'model.pt' has been created by previous TorchScript conversion.
# Replace "your-gguf-converter" and arguments with the actual command if/when available.
os.system("your-gguf-converter --input model.pt --output model.gguf")