## Loan Approval and Credit Scoring Model

In [None]:

# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
import joblib


In [None]:

# Step 2: Load dataset
train_file_path = '/ml_model/train.csv'

# Load the training dataset
df = pd.read_csv(train_file_path)

# Display the first few rows of the dataset to inspect
df.head()


In [None]:

# Step 3: Data Preprocessing
# Let's split the data into features and target variable
X = df.drop(columns=['Loan_Status'])
y = df['Loan_Status']

# Identify numeric and categorical columns
numeric_features = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term', 'Credit_History']
categorical_features = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']

# Step 4: Create preprocessing pipeline for numerical features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),  # Impute missing values with median
    ('scaler', StandardScaler())  # Scale features
])

# Step 5: Apply the transformations on both numeric and categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Step 6: Define the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000))
])

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 8: Fit the model
model.fit(X_train, y_train)


In [None]:

# Step 9: Evaluate the model
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Probabilities for positive class

# Calculate ROC-AUC score
roc_auc = roc_auc_score(y_test, y_pred_prob)
print(f'ROC-AUC: {roc_auc:.4f}')

# Display classification report
print(classification_report(y_test, y_pred))

# Confusion matrix
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))


In [None]:

# Step 10: Model Calibration
calibrated_model = CalibratedClassifierCV(model.named_steps['classifier'], method='isotonic')
calibrated_model.fit(X_train, y_train)

# Get calibrated probabilities
y_pred_calibrated_prob = calibrated_model.predict_proba(X_test)[:, 1]
calibrated_roc_auc = roc_auc_score(y_test, y_pred_calibrated_prob)
print(f'Calibrated ROC-AUC: {calibrated_roc_auc:.4f}')


In [None]:

# Step 11: Save the model and preprocessing pipeline
joblib.dump(model, 'creditwise_logreg_calibrated.joblib')
joblib.dump(calibrated_model, 'creditwise_logreg_calibrated_model.joblib')
joblib.dump(preprocessor, 'creditwise_preprocessor.joblib')

# Output success message
print("Model and preprocessing pipeline have been saved successfully.")
