# *Installations*

In [1]:
!pip install --upgrade scikit-learn imbalanced-learn

Collecting scikit-learn
  Using cached scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)


# *Imports*

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline as make_imb_pipeline
from sklearn.linear_model import LogisticRegression

In [3]:
df = pd.read_csv('/kaggle/input/loan-approval-prediction-dataset/loan_approval_dataset.csv')

In [4]:
df.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,4,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


In [5]:
print(df.columns.tolist())

['loan_id', ' no_of_dependents', ' education', ' self_employed', ' income_annum', ' loan_amount', ' loan_term', ' cibil_score', ' residential_assets_value', ' commercial_assets_value', ' luxury_assets_value', ' bank_asset_value', ' loan_status']


In [6]:
X = df.drop([' loan_status'], axis=1)
y = df[' loan_status'].map({' Approved': 1, ' Rejected': 0})

In [7]:
print(df.isnull().sum())

loan_id                      0
 no_of_dependents            0
 education                   0
 self_employed               0
 income_annum                0
 loan_amount                 0
 loan_term                   0
 cibil_score                 0
 residential_assets_value    0
 commercial_assets_value     0
 luxury_assets_value         0
 bank_asset_value            0
 loan_status                 0
dtype: int64


In [8]:
df.head()

Unnamed: 0,loan_id,no_of_dependents,education,self_employed,income_annum,loan_amount,loan_term,cibil_score,residential_assets_value,commercial_assets_value,luxury_assets_value,bank_asset_value,loan_status
0,1,2,Graduate,No,9600000,29900000,12,778,2400000,17600000,22700000,8000000,Approved
1,2,0,Not Graduate,Yes,4100000,12200000,8,417,2700000,2200000,8800000,3300000,Rejected
2,3,3,Graduate,No,9100000,29700000,20,506,7100000,4500000,33300000,12800000,Rejected
3,4,3,Graduate,No,8200000,30700000,8,467,18200000,3300000,23300000,7900000,Rejected
4,5,5,Not Graduate,Yes,9800000,24200000,20,382,12400000,8200000,29400000,5000000,Rejected


In [9]:
df.loc[0, ' loan_status']

' Approved'

In [10]:
print(f"Shape of X: {X.shape}")  
print(f"Shape of y: {y.shape}")

Shape of X: (4269, 12)
Shape of y: (4269,)


In [11]:
num_cols = X.select_dtypes(include=['int64', 'float64']).columns
cat_cols = X.select_dtypes(include=['object']).columns

In [12]:
num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

In [13]:
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))])

In [14]:
preprocessor = ColumnTransformer([
    ('num', num_transformer, num_cols),
    ('cat', cat_transformer, cat_cols)
])

In [15]:
X.shape
y.shape

(4269,)

# *Data Splitting*

In [16]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y)

# *DecisionTree Model*

In [17]:
dt_model = make_imb_pipeline(
    preprocessor,
    SMOTE(random_state=42),
    DecisionTreeClassifier(class_weight='balanced', random_state=42, max_depth=5))
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

In [18]:
y_pred_dt = dt_model.predict(X_test)
y_prob_dt = dt_model.predict_proba(X_test)[:, 1]

print("Report:")
print(classification_report(y_test, y_pred_dt))

print("\nROC AUC Score:", roc_auc_score(y_test, y_prob_dt))

Report:
              precision    recall  f1-score   support

           0       0.95      0.98      0.96       484
           1       0.99      0.97      0.98       797

    accuracy                           0.97      1281
   macro avg       0.97      0.97      0.97      1281
weighted avg       0.97      0.97      0.97      1281


ROC AUC Score: 0.992965614857368


### Final Accuracy

In [19]:
Accuracy_DecisionTree = roc_auc_score(y_test, y_prob_dt) * 100
print(f'DecisionTree Accuracy: {round(Accuracy_DecisionTree, 3)}%')

DecisionTree Accuracy: 99.297%


# *____________________________________________________________________________*

# **LogisticRegression Model** 

In [20]:
LogisticRegression = make_imb_pipeline(
    preprocessor,
    SMOTE(random_state=42),
    LogisticRegression(class_weight='balanced', random_state=42, max_iter=1000)
)

In [21]:
LogisticRegression.fit(X_train, y_train)
y_pred_lr = LogisticRegression.predict(X_test)
y_prob_lr = LogisticRegression.predict_proba(X_test)[:, 1]

In [22]:
print("Report:")
print(classification_report(y_test, y_pred_lr))
print("ROC AUC:", roc_auc_score(y_test, y_prob_lr))

Report:
              precision    recall  f1-score   support

           0       0.88      0.94      0.91       484
           1       0.96      0.92      0.94       797

    accuracy                           0.93      1281
   macro avg       0.92      0.93      0.92      1281
weighted avg       0.93      0.93      0.93      1281

ROC AUC: 0.9728942210977114


### Final Accuracy

In [23]:
Accuracy_LogisticRegression = roc_auc_score(y_test, y_prob_lr) * 100
print(f'LogisticRegression Accuracy: {round(Accuracy_LogisticRegression, 3)}%')

LogisticRegression Accuracy: 97.289%
