# Import libraries :


In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

# Load dataset :


In [34]:
df = pd.read_csv("loan_approval_dataset.csv")
df.columns = df.columns.str.strip()

# Handle missing values :


In [35]:
df.fillna(df.median(numeric_only=True), inplace=True)
df.fillna(df.mode().iloc[0], inplace=True)

# Encode categorical columns :


In [36]:
encoder = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = encoder.fit_transform(df[col])

# Features and target :


In [37]:
X = df.drop('loan_status', axis=1)
y = df['loan_status']

# Train-test split :


In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check class imbalance :


In [39]:
print("Before SMOTE:", y_train.value_counts())

Before SMOTE: loan_status
0    2120
1    1295
Name: count, dtype: int64


# Balance data :


In [40]:
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)
print("After SMOTE:", y_train_res.value_counts())

After SMOTE: loan_status
0    2120
1    2120
Name: count, dtype: int64


# Optional: scale features :


In [41]:
scaler = StandardScaler()
X_train_res = scaler.fit_transform(X_train_res)
X_test = scaler.transform(X_test)

# Logistic Regression :


In [42]:
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_res, y_train_res)
y_pred_lr = log_reg.predict(X_test)

# Decision Tree :


In [43]:
tree = DecisionTreeClassifier(random_state=42)
tree.fit(X_train_res, y_train_res)
y_pred_tree = tree.predict(X_test)

# Evaluation :


In [44]:
print("\n                   === Logistic Regression ===\n")
print(classification_report(y_test, y_pred_lr))


                   === Logistic Regression ===

              precision    recall  f1-score   support

           0       0.94      0.91      0.92       536
           1       0.85      0.90      0.88       318

    accuracy                           0.91       854
   macro avg       0.90      0.90      0.90       854
weighted avg       0.91      0.91      0.91       854



In [45]:
print("=== Logistic Regression Confusion Matrix ===\n", confusion_matrix(y_test, y_pred_lr))

=== Logistic Regression Confusion Matrix ===
 [[487  49]
 [ 32 286]]


In [46]:
print("\n                      === Decision Tree ===\n")
print(classification_report(y_test, y_pred_tree))


                      === Decision Tree ===

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       536
           1       0.97      0.97      0.97       318

    accuracy                           0.98       854
   macro avg       0.97      0.98      0.97       854
weighted avg       0.98      0.98      0.98       854



In [47]:
print("=== Decision Tree Confusion Matrix ===\n", confusion_matrix(y_test, y_pred_tree))

=== Decision Tree Confusion Matrix ===
 [[525  11]
 [  9 309]]
