# Loan Approval Prediction using Classification Models

**Final Project â€“ Machine Learning with Python**

Author: **Sief Azizia**

## 1. Import Required Libraries

In [None]:

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report, confusion_matrix


## 2. Load Dataset

In [None]:

df = pd.read_csv("train.csv")
df.head()


## 3. Define Target and Features

In [None]:

X = df.drop(columns=['Loan_Status', 'Loan_ID'])
y = df['Loan_Status'].map({'Y': 1, 'N': 0})


## 4. Identify Feature Types

In [None]:

num_cols = X.select_dtypes(include=['number']).columns
cat_cols = X.select_dtypes(exclude=['number']).columns


## 5. Preprocessing Pipelines

In [None]:

numeric_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocess = ColumnTransformer([
    ('num', numeric_pipe, num_cols),
    ('cat', categorical_pipe, cat_cols)
])


## 6. Train-Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## 7. Model Training and Evaluation

In [None]:

models = {
    "Logistic Regression": LogisticRegression(max_iter=2000, class_weight='balanced'),
    "KNN": KNeighborsClassifier(n_neighbors=15),
    "Random Forest": RandomForestClassifier(n_estimators=300, random_state=42, class_weight='balanced_subsample')
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
results = []

for name, model in models.items():
    pipe = Pipeline([('prep', preprocess), ('model', model)])
    scores = cross_validate(pipe, X_train, y_train, cv=cv,
                            scoring=['f1','precision','recall'])
    results.append({
        'Model': name,
        'F1': scores['test_f1'].mean(),
        'Precision': scores['test_precision'].mean(),
        'Recall': scores['test_recall'].mean()
    })

pd.DataFrame(results)


## 8. Final Model Evaluation

In [None]:

final_model = Pipeline([
    ('prep', preprocess),
    ('model', LogisticRegression(max_iter=2000, class_weight='balanced'))
])

final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
