# Loan Prediction Classification Model
Assignment : Build and train loan approval prediction model for the practical loan dataset given. you can use KNN or any other algorithm for classification


In [9]:
# Loading libraries

!pip install -q kagglehub[pandas-datasets] loguru scikit-learn

In [10]:
from loguru import logger
import numpy as np
import pandas as pd
from IPython.display import display

In [11]:
# Downloading classification data from Kagglehub

import kagglehub

path = kagglehub.dataset_download("taweilo/loan-approval-classification-data")

logger.info("Path to dataset files: {}", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/taweilo/loan-approval-classification-data?dataset_version_number=1...


100%|██████████| 751k/751k [00:01<00:00, 496kB/s]

Extracting files...



[32m2025-05-17 22:39:04.403[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1mPath to dataset files: C:\Users\indra\.cache\kagglehub\datasets\taweilo\loan-approval-classification-data\versions\1[0m


In [12]:
# Based on the path get the CSV
file = path + "/loan_data.csv"

df = pd.read_csv(file)

logger.info("Dataframe is loaded")

display(df.head())

[32m2025-05-17 22:39:04.679[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mDataframe is loaded[0m


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


In [13]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

logger.info("Encoding non numerical fields")

# Encodig the non numerical fields
df["person_gender"] = label_encoder.fit_transform(df["person_gender"])
df["person_education"] = label_encoder.fit_transform(df["person_education"])
df["person_home_ownership"] = label_encoder.fit_transform(df["person_home_ownership"])
df["loan_intent"] = label_encoder.fit_transform(df["loan_intent"])
df["previous_loan_defaults_on_file"] = label_encoder.fit_transform(df["previous_loan_defaults_on_file"])

display(df.head())


[32m2025-05-17 22:39:38.889[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mEncoding non numerical fields[0m


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,0,4,71948.0,0,3,35000.0,4,16.02,0.49,3.0,561,0,1
1,21.0,0,3,12282.0,0,2,1000.0,1,11.14,0.08,2.0,504,1,0
2,25.0,0,3,12438.0,3,0,5500.0,3,12.87,0.44,3.0,635,0,1
3,23.0,0,1,79753.0,0,3,35000.0,3,15.23,0.44,2.0,675,0,1
4,24.0,1,4,66135.0,1,3,35000.0,3,14.27,0.53,4.0,586,0,1


In [17]:
# Splitting the dataset into testing and traing

from sklearn.model_selection import train_test_split

logger.info("Splitting dataset into training and testing")
X_train, X_test, y_train, y_test = train_test_split(df.drop('loan_status', axis=1), df['loan_status'], test_size=0.2,
                                                    random_state=42)

logger.info("Training data")
display(X_train.head())

logger.info("Testing data")
display(X_test.head())

[32m2025-05-17 22:44:30.909[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mSplitting dataset into training and testing[0m
[32m2025-05-17 22:44:30.961[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mTraining data[0m


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file
25180,34.0,0,1,97265.0,11,0,15000.0,4,12.73,0.15,9.0,631,0
12555,25.0,1,3,72953.0,3,3,12000.0,5,11.86,0.16,4.0,659,1
29153,41.0,0,4,322597.0,18,0,24000.0,4,10.37,0.07,11.0,683,1
23838,27.0,1,0,94232.0,4,3,9600.0,1,17.14,0.1,7.0,641,0
35686,27.0,1,4,84873.0,7,3,7059.0,2,12.97,0.08,3.0,706,1


[32m2025-05-17 22:44:31.038[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1mTesting data[0m


Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file
37979,32.0,1,0,96865.0,10,0,7500.0,1,6.04,0.08,10.0,601,0
9911,24.0,1,0,56838.0,6,3,9000.0,1,11.49,0.16,4.0,647,1
43386,22.0,0,4,37298.0,0,3,5000.0,3,14.88,0.13,4.0,711,0
13822,23.0,0,1,39944.0,1,2,5000.0,5,13.99,0.13,3.0,597,1
44810,42.0,1,3,67974.0,20,0,10000.0,2,15.41,0.15,11.0,586,0


In [18]:
# Training the classification model

from sklearn.neighbors import KNeighborsClassifier

logger.info("Training the classification model")
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)
logger.info("Model is trained")

logger.info(f"Model : {model}")

[32m2025-05-17 22:44:38.097[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mTraining the classification model[0m
[32m2025-05-17 22:44:38.472[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mModel is trained[0m
[32m2025-05-17 22:44:38.478[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mModel : KNeighborsClassifier()[0m


In [19]:
# print model accuracy

from sklearn.metrics import accuracy_score

logger.info("Testing the model accuracy")
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
logger.info(f"Model accuracy: {accuracy * 100:.2f}%")


[32m2025-05-17 22:44:39.735[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mTesting the model accuracy[0m
[32m2025-05-17 22:44:41.818[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m8[0m - [1mModel accuracy: 83.04%[0m
