# KNeighborsClassifier

In [61]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

*Load Datasets*

In [62]:
data = pd.read_csv('LoanApprovalPrediction.csv')
data

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0.0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1.0,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0.0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0.0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0.0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...
593,LP002978,Female,No,0.0,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
594,LP002979,Male,Yes,3.0,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
595,LP002983,Male,Yes,1.0,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
596,LP002984,Male,Yes,2.0,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [63]:
data.isnull().sum()

Loan_ID               0
Gender                0
Married               0
Dependents           12
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           21
Loan_Amount_Term     14
Credit_History       49
Property_Area         0
Loan_Status           0
dtype: int64

*Handling Missing Values*

In [64]:
data.LoanAmount.fillna(data.LoanAmount.median(), inplace=True)
data.Loan_Amount_Term.fillna(data.Loan_Amount_Term.mode()[0], inplace=True)
data.Credit_History.fillna(data.Credit_History.mode()[0], inplace=True)
data.Dependents.fillna(data.Dependents.mode()[0], inplace=True)
data.isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.LoanAmount.fillna(data.LoanAmount.median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data.Loan_Amount_Term.fillna(data.Loan_Amount_Term.mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the inter

Loan_ID              0
Gender               0
Married              0
Dependents           0
Education            0
Self_Employed        0
ApplicantIncome      0
CoapplicantIncome    0
LoanAmount           0
Loan_Amount_Term     0
Credit_History       0
Property_Area        0
Loan_Status          0
dtype: int64

*Drop unnecessary columns*

In [65]:
data.drop('Loan_ID', axis=1, inplace=True)

In [66]:
data.columns

Index(['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',
       'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

*Encoding categorical variables*

In [67]:
categorical_cols = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status']

In [68]:
data[categorical_cols] = data[categorical_cols].apply(lambda x: x.fillna(x.mode()[0]))

*Label Encoding*

In [69]:
le= LabelEncoder()
for col in categorical_cols:
    data[col]= le.fit_transform(data[col])

In [70]:
data[categorical_cols]

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,Property_Area,Loan_Status
0,1,0,0,0,0,2,1
1,1,1,1,0,0,0,0
2,1,1,0,0,1,2,1
3,1,1,0,1,0,2,1
4,1,0,0,0,0,2,1
...,...,...,...,...,...,...,...
593,0,0,0,0,0,0,1
594,1,1,3,0,0,0,1
595,1,1,1,0,0,2,1
596,1,1,2,0,0,2,1


*Seperate features and target*

In [71]:
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

*Feature scaling*

In [72]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

*Splitting the dataset*

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

*Initialize and train KNN classifier*

In [74]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

*Predictions*

In [75]:
y_pred = knn.predict(X_test)

*Evaluate the model*

In [76]:
accuracy_knn = accuracy_score(y_test, y_pred)
print(f'Accuracy of KNN Classifier: {accuracy_knn * 100:.2f}%')

Accuracy of KNN Classifier: 77.50%


# Random Forest Classifier

*Initialize and train Random Forest classifier*

In [77]:
from sklearn.ensemble import RandomForestClassifier

In [78]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

*Predictions*

In [79]:
y_pred = rf.predict(X_test)

*Evaluate the model*

In [80]:
accuracy_rf = accuracy_score(y_test, y_pred)
print(f'Accuracy of Random Forest Classifier: {accuracy_rf* 100:.2f}%')

Accuracy of Random Forest Classifier: 80.83%


# Support Vector Classifiers (SVC)

*Initialize and train Support Vector Classifier*

In [81]:
from sklearn.svm import SVC

In [82]:
svc = SVC(kernel='rbf', random_state=42)
svc.fit(X_train, y_train)

*Predictions*

In [83]:
y_pred = svc.predict(X_test)

*Evaluate the model*

In [84]:
accuracy_svc = accuracy_score(y_test, y_pred)
print(f'Accuracy of Support Vector Classifier: {accuracy_svc * 100:.2f}%')

Accuracy of Support Vector Classifier: 80.83%


# Logistics Regression

In [85]:
from sklearn.linear_model import LogisticRegression

*Initialize and train Logistic Regression*

In [86]:
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)

*Predictions*

In [87]:
y_pred = log_reg.predict(X_test)

*Evaluate the model*

In [88]:
accuracy_log_reg = accuracy_score(y_test, y_pred)
print(f'Accuracy of Logistic Regression: {accuracy_log_reg * 100:.2f}%')

Accuracy of Logistic Regression: 81.67%


In [98]:
df={
    'Model': ['KNN', 'Random Forest', 'SVC', 'Logistic Regression'],
    'Accuracy': [accuracy_knn, accuracy_rf, accuracy_svc, accuracy_log_reg]
}
data= pd.DataFrame(df)
data['Accuracy'] = (data['Accuracy'] * 100).round(2)
# Add percentage sign to the accuracy column
data['Accuracy'] = data['Accuracy'].apply(lambda x: str(x) + '%')
data

Unnamed: 0,Model,Accuracy
0,KNN,77.5%
1,Random Forest,80.83%
2,SVC,80.83%
3,Logistic Regression,81.67%
