Loan Approval Classification using KKN

In [9]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [10]:
# Load the dataset
data = pd.read_csv('loan_data.csv')

# Assuming 'loan_status' is the target column
X = data.drop('loan_status', axis=1)
y = data['loan_status']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Custom k-NN implementation
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x):
        # Compute distances between x and all examples in the training set
        distances = [np.linalg.norm(x - x_train) for x_train in self.X_train]
        # Sort by distance and return indices of the first k neighbors
        k_indices = np.argsort(distances)[:self.k]
        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        # Return the most common class label
        most_common = np.bincount(k_nearest_labels).argmax()
        return most_common

In [12]:
# Let us now try K = 1 -> Over Fitting
# Custom k-NN
knn_custom = KNN(k=1)
knn_custom.fit(X_train, y_train)

# Predictions
y_pred_custom = knn_custom.predict(X_test)
y_pred_train_custom = knn_custom.predict(X_train)

# Print accuracy
print("Trying k = 1:" + str(accuracy_score(y_train, y_pred_train_custom)) + " which is over fitting")
print("Custom k-NN Test Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Custom k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_custom))

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [None]:
# Let us now try K = 100 -> under Fitting
# Custom k-NN
knn_custom = KNN(k=100)
knn_custom.fit(X_train, y_train)

# Predictions
y_pred_custom = knn_custom.predict(X_test)
y_pred_train_custom = knn_custom.predict(X_train)

# Print accuracy
print("Trying k = 100:" + str(accuracy_score(y_train, y_pred_train_custom)) + " which is under fitting")
print("Custom k-NN Test Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Custom k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_custom))

In [None]:
# Let us now try K = 5 -> Good Fit
# Custom k-NN
knn_custom = KNN(k=2)
knn_custom.fit(X_train, y_train)

# Predictions
y_pred_custom = knn_custom.predict(X_test)
y_pred_train_custom = knn_custom.predict(X_train)

# Print accuracy
print("Trying k = 5:" + str(accuracy_score(y_train, y_pred_train_custom)) + " which is Good fit")
print("Custom k-NN Test Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Custom k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_custom))

In [None]:
# Create a comparison table
results = {
    'Model': ['Custom k-NN'],
    'Train Accuracy': [accuracy_score(y_train, y_pred_train_custom)],
    'Test Accuracy': [accuracy_score(y_test, y_pred_custom)]
}

comparison_table = pd.DataFrame(results)

# Display the comparison table
print(comparison_table)

Sklearn Implenmentation of KNN Classifier

In [6]:
# Sklearn k-NN
knn_sklearn = KNeighborsClassifier(n_neighbors=3)
knn_sklearn.fit(X_train, y_train)

# Predictions
y_pred_sklearn = knn_sklearn.predict(X_test)
y_pred_train_sklearn = knn_sklearn.predict(X_train)

# Print accuracy
print("Sklearn k-NN Test Accuracy:", accuracy_score(y_test, y_pred_sklearn))
print("Sklearn k-NN Train Accuracy:", accuracy_score(y_train, y_pred_train_sklearn))

ValueError: could not convert string to float: 'female'

GPT GENERATED KNN 

In [None]:
# create a KKNeighborsClassifier with 3 neighbors
knn = KNeighborsClassifier(n_neighbors=3)
# fit the classifier to the training data
knn.fit(X_train, y_train)
# make predictions on the testing data
y_pred = knn.predict(X_test)
# calculate the accuracy of the classifier on the testing data
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# create a confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
# create a classification report
cr = classification_report(y_test, y_pred)
print("Classification Report:")
print(cr)
# create a ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()
# create a precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_test, y_pred)
plt.plot(recall, precision)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.show()
# create a feature importance plot
importances = knn.feature_importances_
indices = np.argsort(importances)[::-1]
plt.bar(range(X.shape[1]), importances[indices])
plt.xticks(range(X.shape[1]), X.columns[indices], rotation=90)
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('Feature Importance Plot')
plt.show()



