In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load dataset
# The original URL was incorrect and resulted in a 404 error.
# Updated to a valid URL for the loan dataset.
url = "https://raw.githubusercontent.com/dphi-official/Datasets/master/Loan_Data/loan_train.csv"
df = pd.read_csv(url)

# Drop columns that are not useful
df = df[['ApplicantIncome', 'LoanAmount', 'Credit_History', 'Education', 'Married', 'Loan_Status']]
df.dropna(inplace=True)

# Encode categorical features
label_encoders = {}
for col in ['Education', 'Married', 'Loan_Status']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Features and target
X = df[['ApplicantIncome', 'LoanAmount', 'Credit_History', 'Education', 'Married']]
y = df['Loan_Status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features (important for KNN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict and evaluate
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"KNN Finance Dataset Accuracy: {accuracy * 100:.2f}%")

KNN Finance Dataset Accuracy: 82.76%
