The code to download the dataset from kaggle: [Dataset](https://www.kaggle.com/datasets/mirzahasnine/loan-data-set)

In [None]:
! pip install -q kaggle pyswarms

In [None]:
# ! mkdir ~/.kaggle
# ! cp kaggle.json ~/.kaggle/
# ! chmod 600 ~/.kaggle/kaggle.json
# ! kaggle datasets download mirzahasnine/loan-data-set --unzip

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Downloading loan-data-set.zip to /content
  0% 0.00/10.9k [00:00<?, ?B/s]
100% 10.9k/10.9k [00:00<00:00, 3.98MB/s]


In [None]:
# Import necessary libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from pyswarms.single import GlobalBestPSO
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Load your dataframe
df = pd.read_csv("loan_train.csv")

# Get a list of all string columns
str_cols = df.select_dtypes(include=['object']).columns.tolist()

# Create an instance of the OneHotEncoder class
encoder = OneHotEncoder(sparse=False)

# Fit and transform the string columns in the dataframe
encoded_cols = encoder.fit_transform(df[str_cols])

# Get the feature names of the encoded columns
feature_names = encoder.get_feature_names_out(str_cols)

# Convert the encoded columns back to a dataframe and merge it with the original dataframe
encoded_df = pd.DataFrame(encoded_cols, columns=feature_names)
loan_df = pd.concat([df.drop(str_cols, axis=1), encoded_df], axis=1)

loan_df = loan_df.dropna()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(loan_df.iloc[:, :-1], loan_df.iloc[:, -1], test_size=0.2,
                                                    random_state=42)

# Define the function to optimize
def knn_cv_acc(hyperparams):
    if hyperparams.ndim == 1:
        # Convert hyperparameters to integers
        n_neighbors = abs(int(hyperparams[0] * 10)) + 1
        weights = 'uniform' if int(hyperparams[1]) == 0 else 'distance'
        p = abs(int(hyperparams[2] * 10)) + 1
    else:
        # Convert hyperparameters to integers for all particles
        n_neighbors = [abs(int(h[0] * 10)) + 1 for h in hyperparams]
        weights = ['uniform' if int(h[1]) == 0 else 'distance' for h in hyperparams]
        p = [abs(float(h[2] * 10)) + 1 for h in hyperparams]

    # Create KNN classifiers with the given hyperparameters
    knns = [KNeighborsClassifier(n_neighbors=n, weights=w, p=pi) for n, w, pi in zip(n_neighbors, weights, p)]

    # Train the classifiers and get the accuracy on the validation set
    accs = []
    for knn in knns:
        knn.fit(X_train, y_train)
        acc = knn.score(X_test, y_test)
        accs.append(acc)

    # Return the negative accuracies (to be minimized by PSO)
    return [-acc for acc in accs]


# Define the upper and lower bounds for the hyperparameters
lb = [1, 0, 1]  # lower bounds for n_neighbors, weights, and p
ub = [10, 1, 3]  # upper bounds for n_neighbors, weights, and p

# Call the PSO optimizer
optimizer = GlobalBestPSO(n_particles=10, dimensions=3, options={"c1": 0.5, "c2": 0.3, "w": 0.9})
xopt, fopt = optimizer.optimize(knn_cv_acc, iters=50)
# Print the optimized hyperparameters and the corresponding test accuracy
n_neighbors = abs(int(fopt[0] * 10)) + 1
weights = 'uniform' if int(fopt[1]) == 0 else 'distance'
p = abs(float(fopt[2] * 10)) + 1
print("Optimized hyperparameters: n_neighbors={}, weights='{}', p={}".format(n_neighbors, weights, p))
knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, p=p)
knn.fit(X_train, y_train)
acc = knn.score(X_test, y_test)
print("Test accuracy: {:.2f}%".format(acc * 100))

2023-04-19 12:29:02,551 - pyswarms.single.global_best - INFO - Optimize for 50 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|██████████|50/50, best_cost=-.691
2023-04-19 12:29:29,559 - pyswarms.single.global_best - INFO - Optimization finished | best cost: -0.6909090909090909, best pos: [1.35791274 1.60296745 0.31250161]


Optimized hyperparameters: n_neighbors=14, weights='distance', p=4.125016082400592
Test accuracy: 69.09%
