The code to download the dataset from kaggle: [Dataset](https://www.kaggle.com/datasets/mirzahasnine/loan-data-set)

In [None]:
! pip install -q kaggle

In [1]:
# ! mkdir ~/.kaggle
# ! cp kaggle.json ~/.kaggle/
# ! chmod 600 ~/.kaggle/kaggle.json
# ! kaggle datasets download mirzahasnine/loan-data-set --unzip

Downloading loan-data-set.zip to /content
  0% 0.00/10.9k [00:00<?, ?B/s]
100% 10.9k/10.9k [00:00<00:00, 26.1MB/s]


In [None]:
# Import necessary libraries
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from itertools import accumulate
from random import random
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Load your dataframe
df = pd.read_csv("loan_train.csv")

# Get a list of all string columns
str_cols = df.select_dtypes(include=['object']).columns.tolist()

# Create an instance of the OneHotEncoder class
encoder = OneHotEncoder(sparse=False)

# Fit and transform the string columns in the dataframe
encoded_cols = encoder.fit_transform(df[str_cols])

# Get the feature names of the encoded columns
feature_names = encoder.get_feature_names_out(str_cols)

# Convert the encoded columns back to a dataframe and merge it with the original dataframe
encoded_df = pd.DataFrame(encoded_cols, columns=feature_names)
loan_df = pd.concat([df.drop(str_cols, axis=1), encoded_df], axis=1)

loan_df = loan_df.dropna()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(loan_df.iloc[:, :-1], loan_df.iloc[:, -1], test_size=0.2,
                                                    random_state=42)



In [None]:
# Create a function to find the best k value
def knn_accuracy(k):
    # Create a KNN classifier with k neighbors
    knn = KNeighborsClassifier(n_neighbors=k).fit(X_train, y_train)
    # Return the accuracy
    return knn.score(X_test, y_test)

In [None]:
def range_x(lower_bound, upper_bound, step_size):
    while lower_bound <= upper_bound:
        yield lower_bound
        lower_bound += step_size

def get_range(x, acc_p):
    for i in range(len(acc_p)):
        if x < acc_p[i]:
            return i

def get_best(result, maximum=True):
    return result.index(max(result)) if maximum else result.index(min(result))

def get_worst(result, maximum=True):
    return result.index(min(result)) if maximum else result.index(max(result))

def get_new_pheromone(evaporation_rate, t):
    return (1 - evaporation_rate) * t

def get_new_pheromone_best(best_value, worst_value, t, scale_factor):
    return t + scale_factor * (best_value / worst_value)

In [None]:
# create ant colony optimization algorithm
population_size = 10
max_iter = 8
ant_count = 5
lower_bound = 1
upper_bound = 20
step_size = 1
pheromone = 1
scale_factor = 2
evaporation_rate = 0.5

In [None]:
def f(x):
    return knn_accuracy(x)

In [None]:
x = list(range_x(lower_bound, upper_bound, step_size))
T = [pheromone] * len(x)
for iteration in range(max_iter):
    print(f"iteration {iteration + 1}")
    p = [T[i]/sum(T) for i in range(len(T))]
    acc_p =  [round(i,3) for i in accumulate(p)]
    rs = [random() for i in range(ant_count)] # random numbers
    result = [f(x[get_range(i, acc_p)]) for i in rs]
    best_index, worst_index = get_best(result, maximum=True), get_worst(result, maximum=True)
    best_value, worst_value = result[best_index], result[worst_index]
    print(f"{best_value = }, {worst_value = }")
    best_index, worst_index = rs[best_index], rs[worst_index]
    best_index, worst_index = get_range(best_index, acc_p), get_range(worst_index, acc_p)
    print(f"best_x = {x[best_index]}, worst_x= {x[worst_index]}")
    not_best_ts = T[:best_index]+T[best_index + 1:]
    first_len, last_len = len(T[:best_index]), len(T[best_index + 1:])
    not_best_ts = [get_new_pheromone(evaporation_rate, i) for i in not_best_ts]
    T[:best_index] = not_best_ts[:first_len]
    T[best_index + 1:] = not_best_ts[first_len:]
    T[best_index] = get_new_pheromone_best(best_value, worst_value, T[best_index], scale_factor)
    print()
print("Test accuracy: {:.2f}%".format(best_value * 100))

iteration 1
best_value = 0.6909090909090909, worst_value = 0.6181818181818182
best_x = 19, worst_x= 6

iteration 2
best_value = 0.6909090909090909, worst_value = 0.6090909090909091
best_x = 19, worst_x= 1

iteration 3
best_value = 0.6909090909090909, worst_value = 0.6363636363636364
best_x = 19, worst_x= 5

iteration 4
best_value = 0.6909090909090909, worst_value = 0.6545454545454545
best_x = 19, worst_x= 8

iteration 5
best_value = 0.6909090909090909, worst_value = 0.6909090909090909
best_x = 19, worst_x= 19

iteration 6
best_value = 0.6909090909090909, worst_value = 0.6909090909090909
best_x = 19, worst_x= 19

iteration 7
best_value = 0.6909090909090909, worst_value = 0.6909090909090909
best_x = 19, worst_x= 19

iteration 8
best_value = 0.6909090909090909, worst_value = 0.6909090909090909
best_x = 19, worst_x= 19

Test accuracy: 69.09%
