In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# %% Imports
import numpy as np

In [None]:
# %% Functions

def convert_to_numeric(arr):

    for col in range(arr.shape[1]):

        try:
            arr[:, col] = arr[:, col].astype(float)
        except ValueError:
            arr[:, col] = np.unique(arr[:, col], return_inverse=True)[1].astype(int)

    return arr


def knn(train_data, test_data, neighbors):

    result = []

    test_x = test_data[:, :-1]
    train_x = train_data[:, :-1]
    train_y = train_data[:, -1]

    test_x = convert_to_numeric(test_x).astype(float)
    train_x = convert_to_numeric(train_x).astype(float)
    train_y = train_y.astype(int)

    for test_val in test_x:

        # Calculate Eucledian Distance
        distances = np.linalg.norm(test_val.reshape(1, -1) - train_x, axis=1)
        neighborhood = np.argsort(distances)[:neighbors]

        test_val_y_candidates = train_y[neighborhood]
        class_counts = np.bincount(test_val_y_candidates)
        majority_class = np.argmax(class_counts)

        result.append(majority_class)

    return np.array(result)

In [None]:
# %% Main Execution
file_name="/content/drive/MyDrive/dm/Project2/project2_dataset2.txt"
data = np.loadtxt(file_name, delimiter="\t",dtype='str')
data = np.asarray(data)
k=input("Enter value of k: ")
K_Fold=10
ten_fold_cross_valid = np.array_split(data, K_Fold)
accuracy =[]
precision = []
recall = []
f_measure = []

assert K_Fold > 1 , 'not a valid input for multi fold Cross-validation'

for index in range(len(ten_fold_cross_valid)):

    test_data=ten_fold_cross_valid[index]
    train_data=np.array(np.vstack([x for i,x in enumerate(ten_fold_cross_valid) if i != index]))

    result = knn(train_data, test_data, k)
    test_y = test_data[:, -1].astype(int)

    TP = FN = FP = TN = 0

    FP = ((result == 1) & (test_y == 0)).sum()
    TP = ((result == 1) & (test_y == 1)).sum()
    FN = ((result == 0) & (test_y == 1)).sum()
    TN = ((result == 0) & (test_y == 0)).sum()

    if TP + FN + FP + TN != 0:
        accuracy.append(float(TP + TN)/(TP + FN + FP + TN))
    if TP + FP != 0:
        precision.append(float(TP)/(TP + FP))
    if TP + FN !=0:
        recall.append(float(TP)/(TP + FN))
    if TP + FN + FP !=0:
        f_measure.append(float(2 * TP) / ((2 * TP) + FN + FP))

print("Average accuracy  : "+  str(sum(accuracy)*100/len(accuracy)))
print("Average precision : "+  str(sum(precision)*100/len(precision)))
print("Average recall    : "+  str(sum(recall)*100/len(recall)))
print("Average f_measure : "+  str(sum(f_measure)*100/len(f_measure)))