In [27]:
import pandas as pd

In [None]:
dataset= pd.read_csv("anemia_prediction.csv")
# 0 means not present
# 1 means present

In [29]:
dataset

Unnamed: 0,Hemoglobin,MCH,Result
0,14.9,22.7,0
1,15.9,25.4,0
2,9.0,21.5,1
3,14.9,16.0,0
4,14.7,22.0,0
...,...,...,...
394,13.9,29.7,0
395,13.9,25.9,0
396,15.7,17.7,0
397,15.6,19.4,0


In [None]:
class kNN:
    '''k-Nearest Neighbours'''

    def __init__(self, k):
        self.k = k

    def manhattan(self, v1, v2):
        total = 0
        for i in range(len(v1)):
            diff = v1[i] - v2[i]
            if diff < 0:
                diff = -diff
            total += diff
        return total

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        preds = []
        for i in range(len(X_test)):
            test_row = X_test[i]
            neighbours = self.get_neighbours(test_row)
            majority = self.get_majority_class(neighbours)
            preds.append(majority)
        return preds

    def get_neighbours(self, test_row):
        distances = []

        print("Test Data:", test_row)
        print("Distances from training data (Manhattan):")
        for i in range(len(self.X_train)):
            train_row = self.X_train[i]
            label = self.y_train[i]
            dist = self.manhattan(train_row, test_row)
            distances.append([dist, label])
            print(f"Train #{i}: Distance = {round(dist, 3)}, Label = {label}")

        for i in range(len(distances)):
            for j in range(i + 1, len(distances)):
                if distances[i][0] > distances[j][0]:
                    temp = distances[i]
                    distances[i] = distances[j]
                    distances[j] = temp

        k_neighbours = []
        count = 0
        for item in distances:
            if count < self.k:
                k_neighbours.append(item[1])
                count += 1
            else:
                break

        return k_neighbours

    def get_majority_class(self, neighbours):
        class_counts = {}
        for label in neighbours:
            if label in class_counts:
                class_counts[label] += 1
            else:
                class_counts[label] = 1

        max_count = -1
        majority_class = None
        for label in class_counts:
            if class_counts[label] > max_count:
                max_count = class_counts[label]
                majority_class = label

        return majority_class


def accuracy(preds, y_test):
    correct = 0
    total = len(y_test)
    for i in range(total):
        if preds[i] == y_test[i]:
            correct += 1
    return 100 * correct / total

In [31]:
X= dataset.drop(['Result'], axis=1).values
y= dataset['Result'].values

In [None]:
split= int(0.8* len(dataset))
X_train, y_train= X[:split], y[:split]
X_test, y_test= X[split:], y[split:]

In [33]:
knn = kNN(k=5)
knn.fit(X_train, y_train)
preds = knn.predict(X_test)

Test Data: [16.8 24.3]
Distances from training data (Manhattan):
Train #0: Distance = 8.1, Label = 0
Train #1: Distance = 5.0, Label = 1
Train #2: Distance = 1.2, Label = 0
Train #3: Distance = 4.8, Label = 0
Train #4: Distance = 8.2, Label = 1
Train #5: Distance = 8.3, Label = 1
Train #6: Distance = 8.2, Label = 0
Train #7: Distance = 3.3, Label = 0
Train #8: Distance = 4.2, Label = 0
Train #9: Distance = 6.1, Label = 0
Train #10: Distance = 6.6, Label = 0
Train #11: Distance = 5.4, Label = 0
Train #12: Distance = 6.4, Label = 0
Train #13: Distance = 3.8, Label = 0
Train #14: Distance = 5.7, Label = 0
Train #15: Distance = 6.0, Label = 0
Train #16: Distance = 3.2, Label = 0
Train #17: Distance = 7.9, Label = 1
Train #18: Distance = 8.2, Label = 1
Train #19: Distance = 2.9, Label = 0
Train #20: Distance = 8.3, Label = 0
Train #21: Distance = 11.4, Label = 0
Train #22: Distance = 7.0, Label = 1
Train #23: Distance = 1.2, Label = 0
Train #24: Distance = 7.5, Label = 1
Train #25: Distance

In [34]:
print(f'Accuracy: {accuracy(preds, y_test):.3f} %')

Accuracy: 91.000 %
