In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
diabetes_data = pd.read_csv("diabetes.csv")

In [4]:
features = diabetes_data.drop(['Outcome'], axis=1)
target = diabetes_data['Outcome']

In [5]:
def min_max_scaling(X):
    return (X - X.min()) / (X.max() - X.min())

In [6]:
features_normalized = features.apply(min_max_scaling)

X_train, X_test, y_train, y_test, = train_test_split(features_normalized, target, test_size=0.3, shuffle=False)

In [7]:
def euclidean_Theorem(x1, x2):
    distance = np.sqrt(np.sum((x1 - x2) ** 2))
    return distance

In [8]:
class KNN():
    def __init__(self, X, y, k):
        self.X_train = X
        self.y_train = y
        self.k = k

    def predict(self, X):
        predictions = []
        for x_to_predict in X:

            distances = [euclidean_Theorem(x_to_predict, x_train) for x_train in self.X_train]

            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            k_distances = sorted(distances)[:self.k]

            count_ones = np.sum(np.array(k_nearest_labels) == 1)
            count_zeros = np.sum(np.array(k_nearest_labels) == 0)

            if count_ones > count_zeros:
                predictions.append(1)

            elif count_ones < count_zeros:
                predictions.append(0)
                
            else:  # tie case
                class1_weighted_sum = 0     #hold summation of weighted distances of class 1
                class0_weighted_sum = 0

                weighted_distances = 1 / (np.array(k_distances) + 1e-8)

                for i in range(self.k):
                    if k_nearest_labels[i] == 1:
                        class1_weighted_sum += weighted_distances[i]
                    else:
                        class0_weighted_sum += weighted_distances[i]

                predictions.append(1 if class1_weighted_sum  > class0_weighted_sum  else 0)
   
        return predictions    
            

In [9]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()

In [10]:
k_itreations = 5
accuracies = 0

for k in range(2,k_itreations+2):
    knn_model = KNN(X_train, y_train, k)
    predictions = knn_model.predict(X_test)
    correct_predictions = np.sum(y_test == predictions)
    accuracy = correct_predictions / len(X_test)
    accuracies += accuracy

    print(f"k value: {k}")
    print(f"Number of correctly classified instances: {correct_predictions}")
    print(f"Total number of instances: {len(X_test)}")
    print(f"Accuracy: {accuracy * 100:.2f} %\n")

print(f"Average Accuracy Across All Iterations: {(accuracies/k_itreations)*100:.2f} %")    

k value: 2
Number of correctly classified instances: 163
Total number of instances: 231
Accuracy: 70.56 %

k value: 3
Number of correctly classified instances: 175
Total number of instances: 231
Accuracy: 75.76 %

k value: 4
Number of correctly classified instances: 176
Total number of instances: 231
Accuracy: 76.19 %

k value: 5
Number of correctly classified instances: 172
Total number of instances: 231
Accuracy: 74.46 %

k value: 6
Number of correctly classified instances: 175
Total number of instances: 231
Accuracy: 75.76 %

Average Accuracy Across All Iterations: 74.55 %
