In [8]:
# Implement K-Nearest Neighbor classifier on the heart disease dataset and analyze the
# performance using accuracy, while varying the number of neighbors (e.g. 1 – 250). Also
# print the neighbor(s) having the highest accuracy and those with the lowest accuracy.
# 2. Run the above provided KNN Algorithm for different random seed values from 1 to 10.
# Print all accuracies and then print the highest and the lowest.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# Load dataset
heart = pd.read_csv('heart.csv')
print(heart.head(5))


x = heart.drop(columns=['target'])  # Drop the target column for features
y = heart['target']                 # Select the target column

# can also do:
# Assuming the last column is the target, and others are features
# x = heart.iloc[:, :-1] # : means select all rows AND :-1 means select all columns except the last one.
# y = heart.iloc[:, -1] # : means select all rows AND -1 means select only the last column which features target

# Loop for random seeds
for seed in range(1, 11):
    # Splitting the dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed)

    output = []
    # Loop for k values
    for k in range(1, 251):
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(x_train, y_train)
        y_prediction = knn.predict(x_test)
        acc = accuracy_score(y_test, y_prediction)
        output.append((k, acc))

    # Sort accuracies for the current seed
    output.sort(key=lambda x: x[1], reverse=True)
    highest_acc = output[0]
    lowest_acc = output[-1]

    print(f"Random Seed {seed}: Highest Accuracy: {highest_acc}, Lowest Accuracy: {lowest_acc}")

print(f"Highest: {highest_acc}")
print(f"Lowest: {lowest_acc}")




   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   52    1   0       125   212    0        1      168      0      1.0      2   
1   53    1   0       140   203    1        0      155      1      3.1      0   
2   70    1   0       145   174    0        1      125      1      2.6      0   
3   61    1   0       148   203    0        1      161      0      0.0      2   
4   62    0   0       138   294    1        1      106      0      1.9      1   

   ca  thal  target  
0   2     3       0  
1   0     3       0  
2   0     3       0  
3   1     3       0  
4   3     2       0  
Random Seed 1: Highest Accuracy: (1, 1.0), Lowest Accuracy: (216, 0.6146341463414634)
Random Seed 2: Highest Accuracy: (1, 0.9804878048780488), Lowest Accuracy: (246, 0.6390243902439025)
Random Seed 3: Highest Accuracy: (1, 1.0), Lowest Accuracy: (206, 0.5853658536585366)
Random Seed 4: Highest Accuracy: (1, 1.0), Lowest Accuracy: (231, 0.5756097560975609)
Random Seed 5: Highest