In [102]:
import pandas as pd
import random
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

def knearestneighbors(file, k, attributes):
    # Load and preprocess the data
    df = pd.read_csv(file)
    df = df.rename({'num': 'disease'}, axis=1)
    df['disease'] = df['disease'].apply(lambda x: min(x, 1))

    # Select features and target
    X = df[attributes].values
    y = df['disease'].values
    
    # Initialize Nearest Neighbors model
    nn = NearestNeighbors(n_neighbors=k, metric='euclidean', algorithm='auto')

    # Track precision, recall, and F1 scores for 10 iterations
    test_sizes = [0.1, 0.15, 0.2, 0.25, 0.3, 0.33, 0.4, 0.45, 0.5, 0.6]
    precision_scores = []
    recall_scores = []
    f1_scores = []
    
    for _ in range(10):  # 10 iterations
        # Randomly choose a test size and perform train-test split
        test_size = random.choice(test_sizes)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random.randint(1, 100))

        # Fit the Nearest Neighbors model on the training set
        nn.fit(X_train)

        # Store predictions for the test set
        y_pred = []
        for test_point in X_test:
            distances, indices = nn.kneighbors([test_point])
            nbrs = df.iloc[indices[0]]

            # Get the majority class among the k-nearest neighbors
            healthy = nbrs[nbrs['disease'] == 0].count().disease
            sick = nbrs[nbrs['disease'] == 1].count().disease
            predict = 0 if (healthy > sick) else 1
            y_pred.append(predict)

        # Evaluate the predictions: precision, recall, F1 score
        precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

    # Report the results
    print("Precision Scores (10 iterations):", precision_scores)
    print("Recall Scores (10 iterations):", recall_scores)
    print("F1 Scores (10 iterations):", f1_scores)
    print("Mean F1 Score:", sum(f1_scores) / len(f1_scores))

    for i in precision_scores:
        print(f"Iteration: {i}")
        print(f"Precison Score: {precision_scores[i]}")
        print(f"Recall Score: {recall_scores[i]}")
        print(f"F1 Score: {f1_scores[i]}")


# Call the function
knearestneighbors('cleveland.csv', 10, ['age', 'trestbps', 'chol', 'thalach'])


Precision Scores (10 iterations): [np.float64(0.3684210526315789), np.float64(0.6923076923076923), np.float64(0.5434782608695652), np.float64(0.4), np.float64(0.38461538461538464), np.float64(0.46875), np.float64(0.5555555555555556), np.float64(0.4666666666666667), np.float64(0.4772727272727273), np.float64(0.5416666666666666)]
Recall Scores (10 iterations): [np.float64(0.4117647058823529), np.float64(0.5806451612903226), np.float64(0.6097560975609756), np.float64(0.43243243243243246), np.float64(0.21739130434782608), np.float64(0.5660377358490566), np.float64(0.42857142857142855), np.float64(0.4827586206896552), np.float64(0.5), np.float64(0.48148148148148145)]
F1 Scores (10 iterations): [np.float64(0.3888888888888889), np.float64(0.631578947368421), np.float64(0.5747126436781609), np.float64(0.4155844155844156), np.float64(0.2777777777777778), np.float64(0.5128205128205128), np.float64(0.4838709677419355), np.float64(0.4745762711864407), np.float64(0.4883720930232558), np.float64(0.5

TypeError: list indices must be integers or slices, not numpy.float64