In [1]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv('knn.csv')  # Replace 'your_dataset.csv' with the path to your actual dataset

# Extract the features and target variable
X = data[['radius_se', 'smoothness_se','compactness_mean']].values  # Replace 'feature1' and 'feature2' with the names of your features
y = data['diagnosis'].values  # Replace 'target' with the name of your target variable

# Split the dataset into training and testing sets
split_ratio = 0.8  # Split ratio for training and testing data
split_index = int(split_ratio * len(X))

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Define the number of neighbors (k)
k = 3

# Function to calculate the Euclidean distance between two data points
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum(np.square(x1 - x2)))

# Function to make predictions using the KNN algorithm
def predict(X_train, y_train, X_test, k):
    y_pred = []

    for i in range(len(X_test)):
        distances = []

        # Calculate the Euclidean distance between the testing data point and each training data point
        for j in range(len(X_train)):
            distance = euclidean_distance(X_test[i], X_train[j])
            distances.append((distance, y_train[j]))

        # Sort the distances in ascending order
        distances.sort()

        # Select the top k neighbors
        neighbors = distances[:k]

        # Predict the target variable based on the majority vote of the neighbors
        labels = [neighbor[1] for neighbor in neighbors]
        prediction = max(set(labels), key=labels.count)
        y_pred.append(prediction)

    return y_pred

# Make predictions on the testing data
y_pred = predict(X_train, y_train, X_test, k)

# Calculate the accuracy
accuracy = np.sum(y_test == y_pred) / len(y_test)
print("Accuracy:", accuracy)

# Classify correct and wrong predictions
correct_predictions = X_test[y_test == y_pred]
wrong_predictions = X_test[y_test != y_pred]

print("Correct Predictions:")
print(correct_predictions)
print("Wrong Predictions:")
print(wrong_predictions)


Accuracy: 0.8070175438596491
Correct Predictions:
[[0.3408   0.005841 0.07426 ]
 [0.3135   0.009861 0.08574 ]
 [0.2084   0.005768 0.05205 ]
 [0.2621   0.006054 0.05073 ]
 [0.1781   0.006588 0.04626 ]
 [0.9291   0.00874  0.111   ]
 [2.547    0.00765  0.1988  ]
 [0.2315   0.005356 0.05223 ]
 [0.1816   0.006709 0.05855 ]
 [0.2023   0.005969 0.05994 ]
 [0.3416   0.01098  0.05428 ]
 [0.9289   0.006766 0.2004  ]
 [0.3776   0.007501 0.06258 ]
 [0.2446   0.003271 0.08549 ]
 [0.4455   0.007339 0.03398 ]
 [0.1482   0.004474 0.1069  ]
 [0.1499   0.004873 0.08269 ]
 [0.3478   0.004107 0.1108  ]
 [0.1555   0.003308 0.05319 ]
 [0.2034   0.004957 0.08228 ]
 [0.3147   0.009197 0.1893  ]
 [0.2194   0.004911 0.07838 ]
 [0.3316   0.003704 0.05326 ]
 [0.1588   0.00445  0.1155  ]
 [0.2431   0.003245 0.07957 ]
 [0.163    0.006034 0.1299  ]
 [0.2204   0.003535 0.06698 ]
 [0.5659   0.005288 0.1448  ]
 [0.3713   0.008998 0.09263 ]
 [0.2239   0.005096 0.052   ]
 [0.7548   0.007997 0.1289  ]
 [0.3276   0.01039  