In [None]:
# Load the "diabetes.csv" data and perform necessary processing
def load_diabetes_data(filename):
    data = []
    labels = []
    with open(filename, 'r') as file:
        lines = file.readlines()
        for line in lines[1:]:  # Skip the header line
            values = line.strip().split(',')
            data.append([float(value) for value in values[:-1]])  # Features
            labels.append(int(values[-1]))  # Class labels
    return data, labels

In [None]:
# Load the diabetes data
data, labels = load_diabetes_data('diabetes.csv')

In [None]:
# Split the data into train and test sets
split_ratio = 0.8
split_index = int(split_ratio * len(data))
X_train, y_train = data[:split_index], labels[:split_index]
X_test, y_test = data[split_index:], labels[split_index:]


In [None]:
class KNNClassifier:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = []
        for x in X:
            label = self._predict(x)
            predictions.append(label)
        return predictions

    def _predict(self, x):
        distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = sorted(range(len(distances)), key=lambda i: distances[i])[:self.k]
        #k_indices = sorted(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = max(k_nearest_labels, key=k_nearest_labels.count)
        return most_common

    @staticmethod
    def _euclidean_distance(x1, x2):
        distance = 0
        for i in range(len(x1)):
            distance += (x1[i] - x2[i]) ** 2
        euclidean_dist=distance**(1/2)
        return euclidean_dist

In [None]:
# Create a KNN classifier
knn = KNNClassifier(k=9)
knn.fit(X_train, y_train)

In [None]:
z = []
for i in range(len(X_test)):
  z.append([X_test[i],y_test[i]])

In [None]:
z

[[[11.0, 138.0, 74.0, 26.0, 144.0, 36.1, 0.557, 50.0], 1],
 [[3.0, 106.0, 72.0, 0.0, 0.0, 25.8, 0.207, 27.0], 0],
 [[6.0, 117.0, 96.0, 0.0, 0.0, 28.7, 0.157, 30.0], 0],
 [[2.0, 68.0, 62.0, 13.0, 15.0, 20.1, 0.257, 23.0], 0],
 [[9.0, 112.0, 82.0, 24.0, 0.0, 28.2, 1.282, 50.0], 1],
 [[0.0, 119.0, 0.0, 0.0, 0.0, 32.4, 0.141, 24.0], 1],
 [[2.0, 112.0, 86.0, 42.0, 160.0, 38.4, 0.246, 28.0], 0],
 [[2.0, 92.0, 76.0, 20.0, 0.0, 24.2, 1.698, 28.0], 0],
 [[6.0, 183.0, 94.0, 0.0, 0.0, 40.8, 1.461, 45.0], 0],
 [[0.0, 94.0, 70.0, 27.0, 115.0, 43.5, 0.347, 21.0], 0],
 [[2.0, 108.0, 64.0, 0.0, 0.0, 30.8, 0.158, 21.0], 0],
 [[4.0, 90.0, 88.0, 47.0, 54.0, 37.7, 0.362, 29.0], 0],
 [[0.0, 125.0, 68.0, 0.0, 0.0, 24.7, 0.206, 21.0], 0],
 [[0.0, 132.0, 78.0, 0.0, 0.0, 32.4, 0.393, 21.0], 0],
 [[5.0, 128.0, 80.0, 0.0, 0.0, 34.6, 0.144, 45.0], 0],
 [[4.0, 94.0, 65.0, 22.0, 0.0, 24.7, 0.148, 21.0], 0],
 [[7.0, 114.0, 64.0, 0.0, 0.0, 27.4, 0.732, 34.0], 1],
 [[0.0, 102.0, 78.0, 40.0, 90.0, 34.5, 0.238, 24.0], 0

In [None]:
knn.predict([X_test[1]])

[0]

In [None]:
# Evaluate the KNN classifier
correct_predictions = 0
total_samples = len(X_test)
for i in range(total_samples):
    prediction = knn.predict([X_test[i]])
    if prediction[0] == y_test[i]:
        correct_predictions += 1

accuracy = correct_predictions / total_samples * 100
print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 70.78%


In [None]:
#Function to calculate evaluation metrics
def calculate_metrics(true_labels, predicted_labels):
    true_positives = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == pred == 1)
    false_positives = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 0 and pred == 1)
    false_negatives = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == 1 and pred == 0)

    precision = true_positives / (true_positives + false_positives) if true_positives + false_positives != 0 else 0
    recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives != 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall != 0 else 0
    return precision, recall, f1_score

In [None]:
# Make predictions
predicted_labels = knn.predict(X_test)

In [None]:
# Calculate evaluation metrics
precision, recall, f1_score = calculate_metrics(y_test, predicted_labels)

print("Precision:", precision*100)
print("Recall:", recall*100)
print("F1 Score:", f1_score*100)

Precision: 61.36363636363637
Recall: 49.09090909090909
F1 Score: 54.545454545454554
