In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('Logistic_Y_Train.csv')

# Extract features and labels
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# KNN classifier class
class KNN_classifier:
    def __init__(self, k=5):
        self.k = k

    def initialise_data(self, X, Y):
        self.X = X
        self.Y = Y

    def distance_formula(self, point1, point2):
        return np.sqrt(np.sum((point1 - point2)**2))

    def predict(self, test_point):
        distances = []
        for i in range(self.X.shape[0]):
            distances.append((self.distance_formula(test_point, self.X[i]), self.Y[i])) # Self.Y represents the vote of that particular training data

        distances = sorted(distances)
        distances = distances[:self.k]

        labels = []

        for _, label in distances:
            labels.append(label)

        all_labels, counts = np.unique(labels, return_counts=True)

        max_count_index = np.argmax(counts)

        print(f'Predicted Class: {all_labels[max_count_index]}, Prob: {counts[max_count_index]/np.sum(counts)}')

        return all_labels[max_count_index], counts[max_count_index]/np.sum(counts)

# Initialize and train the KNN classifier
knn = KNN_classifier(k=5)
knn.initialise_data(X_train, Y_train)

# Predict the outcome for a test point (for demonstration purposes, we use the first test sample)
test_point = X_test[0]
predicted_class, probability = knn.predict(test_point)

print(f'Test Point: {test_point}')
print(f'Predicted Class: {predicted_class}, Probability: {probability}')


Predicted Class: 0, Prob: 0.6
Test Point: [  6.    98.    58.    33.   190.    34.     0.43  43.  ]
Predicted Class: 0, Probability: 0.6
