In [2]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

class KNN:
    """
    Custom implementation of the K-Nearest Neighbors (KNN) algorithm.
    """

    def __init__(self, k=3):
        """
        Initialize the KNN model.
        :param k: Number of nearest neighbors to consider.
        """
        if k <= 0:
            raise ValueError("k must be a positive integer.")
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X_train, y_train):
        """
        Store the training dataset.
        """
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)

    def predict(self, X_test):
        """
        Predict the labels for test data.
        """
        return np.array([self._predict_single(x) for x in np.array(X_test)])

    def _predict_single(self, x):
        """
        Predict a label for a single test sample.
        """
        distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

    def _euclidean_distance(self, x1, x2):
        """
        Compute Euclidean distance between two points.
        """
        return np.sqrt(np.sum((x1 - x2) ** 2))

def load_dataset(filename):
    """
    Load dataset from a CSV file and convert categorical features into numerical values.
    """
    try:
        df = pd.read_csv(filename)
        print("\n✅ Dataset Loaded Successfully!\n")

        # Convert categorical columns to numerical values
        label_encoders = {}
        for column in df.columns:
            if df[column].dtype == 'object':  # Check if column is categorical
                label_encoders[column] = LabelEncoder()
                df[column] = label_encoders[column].fit_transform(df[column])

        return df
    except Exception as e:
        print(f"\n❌ Error loading dataset: {e}")
        return None

def calculate_accuracy(y_true, y_pred):
    """
    Calculate the accuracy of the model.
    """
    correct_predictions = np.sum(y_true == y_pred)
    accuracy = correct_predictions / len(y_true)
    return accuracy * 100

def main():
    """
    Driver function to load dataset, train KNN, and check accuracy.
    """
    dataset_file = "student_performance.csv"  # Update this with your dataset file

    # Load dataset
    df = load_dataset(dataset_file)
    if df is None:
        return

    # Assume last column is the target (label) and rest are features
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    # Split dataset into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train KNN model
    knn = KNN(k=3)
    knn.fit(X_train, y_train)

    # Make predictions
    predictions = knn.predict(X_test)

    # Calculate accuracy
    accuracy = calculate_accuracy(y_test, predictions)

    # Display results
    print("\n📊 **Prediction Results:**")
    for i in range(len(X_test)):
        print(f"   ➤ Data: {X_test[i]} → Predicted Class: {predictions[i]} | Actual Class: {y_test[i]}")

    print(f"\n🎯 Model Accuracy: {accuracy:.2f}%")

if __name__ == "__main__":
    main()



✅ Dataset Loaded Successfully!


📊 **Prediction Results:**
   ➤ Data: [ 7 70 68  1  1  0] → Predicted Class: 1 | Actual Class: 1
   ➤ Data: [ 5 60 45  0  0  2] → Predicted Class: 0 | Actual Class: 0

🎯 Model Accuracy: 100.00%
