In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder

In [3]:
knn_df = pd.read_csv(r"C:\Users\hp\Downloads\knn_dataset.csv")
nb_df = pd.read_csv(r"C:\Users\hp\Downloads\naive_bayes_dataset.csv")

# Prepare data for KNN
X_knn = knn_df[['Brightness', 'Saturation', 'Hue', 'Contrast']].values
y_knn = knn_df['Class'].values

# Encode labels for KNN
le_knn = LabelEncoder()
y_knn_encoded = le_knn.fit_transform(y_knn)

# Split KNN data
X_knn_train, X_knn_test, y_knn_train, y_knn_test = train_test_split(
    X_knn, y_knn_encoded, test_size=0.3, random_state=42
)

# Prepare data for Naïve Bayes
# Encode categorical features
le_buy = LabelEncoder()
le_win = LabelEncoder()
le_length = LabelEncoder()
le_chars = LabelEncoder()
le_class = LabelEncoder()

X_nb = np.column_stack([
    le_buy.fit_transform(nb_df['Contains_Buy']),
    le_win.fit_transform(nb_df['Contains_Win']),
    le_length.fit_transform(nb_df['Email_Length']),
    le_chars.fit_transform(nb_df['Special_Characters'])
])
y_nb = le_class.fit_transform(nb_df['Class'])

# Split Naïve Bayes data
X_nb_train, X_nb_test, y_nb_train, y_nb_test = train_test_split(
    X_nb, y_nb, test_size=0.3, random_state=42
)

In [4]:
class customKNN:
    def __init__(self, k=3):
        self.k = k
        
    def euclidean_distance(self, x1, x2):
        """Calculate Euclidean distance between two points"""
        return np.sqrt(np.sum((x1 - x2) ** 2))
    
    def fit(self, X, y):
        """Store training data"""
        self.X_train = X
        self.y_train = y
        
    def predict(self, X):
        """Predict class for each input row"""
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)
    
    def _predict(self, x):
        """Predict class for a single row"""
        # Compute distances between x and all examples in the training set
        distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
        
        # Sort by distance and return indices of the first k neighbors
        k_indices = np.argsort(distances)[:self.k]
        
        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        
        # Return the most common class label
        return max(set(k_nearest_labels), key=k_nearest_labels.count)

In [8]:
class customNaiveBayes:
    def __init__(self):
        self.class_probabilities = {}
        self.feature_probabilities = {}
    
    def fit(self, X, y):
        """Calculate class and feature probabilities"""
        # Total number of samples
        n_samples = len(y)
        
        # Unique classes
        self.classes = np.unique(y)
        
        # Calculate class probabilities
        for cls in self.classes:
            self.class_probabilities[cls] = np.sum(y == cls) / n_samples
        
        # Calculate feature probabilities for each class
        self.feature_probabilities = {}
        for cls in self.classes:
            # Filter data for this class
            X_cls = X[y == cls]
            
            # Calculate probability for each feature
            cls_probs = {}
            for feature in range(X.shape[1]):
                unique_values = np.unique(X[:, feature])
                feature_probs = {}
                
                for value in unique_values:
                    # Laplace smoothing
                    count = np.sum((X_cls[:, feature] == value))
                    feature_probs[value] = (count + 1) / (len(X_cls) + len(unique_values))
                
                cls_probs[feature] = feature_probs
            
            self.feature_probabilities[cls] = cls_probs

    def predict(self, X):
        """Predict class for each input sample"""
        predictions = []
        
        for sample in X:
            # Calculate posterior probability for each class
            posteriors = {}
            for cls in self.classes:
                posterior = self.class_probabilities[cls]
                
                # Multiply by feature probabilities
                for feature, value in enumerate(sample):
                    # Use Laplace smoothing to handle unseen values
                    posterior *= self.feature_probabilities[cls][feature].get(value, 1e-10)
                
                posteriors[cls] = posterior
            
            # Predict the class with highest posterior probability
            predictions.append(max(posteriors, key=posteriors.get))
        
        return np.array(predictions)

In [9]:
# Custom KNN Classification
custom_knn = customKNN(k=3)
custom_knn.fit(X_knn_train, y_knn_train)
custom_knn_pred = custom_knn.predict(X_knn_test)

# Scikit-learn KNN Classification
sklearn_knn = KNeighborsClassifier(n_neighbors=3)
sklearn_knn.fit(X_knn_train, y_knn_train)
sklearn_knn_pred = sklearn_knn.predict(X_knn_test)

# Custom Naïve Bayes Classification
custom_nb = customNaiveBayes()
custom_nb.fit(X_nb_train, y_nb_train)
custom_nb_pred = custom_nb.predict(X_nb_test)

# Scikit-learn Naïve Bayes Classification
sklearn_nb = MultinomialNB()
sklearn_nb.fit(X_nb_train, y_nb_train)
sklearn_nb_pred = sklearn_nb.predict(X_nb_test)

In [10]:
print("KNN Classification Results:")
print("\nCustom KNN Accuracy:", accuracy_score(y_knn_test, custom_knn_pred))
print("Scikit-learn KNN Accuracy:", accuracy_score(y_knn_test, sklearn_knn_pred))

print("\nNaïve Bayes Classification Results:")
print("\nCustom Naïve Bayes Accuracy:", accuracy_score(y_nb_test, custom_nb_pred))
print("Scikit-learn Naïve Bayes Accuracy:", accuracy_score(y_nb_test, sklearn_nb_pred))

# Detailed Classification Reports
print("\nCustom KNN Classification Report:")
print(classification_report(y_knn_test, custom_knn_pred, 
                            target_names=le_knn.classes_))

print("\nScikit-learn KNN Classification Report:")
print(classification_report(y_knn_test, sklearn_knn_pred, 
                            target_names=le_knn.classes_))

print("\nCustom Naïve Bayes Classification Report:")
print(classification_report(y_nb_test, custom_nb_pred, 
                            target_names=le_class.classes_))

print("\nScikit-learn Naïve Bayes Classification Report:")
print(classification_report(y_nb_test, sklearn_nb_pred, 
                            target_names=le_class.classes_))

KNN Classification Results:

Custom KNN Accuracy: 0.6666666666666666
Scikit-learn KNN Accuracy: 0.6666666666666666

Naïve Bayes Classification Results:

Custom Naïve Bayes Accuracy: 0.5333333333333333
Scikit-learn Naïve Bayes Accuracy: 0.6

Custom KNN Classification Report:
              precision    recall  f1-score   support

        Blue       0.33      0.25      0.29         4
         Red       0.75      0.82      0.78        11

    accuracy                           0.67        15
   macro avg       0.54      0.53      0.53        15
weighted avg       0.64      0.67      0.65        15


Scikit-learn KNN Classification Report:
              precision    recall  f1-score   support

        Blue       0.33      0.25      0.29         4
         Red       0.75      0.82      0.78        11

    accuracy                           0.67        15
   macro avg       0.54      0.53      0.53        15
weighted avg       0.64      0.67      0.65        15


Custom Naïve Bayes Classifica