In [9]:
import sys
print(sys.path)  # Check where Python is looking for packages

import sklearn
print(sklearn.__version__)  # Verify it's accessible

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
knn_file_path = "/mnt/data/knn_dataset.csv"
df = pd.read_csv(knn_file_path)

# Prepare feature and target variables
X = df.iloc[:, :-1].values  # Feature columns
y = df.iloc[:, -1].values   # Target column

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to compute Euclidean distance
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))

# KNN Classifier from Scratch
class KNNClassifier:
    def __init__(self, k=3):
        self.k = k
    
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
    
    def predict(self, X_test):
        predictions = []
        for test_point in X_test:
            distances = [euclidean_distance(test_point, x) for x in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            prediction = max(set(k_nearest_labels), key=k_nearest_labels.count)
            predictions.append(prediction)
        return predictions

# Train the custom KNN model
knn_model = KNNClassifier(k=3)
knn_model.fit(X_train, y_train)
custom_predictions = knn_model.predict(X_test)

# Evaluate custom KNN
accuracy = np.mean(custom_predictions == y_test)
print(f"Custom KNN Accuracy: {accuracy:.2f}")


['/usr/lib/python312.zip', '/usr/lib/python3.12', '/usr/lib/python3.12/lib-dynload', '', '/home/mustafa/Documents/AI/Assignment 3/myenv/lib/python3.12/site-packages']


ModuleNotFoundError: No module named 'sklearn'

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# KNN using Scikit-learn
knn_sklearn = KNeighborsClassifier(n_neighbors=3)
knn_sklearn.fit(X_train, y_train)
y_pred_sklearn = knn_sklearn.predict(X_test)
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"KNN using Scikit-learn Accuracy: {accuracy_sklearn:.2f}")

In [None]:
# Plot comparison
labels = ['KNN from Scratch', 'KNN using Scikit-learn']
accuracies = [accuracy_scratch, accuracy_sklearn]

plt.bar(labels, accuracies, color=['blue', 'green'])
plt.ylabel('Accuracy')
plt.title('KNN: Scratch vs Scikit-learn')
plt.show()

In [None]:
from collections import defaultdict

# Naive Bayes Classifier
class NaiveBayes:
    def __init__(self):
        self.priors = {}
        self.likelihoods = defaultdict(lambda: {})

    def fit(self, X, y):
        self.classes = np.unique(y)
        for c in self.classes:
            X_c = X[y == c]
            self.priors[c] = X_c.shape[0] / X.shape[0]
            for feature in range(X.shape[1]):
                self.likelihoods[feature][c] = {
                    'mean': X_c[:, feature].mean(),
                    'std': X_c[:, feature].std()
                }

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x):
        posteriors = {}
        for c in self.classes:
            posterior = self.priors[c]
            for feature, value in enumerate(x):
                mean = self.likelihoods[feature][c]['mean']
                std = self.likelihoods[feature][c]['std']
                posterior *= self._gaussian_pdf(value, mean, std)
            posteriors[c] = posterior
        return max(posteriors, key=posteriors.get)

    def _gaussian_pdf(self, x, mean, std):
        exponent = np.exp(-((x - mean) ** 2 / (2 * std ** 2)))
        return (1 / (np.sqrt(2 * np.pi) * std)) * exponent

# Load dataset
data = np.genfromtxt('naive_bayes_dataset.csv', delimiter=',', dtype=str, skip_header=1)
X = data[:, :-1]
y = data[:, -1]

# Convert categorical features to numerical
from sklearn.preprocessing import LabelEncoder
label_encoders = [LabelEncoder() for _ in range(X.shape[1])]
X_encoded = np.array([label_encoders[i].fit_transform(X[:, i]) for i in range(X.shape[1])]).T

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Naive Bayes from scratch
nb_scratch = NaiveBayes()
nb_scratch.fit(X_train, y_train)
y_pred_scratch = nb_scratch.predict(X_test)
accuracy_scratch = accuracy_score(y_test, y_pred_scratch)
print(f"Naive Bayes from Scratch Accuracy: {accuracy_scratch:.2f}")

In [None]:
from sklearn.naive_bayes import GaussianNB

# Naive Bayes using Scikit-learn
nb_sklearn = GaussianNB()
nb_sklearn.fit(X_train, y_train)
y_pred_sklearn = nb_sklearn.predict(X_test)
accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"Naive Bayes using Scikit-learn Accuracy: {accuracy_sklearn:.2f}")

In [11]:
# Plot comparison
labels = ['Naive Bayes from Scratch', 'Naive Bayes using Scikit-learn']
accuracies = [accuracy_scratch, accuracy_sklearn]

plt.bar(labels, accuracies, color=['blue', 'green'])
plt.ylabel('Accuracy')
plt.title('Naive Bayes: Scratch vs Scikit-learn')
plt.show()

NameError: name 'accuracy_scratch' is not defined

In [None]:
# KNN Comparison Graph
labels = ['KNN from Scratch', 'KNN using Scikit-learn']
accuracies = [accuracy_scratch, accuracy_sklearn]

plt.bar(labels, accuracies, color=['blue', 'green'])
plt.ylabel('Accuracy')
plt.title('KNN: Scratch vs Scikit-learn')
plt.show()

In [None]:
# Naive Bayes Comparison Graph
labels = ['Naive Bayes from Scratch', 'Naive Bayes using Scikit-learn']
accuracies = [accuracy_scratch, accuracy_sklearn]

plt.bar(labels, accuracies, color=['blue', 'green'])
plt.ylabel('Accuracy')
plt.title('Naive Bayes: Scratch vs Scikit-learn')
plt.show()