In [233]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


In [234]:
class WeightedKNN:
    def __init__(self, k=3):
        self.k = k
    
    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)
    
    def calculateDistance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))
    
    def calculateNeighbors(self, x):
        distances = [self.calculateDistance(x, x_train) for x_train in self.X_train]
        neighbors = np.argsort(distances)[:self.k]
        return neighbors, np.array(distances)[neighbors]
    
    def weightedVote(self, neighbors, distances):
        weights = 1 / (distances + 1e-5)
        class_votes = {}
        
        for i, neighbor in enumerate(neighbors):
            label = self.y_train[neighbor]
            class_votes[label] = class_votes.get(label, 0) + weights[i]
        
        return max(class_votes, key=class_votes.get)
    
    def predict(self, X):
        X = np.array(X)
        predictions = []
        
        for x in X:
            neighbors, distances = self.calculateNeighbors(x)
            prediction = self.weightedVote(neighbors, distances)
            predictions.append(prediction)
        
        return np.array(predictions)
    
    def score(self, X, y):
        y_pred = self.predict(X)
        
        # Calculate metrics
        accuracy = accuracy_score(y, y_pred)
        precision = precision_score(y, y_pred, average='weighted')
        recall = recall_score(y, y_pred, average='weighted')
        f1 = f1_score(y, y_pred, average='weighted')
        conf_matrix = confusion_matrix(y, y_pred)
        
        # Print results
        print("Confusion Matrix:\n", conf_matrix)
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        
        return accuracy


In [235]:
class GaussianNaiveBayes:
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        
        # Get the unique classes
        self.classes = np.unique(y)
        
        # Calculate mean, variance, and prior probability for each class
        self.means = {}
        self.variances = {}
        self.priors = {}
        
        for cls in self.classes:
            X_cls = X[y == cls]
            self.means[cls] = np.mean(X_cls, axis=0)
            self.variances[cls] = np.var(X_cls, axis=0)
            self.priors[cls] = X_cls.shape[0] / X.shape[0]
    
    def calculateLikelihood(self, x, mean, var):
        # Calculate Gaussian likelihood
        exponent = np.exp(-((x - mean) ** 2) / (2 * var))
        return (1 / np.sqrt(2 * np.pi * var)) * exponent
    
    def calculateClassPosterior(self, x):
        posteriors = {}
        
        for cls in self.classes:
            # Start with the prior
            prior = np.log(self.priors[cls])
            # Sum the log of the likelihoods for each feature
            class_conditional = np.sum(np.log(self.calculateLikelihood(x, self.means[cls], self.variances[cls])))
            posteriors[cls] = prior + class_conditional
            
        return max(posteriors, key=posteriors.get)
    
    def predict(self, X):

        X = np.array(X)
        predictions = [self.calculateClassPosterior(x) for x in X]
        return np.array(predictions)
    
    def score(self, X, y):
        y_pred = self.predict(X)
        
        # Calculate metrics
        accuracy = accuracy_score(y, y_pred)
        precision = precision_score(y, y_pred, average='weighted')
        recall = recall_score(y, y_pred, average='weighted')
        f1 = f1_score(y, y_pred, average='weighted')
        conf_matrix = confusion_matrix(y, y_pred)
        
        # Print results
        print("Confusion Matrix:\n", conf_matrix)
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        
        return accuracy


In [236]:
# Load the dataset and ensure all columns are numeric
diabetes = pd.read_csv('diabetes.csv', header=0)


# Separate features and labels
labels = diabetes['Outcome']
features = diabetes.drop('Outcome', axis=1)

print(features.dtypes)




X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.45, random_state=42)

features.head()

Pregnancies                   int64
Glucose                       int64
BloodPressure                 int64
SkinThickness                 int64
Insulin                       int64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                           int64
dtype: object


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [237]:
KNN = WeightedKNN(k=3)
KNN.fit(X_train, y_train)
print(KNN.score(X_test, y_test))


Confusion Matrix:
 [[180  48]
 [ 54  64]]
Accuracy: 0.7052
Precision: 0.7018
Recall: 0.7052
F1 Score: 0.7033
0.7052023121387283


In [238]:
GNB = GaussianNaiveBayes()
GNB.fit(X_train, y_train)
print(GNB.score(X_test, y_test))


Confusion Matrix:
 [[192  36]
 [ 43  75]]
Accuracy: 0.7717
Precision: 0.7688
Recall: 0.7717
F1 Score: 0.7699
0.7716763005780347
