In [3]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import sklearn

# Naive Bayes (Scratch)

In [4]:
class NaiveBayes:
    def __init__(self):
        self.mean = None 
        self.variance = None
        self.priors = None
        self.classes = None
        
        
    def fit(self,X,y):
        self.classes = np.unique(y)
        n_samples, n_features = X.shape
        n_classes = len(self.classes)
        
        
        self.mean = np.zeros((n_classes,n_features),dtype=np.float64)
        self.variance = np.zeros((n_classes,n_features),dtype=np.float64)
        self.priors = np.zeros(n_classes,dtype=np.float64)
        
        for idx, c in enumerate(self.classes):
            X_c = X[y == c] 
            self.mean[idx,:] = X_c.mean(axis=0)
            self.variance[idx,:] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0]/float(n_samples)
            
            
            
    def predict(self,X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)
    
    def _predict(self,x):
        
        posteriors = []
        
        for idx,c in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            likelihood = np.sum(np.log(self._pdf(idx,x)))
            posterior = prior + likelihood
            posteriors.append(posterior)
            
        return self.classes[np.argmax(posteriors)]
    
    
    def _pdf(self,class_idx, x):
        mean = self.mean[class_idx]
        var = self.variance[class_idx]
        numerator = np.exp(-(x-mean)**2/(2*var))
        denominator = np.sqrt(2*np.pi*var)
        
        return numerator/denominator    
    
    
    
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split  
from sklearn.metrics import accuracy_score
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and train the Naive Bayes model
nb = NaiveBayes()
nb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = nb.predict(X_test)
print(y_pred)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
    
    
    
    
# # Sample dataset
data = np.array([
    [6.0, 180, 0],  # Class 0
    [5.5, 170, 0],  # Class 0
    [5.8, 175, 0],  # Class 0
    [5.9, 165, 1],  # Class 1
    [5.6, 160, 1],  # Class 1
    [5.7, 155, 1]   # Class 1
])  

# Split into features and labels
X_train = data[:, :-1]  # Features
y_train = data[:, -1]   # Labels

# Instantiate and train the Naive Bayes model
nb = NaiveBayes()
nb.fit(X_train, y_train)

# Predicting a new sample
X_test = np.array([[5.5, 150]])  # New data point
prediction = nb.predict(X_test)

print("Predicted Class:", prediction[0])  # Output the predicted class

accuracy_score([1],prediction)


[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Accuracy: 1.0
Predicted Class: 1.0


1.0

In [5]:
data = np.array([
    [6.0, 180, 0],  # Class 0
    [5.5, 170, 0],  # Class 0
    [5.8, 175, 0],  # Class 0
    [5.9, 165, 1],  # Class 1
    [5.6, 160, 1],  # Class 1
    [5.7, 155, 1]   # Class 1
])
np.mean()

TypeError: mean() missing 1 required positional argument: 'a'

In [8]:
class GNB:
    def __init__(self):
        self.mean = None
        self.variance = None
        self.priors = None
        self.classes = None
        
    
    def fit(self,X,y):
        self.classes = np.unique(y)
        n_samples,n_features = X.shape
        n_classes = len(self.classes)
        
        self.mean = np.zeros((n_classes,n_features))
        self.variance = np.zeros((n_classes,n_features))
        self.priors = np.zeros(n_classes)
        
        for idx,c in enumerate(self.classes):
            X_c = X[ y == c ]
            self.mean[idx,:] = np.mean(X_c,axis=0)
            self.variance[idx,:] = np.var(X_c,axis=0)
            self.priors[idx] = len(X_c)/n_samples
            
            
    def predict(self,X):
        y_pred = [self.__predict(x) for x in X]
        return np.array(y_pred)
    
    
    def __predict(self,x):
        posteriors = []
        
        for idx,c in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            likelihood = np.sum(np.log(self.__pdf(idx,x)))
            posterior = prior + likelihood
            posteriors.append(posterior)
            
        return self.classes[np.argmax(posteriors)]
    
    def __pdf(self,class_idx,x):
        mean = self.mean[class_idx]
        var = self.variance[class_idx]
        numerator = np.exp(-(mean - x)**2/(2*var))
        denominator = np.sqrt(2*np.pi*var)
        return numerator/denominator
    
    
    

        
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and train the Naive Bayes model
nb = GNB()
nb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = nb.predict(X_test)
print(y_pred)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
            
            
        

[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Accuracy: 1.0


In [13]:
class NBS:
    def __init__(self):
        self.mean = None
        self.variance = None
        self.priors = None
        self.classes = None
        
    def fit(self,X,y):
        self.classes = np.unique(y)
        n_samples,n_featuers = X.shape
        n_classes = len(self.classes)
        
        self.mean = np.zeros((n_classes,n_featuers))
        self.variance = np.zeros((n_classes,n_featuers))
        self.priors = np.zeros(n_classes)
        
        for idx,c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx,:] = np.mean(X_c,axis=0)
            self.variance[idx,:] = np.var(X_c,axis=0)
            self.priors[idx] = X_c.shape[0]/n_samples
            
    
    def predict(self,X):
        y_pred = [ self.__predict(x) for x in X]
        return y_pred
        
    
    def __predict(self,x):
        posteriors = []
        
        for idx,c in enumerate(self.classes):
            priors = np.log(self.priors[idx])
            likelihood = np.sum(np.log(self.__pdf(idx,x)))
            posterior = priors + likelihood
             
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def __pdf(self,idx,x):
        mean = self.mean[idx]
        var = self.variance[idx]
        numerator = np.exp(-(mean - x)**2/(2*var))
        denominator = np.sqrt(2*np.pi*var)
        return numerator/denominator
    

        
                 
            
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and train the Naive Bayes model
nb = NBS()
nb.fit(X_train, y_train)

# Make predictions on the test set
y_pred = nb.predict(X_test)
print(y_pred)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
                   

[np.int64(1), np.int64(0), np.int64(2), np.int64(1), np.int64(1), np.int64(0), np.int64(1), np.int64(2), np.int64(1), np.int64(1), np.int64(2), np.int64(0), np.int64(0), np.int64(0), np.int64(0), np.int64(1), np.int64(2), np.int64(1), np.int64(1), np.int64(2), np.int64(0), np.int64(2), np.int64(0), np.int64(2), np.int64(2), np.int64(2), np.int64(2), np.int64(2), np.int64(0), np.int64(0)]
Accuracy: 1.0
