In [36]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [37]:
import numpy as np

# Define the number of data points
n_samples = 1000

# Define the probabilities for each class
class_probs = [0.5, 0.5]

# Generate the class labels
y = np.random.choice([0, 1], size=n_samples, p=class_probs)

# Generate binary features for each class
X = np.zeros((n_samples, 2))

# Define the probabilities for each feature given the class label
feature_probs = [[[0.8, 0.2],  # For class 0
                  [0.2, 0.8]],  # For class 1
                 [[0.6, 0.4],  # For class 0
                  [0.4, 0.6]]]  # For class 1

# Generate the features based on the class labels
for i in range(n_samples):
    for j in range(2):
        X[i, j] = np.random.choice([0, 1], p=feature_probs[y[i]][j])

# Display the first few rows of the generated dataset
print("Class Labels (y):")
print(y[:5])
print("\nFeatures (X):")
print(X[:5])


Class Labels (y):
[0 1 1 1 1]

Features (X):
[[0. 1.]
 [1. 1.]
 [0. 1.]
 [1. 1.]
 [0. 1.]]


In [38]:
trainX, testX, trainY, testY = train_test_split(X, y, test_size = 0.20)

In [39]:
class NaiveBayes:
    
    def __init__(self, X, y, laplase = 0):
        self.X = pd.DataFrame(X)
        self.y = pd.DataFrame(y)
        self.laplase = laplase
        
    def fit(self):
        self.values = {}
        for col in self.X.columns:
            self.values[col] = {}
            for value in self.X[col].unique():
                self.values[col][value] = {}
                for label in self.y[self.y.columns[0]].unique():
                    count = ((self.X[col] == value) & (self.y[self.y.columns[0]] == label)).sum()
                    self.values[col][value][label] = (count + self.laplase) / ((self.y[self.y.columns[0]] == label).sum() 
                                                                               + (self.laplase * len(self.X[col].unique())))  
        return self.values
    
    def predict(self, X):
        X = pd.DataFrame(X)
        output = {}
        for index, row in X.iterrows():
            output[str(index)] = {} 
            for label in self.y[self.y.columns[0]].unique():
                prob = 1   
                for col, val in row.items(): 
                    prob = prob * self.values[col][val][label]
                output[str(index)][label] = prob * (self.y[self.y.columns[0]] == label).sum()
        max_subkeys = []
        for key, sub_dict in output.items():
            max_subkey = max(sub_dict, key=sub_dict.get)
            max_subkeys.append(max_subkey)   
        return max_subkeys
                    
                    
                    
                    
            
                   
                    
                    
        
            

In [40]:
g = NaiveBayes(trainX, trainY, laplase = 0)

In [41]:
g.fit()

{0: {0.0: {0: 0.7772277227722773, 1: 0.6262626262626263},
  1.0: {0: 0.22277227722772278, 1: 0.37373737373737376}},
 1: {1.0: {0: 0.7846534653465347, 1: 0.5732323232323232},
  0.0: {0: 0.21534653465346534, 1: 0.42676767676767674}}}

In [42]:
s = g.predict(testX)

In [43]:
import numpy as np

def confusion_matrix(y_true, y_pred, num_classes):
    matrix = np.zeros((num_classes, num_classes), dtype=int)
    for true, pred in zip(y_true, y_pred):
        matrix[int(true), int(pred)] += 1
    return matrix

def precision_recall_accuracy(conf_matrix):
    num_classes = conf_matrix.shape[0]
    precision = np.zeros(num_classes)
    recall = np.zeros(num_classes)
    accuracy = np.trace(conf_matrix) / np.sum(conf_matrix)
    for i in range(num_classes):
        true_positives = conf_matrix[i, i]
        false_positives = np.sum(conf_matrix[:, i]) - true_positives
        false_negatives = np.sum(conf_matrix[i, :]) - true_positives
        precision[i] = true_positives / (true_positives + false_positives)
        recall[i] = true_positives / (true_positives + false_negatives)
    return precision, recall, accuracy


conf_matrix = confusion_matrix(np.array(testY), s, 2)
precision, recall, accuracy = precision_recall_accuracy(conf_matrix)

print("Confusion Matrix:")
print(conf_matrix)
print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)

Confusion Matrix:
[[58 33]
 [44 65]]
Precision: [0.56862745 0.66326531]
Recall: [0.63736264 0.59633028]
Accuracy: 0.615
