<a href="https://colab.research.google.com/github/Jigyass/Data-Privacy-and-Data-Security-Models/blob/main/Naive_Bayes_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!unzip /content/drive/MyDrive/Data_Privacy_and_Data_Security/iris.zip

Archive:  /content/drive/MyDrive/Data_Privacy_and_Data_Security/iris.zip
  inflating: Index                   
  inflating: bezdekIris.data         
  inflating: iris.data               
  inflating: iris.names              


In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from scipy.stats import norm
from sklearn.metrics import precision_score, recall_score

In [3]:
# Load the dataset
df = pd.read_csv('iris.data', header=None)
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

In [4]:
# Select specified records for testing
test_indices = [i-1 for i in range(1, 11)] + [i-1 for i in range(51, 61)] + [i-1 for i in range(101, 111)]
test_data = df.iloc[test_indices]

# Select remaining records for training
train_data = df.drop(test_indices)

# Separate features and labels
X_train = train_data.drop('class', axis=1)
y_train = train_data['class']
X_test = test_data.drop('class', axis=1)
y_test = test_data['class']

In [10]:
class NaiveBayes:
    def __init__(self):
        self.model = None

    def train(self, X, y):
        self.model = {}
        self.classes = np.unique(y)
        for c in self.classes:
            # Compute the mean and standard deviation for each feature per class
            self.model[c] = {}
            class_data = X[y == c]
            self.model[c]['mean'] = np.mean(class_data, axis=0)
            self.model[c]['std'] = np.std(class_data, axis=0)

    def predict(self, X):
        preds = []
        for sample in X:
            posterior_probs = {}
            for c in self.classes:
                prior = 1 / len(self.classes)
                likelihood = np.prod(norm.pdf(sample, self.model[c]['mean'], self.model[c]['std']))
                posterior = prior * likelihood
                posterior_probs[c] = posterior

            # Pick the class with the highest posterior probability
            pred = max(posterior_probs, key=posterior_probs.get)
            preds.append(pred)

        return np.array(preds)

# Initialize and train the Naive Bayes Classifier
nb = NaiveBayes()
nb.train(X_train.to_numpy(), y_train.to_numpy())

# Perform predictions
y_pred = nb.predict(X_test.to_numpy())

# Calculate accuracy
accuracy = np.mean(y_pred == y_test.to_numpy())
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 93.33%


In [11]:
def laplace_mechanism(x, epsilon):
    b = 1.0  # sensitivity
    return x + np.random.laplace(0, b / epsilon)

class DifferentiallyPrivateNaiveBayes(NaiveBayes):
    def __init__(self, epsilon):
        super().__init__()
        self.epsilon = epsilon / 2  # divide by 2 to allocate half for mean and half for std

    def train(self, X, y):
        self.model = {}
        self.classes = np.unique(y)
        for c in self.classes:
            self.model[c] = {}
            class_data = X[y == c]
            # Adding Laplace noise to mean and std
            self.model[c]['mean'] = laplace_mechanism(np.mean(class_data, axis=0), self.epsilon)
            self.model[c]['std'] = laplace_mechanism(np.std(class_data, axis=0), self.epsilon)

In [12]:
# Initialize and train the Differentially Private Naive Bayes Classifier
epsilon = 1.0  # feel free to change this
dp_nb = DifferentiallyPrivateNaiveBayes(epsilon)
dp_nb.train(X_train.to_numpy(), y_train.to_numpy())

# Perform predictions
y_pred_dp = dp_nb.predict(X_test.to_numpy())

# Calculate accuracy
accuracy_dp = np.mean(y_pred_dp == y_test.to_numpy())
print(f"Differentially Private Accuracy: {accuracy_dp * 100:.2f}%")

Differentially Private Accuracy: 33.33%


In [16]:
# Define different epsilon values
epsilon_values = [0.5, 1, 2, 4, 8, 16]

In [17]:
# Loop through each epsilon value to train the model and evaluate
for epsilon in epsilon_values:
    dp_nb = DifferentiallyPrivateNaiveBayes(epsilon)
    dp_nb.train(X_train.to_numpy(), y_train.to_numpy())

    y_pred_dp = dp_nb.predict(X_test.to_numpy())

    # Added zero_division=1 parameter
    precision = precision_score(y_test, y_pred_dp, average='macro', zero_division=1)
    recall = recall_score(y_test, y_pred_dp, average='macro', zero_division=1)

    print(f"Epsilon: {epsilon}, Precision: {precision * 100:.2f}%, Recall: {recall * 100:.2f}%")

Epsilon: 0.5, Precision: 77.78%, Recall: 33.33%
Epsilon: 1, Precision: 77.78%, Recall: 33.33%
Epsilon: 2, Precision: 77.78%, Recall: 33.33%
Epsilon: 4, Precision: 77.78%, Recall: 33.33%
Epsilon: 8, Precision: 83.33%, Recall: 66.67%
Epsilon: 16, Precision: 93.33%, Recall: 93.33%
