# Naive bias Classification

# Importing dataset and libraries

In [192]:
import pandas as pd
import numpy as np

df=pd.read_csv('data.csv')

# Separate the class labels (Insurance) from the features

In [193]:
from sklearn.model_selection import train_test_split

X = df[['glucose','bloodpressure']]
y = df['diabetes']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [194]:
X_train.shape, X_test.shape

((696, 2), (299, 2))

# Step 1: Calculate prior probabilities P(y)

In [195]:
prior_prob1 = np.sum(y_train == 0) / len(y_train)
prior_prob2 = np.sum(y_train == 1) / len(y_train)

# Step 2: Calculate conditional probabilities P(xi|y)
# Laplacian smoothing is applied

In [196]:
alpha = 1

# Calculate mean and standard deviation for each feature for both classes

In [197]:
mean1 = X_train[y_train == 0].mean()
std1 = X_train[y_train == 0].std()
mean2 = X_train[y_train == 1].mean()
std2 = X_train[y_train == 1].std()

# Step 3: Predict class labels for test instances

In [198]:
def predict(X):
    predictions = []
    for i in range(len(X)):
        # Calculate conditional probabilities for both classes
        prob_x_given_y0 = (
            np.exp(-(X.iloc[i] - mean1) ** 2 / (2 * std1 ** 2)) /
            (np.sqrt(2 * np.pi) * std1)
        )
        prob_x_given_y1 = (
            np.exp(-(X.iloc[i] - mean2) ** 2 / (2 * std2 ** 2)) /
            (np.sqrt(2 * np.pi) * std2)
        )

        # Apply Laplacian smoothing
        prob_x_given_y0_smoothed = (prob_x_given_y0 + alpha) / (len(X_train) + alpha * 2)
        prob_x_given_y1_smoothed = (prob_x_given_y1 + alpha) / (len(X_train) + alpha * 2)

        # Calculate the posterior probabilities P(y|x) for both classes
        posterior_prob_y0 = prior_prob1 * np.prod(prob_x_given_y0_smoothed)
        posterior_prob_y1 = prior_prob2 * np.prod(prob_x_given_y1_smoothed)

        # Make the prediction based on the class with the higher posterior probability
        if posterior_prob_y0 > posterior_prob_y1:
            predictions.append(0)
        else:
            predictions.append(1)
    return predictions

# Step 4: Predict class labels for the test set

In [199]:
predicted_labels = predict(X_test)

# Step 5: Calculate accuracy
# Accuracy is very Low

In [200]:
accuracy = np.mean(predicted_labels == y_test)
print(f"Accuracy of the Naïve Bayes classifier: {accuracy * 100:.2f}%")

Accuracy of the Naïve Bayes classifier: 43.81%


# To increase Accuracy

In [201]:
X_train = train_data[['glucose', 'bloodpressure']]
y_train = train_data['diabetes']
X_test = test_data[['glucose', 'bloodpressure']]
y_test = test_data['diabetes']

# Calculate probabilities

In [202]:
prior_prob_y0 = np.sum(y_train == 0) / len(y_train)
prior_prob_y1 = np.sum(y_train == 1) / len(y_train)

In [203]:
alpha = 1

In [204]:
mean_y0 = X_train[y_train == 0].mean()
std_y0 = X_train[y_train == 0].std()
mean_y1 = X_train[y_train == 1].mean()
std_y1 = X_train[y_train == 1].std()

In [205]:
def predict(X):
    predictions = []
    for i in range(len(X)):
        # Calculate conditional probabilities for both classes
        prob_x_given_y0 = (
            np.exp(-(X.iloc[i] - mean_y0) ** 2 / (2 * std_y0 ** 2)) /
            (np.sqrt(2 * np.pi) * std_y0)
        )
        prob_x_given_y1 = (
            np.exp(-(X.iloc[i] - mean_y1) ** 2 / (2 * std_y1 ** 2)) /
            (np.sqrt(2 * np.pi) * std_y1)
        )

        # Apply Laplacian smoothing
        prob_x_given_y0_smoothed = (prob_x_given_y0 + alpha) / (len(X_train) + alpha * 2)
        prob_x_given_y1_smoothed = (prob_x_given_y1 + alpha) / (len(X_train) + alpha * 2)

        # Calculate the posterior probabilities P(y|x) for both classes
        posterior_prob_y0 = prior_prob_y0 * np.prod(prob_x_given_y0_smoothed)
        posterior_prob_y1 = prior_prob_y1 * np.prod(prob_x_given_y1_smoothed)

        # Make the prediction based on the class with the higher posterior probability
        if posterior_prob_y0 > posterior_prob_y1:
            predictions.append(0)
        else:
            predictions.append(1)
    return predictions

In [206]:
predicted_labels = predict(X_test)

# Calculate Accuracy

In [207]:
accuracy = np.mean(predicted_labels == y_test)
print(f"Accuracy of the Naïve Bayes classifier: {accuracy * 100:.2f}%")

Accuracy of the Naïve Bayes classifier: 86.93%


In [208]:
df.dropna(inplace=True)

In [209]:
accuracy = np.mean(predicted_labels == y_test)
print(f"Accuracy of the Naïve Bayes classifier: {accuracy * 100:.2f}%")

Accuracy of the Naïve Bayes classifier: 86.93%
