In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/content/Naive-Bayes-Classification-Data.csv') #blood pressure, glucose diabetes
print(df)

     glucose  bloodpressure  diabetes
0         40             85         0
1         40             92         0
2         45             63         1
3         45             80         0
4         40             73         1
..       ...            ...       ...
990       45             87         0
991       40             83         0
992       40             83         0
993       40             60         1
994       45             82         0

[995 rows x 3 columns]


In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [None]:
X

array([[40, 85],
       [40, 92],
       [45, 63],
       ...,
       [40, 83],
       [40, 60],
       [45, 82]])

In [None]:
y

array([0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
model = GaussianNB()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)*100
accuracy

92.40121580547113

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.93      0.92       153
           1       0.94      0.92      0.93       176

    accuracy                           0.92       329
   macro avg       0.92      0.92      0.92       329
weighted avg       0.92      0.92      0.92       329



In [None]:
print(confusion_matrix(y_pred, y_test))

[[142  14]
 [ 11 162]]


In [None]:
print(confusion_matrix(y_test, y_pred))

[[142  11]
 [ 14 162]]


WITHOUT INBUILT LIBRARIES



In [None]:
import pandas as pd
import numpy as np
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


def load_data(file_path):
    data = pd.read_csv(file_path)
    return data


def preprocess_data(data):
    X = data.drop('diabetes', axis=1)  # 'diabetes' is the target variable
    y = data['diabetes']
    return np.array(X), np.array(y)


def summarize_by_class(X, y):
    summaries = {}
    classes = np.unique(y)
    # print(classes)

    for class_label in classes:
        class_data = X[y == class_label]
        # print(class_data)
        print(f"{class_label} {class_data.shape}")
        class_summaries = []
        for i in range(class_data.shape[1]):  # For each feature
            feature_values = class_data[:, i]
            mean = np.mean(feature_values)
            variance = np.var(feature_values)
            class_summaries.append((mean, variance))
            # print(class_summaries)
        summaries[class_label] = class_summaries

    return summaries


def gaussian_probability(x, mean, variance):
    exponent = math.exp(-((x - mean) ** 2) / (2 * variance))
    return (1 / math.sqrt(2 * math.pi * variance)) * exponent


def calculate_class_probabilities(test_row, summary_stats):
    probabilities = {}
    for class_label, class_summaries in summary_stats.items():
        probabilities[class_label] = 1
        for i in range(len(class_summaries)):
            mean, variance = class_summaries[i]
            feature_value = test_row[i]
            probabilities[class_label] *= gaussian_probability(feature_value, mean, variance)
    return probabilities


def predict(test_row, summary_stats):
    probabilities = calculate_class_probabilities(test_row, summary_stats)
    best_label, best_prob = None, -1
    for class_label, prob in probabilities.items():
        if best_label is None or prob > best_prob:
            best_prob = prob
            best_label = class_label
    return best_label


def accuracy(X_test, y_test, summary_stats):
    correct_predictions = 0
    for i in range(len(X_test)):
        predicted = predict(X_test[i], summary_stats)
        if predicted == y_test[i]:
            correct_predictions += 1
    return (correct_predictions / len(X_test)) * 100


def main():
    file_path = 'path_to_your_data.csv'
    data = pd.read_csv('/content/Naive-Bayes-Classification-Data.csv') #blood pressure, glucose diabetes

    X, y = preprocess_data(data)

    X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.5, random_state=42)

    summary_stats = summarize_by_class(X_train, y_train)

    acc = accuracy(X_test, y_test, summary_stats)
    print(f"Accuracy: {acc:.2f}%")

if __name__ == "__main__":
    main()


0 (257, 2)
1 (240, 2)
Accuracy: 92.37%


In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.93      0.92       153
           1       0.94      0.92      0.93       176

    accuracy                           0.92       329
   macro avg       0.92      0.92      0.92       329
weighted avg       0.92      0.92      0.92       329

