In [1]:
# Demonstration of Naïve Bayesian classifier for a sample training data set stored as a .CSV file. Calculate the
# accuracy, precision, and recall for your dataset

In [2]:
# The Naïve Bayesian classifier is a probabilistic machine learning model used for classification tasks. It is based on
# Bayes' Theorem and assumes that the features (or predictors) are independent, given the class label. Despite its
# simplicity, it often performs well in various classification tasks, especially with small to medium-sized datasets.
# In this demonstration, we'll implement a Naïve Bayesian classifier using a sample dataset stored in a .CSV file.
# We'll train the model using the dataset, make predictions, and then evaluate the model's performance using
# accuracy, precision, and recall metrics.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
# Step 1: Load the Dataset
# df = pd.read_csv('sample_data.csv')
# # Step 2: Preprocess the Data
# # Assuming the last column is the target variable and the rest are features
# X = df.iloc[:, :-1]
# y = df.iloc[:, -1]
X,y=make_classification(n_samples=500,n_features=10,n_classes=2,random_state=42)
# Step 3: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Step 4: Train the Naïve Bayesian Classifier
model = GaussianNB()
model.fit(X_train, y_train)
# Step 5: Make Predictions
y_pred = model.predict(X_test)
# Step 6: Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
# Output the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.88
Precision: 0.88
Recall: 0.88
[[48  6]
 [ 6 40]]
              precision    recall  f1-score   support

           0       0.89      0.89      0.89        54
           1       0.87      0.87      0.87        46

    accuracy                           0.88       100
   macro avg       0.88      0.88      0.88       100
weighted avg       0.88      0.88      0.88       100



In [None]:
# Accuracy measures the proportion of correct predictions made by the model.
#  Precision measures the proportion of true positive predictions in the positive class predicted by the model.
#  Recall (or Sensitivity) measures the proportion of true positive cases that were correctly identified by the
# model
# The accuracy, precision, and recall scores for the classifier on the test dataset.
# Example
# Given a sample CSV file sample_data.csv, the code will load the data, train the Naïve Bayesian classifier, and
# output metrics like:
# Accuracy: 0.85
# Precision: 0.84
# Recall: 0.83
# These results indicate how well the classifier performs on the given dataset.