# Bayesian Classifier

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [10]:
# Create a small test dataset
data = {
    'Toothache':    [1, 0, 1, 0, 1, 0],
    'Cavity':       [1, 1, 0, 0, 1, 0],
    'VisitDentist': [1, 0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)

In [11]:
# Split dataset
X = df[['Toothache', 'Cavity']]
y = df['VisitDentist']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Bayesian classifier
clf = GaussianNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Print accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))
print("For data", X_test.values.tolist(), "Predictions:", y_pred)

Accuracy: 1.0
For data [[1, 1], [0, 1]] Predictions: [1 0]


In [1]:
import math

# Sample dataset (Outlook, Temperature, PlayTennis)
# Features: Outlook, Temp
# Label: Play (Yes/No)
dataset = [
    ['Sunny', 'Hot', 'No'],
    ['Sunny', 'Hot', 'No'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rain', 'Mild', 'Yes'],
    ['Rain', 'Cool', 'Yes'],
    ['Rain', 'Cool', 'No'],
    ['Overcast', 'Cool', 'Yes'],
    ['Sunny', 'Mild', 'No'],
    ['Sunny', 'Cool', 'Yes'],
    ['Rain', 'Mild', 'Yes'],
    ['Sunny', 'Mild', 'Yes'],
    ['Overcast', 'Mild', 'Yes'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rain', 'Mild', 'No']
]

# Train function
def train_naive_bayes(data):
    label_counts = {}
    feature_counts = {}

    for row in data:
        outlook, temp, label = row

        # Count labels
        label_counts[label] = label_counts.get(label, 0) + 1

        # Count features conditional on labels
        if label not in feature_counts:
            feature_counts[label] = {"Outlook": {}, "Temp": {}}

        feature_counts[label]["Outlook"][outlook] = feature_counts[label]["Outlook"].get(outlook, 0) + 1
        feature_counts[label]["Temp"][temp] = feature_counts[label]["Temp"].get(temp, 0) + 1

    return label_counts, feature_counts


# Predict function
def predict_naive_bayes(x, label_counts, feature_counts):
    total = sum(label_counts.values())
    probs = {}

    for label in label_counts:
        # Prior probability
        probs[label] = label_counts[label] / total

        # Likelihood (multiply conditional probabilities)
        for i, feature in enumerate(["Outlook", "Temp"]):
            value = x[i]
            count = feature_counts[label][feature].get(value, 0)
            probs[label] *= (count + 1) / (label_counts[label] + len(feature_counts[label][feature]))  # Laplace smoothing

    return max(probs, key=probs.get)


# Train model
label_counts, feature_counts = train_naive_bayes(dataset)

# Test prediction
test_sample = ['Sunny', 'Cool']  # Outlook=Sunny, Temp=Cool
prediction = predict_naive_bayes(test_sample, label_counts, feature_counts)

print("Test Sample:", test_sample)
print("Predicted Class:", prediction)

Test Sample: ['Sunny', 'Cool']
Predicted Class: Yes
