# Bayesian Classifier

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [5]:
# Create a small test dataset
data = {
    'Toothache':    [1, 0, 1, 0, 1, 0],
    'Cavity':       [1, 1, 0, 0, 1, 0],
    'VisitDentist': [1, 0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)

def baesian_classifier_probabilities(df):
    probabilities = {}
    total_count = len(df)

    for column in df.columns:
        probabilities[column] = {}
        for value in df[column].unique():
            probabilities[column][value] = len(df[df[column] == value]) / total_count
    return probabilities

def baesian_classifier(df, target_column):
    # Calculate prior probabilities for each class
    priors = df[target_column].value_counts(normalize=True)
    print("Prior probabilities:")
    print(priors)

    # Calculate likelihoods for each feature given each class
    features = [col for col in df.columns if col != target_column]
    likelihoods = {}

    for feature in features:
        likelihoods[feature] = {}
        for cls in df[target_column].unique():
            subset = df[df[target_column] == cls]
            value_counts = subset[feature].value_counts(normalize=True)
            likelihoods[feature][cls] = value_counts
            print(f"\nLikelihoods for {feature} given class {cls}:")
            print(value_counts)

    # Example: Predict for a sample
    sample = {feature: df[feature].iloc[0] for feature in features}
    print("\nSample to predict:", sample)

    posteriors = {}
    for cls in df[target_column].unique():
        posterior = priors[cls]
        for feature in features:
            value = sample[feature]
            likelihood = likelihoods[feature][cls].get(value, 0)
            posterior *= likelihood
        posteriors[cls] = posterior
        print(f"Posterior for class {cls}: {posterior}")

    predicted_class = max(posteriors, key=posteriors.get)
    print("\nPredicted class:", predicted_class)
    return predicted_class

# Example usage
target_column = 'VisitDentist'
predicted_class = baesian_classifier(df, target_column)

Prior probabilities:
VisitDentist
1    0.5
0    0.5
Name: proportion, dtype: float64

Likelihoods for Toothache given class 1:
Toothache
1    1.0
Name: proportion, dtype: float64

Likelihoods for Toothache given class 0:
Toothache
0    1.0
Name: proportion, dtype: float64

Likelihoods for Cavity given class 1:
Cavity
1    0.666667
0    0.333333
Name: proportion, dtype: float64

Likelihoods for Cavity given class 0:
Cavity
0    0.666667
1    0.333333
Name: proportion, dtype: float64

Sample to predict: {'Toothache': np.int64(1), 'Cavity': np.int64(1)}
Posterior for class 1: 0.3333333333333333
Posterior for class 0: 0.0

Predicted class: 1


In [11]:
# Split dataset
X = df[['Toothache', 'Cavity']]
y = df['VisitDentist']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Bayesian classifier
clf = GaussianNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Print accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))
print("For data", X_test.values.tolist(), "Predictions:", y_pred)

Accuracy: 1.0
For data [[1, 1], [0, 1]] Predictions: [1 0]


In [1]:
import math

# Sample dataset (Outlook, Temperature, PlayTennis)
# Features: Outlook, Temp
# Label: Play (Yes/No)
dataset = [
    ['Sunny', 'Hot', 'No'],
    ['Sunny', 'Hot', 'No'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rain', 'Mild', 'Yes'],
    ['Rain', 'Cool', 'Yes'],
    ['Rain', 'Cool', 'No'],
    ['Overcast', 'Cool', 'Yes'],
    ['Sunny', 'Mild', 'No'],
    ['Sunny', 'Cool', 'Yes'],
    ['Rain', 'Mild', 'Yes'],
    ['Sunny', 'Mild', 'Yes'],
    ['Overcast', 'Mild', 'Yes'],
    ['Overcast', 'Hot', 'Yes'],
    ['Rain', 'Mild', 'No']
]

# Train function
def train_naive_bayes(data):
    label_counts = {}
    feature_counts = {}

    for row in data:
        outlook, temp, label = row

        # Count labels
        label_counts[label] = label_counts.get(label, 0) + 1

        # Count features conditional on labels
        if label not in feature_counts:
            feature_counts[label] = {"Outlook": {}, "Temp": {}}

        feature_counts[label]["Outlook"][outlook] = feature_counts[label]["Outlook"].get(outlook, 0) + 1
        feature_counts[label]["Temp"][temp] = feature_counts[label]["Temp"].get(temp, 0) + 1

    return label_counts, feature_counts


# Predict function
def predict_naive_bayes(x, label_counts, feature_counts):
    total = sum(label_counts.values())
    probs = {}

    for label in label_counts:
        # Prior probability
        probs[label] = label_counts[label] / total

        # Likelihood (multiply conditional probabilities)
        for i, feature in enumerate(["Outlook", "Temp"]):
            value = x[i]
            count = feature_counts[label][feature].get(value, 0)
            probs[label] *= (count + 1) / (label_counts[label] + len(feature_counts[label][feature]))  # Laplace smoothing

    return max(probs, key=probs.get)


# Train model
label_counts, feature_counts = train_naive_bayes(dataset)

# Test prediction
test_sample = ['Sunny', 'Cool']  # Outlook=Sunny, Temp=Cool
prediction = predict_naive_bayes(test_sample, label_counts, feature_counts)

print("Test Sample:", test_sample)
print("Predicted Class:", prediction)

Test Sample: ['Sunny', 'Cool']
Predicted Class: Yes
