In [2]:
import pandas as pd

# Creating a DataFrame from the provided data
data = {
    'Age': ['<=30', '<=30', '31-40', '>40', '>40', '>40', '31-40', '<=30', '<=30', '>40', '<=30', '31-40', '31-40', '>40'],
    'Income': ['High', 'High', 'High', 'Medium', 'Low', 'Low', 'Low', 'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'High', 'Medium'],
    'Student': ['No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No'],
    'Credit Rating': ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair', 'Excellent'],
    'Buys Computer': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Count occurrences of each class in the target column
total_rows = len(df)
class_counts = df['Buys Computer'].value_counts()

# Print total rows and class label counts
print(f"Total rows: {total_rows}")
print(f"Class label counts:\n{class_counts}")

# Function to calculate conditional probabilities for each feature given a class
def calculate_conditional_probabilities(feature, target_class):
    feature_counts = df[df['Buys Computer'] == target_class][feature].value_counts()
    total_class_count = class_counts[target_class]
    return feature_counts / total_class_count

# Calculate prior probabilities
P_buys_computer_yes = class_counts['Yes'] / total_rows
P_buys_computer_no = class_counts['No'] / total_rows

# Calculate conditional probabilities for the feature values
P_age_yes = calculate_conditional_probabilities('Age', 'Yes')['<=30']
P_age_no = calculate_conditional_probabilities('Age', 'No')['<=30']

P_income_yes = calculate_conditional_probabilities('Income', 'Yes')['Medium']
P_income_no = calculate_conditional_probabilities('Income', 'No')['Medium']

P_student_yes = calculate_conditional_probabilities('Student', 'Yes')['Yes']
P_student_no = calculate_conditional_probabilities('Student', 'No')['Yes']

P_credit_rating_yes = calculate_conditional_probabilities('Credit Rating', 'Yes')['Fair']
P_credit_rating_no = calculate_conditional_probabilities('Credit Rating', 'No')['Fair']

# Apply the Naive Bayes formula for both classes
P_X_given_yes = P_age_yes * P_income_yes * P_student_yes * P_credit_rating_yes
P_X_given_no = P_age_no * P_income_no * P_student_no * P_credit_rating_no

# Calculate the final probabilities
P_yes = P_X_given_yes * P_buys_computer_yes
P_no = P_X_given_no * P_buys_computer_no

# Print the final probabilities
print(f"P(X|buys_computer = 'yes') * P(buys_computer = 'yes') = {P_yes:.6f}")
print(f"P(X|buys_computer = 'no') * P(buys_computer = 'no') = {P_no:.6f}")

# Make prediction based on the higher probability
if P_yes > P_no:
    print("The person belongs to class: buys_computer = 'yes'")
else:
    print("The person belongs to class: buys_computer = 'no'")


Total rows: 14
Class label counts:
Buys Computer
Yes    9
No     5
Name: count, dtype: int64
P(X|buys_computer = 'yes') * P(buys_computer = 'yes') = 0.028219
P(X|buys_computer = 'no') * P(buys_computer = 'no') = 0.006857
The person belongs to class: buys_computer = 'yes'


In [None]:
Naive Bayes Classifier: Theory
The Naive Bayes classifier is a probabilistic machine learning model that applies Bayes Theorem with strong (naive) independence assumptions between the features. 
It’s particularly useful for classification tasks, especially when the features are categorical.

The Naive Assumption:
Naive Bayes assumes that the features are conditionally independent given the class. 
This assumption simplifies the computation of P(X∣C) because instead of having to calculate the joint distribution of all features.
