In [5]:
import pandas as pd
import numpy as np

# Load the dataset
data = pd.read_csv('buys_computer.csv')

# Function to prepare dictionary which counts the number of "yes" and "no" class labels for each nominal value in an attribute
def count_labels(attribute):
    counts = {}
    for value in data[attribute].unique():
        counts[value] = (data[attribute] == value).value_counts().to_dict()
        return counts

# Function to compute the probability of the class labels ("yes" or "no") for each nominal value in an attribute
def class_probabilities(attribute):
    probabilities = {}
    for value in data[attribute].unique():
        subset = data[data[attribute] == value]
        probabilities[value] = subset['buys_computer'].value_counts(normalize=True).to_dict()
    return probabilities

# Function to compute the probability of instance X against class label ("yes" and "no") using Bayes' theorem
def bayes_theorem(X):
    # Prior probabilities
    prior_yes = (data['buys_computer'] == 'yes').mean()
    prior_no = 1 - prior_yes
    
    # Conditional probabilities
    conditional_yes = {}
    conditional_no = {}
    
    for attribute in X.index:
        subset_yes = data[data['buys_computer'] == 'yes']
        subset_no = data[data['buys_computer'] == 'no']
        
        # Compute conditional probabilities P(X|H)
        p_X_given_yes = subset_yes[subset_yes[attribute] == X[attribute]].shape[0] / subset_yes.shape[0]
        p_X_given_no = subset_no[subset_no[attribute] == X[attribute]].shape[0] / subset_no.shape[0]
        
        # Add to dictionaries
        conditional_yes[attribute] = p_X_given_yes
        conditional_no[attribute] = p_X_given_no
    
    # Compute numerator of Bayes' theorem P(X|H) * P(H)
    numerator_yes = np.prod(list(conditional_yes.values())) * prior_yes
    numerator_no = np.prod(list(conditional_no.values())) * prior_no
    
    # Compute denominator of Bayes' theorem
    denominator = numerator_yes + numerator_no
    
    # Compute posterior probabilities P(H|X)
    posterior_yes = numerator_yes / denominator
    posterior_no = numerator_no / denominator
    
    return {'yes': posterior_yes, 'no': posterior_no}

print(count_labels('age'))
print(class_probabilities('income'))
X = pd.Series({'age': 'youth', 'income': 'medium', 'student': 'yes', 'credit_rating': 'fair'})
print(bayes_theorem(X))


{'youth': {False: 9, True: 5}}
{'high': {'no': 0.5, 'yes': 0.5}, 'medium': {'yes': 0.6666666666666666, 'no': 0.3333333333333333}, 'low': {'yes': 0.75, 'no': 0.25}}
{'yes': 0.8045052292839904, 'no': 0.19549477071600968}
