In [1]:
import pandas as pd
import numpy as np


In [2]:
data = {
    'DocID': [1, 2, 3, 4, 5],
    'Word1': [1, 0, 1, 0, 1],  # Presence of word1
    'Word2': [0, 1, 1, 1, 0],  # Presence of word2
    'Class': ['A', 'B', 'A', 'B', 'A']
}

df = pd.DataFrame(data)

In [3]:
def train_bayes(df):
    # Separate classes -- sperates all outcomes that can happen, or classifications(classes)
    classes = df['Class'].unique()
    
    # Calculate class prior probabilities - normalize = true give probability
    class_priors = df['Class'].value_counts(normalize=True)
    
    # Calculate conditional probabilities P(word|class)
    conditional_probs = {}
    #cls = class(i think)
    for cls in classes:
        #gets a df made up of all of one classification
        class_df = df[df['Class'] == cls]
        word_probs = {}
        #loops through all relevant columns
        for col in df.columns[1:-1]:  # Exclude 'DocID' and 'Class'
            prob_word_given_cls = (class_df[col].sum() + 1) / (len(class_df) + 2)  # Laplace smoothing
            word_probs[col] = prob_word_given_cls
        conditional_probs[cls] = word_probs
    
    return class_priors, conditional_probs

In [4]:
def predict_bayes(doc, class_priors, conditional_probs):
    scores = {}
    for cls, word_probs in conditional_probs.items():
        score = np.log(class_priors[cls])  # Initialize with class prior probability
        for word, present in doc.items():
            if present:  # Word is present
                score += np.log(word_probs[word])  # Add log conditional probability
            else:  # Word is absent
                score += np.log(1 - word_probs[word])  # Add log (1 - conditional probability)
        scores[cls] = score
    return max(scores, key=scores.get)

In [7]:
# Example usage
class_priors, conditional_probs = train_bayes(df)
test_doc = {'Word1': 1, 'Word2': 1}  # Test document
predicted_class = predict_bayes(test_doc, class_priors, conditional_probs)
print("Predicted class:", predicted_class)

Predicted class: A
