In [None]:

import pandas as pd
data = {
    'S1_Headache': ['Yes','No','Yes','No','Yes','No','Yes','No','Yes','No'],
    'S2_Fever':    ['Yes','No','No','Yes','Yes','No','Yes','Yes','No','No'],
    'FS_Flu':      ['Yes','No','No','Yes','Yes','No','Yes','No','No','No']
}
df = pd.DataFrame(data)
df.index = range(1, len(df)+1)


In [None]:

from collections import Counter
import math
def entropy(labels):
    n = len(labels)
    if n == 0:
        return 0.0
    counts = Counter(labels)
    entropy = 0.0
    for c in counts.values():
        p = c / n
        entropy -= p * math.log2(p)
    return entropy

def condEntropy(feature_values, labels):
    n = len(labels)
    df_local = pd.DataFrame({'X': feature_values, 'Y': labels})
    entropy = 0.0
    for x, group in df_local.groupby('X'):
        px = len(group) / n
        entropy += px * entropy(group['Y'])
    return entropy

def infGain(feature_values, labels):
    return entropy(labels) - condEntropy(feature_values, labels)

H_full = entropy(df['FS_Flu'])
H_S1_cond = condEntropy(df['S1_Headache'], df['FS_Flu'])
H_S2_cond = condEntropy(df['S2_Fever'], df['FS_Flu'])
IG_S1 = infGain(df['S1_Headache'], df['FS_Flu'])
IG_S2 = infGain(df['S2_Fever'], df['FS_Flu'])

print(f"entropy H(FS) = {H_full:.4f} bits")
print(f"H(FS|S1) = {H_S1_cond:.4f}; IG(S1) = {IG_S1:.4f}")
print(f"H(FS|S2) = {H_S2_cond:.4f}; IG(S2) = {IG_S2:.4f}")


In [None]:

root = 'S1_Headache' if IG_S1 > IG_S2 else 'S2_Fever'
print('Chosen root:', root)

from collections import Counter
def buildTree(df, rootAtt):
    tree = {'root': rootAtt, 'nodes': {}}
    for val, group in df.groupby(rootAtt):
        subH = entropy(group['FS_Flu'])
        counts = Counter(group['FS_Flu'])
        if subH == 0.0:
            tree['nodes'][val] = {'pure': True, 'class': group['FS_Flu'].iloc[0], 'count': len(group)}
        else:
            left = ['S1_Headache','S2_Fever']
            left.remove(rootAtt)
            child = left[0]
            tree['nodes'][val] = {'pure': False, 'split_on': child, 'counts': dict(counts)}
    return tree

manualTree = buildTree(df, root)
manualTree


In [None]:

yesOrNo = {'No':0, 'Yes':1}
X = df[['S1 Headache','S2 Fever']].replace(yesOrNo).astype(int)
y = df['FS Flu'].replace(yesOrNo).astype(int)

from sklearn.tree import DecisionTreeClassifier, export_text
clf = DecisionTreeClassifier(criterion='entropy', random_state=0)
clf.fit(X,y)
print(export_text(clf, feature_names=list(X.columns)))

queries = pd.DataFrame({
    'S1 Headache': ['Yes','Yes','No','No'],
    'S2 Fever':    ['Yes','No','Yes','No']
}).replace(yesOrNo).astype(int)

preds_dt = clf.predict(queries)
preds_dt_labels = ['Yes' if p==1 else 'No' for p in preds_dt]
queries_disp = queries.replace({0:'No',1:'Yes'}).copy()
queries_disp['Pred Flu DT'] = preds_dt_labels
queries_disp


In [None]:

from sklearn.naive_bayes import BernoulliNB
nb = BernoulliNB(alpha=1.0)
nb.fit(X,y)
preds_nb = nb.predict(queries)
preds_nb_labels = ['Yes' if p==1 else 'No' for p in preds_nb]
queries_disp['Pred Flu NB'] = preds_nb_labels

n = len(y)
prior_yes = sum(y==1)/n
prior_no = sum(y==0)/n

def conditional_probs(feature_col, label_col):
    res = {}
    for label in [1,0]:
        sub = feature_col[label_col==label]
        p1 = (sub.sum() + 1) / (len(sub) + 2)
        res[label] = {'P=1': p1, 'P=0': 1-p1}
    return res

cond_S1 = conditional_probs(X['S1 Headache'], y)
cond_S2 = conditional_probs(X['S2 Fever'], y)

print(f'P(FS=Yes)={prior_yes:.4f}, P(FS=No)={prior_no:.4f}')
print('P(S1=Yes|FS=Yes)=', cond_S1[1]['P=1'])
print('P(S1=Yes|FS=No)=', cond_S1[0]['P=1'])
print('P(S2=Yes|FS=Yes)=', cond_S2[1]['P=1'])
print('P(S2=Yes|FS=No)=', cond_S2[0]['P=1'])

queries_disp
