# Naïve Bayes

In [14]:
from math import prod

import numpy as np
import pandas as pd

import yaml

## Naïve Bayes

In [15]:
age = {
    1: "<=30",
    2: "31..40",
    3: ">40"
}
income = {
    1: "high",
    2: "medium",
    3: "low"
}
credit_rating = {
    1: "fair",
    2: "excellent"
}

df = pd.DataFrame({
    "age": [1, 1, 2, 3, 3, 3, 2, 1, 1, 3, 1, 2, 2, 3],
    "income": [1, 1, 1, 2, 3, 3, 3, 2, 3, 2, 2, 2, 1, 2],
    "student": [False, False, False, False, True, True, True, False, True, True, True, False, True, False],
    "credit_rating": [1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2],
    "buys_computer": [False, False, True, True, True, False, True, False, True, True, True, True, True, False]
})
df

Unnamed: 0,age,income,student,credit_rating,buys_computer
0,1,1,False,1,False
1,1,1,False,2,False
2,2,1,False,1,True
3,3,2,False,1,True
4,3,3,True,1,True
5,3,3,True,2,False
6,2,3,True,2,True
7,1,2,False,1,False
8,1,3,True,1,True
9,3,2,True,1,True


In [16]:
def get_val_counts(df):
    y_vals = df.iloc[:, -1].value_counts().to_dict()
    x_vals = {attr: df[attr].value_counts().to_dict() for attr in df.iloc[:, :-1]}
    return x_vals, y_vals

In [17]:
X = {
    'age': 1,
    'income': 2,
    'student': True,
    'credit_rating': 1
}

In [18]:
def naive_bayes(df, sample):
    x_vals, y_vals = get_val_counts(df)
    y_total = sum(y_vals[y] for y in y_vals)
    pc = {y: y_vals[y]/y_total for y in y_vals}
    pxc = {
        attr: { # for every class
        # for all class labels, calclulate P(X|Ci)
            # y: (the number of records with the attr value given a class label y) / (no of records with class label y)
            y: len(df.loc[(
                df[attr] == sample[attr]
            ) & (
                df.iloc[:, -1] == y
            )]) / y_vals[y] for y in y_vals
        } for attr in sample
    }

    # P(X | Ci) * P(Ci) for each class label
    finals = {
        y: prod([pxc[attr][y] for attr in pxc]) * pc[y] for y in y_vals
    }

    # return the maximum
    return max(finals, key=lambda x: finals[x])

In [19]:
naive_bayes(df, X)

True