**Create dataset**

In [None]:
import numpy as np


# Create data
def create_train_data():
    data = [
        ['Sunny', 'Hot', 'High', 'Weak', 'no'],
        ['Sunny', 'Hot', 'High', 'Strong', 'no'],
        ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
        ['Overcast', 'Mild', 'High', 'Weak', 'no'],
        ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Mild', 'Normal', 'Weak', 'yes']
    ]
    return np.array(data)


train_data = create_train_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


**Compute Prior Probability**

In [None]:
def compute_prior_probability(train_data):
    y_unique = ['no', 'yes']
    prior_probability = np.zeros(len(y_unique))
    for i in range(len(y_unique)):
        prior_probability[i] = np.sum(train_data[:, -1] == y_unique[i]) / len(train_data)
    return prior_probability


prior_probability = compute_prior_probability(train_data)
print(prior_probability[0])
print(prior_probability[1])

0.4
0.6


**Compute Conditional Probability**

In [None]:
def compute_conditional_probability(train_data):
    y_unique = ['no', 'yes']
    conditional_probability = []
    list_x_name = []
    for i in range(0, train_data.shape[1] - 1):
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)
        x_conditional_probability = np.zeros((len(y_unique), len(x_unique)))
        for j in range(len(y_unique)):
            for k in range(len(x_unique)):
                x_conditional_probability[j, k] = np.sum((train_data[:, i] == x_unique[k]) & (train_data[:, -1] == y_unique[j]))
        conditional_probability.append(x_conditional_probability)
    return conditional_probability, list_x_name

train_data = create_train_data()
_, list_x_name = compute_conditional_probability(train_data)
print("x1 = ", list_x_name[0])
print("x2 = ", list_x_name[1])
print("x3 = ", list_x_name[2])
print("x4 = ", list_x_name[3])

x1 =  ['Overcast' 'Rain' 'Sunny']
x2 =  ['Cool' 'Hot' 'Mild']
x3 =  ['High' 'Normal']
x4 =  ['Strong' 'Weak']


**Get index from value**

In [None]:
def get_index_from_value(feature_name, list_features):
    return np.where(np.array(list_features) == feature_name)[0][0]

**Câu hỏi 16:** Hãy cho biết kết quả của đoạn chương trình sau đây:

In [None]:
train_data = create_train_data()
_, list_x_name = compute_conditional_probability(train_data)
outlook = list_x_name[0]

i1 = get_index_from_value('Overcast', outlook)
i2 = get_index_from_value('Rain', outlook)
i3 = get_index_from_value('Sunny', outlook)

print(i1, i2, i3)

0 1 2


**Câu hỏi 17:** Hãy cho biết kết quả của đoạn chương trình sau đây:

In [None]:
x1 = get_index_from_value('Sunny', list_x_name[0])
P_Outlook_Sunny_given_Yes = conditional_probability[0][1, x1] / np.sum(train_data[:, -1] == 'yes')
P_Outlook_Sunny_given_Yes

0.16666666666666666

**Câu hỏi 18:** Hãy cho biết kết quả của đoạn chương trình sau đây:

In [None]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)
# Compute P("Outlook"="Sunny"|"Play Tennis"="No")
x1 = get_index_from_value('Sunny', list_x_name[0])
P_Outlook_Sunny_given_No = conditional_probability[0][0, x1] / np.sum(train_data[:, -1] == 'no')
P_Outlook_Sunny_given_No

0.5

**Train Naive Bayes**

In [None]:
def train_naive_bayes(train_data):
    # Step 1: Calculate Prior Probability
    y_unique = ['no', 'yes']
    prior_probability = compute_prior_probability(train_data)

    # Step 2: Calculate Conditional Probability
    conditional_probability, list_x_name = compute_conditional_probability(train_data)

    return prior_probability, conditional_probability, list_x_name


def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    x1 = get_index_from_value(X[0], list_x_name[0])
    x2 = get_index_from_value(X[1], list_x_name[1])
    x3 = get_index_from_value(X[2], list_x_name[2])
    x4 = get_index_from_value(X[3], list_x_name[3])

    p0 = 0
    p1 = 0
    y_unique = ['no', 'yes']

    for i in range(len(y_unique)):
        p0 += prior_probability[i] * conditional_probability[0][i, x1] * conditional_probability[1][i, x2] * conditional_probability[2][i, x3] * conditional_probability[3][i, x4]
        p1 += prior_probability[i] * conditional_probability[0][i, x1] * conditional_probability[1][i, x2] * conditional_probability[2][i, x3] * conditional_probability[3][i, x4]

    if p0 > p1:
        y_pred = 'no'
    else:
        y_pred = 'yes'

    return y_pred

**Câu hỏi 19:** Hãy cho biết kết quả của đoạn chương trình sau đây:

In [None]:
X = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data()
prior_probability, conditional_probability, list_x_name = train_naive_bayes(data) # Get y_unique from train_naive_bayes
pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability) # Pass y_unique to prediction_play_tennis

if(pred):
    print("Ad should go!")
else:
    print("Ad should not go!")

Ad should go!
