In [None]:
import numpy as np

def create_train_data():
    data = [
        ['Sunny', 'Hot', 'High', 'Weak', 'no'],
        ['Sunny', 'Hot', 'High', 'Strong', 'no'],
        ['Overcast', 'Hot', 'High', 'Weak', 'yes'],
        ['Rain', 'Mild', 'High', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Cool', 'Normal', 'Strong', 'no'],
        ['Overcast', 'Cool', 'Normal', 'Strong', 'yes'],
        ['Overcast', 'Mild', 'High', 'Weak', 'no'],
        ['Sunny', 'Cool', 'Normal', 'Weak', 'yes'],
        ['Rain', 'Mild', 'Normal', 'Weak', 'yes']
    ]
    return np.array(data)

train_data = create_train_data()
print(train_data)

[['Sunny' 'Hot' 'High' 'Weak' 'no']
 ['Sunny' 'Hot' 'High' 'Strong' 'no']
 ['Overcast' 'Hot' 'High' 'Weak' 'yes']
 ['Rain' 'Mild' 'High' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Cool' 'Normal' 'Strong' 'no']
 ['Overcast' 'Cool' 'Normal' 'Strong' 'yes']
 ['Overcast' 'Mild' 'High' 'Weak' 'no']
 ['Sunny' 'Cool' 'Normal' 'Weak' 'yes']
 ['Rain' 'Mild' 'Normal' 'Weak' 'yes']]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def compute_prior_probablity(train_data):
    y_unique, counts = np.unique(train_data[:, -1], return_counts=True)
    prior_probability = counts / train_data.shape[0]
    return prior_probability

prior_probablity = compute_prior_probablity(train_data)
print("P(play tennis = No", prior_probablity[0])
print("P(play tennis = Yes", prior_probablity[1])

P(play tennis = No 0.4
P(play tennis = Yes 0.6


In [None]:
import numpy as np

def compute_conditional_probability(train_data):
    y_unique = np.unique(train_data[:, -1])
    list_x_name = []

    conditional_probability = {label: {} for label in y_unique}

    for i in range(train_data.shape[1] - 1):  # Iterate over all features except the label
        x_unique = np.unique(train_data[:, i])
        list_x_name.append(x_unique)

        for label in y_unique:
            conditional_probability[label][i] = {}
            subset = train_data[train_data[:, -1] == label]
            total_label_count = subset.shape[0]

            for x_value in x_unique:
                count_x_given_label = subset[subset[:, i] == x_value].shape[0]
                conditional_probability[label][i][x_value] = count_x_given_label / total_label_count

    return conditional_probability, list_x_name

train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)
print(conditional_probability)
print("x1 =", list_x_name[0])
print("x2 =", list_x_name[1])
print("x3 =", list_x_name[2])
print("x4 =", list_x_name[3])

{'no': {0: {'Overcast': 0.25, 'Rain': 0.25, 'Sunny': 0.5}, 1: {'Cool': 0.25, 'Hot': 0.5, 'Mild': 0.25}, 2: {'High': 0.75, 'Normal': 0.25}, 3: {'Strong': 0.5, 'Weak': 0.5}}, 'yes': {0: {'Overcast': 0.3333333333333333, 'Rain': 0.5, 'Sunny': 0.16666666666666666}, 1: {'Cool': 0.5, 'Hot': 0.16666666666666666, 'Mild': 0.3333333333333333}, 2: {'High': 0.3333333333333333, 'Normal': 0.6666666666666666}, 3: {'Strong': 0.16666666666666666, 'Weak': 0.8333333333333334}}}
x1 = ['Overcast' 'Rain' 'Sunny']
x2 = ['Cool' 'Hot' 'Mild']
x3 = ['High' 'Normal']
x4 = ['Strong' 'Weak']


In [None]:
import numpy as np

def get_index_from_value(feature_name, list_features):
    return np.where(list_features == feature_name)[0][0]

train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

outlook = list_x_name[0]

i1 = get_index_from_value("Overcast", outlook)
i2 = get_index_from_value("Rain", outlook)
i3 = get_index_from_value("Sunny", outlook)

print(i1, i2, i3)

0 1 2


In [None]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

outlook = list_x_name[0]

i1 = get_index_from_value("Overcast", outlook)
i2 = get_index_from_value("Rain", outlook)
i3 = get_index_from_value("Sunny", outlook)

print(i1, i2, i3)


0 1 2


In [None]:
train_data = create_train_data()
conditional_probability, list_x_name = compute_conditional_probability(train_data)

# Compute P("Outlook" = "Sunny" | Play Tennis = "No")
x1 = get_index_from_value("Sunny", list_x_name[0])

print("P('Outlook' = 'Sunny' | Play Tennis = 'No') = ", np.round(conditional_probability['no'][0]['Sunny'], 2))

P('Outlook' = 'Sunny' | Play Tennis = 'No') =  0.5


In [None]:
def train_naive_bayes(train_data):
    # Step 1: Calculate Prior Probability
    prior_probability = compute_prior_probablity(train_data)

    # Step 2: Calculate Conditional Probability
    conditional_probability, list_x_name = compute_conditional_probability(train_data)

    return prior_probability, conditional_probability, list_x_name


In [None]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
    x1 = get_index_from_value(X[0], list_x_name[0])
    x2 = get_index_from_value(X[1], list_x_name[1])
    x3 = get_index_from_value(X[2], list_x_name[2])
    x4 = get_index_from_value(X[3], list_x_name[3])

    # Calculate P(Yes | X)
    p_yes = prior_probability[1] * \
            conditional_probability['yes'][0][list_x_name[0][x1]] * \
            conditional_probability['yes'][1][list_x_name[1][x2]] * \
            conditional_probability['yes'][2][list_x_name[2][x3]] * \
            conditional_probability['yes'][3][list_x_name[3][x4]]

    # Calculate P(No | X)
    p_no = prior_probability[0] * \
           conditional_probability['no'][0][list_x_name[0][x1]] * \
           conditional_probability['no'][1][list_x_name[1][x2]] * \
           conditional_probability['no'][2][list_x_name[2][x3]] * \
           conditional_probability['no'][3][list_x_name[3][x4]]

    if p_yes > p_no:
        y_pred = 1
    else:
        y_pred = 0

    return y_pred

In [None]:
X = ['Sunny', 'Cool', 'High', 'Strong']
train_data = create_train_data()
prior_probability, conditional_probability, list_x_name = train_naive_bayes(train_data)

pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)

if pred:
    print("Ad should go!")
else:
    print("Ad should not go!")

Ad should not go!
