In [19]:
import numpy as np

In [20]:
def create_train_data():
    # Load the dataset from file
    data = np.loadtxt('play-tennis.txt', delimiter=',', dtype='str')
    return data

In [21]:
def compute_prior_probability(train_data):
  y_unique, y_counts = np.unique(train_data[:,-1], return_counts=True)
  prior_probability = {}
  for i in range(len(y_unique)):
    prior_probability[y_unique[i]] = y_counts[i] / len(train_data)
  return prior_probability

In [48]:
def compute_conditional_probability(train_data):
  y_unique = ['no', 'yes']
  conditional_probability = []
  list_x_name = []
  for i in range(0, train_data.shape[1] - 1):
    x_unique = np.unique(train_data[:,i])
    list_x_name.append(x_unique)
    
    x_conditional_probability = np.zeros((len(y_unique), len(x_unique)))
    for j, y in enumerate(y_unique):
      y_count = np.sum(train_data[:,-1] == y)
      for k, x in enumerate(x_unique):
        x_count = np.sum((train_data[:,i] == x) & (train_data[:,-1] == y))
        x_conditional_probability[j,k] = x_count / y_count
    conditional_probability.append(x_conditional_probability)
    
  return conditional_probability, list_x_name

In [28]:
def get_index_from_value(feature_name, list_features):
  return np.where(list_features == feature_name)[0][0]

In [23]:
prior_probability = compute_prior_probability(create_train_data())
print(f"P(play tennis = yes) = {prior_probability['yes']}")
print(f"P(play tennis = no) = {prior_probability['no']}")

P(play tennis = yes) = 0.6
P(play tennis = no) = 0.4


In [53]:
conditional_probability, list_x_name = compute_conditional_probability(create_train_data())
outlook = list_x_name[0]

for i in conditional_probability:
  print(i)
x1 = get_index_from_value('Sunny', outlook)
print("P(Outlook = Sunny | play tennis = yes) = ", conditional_probability[0][1,x1])
print("P(Outlook = Sunny | play tennis = no) = ", conditional_probability[0][0,x1])

[[0.25       0.25       0.5       ]
 [0.33333333 0.5        0.16666667]]
[[0.25       0.5        0.25      ]
 [0.5        0.16666667 0.33333333]]
[[0.75       0.25      ]
 [0.33333333 0.66666667]]
[[0.5        0.5       ]
 [0.16666667 0.83333333]]
P(Outlook = Sunny | play tennis = yes) =  0.16666666666666666
P(Outlook = Sunny | play tennis = no) =  0.5


In [54]:
def train_naive_bayes(train_data):
  prior_probability = compute_prior_probability(train_data)
  conditional_probability, list_x_name = compute_conditional_probability(train_data)
  return prior_probability, conditional_probability, list_x_name

In [55]:
def prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability):
  x1 = get_index_from_value(X[0], list_x_name[0])
  x2 = get_index_from_value(X[1], list_x_name[1])
  x3 = get_index_from_value(X[2], list_x_name[2])
  x4 = get_index_from_value(X[3], list_x_name[3])
  
  p0 = 0
  p1 = 0
  
  for i in range(0, len(prior_probability)):
    p0 += conditional_probability[0][i,x1] * conditional_probability[1][i,x2] * conditional_probability[2][i,x3] * conditional_probability[3][i,x4] * prior_probability[list(prior_probability.keys())[i]]
    p1 += conditional_probability[0][i,x1] * conditional_probability[1][i,x2] * conditional_probability[2][i,x3] * conditional_probability[3][i,x4] * prior_probability[list(prior_probability.keys())[i]]
    
  p0 /= prior_probability['no']
  p1 /= prior_probability['yes']
  
  if p0 > p1:
    y_pred = 0
  else:
    y_pred = 1
    
  return y_pred

In [56]:
X = ['Sunny', 'Cool', 'High', 'Strong']
data = create_train_data()
prior_probability, conditional_probability, list_x_name = train_naive_bayes(data)
y_pred = prediction_play_tennis(X, list_x_name, prior_probability, conditional_probability)

if y_pred == 0:
  print("Play tennis = No")
else:
  print("Play tennis = Yes")

Play tennis = No
