In [4]:
import numpy as np
import pandas as pd


In [47]:
data = pd.DataFrame({
    'Outlook': ['Rainy', 'Sunny', 'Overcast', 'Overcast', 'Sunny', 'Rainy', 'Sunny', 'Overcast', 'Rainy', 'Sunny', 'Sunny', 'Rainy', 'Overcast', 'Overcast'],
    'Play': ['Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes']
})


In [48]:
data['Outlook'] = data['Outlook'].map({'Sunny': 1, 'Rainy': 0, 'Overcast': 0})
data['Play'] = data['Play'].map({'Yes': 1, 'No': 0})
X = data['Outlook'].values
y = data['Play'].values


In [49]:
class BernoulliNaiveBayes:
    def __init__(self):
        self.class_probs = {}
        self.feature_probs = {}

    def fit(self, X, y):
        self.class_probs[1] = np.mean(y)  # Probability of 'Yes' (Play)
        self.class_probs[0] = 1 - self.class_probs[1]  # Probability of 'No' (Don't Play)

        for i in range(2):  # For each class (1 and 0)
            class_mask = y == i
            class_data = X[class_mask]
            self.feature_probs[i] = {
                feature: (class_data == feature).sum() / len(class_data) for feature in np.unique(X)
            }

    def predict(self, X):
        predictions = []

        for i in range(X.shape[0]):
            x = X[i]
            probs = {
                c: np.log(self.class_probs[c]) + np.sum(np.fromiter(
                    (np.log(self.feature_probs[c].get(f, 1e-10)) if f == 1 else np.log(1 - self.feature_probs[c].get(f, 1e-10))
                    for f, c in zip(x, [1, 0])),float)
                ) for c in [1, 0]
            }
            predictions.append(1 if probs[1] >= probs[0] else 0)

        return np.array(predictions)



# Create and train the Bernoulli Naive Bayes model
model = BernoulliNaiveBayes()
model.fit(X, y)


In [58]:
new_data = np.array([[1], [1]])
print(new_data)
predictions = model.predict(new_data)
predictions

[[1]
 [1]]


array([1, 1])

In [59]:
# accuracy = np.mean(y == model.predict(X))
# print("Accuracy:", accuracy)


#### Approach 2

In [60]:
import numpy as np
import pandas as pd


In [61]:
data = pd.DataFrame({
    'Outlook': ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Overcast', 'Sunny'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    'Play Golf': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})


In [62]:
data['Outlook'] = data['Outlook'].map({'Sunny': 1, 'Rainy': 0, 'Overcast': 0})
data['Temperature'] = data['Temperature'].map({'Hot': 0, 'Mild': 1, 'Cool': 2})
data['Humidity'] = data['Humidity'].map({'High': 0, 'Normal': 1})
data['Play Golf'] = data['Play Golf'].map({'Yes': 1, 'No': 0})

X = data[['Outlook', 'Temperature', 'Humidity', 'Windy']].values
y = data['Play Golf'].values


In [76]:
class BernoulliNaiveBayes:
    def __init__(self):
        self.class_probs = {}
        self.feature_probs = {}

    def fit(self, X, y):
        self.class_probs[1] = np.mean(y)  # Probability of 'Yes' (Play Golf)
        self.class_probs[0] = 1 - self.class_probs[1]  # Probability of 'No' (Don't Play Golf)

        for i in range(2):  # For each class (1 and 0)
            class_mask = y == i
            class_data = X[class_mask]
            self.feature_probs[i] = {}

            for j in range(X.shape[1]):  # Iterate through each feature
                unique_features = np.unique(X[:, j])
                self.feature_probs[i][j] = {
                    feature: (class_data[:, j] == feature).sum() / len(class_data) for feature in unique_features
                }

    def predict(self, X):
        predictions = []

        for i in range(X.shape[0]):
            x = X[i]
            probs = {
                c: np.log(self.class_probs[c]) + np.sum(np.fromiter(
                    (np.log(self.feature_probs[c].get(f, 1e-10)) if f == 1 else np.log(1 - self.feature_probs[c].get(f, 1e-10))
                    for f, c in zip(x, [1, 0])),float)
                ) for c in [1, 0]
            }
            predictions.append(1 if probs[1] >= probs[0] else 0)
            
            probs = {
                c: np.log(self.class_probs[c]) + np.sum(np.fromtier(
                    (np.log(self.feature_probs[c][j].get(f, 1e-10)) if f == 1 else np.log(1 - self.feature_probs[c][j].get(f, 1e-10))
                    for j, f, c in zip(range(X.shape[1]), x, [1, 0])),float)
                ) for c in [1, 0]
            }
            predictions.append(1 if probs[1] >= probs[0] else 0)

        return np.array(predictions)



In [79]:
class BernoulliNaiveBayes:
    def __init__(self):
        self.class_probs = {}
        self.feature_probs = {}

    def fit(self, X, y):
        self.class_probs[1] = np.mean(y)  # Probability of 'Yes' (Play Golf)
        self.class_probs[0] = 1 - self.class_probs[1]  # Probability of 'No' (Don't Play Golf)

        for i in range(2):  # For each class (1 and 0)
            class_mask = y == i
            class_data = X[class_mask]
            self.feature_probs[i] = {}

            for j in range(X.shape[1]):  # Iterate through each feature
                unique_features = np.unique(X[:, j])
                self.feature_probs[i][j] = {
                    feature: (class_data[:, j] == feature).sum() / len(class_data) for feature in unique_features
                }

    def predict(self, X):
        predictions = []

        for i in range(X.shape[0]):
            x = X[i]
            probs = {
                c: np.log(self.class_probs[c]) + np.sum([
                    np.log(self.feature_probs[c][j].get(f, 1e-10)) if f == 1 else np.log(1 - self.feature_probs[c][j].get(f, 1e-10))
                    for j, f in enumerate(x)
                ]) for c in [1, 0]
            }
            predictions.append(1 if probs[1] >= probs[0] else 0)

        return np.array(predictions)


In [80]:
model = BernoulliNaiveBayes()
model.fit(X, y)


In [89]:
new_data = np.array([[1, 0, 1, False], [0, 1, 0, False]])
predictions = model.predict(new_data)
predictions


array([1, 1])

In [None]:
# Load the data
# data = pd.DataFrame({
#     'Outlook': ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Overcast', 'Sunny'],
#     'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
#     'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
#     'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
#     'Play Golf': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
# })

## Approach 2

In [31]:
import numpy as np
import pandas as pd


# Reading the data from csv file golf_data.csv
data = pd.read_csv('golf_data.csv')


In [32]:

# Calculating prior probabilities that is P(A) = instance/total_instance
total_instances = len(data)
prior_prob_yes = (data['Play Golf'] == 'Yes').sum() / total_instances
prior_prob_no = (data['Play Golf'] == 'No').sum() / total_instances

# Calculating likelihood probabilities for every feature-value : outcome
likelihood_probs = {}
for feature in ['Outlook', 'Temperature', 'Humidity', 'Windy']:
    for value in data[feature].unique():
        for outcome in ['Yes', 'No']:
            count = ((data[feature] == value) & (data['Play Golf'] == outcome)).sum()
#             Total number of yes's and no's separately
            total_outcome = (data['Play Golf'] == outcome).sum() 
            likelihood_probs[(feature, value, outcome)] = count / total_outcome
# likelihood_probs

In [33]:
likelihood_data = likelihood_probs

# Convert the data to a Pandas DataFrame
likelyhood_data_disp = pd.DataFrame(likelihood_data.values(), index=pd.MultiIndex.from_tuples(likelihood_data.keys(), names=['Attribute', 'Value', 'Outcome']), columns=['Probability'])

# Display the DataFrame
likelyhood_data_disp


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Probability
Attribute,Value,Outcome,Unnamed: 3_level_1
Outlook,Rainy,Yes,0.222222
Outlook,Rainy,No,0.6
Outlook,Overcast,Yes,0.444444
Outlook,Overcast,No,0.0
Outlook,Sunny,Yes,0.333333
Outlook,Sunny,No,0.4
Temperature,Hot,Yes,0.222222
Temperature,Hot,No,0.4
Temperature,Mild,Yes,0.444444
Temperature,Mild,No,0.4


In [34]:

# Entering New data point
outlook = input("Enter Outlook: Sunny, Rainy, Overcast - ")
temperature = input("Enter Temperature: Hot , Mild, Cool - ")
humidity = input("Enter Humidity: High , Normal - ")
windy = input("Enter Windy: True , False - ")

if windy == 'True':
    windy = True
elif windy == 'False':
    windy = False
new_data = {
    'Outlook': outlook,
    'Temperature': temperature,
    'Humidity': humidity,
    'Windy': windy
}

Enter Outlook: Sunny, Rainy, Overcast - Rainy
Enter Temperature: Hot , Mild, Cool - Mild
Enter Humidity: High , Normal - High
Enter Windy: True , False - True


In [35]:

# Calculating posterior probabilities that is dependent probability on other event already happened means likelihood muliplied
posterior_prob_yes = prior_prob_yes
posterior_prob_no = prior_prob_no

for feature, value in new_data.items():
    posterior_prob_yes *= likelihood_probs.get((feature, value, 'Yes'), 1e-10)
    posterior_prob_no *= likelihood_probs.get((feature, value, 'No'), 1e-10)

# Normalizing probabilities
total_prob = posterior_prob_yes + posterior_prob_no
posterior_prob_yes /= total_prob
posterior_prob_no /= total_prob

# Predict the class having higher probability
prediction = 'Yes' if posterior_prob_yes > posterior_prob_no else 'No'
print(f"Probability of 'Yes': {posterior_prob_yes:.4f}")
print(f"Probability of 'No': {posterior_prob_no:.4f}")
print(f"Predicted class: {prediction}")

# Rainy , Mild, High, True -> No
# Sunny , Mild, High, False -> Yes
# Overcast, Mild, High, False -> Yes

Probability of 'Yes': 0.1464
Probability of 'No': 0.8536
Predicted class: No
