# **Naive Bayes Classifier**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv('PlayTennis.csv')

data.drop("day", axis=1, inplace=True)

display(data)

Unnamed: 0,outlook,temperature,humidity,wind,play
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [None]:
X = data.iloc[:, :-1]
Y = data.iloc[:, -1]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, shuffle=False)

In [None]:
PositiveProbabilities = Y_train[Y_train == 'Yes'].count() / Y_train.count()
NegativeProbabilities = Y_train[Y_train == 'No'].count() / Y_train.count()

In [None]:
FeatureProbabilities = {}

for col in X_train.columns:
    d = {col: {}}

    for possibility in list(X_train[col].unique()):
        YesProbabilities = X_train.loc[(X_train[col] == possibility) &
                           (Y_train=='Yes')][col].count() /
                           Y_train[Y_train=='Yes'].count()
        NoProbabilities = X_train.loc[(X_train[col] == possibility) &
                          (Y_train=='No')][col].count()/
                          Y_train[Y_train=='No'].count()

        d[col].update({possibility: [YesProbabilities, NoProbabilities]})

    FeatureProbabilities.update(d)

In [None]:
predictions = []

for i in range(X_test.shape[0]):
    predictYes = PositiveProbabilities
    predictNo = NegativeProbabilities

    for j in X_test.columns:
        predictYes *= FeatureProbabilities[j][X_test.iloc[i][j]][0]
        predictNo *= FeatureProbabilities[j][X_test.iloc[i][j]][1]

    predictions.append('Yes' if predictYes > predictNo else 'No')

    print(f'For test hypothesis: {list(X_test.iloc[i])}')
    print(f'Prediction: {predictions[-1]}\nActual value: {Y_test.iloc[i]}\n')

For test hypothesis: ['Sunny', 'Mild', 'Normal', 'Strong']
Prediction: No
Actual value: Yes

For test hypothesis: ['Overcast', 'Mild', 'High', 'Strong']
Prediction: Yes
Actual value: Yes

For test hypothesis: ['Overcast', 'Hot', 'Normal', 'Weak']
Prediction: Yes
Actual value: Yes

For test hypothesis: ['Rain', 'Mild', 'High', 'Strong']
Prediction: No
Actual value: No



In [None]:
confusion_matrix = pd.DataFrame([[0, 0], [0, 0]],
                                columns=['No','Yes'],
                                index=['No', 'Yes'])

for i in range(Y_test.shape[0]):
    confusion_matrix.loc[Y_test.iloc[i], predictions[i]] += 1

display(confusion_matrix)

Unnamed: 0,No,Yes
No,1,0
Yes,1,2


In [None]:
tn, fp, fn, tp = confusion_matrix.loc['No', 'No'],
                 confusion_matrix.loc['No', 'Yes'],
                 confusion_matrix.loc['Yes', 'No'],
                 confusion_matrix.loc['Yes', 'Yes']

precision = tp / (tp + fp)
recall = tn / (tp + fn)
accuracy = (tp + tn) / (tp + tn + fp + fn)
f1score = (2 * precision * recall) / (precision + recall)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1score}")

Precision: 1.0
Recall: 0.3333333333333333
Accuracy: 0.75
F1 Score: 0.5
