In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
data = [
    ["Sunny", "Hot", "High", "Weak", "No"],
    ["Sunny", "Hot", "High", "Strong", "No"],
    ["Overcast", "Hot", "High", "Weak", "Yes"],
    ["Rain", "Mild", "High", "Weak", "Yes"],
    ["Rain", "Cool", "Normal", "Weak", "Yes"],
    ["Rain", "Cool", "Normal", "Strong", "No"],
    ["Overcast", "Cool", "Normal", "Strong", "Yes"],
    ["Sunny", "Mild", "High", "Weak", "No"],
    ["Sunny", "Cool", "Normal", "Weak", "Yes"],
    ["Rain", "Mild", "Normal", "Weak", "Yes"],
    ["Sunny", "Mild", "Normal", "Strong", "Yes"],
    ["Overcast", "Mild", "High", "Strong", "Yes"],
    ["Overcast", "Hot", "Normal", "Weak", "Yes"],
    ["Rain", "Mild", "High", "Strong", "No"]
]

In [3]:
columns = ['Outlook', 'Temperature', 'Humidity', 'Wind', 'Decision']

In [4]:
df = pd.DataFrame(data, columns=columns)

In [6]:
df.head(20)

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Decision
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [20]:
class NaiveBayesClassifier:
    def fit(self, X, y):
        self.classes, counts = np.unique(y, return_counts=True)
        self.class_probs = counts / len(y)
        
        self.feature_probs = {}
        for c in self.classes:
            indices = np.where(y == c)
            class_data = X[indices]
            self.feature_probs[c] = [(np.unique(class_data[:, i], return_counts=True)[1] + 1) / (len(class_data) + len(np.unique(X[:, i]))) for i in range(X.shape[1])]
    
    def predict(self, X):
        predictions = []
        for instance in X:
            probs = []
            for i, c in enumerate(self.classes):
                prob = self.class_probs[i]
                for j in range(len(instance)):
                    prob *= self.feature_probs[c][j][np.where(self.feature_probs[c][j][0] == instance[j])][0]
                probs.append(prob)
            predictions.append(self.classes[np.argmax(probs)])
        return predictions

In [27]:
# Encoding categorical variables
df_encoded = pd.get_dummies(df, columns=['Outlook', 'Temperature', 'Humidity', 'Wind'])

# Splitting into features and target variable
X = df_encoded.drop('Decision', axis=1)
y = df_encoded['Decision']

# Splitting into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [28]:
X_test.head()

Unnamed: 0,Outlook_Overcast,Outlook_Rain,Outlook_Sunny,Temperature_Cool,Temperature_Hot,Temperature_Mild,Humidity_High,Humidity_Normal,Wind_Strong,Wind_Weak
9,False,True,False,False,False,True,False,True,False,True
11,True,False,False,False,False,True,True,False,True,False


In [29]:
# Fitting Naive Bayes classifier
nb_classifier = CategoricalNB()
nb_classifier.fit(X_train, y_train)

In [30]:
# Making predictions
y_pred = nb_classifier.predict(X_test)

In [31]:
# Evaluate the classifier
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

  _warn_prf(average, modifier, msg_start, len(result))


In [32]:
# Display results
print("Confusion Matrix:")
print(conf_matrix)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Confusion Matrix:
[[0 0]
 [1 1]]
Accuracy: 0.5
Precision: 1.0
Recall: 0.5
F1 Score: 0.6666666666666666
