In [556]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as m 
from sklearn.model_selection import train_test_split
from collections import Counter

In [557]:
weather = ['Clear', 'Clear', 'Clear', 'Clear', 'Clear', 'Clear',
            'Rainy', 'Rainy', 'Rainy', 'Rainy', 'Rainy', 'Rainy',
            'Snowy', 'Snowy', 'Snowy', 'Snowy', 'Snowy', 'Snowy']

timeOfWeek = ['Workday', 'Workday', 'Workday',
            'Weekend', 'Weekend', 'Weekend',
            'Workday', 'Workday', 'Workday',
            'Weekend', 'Weekend', 'Weekend',
            'Workday', 'Workday', 'Workday',
            'Weekend', 'Weekend', 'Weekend']

timeOfDay = ['Morning', 'Lunch', 'Evening',
            'Morning', 'Lunch', 'Evening',
            'Morning', 'Lunch', 'Evening',
            'Morning', 'Lunch', 'Evening',
            'Morning', 'Lunch', 'Evening',
            'Morning', 'Lunch', 'Evening',
            ]
trafficJam = ['Yes', 'No', 'Yes',
            'No', 'No', 'No',
            'Yes', 'Yes', 'Yes',
            'No', 'No', 'No',
            'Yes', 'Yes', 'Yes',
            'Yes', 'No', 'Yes'
            ]

In [558]:
data = pd.DataFrame(zip(weather, timeOfWeek, timeOfDay, trafficJam), columns=['weather', 'timeOfWeek', 'timeOfDay', 'trafficJam'])

In [559]:
feature = np.array(data.iloc[:, :3])
label = np.array(data.iloc[:, 3])

In [560]:
X_train, X_test, y_train, y_test = train_test_split(feature, label, test_size=0.3, random_state=520)

In [561]:
class CategoricalNB:
    
    def __init__(self, alpha):
        self.alpha = alpha
        self.model = None
    
    def summarize(self, data):
        count = [Counter(feature) for feature in zip(*data)]
        for i in range(len(count)):
            sum_cnt = sum([value for ct, value in count[i].items()])
            count[i] = {ct: (value + self.alpha) / (sum_cnt + len(count[i]) * self.alpha) for ct, value in count[i].items()}
        return count
    
    # the model is a dictionary where the key is the label and the value is another dictionary whose key is the feature and the value is the probability of the corresponding class
    def fit(self, X_train, y_train):
        labels = list(set(y_train))
        data = {label: [] for label in labels}
        for value, label in zip(X_train, y_train):
            data[label].append(value)
        self.model = {
            label: self.summarize(value)
            for label, value in data.items()
        }
        return "Categorical Naive Bayes Training is Done!"
    
    def predict(self, data):
        probabilities = {}
        for label, value in self.model.items():
            probabilities[label] = 1
            for i in range(len(value)):
                probabilities[label] *= value[i][data[i]]
        result = sorted(probabilities.items(), key=lambda x: x[1])[-1][0]
        return result
    
    def score(self, X_test, y_test):
        right = 0
        for X, y in zip(X_test, y_test):
            label = self.predict(X)
            if label == y:
                right += 1
        return right / len(y_test)
        

In [562]:
model = CategoricalNB(1)

In [563]:
model.fit(X_train, y_train)

'Categorical Naive Bayes Training is Done!'

In [564]:
model.score(X_test, y_test)

1.0