### Naive Bayes from scratch

Import Required Libraries

In [1]:
import numpy as np

In [2]:
# This function trains a Multinomial Naive Bayes classifier on the given training data.


def fit(X_train, Y_train):
    result = {}
    class_values = set(Y_train)
    for current_class in class_values:
        result[current_class] = {}
        result["total_data"] = len(Y_train)
        current_class_rows = (Y_train == current_class)
        X_train_current = X_train[current_class_rows]
        Y_train_current = Y_train[current_class_rows]
        num_features = X_train.shape[1]
        result[current_class]["total_count"] = len(Y_train_current)
        for j in range(1, num_features + 1):
            result[current_class][j] = {}
            all_possible_values = set(X_train[:, j - 1])
            for current_value in all_possible_values:
                result[current_class][j][current_value] = (X_train_current[:, j - 1] == current_value).sum()
                
    return result

In [3]:
# This function predicts the class label for a single data point 'x' using a trained Multinomial Naive Bayes classifier represented by 'dictionary'.

def predictSinglePoint(dictionary, x):
    classes = dictionary.keys()
    best_p = -1000
    best_class = -1
    first_run = True
    for current_class in classes:
        if (current_class == "total_data"):
            continue
        p_current_class = probability(dictionary, x, current_class)
        if (first_run or p_current_class > best_p):
            best_p = p_current_class
            best_class = current_class
        first_run = False
    return best_class

In [4]:
# This function computes the logarithmic probability of a data point 'x' belonging to a specific class ('current_class') using a trained Multinomial Naive Bayes classifier represented by 'dictionary'.


def probability(dictionary, x, current_class):
    output = np.log(dictionary[current_class]["total_count"]) - np.log(dictionary["total_data"])
    num_features = len(dictionary[current_class].keys()) - 1;
    for j in range(1, num_features + 1):
        xj = x[j - 1]
        count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1
        count_current_class = dictionary[current_class]["total_count"] + len(dictionary[current_class][j].keys())
        current_xj_probablity = np.log(count_current_class_with_value_xj) - np.log(count_current_class)
        output = output + current_xj_probablity
    return output

In [5]:
# This function predicts the class labels for a list of data points 'X_test' using a trained Multinomial Naive Bayes classifier represented by 'dictionary'.


def predict(dictionary, X_test):
    y_pred = []
    for x in X_test:
        x_class = predictSinglePoint(dictionary, x)
        y_pred.append(x_class)
    return y_pred

In [6]:
# Example training data
X_train = np.array([[1, 0], [1, 1], [0, 1], [2, 2], [3, 3], [2, 4]])
Y_train = np.array([0, 0, 0, 1, 1, 1])

# Train the Naive Bayes model
model = fit(X_train, Y_train)

In [7]:
# Example test data
X_test = np.array([[1, 1], [0, 0], [3, 4]])

# Make predictions on the test data
predictions = predict(model, X_test)

print(predictions)  # Output: [0, 0, 1]


[0, 0, 1]
