# Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# Dataset

In [2]:
data = pd.read_csv("weekend_decision.csv")

In [3]:
data

Unnamed: 0,Weekend,Weather,Parents,Financial condition,Decision
0,W1,Sunny,Yes,Rich,Cinema
1,W2,Sunny,No,Rich,Play Tennis
2,W3,Windy,Yes,Rich,Cinema
3,W4,Rainy,Yes,Poor,Cinema
4,W5,Rainy,No,Rich,Stay in
5,W6,Rainy,Yes,Poor,Cinema
6,W7,Windy,No,Poor,Cinema
7,W8,Windy,No,Rich,Shopping
8,W9,Windy,Yes,Rich,Cinema
9,W10,Sunny,No,Rich,Play Tennis


In [4]:
X = data.iloc[:,1:-1]
y = data.iloc[:,-1]

In [5]:
X

Unnamed: 0,Weather,Parents,Financial condition
0,Sunny,Yes,Rich
1,Sunny,No,Rich
2,Windy,Yes,Rich
3,Rainy,Yes,Poor
4,Rainy,No,Rich
5,Rainy,Yes,Poor
6,Windy,No,Poor
7,Windy,No,Rich
8,Windy,Yes,Rich
9,Sunny,No,Rich


In [6]:
y

0         Cinema
1    Play Tennis
2         Cinema
3         Cinema
4        Stay in
5         Cinema
6         Cinema
7       Shopping
8         Cinema
9    Play Tennis
Name: Decision, dtype: object

In [7]:
X_encoder = OneHotEncoder()
X_encoded = X_encoder.fit_transform(X)

y_encoder = LabelEncoder()
y_encoded = y_encoder.fit_transform(y)

In [8]:
X_encoded.shape

(10, 7)

In [9]:
y_encoded

array([0, 1, 0, 0, 3, 0, 0, 2, 0, 1])

# Model

In [37]:
class NaiveBayes:
    def __init__(self, alpha=1e-10):
        self._alpha = alpha
        self._classes = []
        self._priors = {}
        self._feature_probs = {}

    def fit(self, X:pd.DataFrame, y:pd.Series):
        n_samples, n_features = X.shape
        self._classes, counts = np.unique(y, return_counts=True)
        
        for c in range(len(counts)):
            X_c = X[y==c]
            self._priors[c] = X_c.shape[0]/X.shape[0]
            word_counts = np.sum(X_c, axis=0) + self._alpha
            self._feature_probs[c] = word_counts/(np.sum(word_counts) + self._alpha*n_features) # or word_counts/X_c.shape[0]
            
    def predict(self, X:pd.DataFrame) -> np.array:
        if X.ndim == 1:
            X = X.reshape(1, -1)
        predictions = []
        for sample in X:
            log_probs = {}
            for c in self._classes:
                log_prob = np.log(self._priors[c])
                log_prob += np.sum(sample.multiply(np.log(self._feature_probs[c])))
                log_probs[c] = log_prob
            predictions.append(max(log_probs, key = log_probs.get))
        return np.array(predictions)

In [38]:
model = NaiveBayes()
model.fit(X_encoded, y_encoded)

# Testing

In [42]:
test_samples = pd.DataFrame([["Windy", "No", "Rich"],["Rainy","No","Poor"]], columns=X.columns)
test_samples

Unnamed: 0,Weather,Parents,Financial condition
0,Windy,No,Rich
1,Rainy,No,Poor


In [43]:
test_encoded = X_encoder.transform(test_samples)

In [44]:
preds = model.predict(test_encoded)
pred_labels = y_encoder.inverse_transform(preds)
print("\nPredictions for Test Samples:")
for orig, enc, pred in zip(test_samples.values, test_encoded.toarray(), pred_labels):
    print(f"Original: {orig}, Encoded: {enc}, Predicted: {pred}")


Predictions for Test Samples:
Original: ['Windy' 'No' 'Rich'], Encoded: [0. 0. 1. 1. 0. 0. 1.], Predicted: Shopping
Original: ['Rainy' 'No' 'Poor'], Encoded: [1. 0. 0. 1. 0. 1. 0.], Predicted: Cinema
