# Trabalho 03 - Naive Bayes
Aluno: Diego Freitas Holanda

Matricula: 411627

In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [23]:
def encode(y: str) -> int:
    return 0 if y.startswith('r') else 1

def decode(y: int) -> str:
    return 'republican' if y == 0 else 'democrat'

## Carregando Dataset

In [24]:
dataset = pd.read_csv('./votesDataset.csv').values

In [25]:
X = dataset[:, 1:-1]
y = np.vstack([encode(item) for item in dataset[:, 0]])

In [26]:
def model_evaluation_scores(y, y_predict) -> dict:
    metrics = {}
    
    # Make sure both have the same format
    y_predict = y_predict.reshape(-1, 1)
    y = y.reshape(-1, 1)
    
    # True Positves
    true_positives = np.nonzero((y == 1) & (y_predict == 1))[0]
    n_true_positives = len(true_positives)

    # False positives
    false_positives = np.nonzero((y == 0) & (y_predict == 1))[0]
    n_false_positives = len(false_positives) 

    # False Negatives
    false_negatives = np.nonzero((y == 1) & (y_predict==0))[0]
    n_false_negatives = len(false_negatives)

    # Accurracy
    accurracy = np.sum(y == y_predict) / len(y_predict)
    metrics['Accurracy'] = accurracy
    
    # Recall
    recall = n_true_positives / (n_true_positives + n_false_negatives)
    metrics['Recall'] = recall
    
    # Precision
    precision = n_true_positives / (n_true_positives + n_false_positives)
    metrics['Precision'] = precision
    
    # F1 Score
    f1_score = 2 * (recall * precision) / (recall + precision)
    metrics['F1 Score'] = f1_score
    
    return metrics

## Implemantação do Naive Bayes Binomial

In [27]:
class BinomialNaiveBayes:
    def __init__(self, n_classes, alpha, a, b):
        self.n_classes = n_classes
        self.alpha = alpha
        self.a = a
        self.b = b
        
    def fit(self, x: np.ndarray, y: np.ndarray):
        self.expected_pi = np.empty((self.n_classes))
        self.expected_theta = np.empty((x.shape[1], self.n_classes))
        
        N = len(x)
        for c in range(self.n_classes):
            ck = x[y == c]
            
            Nc = len(ck)
            self.expected_pi[c] = (Nc + self.alpha[c]) / (N + np.sum(self.alpha))
            for d in range(x.shape[1]):
                Ndc = np.sum(ck[:, d])
                self.expected_theta[d][c] = (Ndc + self.b) / (Nc + self.a + self.b)
    
    def predict(self, X: np.ndarray):
        probs_per_class = np.log(self.expected_pi) + X@np.log(self.expected_theta) +  (1-X)@np.log(1 - self.expected_theta)
        return np.argmax(probs_per_class, axis=1)


## Treino

In [28]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [29]:
alpha = np.array([2, 2])
a = 3
b = 2
model = BinomialNaiveBayes(2, alpha, a, b)
model.fit(x_train, y_train.flatten())
y_predict = model.predict(x_test)

## Teste e Métricas

In [30]:
for metric, value in model_evaluation_scores(y_predict, y_test).items():
    print(f'{metric}: \t{value:.2f}')

Accurracy: 	0.87
Recall: 	0.91
Precision: 	0.89
F1 Score: 	0.90
