# Importing Libs and Data

In [14]:
import joblib
import pandas as pd
from os.path import join
from IPython.display import display, Markdown


test_dataset = pd.read_csv(join('data', 'test_dataset.csv'))
vectorizer = joblib.load(join("models", "vectorizer.pkl"))
prediction_model = joblib.load(join("models", "gnb_model.pkl"))

test_dataset.head()

Unnamed: 0,Title,Fact Check
0,hurricane hermine waterspout north carolina,False
1,bacon sandwich reported offensive facebook,False
2,facebook scam someone tried log account,True
3,teens coughing produce disturbing trend us,False
4,rainbow harpy eagle photo real,False


# Testing Model

In [15]:
y = test_dataset['Fact Check']
x = test_dataset['Title']
x_vectorize = vectorizer.transform(x)

y_pred = prediction_model.predict(x_vectorize.toarray())

preds = list(zip(x, y_pred))
diffs = list(zip(y, y_pred))
for pred, diff in list(zip(preds, diffs))[:10]:
    print(pred, diff)

('hurricane hermine waterspout north carolina', False) (False, False)
('bacon sandwich reported offensive facebook', False) (False, False)
('facebook scam someone tried log account', True) (True, True)
('teens coughing produce disturbing trend us', False) (False, False)
('rainbow harpy eagle photo real', True) (False, True)
('kathy hochul nancy pelosis stepsister', False) (False, False)
('fallen angel', True) (False, True)
('activist shaun king say white jesus statues come', True) (True, True)
('old marilyn manson photo real', True) (False, True)
('smiling sun smiling birds photo real', False) (False, False)


## Metrics

In [17]:
tp = 0
tn = 0
fp = 0
fn = 0

for i in diffs:
    real = "1" if i[0] else "0"
    predicted = "1" if i[1] else "0"
    matrix = f"{real}{predicted}"
    match matrix:
        case "11":
            tp += 1
        case "01":
            fp += 1
        case "10":
            fn += 1
        case "00":
            tn += 1

matrix = ((tp, fn), (fp, tn))
markdownDataframe = pd.DataFrame(matrix, index=["Real - Verdadeiro", "Real - Falso"], columns=["Predito - Verdadeiro", "Predito - Falso"])

display(Markdown(markdownDataframe.to_markdown(index=True)))

accuracy = (tp+tn)/len(diffs)
specificity = tn/(fp+tn)
recall = tp/(tp+fn)
precision = tp/(tp+fp)
f_score = 2 * ((precision * recall) / (precision + recall))

print(f"Acuracia = {round(accuracy * 100,2)}%")
print(f"Especificidade = {round(specificity * 100, 2)}%")
print(f"Sensibilidade = {round(recall * 100, 2)}%")
print(f"Precisão = {round(precision * 100, 2)}%")

print(f"Tamanho do conjunto de teste: {tp+tn+fp+fn}")
# accuracy = accuracy_score(y_test, y_pred)
# cm = confusion_matrix(y_test, y_pred)
# report = classification_report(y_test, y_pred)

# print(f'Accuracy: {accuracy}')
# print(f'Confusion Matrix: {cm}')
# print(f'Classification Report: {report}')

|                   |   Predito - Verdadeiro |   Predito - Falso |
|:------------------|-----------------------:|------------------:|
| Real - Verdadeiro |                    607 |               437 |
| Real - Falso      |                    776 |              1110 |

Acuracia = 58.6%
Especificidade = 58.85%
Sensibilidade = 58.14%
Precisão = 43.89%
Tamanho do conjunto de teste: 2930
