In [53]:
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle

In [54]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [55]:
train_data = pd.read_csv('data/processed/train_data.csv')
test_data = pd.read_csv('data/processed/test_data.csv')


In [56]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)

cv_train = np.concatenate((train_data.iloc[:, 0].values, test_data.iloc[:, 0].values))

cv.fit(cv_train)

pickle.dump(cv, open('model/countvectorizer.sav', 'wb'))

x_train = cv.transform(train_data.iloc[:, 0].values).toarray()
x_test = cv.transform(test_data.iloc[:, 0].values).toarray()

y_train = train_data.iloc[:, 1].values 
y_test = test_data.iloc[:, 1].values



In [57]:
def score(y_test, y_pred):
    print("Accuracy = ", round(accuracy_score(y_test, y_pred), 2), '%', sep='')
    print("Precision = ", round(precision_score(y_test, y_pred), 2))
    print("Recall = ", round(precision_score(y_test, y_pred), 2))

## Multinomial Naive Bayes

In [58]:
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB(alpha=0.1)
classifier.fit(x_train, y_train)
pickle.dump(classifier, open('model/multimodalnb.sav', 'wb'))

In [59]:
y_pred = classifier.predict(x_test)

score(y_test, y_pred)


Accuracy = 0.79%
Precision =  0.81
Recall =  0.81


## Bernoulli Naive Bayes

In [60]:
from sklearn.naive_bayes import BernoulliNB
classifier = BernoulliNB(alpha=0.8)
classifier.fit(x_train, y_train)
pickle.dump(classifier, open('model/bernoulinb.sav', 'wb'))


In [61]:
y_pred = classifier.predict(x_test)

score(y_test, y_pred)

Accuracy = 0.76%
Precision =  0.74
Recall =  0.74


## Logistic Regression

In [62]:
from sklearn import linear_model
classifier = linear_model.LogisticRegression(C=1.5)
classifier.fit(x_train, y_train)
pickle.dump(classifier, open('model/logisticregresion.sav', 'wb'))



In [63]:
y_pred = classifier.predict(x_test)

score(y_test, y_pred)


Accuracy = 0.82%
Precision =  0.84
Recall =  0.84
