In [1]:
import pickle

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from skmultilearn.problem_transform import LabelPowerset

# Load Training and Test sets

In [2]:
X_train = pd.read_pickle("../../pickled_files/X_train.pkl")
X_test =pd.read_pickle("../../pickled_files/X_test.pkl")

In [3]:
mlb = pickle.load(open("../../pickled_files/mlb.pkl", 'rb'))
y_train = pickle.load(open("../../pickled_files/y_train.pkl", 'rb'))
y_test = pickle.load(open("../../pickled_files/y_test.pkl", 'rb'))

# Training MultiNomial Naive Bayes

In [4]:
text_clf = Pipeline([('vect', CountVectorizer(stop_words = "english",ngram_range=(1, 1))),
                     ('tfidf', TfidfTransformer(use_idf=False)),
                     ('clf', LabelPowerset(MultinomialNB(alpha=1e-2))),])


# Fit the model
text_clf = text_clf.fit(X_train, y_train)

# Prediction
predicted = text_clf.predict(X_test)

# Evaluate the model

In [5]:
print(classification_report(y_test, predicted))

              precision    recall  f1-score   support

           0       0.72      0.36      0.48       121
           1       0.73      0.80      0.76       289
           2       0.81      0.76      0.78       301
           3       0.84      0.57      0.68        80
           4       0.73      0.70      0.72       145

   micro avg       0.76      0.69      0.73       936
   macro avg       0.76      0.64      0.68       936
weighted avg       0.76      0.69      0.72       936
 samples avg       0.77      0.74      0.74       936



In [6]:
print(accuracy_score(y_test, predicted))

0.6373193166885677


In [7]:
np.mean(predicted == y_test)

0.8709592641261498

|SVM | RF |
--- ---
| 0.8 | 0.5 |