In [1]:
import pickle

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from skmultilearn.problem_transform import LabelPowerset

# Load training and test sets

In [2]:
X_train = pd.read_pickle("../../pickled_files/X_train.pkl")
X_test =pd.read_pickle("../../pickled_files/X_test.pkl")

In [3]:
mlb = pickle.load(open("../../pickled_files/mlb.pkl", 'rb'))
y_train = pickle.load(open("../../pickled_files/y_train.pkl", 'rb'))
y_test = pickle.load(open("../../pickled_files/y_test.pkl", 'rb'))

# Train the model

In [4]:
text_clf_svm = Pipeline([('vect', CountVectorizer()),
                         ('tfidf', TfidfTransformer()),
                         ('clf-svm', LabelPowerset(SGDClassifier(loss='hinge',
                                                                 penalty='l2',
                                                                 alpha=1e-4,
                                                                 max_iter=6)))])


# Fit the model
_ = text_clf_svm.fit(X_train, y_train)

# Make prediction
predicted_svm = text_clf_svm.predict(X_test)



# Evaluate the model

In [5]:
print(classification_report(y_test, predicted_svm))

              precision    recall  f1-score   support

           0       0.64      0.34      0.44       121
           1       0.62      0.91      0.73       289
           2       0.89      0.63      0.74       301
           3       0.74      0.53      0.61        80
           4       0.71      0.70      0.70       145

   micro avg       0.71      0.68      0.69       936
   macro avg       0.72      0.62      0.65       936
weighted avg       0.73      0.68      0.68       936
 samples avg       0.71      0.70      0.69       936



In [6]:
print(accuracy_score(y_test, predicted_svm))

0.5834428383705651


In [7]:
np.mean(predicted_svm == y_test)

0.8515111695137977