In [1]:
### Evaluation of the SVM, RF and GBT models cited in the paper
SVM_PATH     = '../classifiers/grid_search_best_SVC_74.11.pickle'
RF_PATH      = '../classifiers/grid_search_best_RF_77.98.pickle'
OLD_RF_PATH  = '../classifiers/RF_by-article_stats-and-counts.pickle'
GBT_PATH     = '../classifiers/grid_search_best_GradBoostTree_78.14.pickle'
DATASET_PATH = '../generated_datasets/by-article_stats-and-counts.npz'

In [2]:
import pickle
import numpy as np

svm_clf = pickle.load(open(SVM_PATH, 'rb'))
rf_clf  = pickle.load(open(RF_PATH, 'rb'))
old_rf_clf  = pickle.load(open(RF_PATH, 'rb'))
gbt_clf = pickle.load(open(GBT_PATH, 'rb'))

data = np.load(DATASET_PATH)
X, y = data['X'], data['y']

In [3]:
from sklearn.model_selection import cross_validate

def assess_performance(clf, X, y, cv=10):
    cv = cross_validate(
        clf, X, y,
        cv=cv,
        scoring=['accuracy', 'precision', 'recall', 'f1'],
        return_train_score=True
    )
    
    for metric, vals in cv.items():
        vals = np.array(vals) * 100
        mean = np.mean(vals)
        variance = np.var(vals)
        print('{:10}\t: mean: {:4.4} ; var: {:4.4}'.format(metric, mean, variance))
        
    return cv

In [4]:
from sklearn.model_selection import StratifiedShuffleSplit

cv_split = StratifiedShuffleSplit(n_splits=500, test_size=0.2)
#cv_split = 10

In [5]:
print('\t** SVM **')
svm_cv = assess_performance(svm_clf, X, y, cv=cv_split)

	** SVM **
train_accuracy	: mean: 76.98 ; var: 4.428
test_f1   	: mean: 55.14 ; var: 41.57
train_precision	: mean: 78.42 ; var: 8.33
train_f1  	: mean: 62.15 ; var: 20.52
test_recall	: mean: 45.53 ; var: 57.53
test_accuracy	: mean: 72.73 ; var: 9.903
train_recall	: mean: 51.59 ; var: 26.75
score_time	: mean: 0.2327 ; var: 0.0004185
fit_time  	: mean: 0.6445 ; var: 0.02563
test_precision	: mean: 71.25 ; var: 50.17


In [6]:
print('\t** RF **')
rf_cv = assess_performance(rf_clf, X, y, cv=cv_split)

	** RF **
train_accuracy	: mean: 100.0 ; var: 0.000224
test_f1   	: mean: 63.33 ; var: 30.05
train_precision	: mean: 100.0 ; var:  0.0
train_f1  	: mean: 100.0 ; var: 0.0004152
test_recall	: mean: 55.39 ; var: 44.58
test_accuracy	: mean: 76.27 ; var: 10.22
train_recall	: mean: 100.0 ; var: 0.001652
score_time	: mean: 3.969 ; var: 0.06518
fit_time  	: mean: 16.77 ; var: 0.8385
test_precision	: mean: 74.62 ; var: 40.69


In [7]:
print('\t** Old RF **')
old_rf_cv = assess_performance(old_rf_clf, X, y, cv=cv_split)

	** Old RF **
train_accuracy	: mean: 100.0 ; var: 0.0001496
test_f1   	: mean: 63.29 ; var: 31.06
train_precision	: mean: 100.0 ; var:  0.0
train_f1  	: mean: 100.0 ; var: 0.0002774
test_recall	: mean: 55.15 ; var: 47.37
test_accuracy	: mean: 76.35 ; var: 9.861
train_recall	: mean: 100.0 ; var: 0.001104
score_time	: mean: 4.007 ; var: 0.217
fit_time  	: mean: 16.97 ; var: 1.642
test_precision	: mean: 74.98 ; var: 38.54


In [8]:
print('\t** GBT **')
gbt_cv = assess_performance(gbt_clf, X, y, cv=cv_split)

	** GBT **
train_accuracy	: mean: 100.0 ; var:  0.0
test_f1   	: mean: 64.58 ; var: 27.05
train_precision	: mean: 100.0 ; var:  0.0
train_f1  	: mean: 100.0 ; var:  0.0
test_recall	: mean: 59.41 ; var: 47.99
test_accuracy	: mean: 75.89 ; var: 10.31
train_recall	: mean: 100.0 ; var:  0.0
score_time	: mean: 0.4186 ; var: 0.005305
fit_time  	: mean: 33.17 ; var: 9.128
test_precision	: mean: 71.38 ; var: 34.2
