In [15]:
import os

pre_processed_path = os.path.join('data', 'csvs', "pre_processed.csv")
pre_normalized_path = os.path.join('data', 'csvs', "pre_normalized.csv")
news_validation_path = os.path.join('data', 'csvs', "news_validation.csv")

data_frame_paths = [pre_processed_path, pre_normalized_path]
data_frame_names = ["pre_processed", "pre_normalized"]

In [16]:
from preprocess import stopwords

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

bag_of_words = CountVectorizer(strip_accents="ascii", lowercase=True, stop_words = stopwords)
bag_of_words_normalized = TfidfVectorizer(strip_accents="ascii", lowercase=True, stop_words = stopwords, use_idf = False, norm='l2')
bag_of_words_idf = TfidfVectorizer(strip_accents="ascii", lowercase=True, stop_words = stopwords)

vectorizers = [bag_of_words, bag_of_words_normalized, bag_of_words_idf]
vectorizers_names = ["CountVectorizer", "TfidfVectorizer_normalized", "TfidfVectorizer_idf"]

In [17]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier()

algs = [mlp]
algs_names = ["MLP"]

In [18]:
import importlib
import sys

import get_results

sys.path.append( '.' )
importlib.reload(get_results)

df = get_results.get_result_data_frames(algs, algs_names, vectorizers, vectorizers_names, data_frame_paths, data_frame_names, news_validation_path)
df = df.sort_values(by=['algorithm',"accuracy"], ascending=[True, False]).reset_index(drop=True)
df

MLP CountVectorizer pre_processed
['fake' 'fake' 'fake' 'fake' 'true' 'fake' 'true' 'true' 'fake' 'fake'
 'fake' 'fake' 'fake' 'true']
MLP TfidfVectorizer_normalized pre_processed
['fake' 'fake' 'fake' 'fake' 'fake' 'fake' 'true' 'true' 'true' 'fake'
 'fake' 'fake' 'true' 'true']
MLP TfidfVectorizer_idf pre_processed
['fake' 'fake' 'fake' 'fake' 'true' 'fake' 'true' 'true' 'true' 'fake'
 'fake' 'fake' 'true' 'true']
MLP CountVectorizer pre_normalized
['fake' 'fake' 'fake' 'fake' 'true' 'fake' 'fake' 'true' 'true' 'true'
 'true' 'true' 'true' 'true']
MLP TfidfVectorizer_normalized pre_normalized
['fake' 'fake' 'fake' 'fake' 'true' 'fake' 'fake' 'true' 'true' 'true'
 'true' 'true' 'true' 'true']
MLP TfidfVectorizer_idf pre_normalized
['fake' 'fake' 'fake' 'fake' 'true' 'fake' 'fake' 'true' 'true' 'true'
 'true' 'true' 'true' 'true']


Unnamed: 0,algorithm,dataset,vectorizer,accuracy,precision,recall,f1,validation_accuracy,train_time,predict_time
0,MLP,pre_processed,TfidfVectorizer_normalized,0.9537,0.9669,0.9402,0.9534,0.7143,811.87,2.1
1,MLP,pre_processed,TfidfVectorizer_idf,0.9449,0.9636,0.9255,0.9442,0.6429,1023.63,2.4
2,MLP,pre_processed,CountVectorizer,0.9185,0.8825,0.9669,0.9227,0.5,464.54,2.58
3,MLP,pre_normalized,TfidfVectorizer_normalized,0.9088,0.9198,0.897,0.9082,0.9286,557.87,0.82
4,MLP,pre_normalized,TfidfVectorizer_idf,0.9042,0.9167,0.8905,0.9034,0.9286,541.9,0.99
5,MLP,pre_normalized,CountVectorizer,0.9028,0.9117,0.8933,0.9024,0.9286,439.78,1.18


In [21]:
import pandas as pd

df_result = pd.read_csv("results/models_results.csv")
df_result = pd.concat([df_result, df], ignore_index=True)
df_result

Unnamed: 0,algorithm,dataset,vectorizer,accuracy,precision,recall,f1,validation_accuracy,train_time,predict_time
0,AdaBoost,pre_processed,CountVectorizer,0.9444,0.9448,0.9448,0.9448,0.5833,671.54,22.74
1,AdaBoost,pre_processed,TfidfVectorizer_idf,0.9403,0.9435,0.9374,0.9405,0.5000,427.41,18.12
2,AdaBoost,pre_processed,TfidfVectorizer_normalized,0.9398,0.9443,0.9356,0.9399,0.6667,431.03,18.11
3,AdaBoost,pre_normalized,TfidfVectorizer_normalized,0.8398,0.8270,0.8620,0.8441,0.7500,211.98,10.91
4,AdaBoost,pre_normalized,CountVectorizer,0.8366,0.8449,0.8270,0.8359,0.8333,209.87,10.83
...,...,...,...,...,...,...,...,...,...,...
79,MLP,pre_processed,TfidfVectorizer_idf,0.9449,0.9636,0.9255,0.9442,0.6429,1023.63,2.40
80,MLP,pre_processed,CountVectorizer,0.9185,0.8825,0.9669,0.9227,0.5000,464.54,2.58
81,MLP,pre_normalized,TfidfVectorizer_normalized,0.9088,0.9198,0.8970,0.9082,0.9286,557.87,0.82
82,MLP,pre_normalized,TfidfVectorizer_idf,0.9042,0.9167,0.8905,0.9034,0.9286,541.90,0.99


In [22]:
df_result.to_csv("results/models_results.csv", index=False)