In [2]:
from pymongo import MongoClient
from bson.objectid import ObjectId
import altair as alt
import pandas as pd
import math

In [3]:
client = MongoClient('192.168.178.25', 27017)
db = client.TFE
collection = db.results

# Analysis of results without SMOTE

In [40]:
results = pd.DataFrame(columns = ['model', 'type', 'recall', 'precision', 'max_features'])
for result in collection.find({'infos' : 'Running on all models, using a max feature for TfidfVectorizer', 'smote.value' : {'$exists' : False}}, {'report' : 1, '_id' : 0, 'max_features' : 1}):
    for model in result['report']:
        results = results.append({'model' : model['model'], 'type' : 'fake', 'recall' : model['classification_report']['fake']['recall'], 'precision' : model['classification_report']['fake']['precision'], 'max_features' : result['max_features']}, ignore_index=True)
        results = results.append({'model' : model['model'], 'type' : 'reliable', 'recall' : model['classification_report']['reliable']['recall'], 'precision' : model['classification_report']['reliable']['precision'], 'max_features' : result['max_features']}, ignore_index=True)

In [34]:
results['max_features'] = results['max_features'].apply(lambda x: math.log10(x))

In [7]:
line = alt.Chart(results[results['type'] == 'fake']).mark_line().encode(
    x='max_features',
    y='recall',
    color='model:N'
)


dots = alt.Chart(results[results['type'] == 'fake']).mark_point().encode(
    x='max_features',
    y='recall',
    color='model:N')

chart = alt.layer(
    line, dots
).configure_legend(
    titleAlign='left'
)

chart.save('output/all_fake_recall.png', webdriver='firefox')

In [8]:
line = alt.Chart(results[results['type'] == 'reliable']).mark_line().encode(
    x='max_features',
    y='recall',
    color='model:N'
)

dots = alt.Chart(results[results['type'] == 'reliable']).mark_point().encode(
    x='max_features',
    y='recall',
    color='model:N')

chart = alt.layer(
    line, dots
)

chart.save('output/all_reliable_recall.png', webdriver='firefox')

In [9]:
line = alt.Chart(results[results['type'] == 'fake']).mark_line().encode(
    x='max_features',
    y='precision',
    color='model:N'
)


dots = alt.Chart(results[results['type'] == 'fake']).mark_point().encode(
    x='max_features',
    y='precision',
    color='model:N')

chart = alt.layer(
    line, dots
).configure_legend(
    titleAlign='left'
)

chart.save('output/all_fake_precision.png', webdriver='firefox')

In [10]:
line = alt.Chart(results[results['type'] == 'reliable']).mark_line().encode(
    x='max_features',
    y='precision',
    color='model:N'
)


dots = alt.Chart(results[results['type'] == 'reliable']).mark_point().encode(
    x='max_features',
    y='precision',
    color='model:N')

chart = alt.layer(
    line, dots
).configure_legend(
    titleAlign='left'
)

chart.save('output/all_reliable_precision.png', webdriver='firefox')

# Analysis of the results with SMOTE

In [46]:
results = pd.DataFrame(columns = ['model', 'type', 'recall', 'precision', 'max_features'])
for result in collection.find({'infos' : 'Running on all models, using a max feature for TfidfVectorizer', 'smote.value' : True}, {'report' : 1, '_id' : 0, 'max_features' : 1}):
    for model in result['report']:
        results = results.append({'model' : model['model']+"_SMOTE", 'type' : 'fake', 'recall' : model['classification_report']['fake']['recall'], 'precision' : model['classification_report']['fake']['precision'], 'max_features' : result['max_features']}, ignore_index=True)
        results = results.append({'model' : model['model']+"_SMOTE", 'type' : 'reliable', 'recall' : model['classification_report']['reliable']['recall'], 'precision' : model['classification_report']['reliable']['precision'], 'max_features' : result['max_features']}, ignore_index=True)
results['max_features'] = results['max_features'].apply(lambda x: math.log10(x))

In [47]:
line = alt.Chart(results[results['type'] == 'fake']).mark_line().encode(
    x='max_features',
    y='recall',
    color='model:N'
)


dots = alt.Chart(results[results['type'] == 'fake']).mark_point().encode(
    x='max_features',
    y='recall',
    color='model:N')

chart = alt.layer(
    line, dots
).configure_legend(
    titleAlign='left'
)

chart.save('output/all_fake_recall_SMOTE.png', webdriver='firefox')

In [48]:
line = alt.Chart(results[results['type'] == 'reliable']).mark_line().encode(
    x='max_features',
    y='recall',
    color='model:N'
)

dots = alt.Chart(results[results['type'] == 'reliable']).mark_point().encode(
    x='max_features',
    y='recall',
    color='model:N')

chart = alt.layer(
    line, dots
)

chart.save('output/all_reliable_recall_SMOTE.png', webdriver='firefox')

In [49]:
line = alt.Chart(results[results['type'] == 'fake']).mark_line().encode(
    x='max_features',
    y='precision',
    color='model:N'
)


dots = alt.Chart(results[results['type'] == 'fake']).mark_point().encode(
    x='max_features',
    y='precision',
    color='model:N')

chart = alt.layer(
    line, dots
).configure_legend(
    titleAlign='left'
)

chart.save('output/all_fake_precision_SMOTE.png', webdriver='firefox')

In [50]:
line = alt.Chart(results[results['type'] == 'reliable']).mark_line().encode(
    x='max_features',
    y='precision',
    color='model:N'
)


dots = alt.Chart(results[results['type'] == 'reliable']).mark_point().encode(
    x='max_features',
    y='precision',
    color='model:N')

chart = alt.layer(
    line, dots
).configure_legend(
    titleAlign='left'
)

chart.save('output/all_reliable_precision_SMOTE.png', webdriver='firefox')