In [3]:
import json
import tabulate
import nltk
nltk.download('punkt')

with open('predictions.json') as json_file:
    data = json.load(json_file)
#     print(data)
    for comment in data:
        comment['tokens'] = nltk.word_tokenize(comment['text'])
#     print(data)


[nltk_data] Downloading package punkt to C:\Users\Ammon
[nltk_data]     Stretz\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
from IPython.display import HTML, display

def defaultFilter(el):
    return True

def calcMetrics(f = defaultFilter):
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    for el in filter(f, data):
        if el['label'] and el['prediction']:
            tp += 1
        if not el['label'] and not el['prediction']:
            tn += 1
        if el['label'] and not el['prediction']:
            fp += 1
        if not el['label'] and el['prediction']:
            fn += 1
    accuracy = 0
    if (tp + fp + fn + tn) != 0:
        accuracy = (tp + tn) / (tp + fp + fn + tn)
    
    precision = 0
    if (tp + fp) != 0:
        precision = tp / (tp + fp)
    
    recall = 0
    if (tp + fn) != 0:
        recall = tp / (tp + fn)
    
    f1Score = 0
    if (recall + precision) != 0:
        f1Score = (2 * (recall * precision)) / (recall + precision)
    return {
        "tp": tp,
        "tn": tn,
        "fp": fp,
        "fn": fn,
        "true": tp+tn,
        "false": fp+fn,
        "positiv": tp+fp,
        "negativ": tn+fn,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1Score": f1Score
    }
def showMetrics(metrics):
    display(HTML(tabulate.tabulate([
        ['positiv labels (%s)' % (metrics['tp']+metrics['fn']), 'negative labels (%s)' % (metrics['fp']+metrics['tn'])],
    ], tablefmt='html')))
    display(HTML(tabulate.tabulate([
        ['', 'true (%s)' % metrics['true'], 'false (%s)' % metrics['false']],
        ['positiv (%s)' % metrics['positiv'], metrics['tp'], metrics['fp']],
        ['negativ (%s)' % metrics['negativ'], metrics['tn'], metrics['fn']],
    ], tablefmt='html')))
    display(HTML(tabulate.tabulate([
        ['accuracy', 'precision', 'recall', 'f1Score'],
        [metrics['accuracy'], metrics['precision'], metrics['recall'], metrics['f1Score']],
    ], tablefmt='html')))
def showData(f = defaultFilter):
    table = []
    table.append(['id', 'label', 'prediction', 'text', 'probability'])
    
    for el in filter(f, data):
        table.append([el['id'], el['label'], el['prediction'], el['text'], el['probability']])
        
    display(HTML(tabulate.tabulate(table, tablefmt='html')))

# showData()

In [8]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd

def createPlt(name, filters, relative):
    names = []
    positions = range(len(filters))
    falseBar = [0] * len(filters)
    trueBar = [0] * len(filters)
    index = 0
    for f in filters:
        names.append(f[0])
        for element in filter(f[1], data):
            if element["label"] == element["prediction"]:
                trueBar[index] += 1
            else:
                falseBar[index] += 1
        index += 1
    if relative:
        index = 0
        for f in filters:
            if len(list(filter(f[1], data))) > 0:
                trueBar[index] = 100 * trueBar[index] / len(list(filter(f[1], data)))
                falseBar[index] = 100 * falseBar[index] / len(list(filter(f[1], data)))
            else:
                trueBar[index] = 0
                falseBar[index] = 0
            index += 1
            
    rc('font', weight='bold')
    barWidth = 0.5
    plt.bar(positions, trueBar, color='green', edgecolor='white', width=barWidth)
    plt.bar(positions, falseBar, bottom=trueBar, color='red', edgecolor='white', width=barWidth)
 
    plt.xticks(positions, names, fontweight='regular')
    plt.xlabel(name)
    plt.show()