# Figures twitter_diff_dist.pickle

In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pprint

### load data

In [2]:
result_pickle = 'data/results/twitter_diff_dist.pickle'
with open(result_pickle, 'rb') as handle:
    (results_124, results_192, results_480) = pickle.load(handle)

### config

In [3]:
print_raw_data  = False
print_plot_data = False

show_plots = False
save_plots = False  # show_plots has also to be True

modes = ['bert_768', 'bow_50', 'bow_768']

detectors = {
    #'csdd': "CosineSimilarity",
    'kts' : "KernelTwoSample",
    'aks' : "KS",
    'ammd': "MMD"
}

linewidth = 1.5

### print data

In [4]:
if(print_raw_data):
    pprint.pprint(results_124)
    pprint.pprint(results_192)
    pprint.pprint(results_480)

### extract data

In [5]:
if(print_plot_data):
    print ("data like used in twitter_different_dist.ipynb")
    for mode in modes:
        for detector in detectors:
            res = [np.mean(r[mode][detector]['predictions']) for r in (results_124, results_192, results_480)]
            print(res)
        
data = {}
for mode in modes:
    data[mode] = {}
    for detector in detectors:
        data[mode][detectors[detector]] = [np.mean(r[mode][detector]['predictions']) for r in (results_124, results_192, results_480)]
        
if(print_plot_data):
    pprint.pprint(data)        

### plot data 

In [6]:
def plot(mode, max_pvalue):
    fig, ax = plt.subplots()

    ax.plot(data[mode]["KS"],              '-.', label='KS',              linewidth=linewidth, color='tab:green')
    ax.plot(data[mode]["KernelTwoSample"], '-',  label='KernelTwoSample', linewidth=linewidth, color='tab:purple')
    ax.plot(data[mode]["MMD"],             '--', label='MMD',             linewidth=linewidth, color='tab:blue')

    plt.xticks(ticks=range(0,3), labels=['124 + 24h', '192 + 24h', '480 + 24h'])
    plt.legend(loc='upper right')
    ax.set(ylabel='p-value')
    #ax.set(title='')
    ax.grid(color='#dddddd')
    ax.set_ylim([-0.025, max_pvalue])

    fig.tight_layout()
    
    print(mode)
    if(save_plots):
        filename = 'figures/by_dataset/twitter_' + mode + '_diff_dist_all.pdf'
        fig.savefig(filename, format='pdf')
        print(filename)
        plt.show()

In [7]:
if(show_plots):
    for mode in modes:
        plot(mode, 0.5)

### plots previous version

In [8]:
if(show_plots):
    for mode in modes:
        for detector in detectors:
            res = [np.mean(r[mode][detector]['predictions']) for r in (results_124, results_192, results_480)]
            plt.plot(res)
        plt.xticks(ticks=range(0,3), labels=['124 + 24h', '192 + 24h', '480 + 24h'], rotation=60)
        plt.show()