In [1]:
import pickle
from detectors import *
import matplotlib.pyplot as plt
import numpy as np
import time
import os

In [2]:
def time_fit(det, data):
    time_begin = time.time()
        
    det = det.fit(np.array(data))
    
    return det, time.time() - time_begin

def time_test(det, data):
    time_begin = time.time()
        
    result = det.predict_proba(np.array(data))
    
    return det, time.time() - time_begin, result


# load data

In [3]:
permutations = 10
modes = ['bert_768', 'bow_50', 'bow_768']
subsets = {}

result_pickle = 'data/results/twitter_drift_induction.pickle'

for mode in modes:
    with open('data/twitter/twitter_{mode}_drift.pickle'.format(mode=mode), 'rb') as handle:
        subsets[mode] = gradual_dict = pickle.load(handle)
        
target_percentages = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]

# initiate detectors

In [4]:
detectors = {
    'csdd': CosineSimilarityDriftDetector(),
    'kts' : KernelTwoSampleDriftDetector(),
    'aks' : AlibiKSDetector(),
    'ammd': AlibiMMDDetector()
}


# tests

In [None]:
if os.path.isfile(result_pickle):  # Do not overwrite
    print('Loading result pickle: ', result_pickle)
    with open(result_pickle, 'rb') as handle:
        results = pickle.load(handle)
else:
    results = {mode: {detector: {} for detector in detectors} for mode in modes}
    

for mode in modes:
    # generating permutations
    gradual_dict_orig = [[x for x in subsets[mode]['orig'][0][i::permutations]] for i in range(permutations)]
    gradual_dict_drifted = [[] for i in range(permutations)]
    for percentage in subsets[mode]['drifted'][0]:
        sliced = [[x for x in percentage[i::permutations]] for i in range(permutations)]
        for i in range(permutations):
            gradual_dict_drifted[i].append(sliced[i])
        
    
    for detector in detectors:
        if not detector in results[mode]:
            results[mode][detector] = {}
        if 'predictions' in results[mode][detector]: # skip already computed
            continue
        
        results[mode][detector]['predictions'] = {i: [] for i in range(len(gradual_dict_orig))}
        results[mode][detector]['time_detect'] = {i: [] for i in range(len(gradual_dict_orig))}
        
        for permutation in range(permutations):
        
            det, t = time_fit(detectors[detector], gradual_dict_orig[permutation])
            results[mode][detector]['time_fit'] = t

            for percentage in gradual_dict_drifted[permutation]:
                det_2, t, res = time_test(det, percentage)
                results[mode][detector]['predictions'][permutation].append(res)
                results[mode][detector]['time_detect'][permutation].append(t)

with open(result_pickle, 'wb') as handle:
    pickle.dump(results, handle)
    

# plot graphs

In [None]:
for mode in modes:
    for detector in detectors:
        means = []
        for n in range(len(results[mode][detector]['predictions'][0])):
            nth_entries = [results[mode][detector]['predictions'][i][n] for i in range(permutations)]
            means.append(np.mean(nth_entries))
            
        plt.plot(means)
        plt.xticks(ticks=np.arange(len(target_percentages)), labels=target_percentages, rotation=60)
        plt.savefig('figures/by_dataset/twitter_{mode}_drift_induction_{detector}.pdf'.format(mode=mode, detector=detector), format='pdf')
        plt.show()
        
    for detector in detectors:
        means = []
        for n in range(len(results[mode][detector]['predictions'][0])):
            nth_entries = [results[mode][detector]['predictions'][i][n] for i in range(permutations)]
            means.append(np.mean(nth_entries))
            
        plt.plot(means)
    plt.xticks(ticks=np.arange(len(target_percentages)), labels=target_percentages, rotation=60)
    plt.savefig('figures/by_dataset/twitter_{mode}_drift_induction_all.pdf'.format(mode=mode), format='pdf')
    plt.show()

In [None]:
print(results)