In [1]:
import pickle
from detectors import *
import matplotlib.pyplot as plt
import numpy as np
import random
import time
import os

In [2]:
def time_fit(det, data):
    time_begin = time.time()
        
    det = det.fit(np.array(data))
    
    return det, time.time() - time_begin

def time_test(det, data):
    time_begin = time.time()
        
    result = det.predict_proba(np.array(data))
    
    return det, time.time() - time_begin, result


# load data

In [3]:
modes = ['bert_768', 'bow_50', 'bow_768']
subsets = {}
permutations = 10
max_length = 1000

result_pickle = 'data/results/twitter_diff_classes.pickle'

for mode in modes:
    subsets[mode] = {}
    with open('data/twitter/biden_{mode}_embeddings.pickle'.format(mode=mode), 'rb') as handle:
        data = pickle.load(handle)
        random.shuffle(data)
        data = [list(x) for x in zip(*data)][0][:max_length*permutations]
        data_permutated = [[x for x in data[i::permutations]] for i in range(permutations)]
        subsets[mode]['biden'] = data_permutated
    with open('data/twitter/trump_{mode}_embeddings.pickle'.format(mode=mode), 'rb') as handle:
        data = pickle.load(handle)
        random.shuffle(data)
        data = [list(x) for x in zip(*data)][0][:max_length*permutations]
        data_permutated = [[x for x in data[i::permutations]] for i in range(permutations)]
        subsets[mode]['trump'] = data_permutated
    

# initiate detectors

In [4]:
detectors = {
    'csdd': CosineSimilarityDriftDetector(),
    'kts' : KernelTwoSampleDriftDetector(),
    'aks' : AlibiKSDetector(),
    'ammd': AlibiMMDDetector()
}

# tests

In [5]:

if os.path.isfile(result_pickle):  # Do not overwrite
    print('Loading result pickle: ', result_pickle)
    with open(result_pickle, 'rb') as handle:
        results = pickle.load(handle)
else:
    results = {mode: {detector: {} for detector in detectors} for mode in modes}

    
for detector in detectors:
    for mode in modes:
        if not detector in results[mode]:
            results[mode][detector] = {}
        if 'predictions' in results[mode][detector]: # skip already computed
            continue
        
        results[mode][detector]['predictions'] = []
        results[mode][detector]['time_detect'] = []
        
        for perm in range(permutations):
            det, t = time_fit(detectors[detector], subsets[mode]['biden'][perm])
            results[mode][detector]['time_fit'] = t
            det_2, t, res = time_test(det, subsets[mode]['trump'][perm])
            results[mode][detector]['predictions'].append(res)
            results[mode][detector]['time_detect'].append(t)

with open(result_pickle, 'wb') as handle:
    pickle.dump(results, handle)

In [6]:
print(results)

{'bert_768': {'csdd': {'predictions': [0.98833007, 0.9880465, 0.9884946, 0.9881815, 0.98884106, 0.9879347, 0.98828864, 0.9886123, 0.98833466, 0.98849905], 'time_detect': [0.017789602279663086, 0.01818990707397461, 0.018229007720947266, 0.018047094345092773, 0.01820230484008789, 0.018177509307861328, 0.01817035675048828, 0.017960309982299805, 0.01801753044128418, 0.01800704002380371], 'time_fit': 0.002301454544067383}, 'kts': {'predictions': [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002], 'time_detect': [35.90825295448303, 35.781091928482056, 35.58414053916931, 35.540162086486816, 35.59471845626831, 35.55984044075012, 35.3651020526886, 35.36636662483215, 35.380627155303955, 35.456114292144775], 'time_fit': 0.0016121864318847656}, 'aks': {'predictions': [0.124501504, 0.11660701, 0.12784125, 0.11647242, 0.12715572, 0.1229193, 0.114967845, 0.120046295, 0.120141506, 0.11399152], 'time_detect': [0.193558931350708, 0.22043752670288086, 0.22019100189208984, 0.2205777168