In [1]:
import pickle
from detectors import *
import matplotlib.pyplot as plt
import numpy as np
import time
import os

In [2]:
def time_fit(det, data, labels):
    time_begin = time.time()
        
    det = det.fit(np.array(data), targets=np.array(labels))
    
    return det, time.time() - time_begin

def time_test(det, data):
    time_begin = time.time()
        
    result = det.predict_proba(np.array(data))
    
    return det, time.time() - time_begin, result


# load data

In [3]:
modes = ['bert_768', 'bow_50', 'bow_768']
subsets = {}
num_permutations = 10

result_pickle = 'data/results/twitter_same_dist.pickle'

for mode in modes:
    with open('data/twitter/twitter_{mode}_same_dist.pickle'.format(mode=mode), 'rb') as handle:
        subsets[mode] = permutations_embs, permutation_keys = pickle.load(handle)['data']

# initiate detectors

In [4]:
detectors = {
    'csdd': CosineSimilarityDriftDetector(),
    'kts' : KernelTwoSampleDriftDetector(),
    'aks' : AlibiKSDetector(),
    'ammd': AlibiMMDDetector(),
    'lsdd': AlibiLSDDDetector(),
    'cdbd': CDBDDetector()
}

# tests

In [5]:

if os.path.isfile(result_pickle):  # Do not overwrite
    print('Loading result pickle: ', result_pickle)
    with open(result_pickle, 'rb') as handle:
        results = pickle.load(handle)
else:
    results = {mode: {detector: {} for detector in detectors} for mode in modes}

for detector in detectors:
    for mode in modes:
        if not detector in results[mode]:
            results[mode][detector] = {}
        if 'predictions' in results[mode][detector]: # skip already computed
            continue
        
        results[mode][detector]['predictions'] = []
        
        if detector == 'cdbd':
            fit_set_1 = subsets[mode][0][0][:len(fit_set)//2]
            fit_set_2 = subsets[mode][0][0][len(fit_set)//2:]
            for p in subsets[mode][0][num_permutations+1:]:
                fit_set_1.extend(p[:len(p)//2])
                fit_set_2.extend(p[len(p)//2:])
            fit_set = fit_set_1 + fit_set_2
        else:
            fit_set = subsets[mode][0][0]
        
        det, t = time_fit(detectors[detector],
                          fit_set,
                          [int(x//(len(fit_set)/2)) for x in range(len(fit_set))]
                         )
        results[mode][detector]['time_fit'] = t
        results[mode][detector]['time_detect'] = []
        
        for permutation in subsets[mode][0][1:num_permutations+1]:
            det_2, t, res = time_test(det, permutation)
            results[mode][detector]['predictions'].append(res)
            results[mode][detector]['time_detect'].append(t)

with open(result_pickle, 'wb') as handle:
    pickle.dump(results, handle)

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  lam_index = (rds < lam_rd_max).nonzero()[0]


In [6]:
print(results)

{'bert_768': {'csdd': {'predictions': [0.9995134, 0.9995103, 0.9995152, 0.9995761, 0.9996051, 0.9994821, 0.99953544, 0.9995222, 0.9993547, 0.99950904], 'time_fit': 0.0030434131622314453, 'time_detect': [0.016657590866088867, 0.016158103942871094, 0.0161435604095459, 0.016094207763671875, 0.01593804359436035, 0.015767574310302734, 0.015712261199951172, 0.015787363052368164, 0.015855789184570312, 0.01573967933654785]}, 'kts': {'predictions': [0.384, 0.348, 0.268, 0.554, 0.838, 0.276, 0.442, 0.25, 0.082, 0.436], 'time_fit': 0.0019116401672363281, 'time_detect': [26.70753574371338, 24.414870738983154, 29.438719034194946, 29.973390102386475, 29.601118564605713, 29.49410629272461, 29.446752548217773, 29.419032335281372, 29.540204763412476, 29.454132556915283]}, 'aks': {'predictions': [0.5110915, 0.512096, 0.5031071, 0.50502497, 0.55845565, 0.49757445, 0.508464, 0.51005715, 0.48201433, 0.5263707], 'time_fit': 0.0009007453918457031, 'time_detect': [0.16668128967285156, 0.16501069068908691, 0.1