In [1]:
import pickle
from detectors import *
import matplotlib.pyplot as plt
import numpy as np
import random
import time
import os

In [2]:
def time_fit(det, data):
    time_begin = time.time()
        
    det = det.fit(np.array(data))
    
    return det, time.time() - time_begin

def time_test(det, data):
    time_begin = time.time()
        
    result = det.predict_proba(np.array(data))
    
    return det, time.time() - time_begin, result


# load data

In [3]:
modes = ['bert_768', 'bow_50', 'bow_768']
subsets = {}
permutations = 10
max_length = 1000

result_pickle = 'data/results/amazon_diff_classes.pickle'

for mode in modes:
    subsets[mode] = {}
    with open('data/movies/embeddings/amazon_{mode}_different_classes.pickle'.format(mode=mode), 'rb') as handle:
        data = pickle.load(handle)
        for _class in range(len(data)):
            unpermutated = data[_class][0]
            random.shuffle(unpermutated)
            unpermutated = unpermutated[:max_length*permutations]
            permutated = [[x for x in unpermutated[i::permutations]] for i in range(permutations)]
            subsets[mode][_class] = permutated
    

# initiate detectors

In [4]:
detectors = {
    'csdd': CosineSimilarityDriftDetector(),
    'kts' : KernelTwoSampleDriftDetector(),
    'aks' : AlibiKSDetector(),
    'ammd': AlibiMMDDetector(),
    'lsdd': AlibiLSDDDetector(),
}

# tests

In [5]:

if os.path.isfile(result_pickle):  # Do not overwrite
    print('Loading result pickle: ', result_pickle)
    with open(result_pickle, 'rb') as handle:
        results = pickle.load(handle)
else:
    results = {mode: {detector: {} for detector in detectors} for mode in modes}

    
for detector in detectors:
    for mode in modes:
        if not detector in results[mode]:
            results[mode][detector] = {}
        if 'predictions' in results[mode][detector]: # skip already computed
            continue
        
        results[mode][detector]['predictions'] = []
        results[mode][detector]['time_detect'] = []
        
        for perm in range(permutations):
            det, t = time_fit(detectors[detector], subsets[mode][0][perm])
            results[mode][detector]['time_fit'] = t
            det_2, t, res = time_test(det, subsets[mode][4][perm])
            results[mode][detector]['predictions'].append(res)
            results[mode][detector]['time_detect'].append(t)

with open(result_pickle, 'wb') as handle:
    pickle.dump(results, handle)

Loading result pickle:  data/results/amazon_diff_classes.pickle


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  lam_index = (rds < lam_rd_max).nonzero()[0]


In [6]:
print(results)

{'bert_768': {'csdd': {'predictions': [0.40200058, 0.39733264, 0.41016585, 0.4184376, 0.399518, 0.4051092, 0.4195111, 0.39835742, 0.39016247, 0.40259495], 'time_detect': [0.017508983612060547, 0.018219947814941406, 0.024643421173095703, 0.03472256660461426, 0.03407645225524902, 0.03410911560058594, 0.02477240562438965, 0.018114566802978516, 0.01798391342163086, 0.018001794815063477], 'time_fit': 0.0019843578338623047}, 'kts': {'predictions': [0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002], 'time_detect': [35.03440976142883, 34.81064295768738, 34.487727880477905, 34.523043155670166, 34.594449520111084, 35.124234199523926, 34.7649872303009, 34.72741913795471, 34.75870084762573, 34.87713027000427], 'time_fit': 0.0014495849609375}, 'aks': {'predictions': [0.010624684, 0.011450323, 0.011923821, 0.013114604, 0.012836535, 0.014982406, 0.012036391, 0.01291044, 0.013991681, 0.01180784], 'time_detect': [0.1708979606628418, 0.1697983741760254, 0.16942143440246582, 0.1688323