# Drift experiments

Executes detectors on different models and saves results in the format:

```
{'data_id': {'detector_id': {'predictions': [.1,.2,.3],
                             'time_detect': 60.00,
                             'time_fit': 1.00}}}
```

## Data

In [None]:
import yaml
import os.path
import pickle

# Set data paths
config          = yaml.safe_load(open("config.yaml", "r"))
bow_50_file  = os.path.join(config["EMBEDDINGS_DIRECTORY"], "amazon_drift_bow_50.pickle")
bow_768_file = os.path.join(config["EMBEDDINGS_DIRECTORY"], "amazon_drift_bow_768.pickle")
results_file = os.path.join(config["EXPERIMENTS_DIRECTORY"], "results_a")
print("bow_50_file", bow_50_file)
print("bow_768_file", bow_768_file)

# Load data
data = {}
with open(bow_50_file, "rb") as handle:
    data["bow_50"] = pickle.load(handle)
print("Samples:", len(data["bow_50"]['orig'][0]), len(data["bow_50"]['drifted'][0][0]), len(data["bow_50"]['train'][0]))
with open(bow_768_file, "rb") as handle:
    data["bow_768"] = pickle.load(handle)
print("Samples:", len(data["bow_768"]['orig'][0]), len(data["bow_768"]['drifted'][0][0]), len(data["bow_768"]['train'][0]))

In [None]:
# Print example data
if(False):
    print_model = data["bow_50"]
    print(type(print_model), len(print_model))
    for key, value in print_model.items() :
        print (key, type(value), len(value))
        for i in range(len(value)) :
            print (value[i][0])
            print()

## Results

In [None]:
results = {}

In [None]:
# Load previous results
if os.path.isfile(results_file):
    with open(results_file, "rb") as handle:
        results = pickle.load(handle)

## Experiments

In [None]:
import time

# Call fit funtion, if not already in results
def default_fit(detector_id, detector, data_id, data, results, force_run):
    if(data_id in results and detector_id in results[data_id] and not force_run):
        return
    
    # Reset results
    results_detector = {}
    
    time_begin = time.time()
        
    detector.fit(data)
    
    results_detector["time_fit"] = time.time() - time_begin
    
    if(data_id not in results):
        results[data_id] = {}
    results[data_id][detector_id] = results_detector

# Compute predictions, if not already in results
def default_detect(detector_id, detector, data_id, data, results, force_run):
    if(data_id in results and detector_id in results[data_id] and
       "predictions" in results[data_id][detector_id] and not force_run):
        return
    
    # Get previous results
    if(data_id in results and detector_id in results[data_id]):
        results_detector = results[data_id][detector_id]
    else:
        results_detector = {}
    
    time_begin = time.time()
    
    results_detector["predictions"] = []
    print(data_id, detector_id, end=" ")
    for p in data:
        results_detector["predictions"].append(detector.predict_proba(p))
        print(len(p) , end=" ")
    print()

    results_detector["time_detect"] = time.time() - time_begin

    if(data_id not in results):
        results[data_id] = {}
    results[data_id][detector_id] = results_detector

In [None]:
from detectors.AlibiKSDetector import AlibiKSDetector
detector_id = "AlibiKSDetector"

In [None]:
data_id = "bow_50"
detector = AlibiKSDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = AlibiKSDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.AlibiMMDDetector import AlibiMMDDetector
detector_id = "AlibiMMDDetector"

In [None]:
data_id = "bow_50"
detector = AlibiMMDDetector(backend = 'pytorch')
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = AlibiMMDDetector(backend = 'pytorch')
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.CosineDetector import CosineSimilarityDriftDetector
detector_id = "CosineDetector"

In [None]:
data_id = "bow_50"
detector = CosineSimilarityDriftDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = CosineSimilarityDriftDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.FCITDetector import FCITDriftDetector
detector_id = "FCITDetector"

In [None]:
data_id = "bow_50"
detector = FCITDriftDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = FCITDriftDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.KernelTwoSampleDetector import KernelTwoSampleDriftDetector
detector_id = "KernelTwoSampleDetector"

In [None]:
data_id = "bow_50"
detector = KernelTwoSampleDriftDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = KernelTwoSampleDriftDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.AlibiLSDD import AlibiLSDDDetector
detector_id = "AlibiLSDDDetector"

In [None]:
data_id = "bow_50"
detector = AlibiLSDDDetector(backend='pytorch')
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = AlibiLSDDDetector(backend='pytorch')
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.AlibiChiSquaredDetector import AlibiChiSquaredDetector
detector_id = "AlibiChiSquaredDetector"

In [None]:
data_id = "bow_50"
detector = AlibiChiSquaredDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = AlibiChiSquaredDetector()
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.AlibiChiSquaredDetector import AlibiChiSquaredDetector
detector_id = "AlibiChiSquaredDetector-FDR"

In [None]:
data_id = "bow_50"
detector = AlibiChiSquaredDetector(correction = "fdr")
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
data_id = "bow_768"
detector = AlibiChiSquaredDetector(correction = "fdr")
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, False)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, False)

In [None]:
from detectors.CDBDDetector import CDBDDetector
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
detector_id = "CDBDDetector"

In [None]:
data_id = "bow_50"

features = data[data_id]['train'][0]
targets = np.array(data[data_id]['train'][1])[:,1] # take the labels from dictionary, convert to np.array and slice to only get the scores
targets = targets.astype('int')
x_train, x_test, y_train, y_test = train_test_split(features, targets, test_size=0.33, shuffle=False)
model = SVC(kernel='linear', random_state=42) # SVM model
model.fit(x_train, y_train)

detector = CDBDDetector(model)
default_fit   (detector_id, detector, data_id, data[data_id]['orig'][0],    results, True)
default_detect(detector_id, detector, data_id, data[data_id]['drifted'][0], results, True)

## Results

In [None]:
# Save results
with open(results_file, "wb") as handle:
    pickle.dump(results, handle)

In [None]:
# Print results
if(True):
    print("Runtimes (fit and detect) in minutes:")
    from pprint import pprint
    for data_id in results:
        times = {}
        for detector_id in results[data_id]:
            time = 0
            for key in results[data_id][detector_id]:
                if(key == "time_detect" or key == "time_fit"):
                    time += results[data_id][detector_id][key]
            times[detector_id] = time/60
        pprint(sorted(times.items(), key=lambda item: item[1]))

In [None]:
# Print results
if(True):
    from pprint import pprint
    pprint(results)