# Runtimes

In [1]:
from pprint import pprint
import numpy as np

## Load results

In [2]:
from results_reader import (read_results, print_results, get_result_ids)

results_directory = "../data/results/"

results = read_results(results_directory, print_info=True)

amazon_diff_classes       ../data/results/amazon_diff_classes.pickle         <class 'dict'> 3
amazon_drift_induction    ../data/results/amazon_drift_induction.pickle      <class 'dict'> 3
amazon_same_dist          ../data/results/amazon_same_dist.pickle            <class 'dict'> 3
twitter_diff_classes      ../data/results/twitter_diff_classes.pickle        <class 'dict'> 3
twitter_drift_induction   ../data/results/twitter_drift_induction.pickle     <class 'dict'> 3
twitter_same_dist         ../data/results/twitter_same_dist.pickle           <class 'dict'> 3
twitter_diff_dist_124     ../data/results/twitter_same_dist.pickle           <class 'dict'> 3
twitter_diff_dist_192     ../data/results/twitter_same_dist.pickle           <class 'dict'> 3
twitter_diff_dist_480     ../data/results/twitter_same_dist.pickle           <class 'dict'> 3


## Print overview

In [3]:
if False: # Everything
    print_results(results)
if False: # Every runtime
    print_results(results, ids=[], modes=[], detectors=[], keys=["time_detect", "time_fit"])
if False: # Fit runtime -> float or none
    print_results(results, ids=[], modes=[], detectors=[], keys=["time_fit"])
if False:  # Detect runtime -> list or dict or none
    print_results(results, ids=[], modes=[], detectors=[], keys=["time_detect"])

## Filter and collect data

In [4]:
def filter_data(results, ids=[], modes=[], detectors=[], keys=[]):
    
    # Do not overwrite in iterations
    if(len(keys) != 1):
        print("Implemented for only 1 key")
        return

    # Create structure based on detectors
    results_ = {}
    for id_ in ids if ids else results:
        data = results[id_]["data"]
        for mode in modes if modes else data:
            for detector in detectors if detectors else data[mode]:
                if not detector in results_:
                    results_[detector] = {}
                if not id_ in results_[detector]:
                    results_[detector][id_] = {}
                if not mode in results_[detector][id_]:
                    results_[detector][id_][mode] = {}
                if not "sum" in results_[detector]:
                    results_[detector]["sum"] = 0

    # Collect data
    for id_ in ids if ids else results:
        data = results[id_]["data"]
        for mode in modes if modes else data:
            for detector in detectors if detectors else data[mode]:
                for key in keys if keys else data[mode][detector]:
                    if(key in data[mode][detector] and data[mode][detector][key]):
                        
                        if(isinstance(data[mode][detector][key], list)):
                            results_[detector][id_][mode] = np.sum(data[mode][detector][key])
                            results_[detector]["sum"]    += np.sum(data[mode][detector][key])
                        
                        elif(isinstance(data[mode][detector][key], dict)):
                            s = 0
                            for k in data[mode][detector][key]:
                                s += np.sum(data[mode][detector][key][k])
                            results_[detector][id_][mode] = s
                            results_[detector]["sum"]    += s
 
                        else:
                            results_[detector][id_][mode] = data[mode][detector][key]
                            results_[detector]["sum"]    += data[mode][detector][key]
    return results_

fit_times = filter_data(results, keys=["time_fit"])
det_times = filter_data(results, keys=["time_detect"])

if(False):
    pprint(fit_times)
if(False):
    pprint(det_times)

## Detectors and labels

In [5]:
def get_label(detector_name):
    mappings = {
        "cdbd" : "CDBD",
        "csdd" : "Cosine Similarity",
        "kts" : "KTS/MMD", # "Kernel Two Sample",
        "aks" : "KS",
        "lsdd" : "LSDD",
        "ammd" : "MMD",
    }
    if(detector_name in mappings):
        return mappings[detector_name]
    else:
        print("Unknown detector label:", detector_name)
        return detector_name
    
ids, modes, detectors, keys = get_result_ids(results)
if(False):
    print(ids)
    print(modes)
    print(keys)
print(detectors)

labels_to_detectors = {}
for detector in detectors:
    labels_to_detectors[get_label(detector)] = detector

['aks', 'ammd', 'cdbd', 'csdd', 'kts', 'lsdd']


## Print

In [6]:
def f(value):
    value = np.round(value, 1)
    #value = format(value, '.2f')
    value = str(value).rjust(6)
    return value
def fh(value):
    value = np.round(value, 2)
    #value = format(value, '.4f')
    value = str(value).rjust(6)
    return value

overall = 0
overallHours = 0

print("In min  In hours  Detector", sep="   ")

for label in sorted(labels_to_detectors):
    detector = labels_to_detectors[label]
    
    minutes = (fit_times[detector]["sum"] + det_times[detector]["sum"]) / 60
    hours   = (fit_times[detector]["sum"] + det_times[detector]["sum"]) / 60 / 60
    
    if(detector == "ammd" or detector == "csdd"):
        print(f(minutes), fh(hours), get_label(detector) + " (excluded from overall)", sep="   ")

    else:
        print(f(minutes), fh(hours), get_label(detector), sep="   ")
        overall += minutes
        overallHours += hours
        
print()
print("Overall (in minutes):", f(overall))
print("Overall (in hours):  ", fh(overallHours))

In min  In hours  Detector
   8.2     0.14   CDBD
   0.8     0.01   Cosine Similarity (excluded from overall)
   4.5     0.08   KS
3490.9    58.18   KTS/MMD
   0.4     0.01   LSDD
  87.5     1.46   MMD (excluded from overall)

Overall (in minutes): 3504.0
Overall (in hours):     58.4
