In [9]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
import copy
from os.path import isfile, join


from importnb import Notebook
ShapDetectorClass_k = Notebook.load('KS_Backend.ipynb')
ShapDetector_k = ShapDetectorClass_k.ShapDetector

In [10]:
plt.style.use('ggplot')
plt.style.use('seaborn-white')

In [43]:
def plot_stats(true_drifts, drift_detections, dist_list, shap_list, score_list, plot_steps, p_list=5):
    plt.figure(figsize = (30,20))
    
    ax1 = plt.subplot(4, 1, 1)
    create_abs_shap_plot(true_drifts, drift_detections, shap_list, plot_steps)
    
    plt.subplot(4, 1, 2, sharex=ax1)
    create_acc_plot(true_drifts, drift_detections, score_list, plot_steps)
    
    plt.subplot(4, 1, 3, sharex=ax1)
    create_dist_plot(true_drifts, drift_detections, dist_list, plot_steps)
    
    plt.subplot(4, 1, 4, sharex=ax1)
    create_shap_plot(true_drifts, drift_detections, shap_list, plot_steps)

    
def create_dist_plot(true_drifts, drift_detections, dist_list, plot_steps, window_size=50):            
    # compute rolling mean of distance
    dist_series = pd.Series(dist_list)
    windows = dist_series.rolling(window_size)
    moving_averages = windows.mean()
    moving_averages_list = moving_averages.tolist()    
    plt.plot(moving_averages_list,  linewidth=1.1, color = 'green')#label = 'Distance', 

    #true drift points
    for d in true_drifts:
        plt.axvline(d, color = 'red', linewidth=1)
    
    #detected drift points
    j = 1
    for i in drift_detections:
        plt.axvline(i, color = 'black', linewidth=0.7)
        j += 1
        
    plt.xticks(np.arange(0, len(dist_list), plot_steps), rotation=20, fontsize= 20)
    plt.xlabel('Processed instances', fontsize = 20)
    #plt.xlim([0, len(dist_list)+1])
    plt.yticks(fontsize = 20)
    plt.ylabel('Distance', fontsize = 20)
    plt.legend()
    
    return 

def create_shap_plot(true_drifts, drift_detections, shap_list, plot_steps, window_size=50):
    shap_transposed = list(map(list, zip(*shap_list)))
        
    cnt=1
    for s_values in shap_transposed:
        dist_series = pd.Series(s_values)
        windows = dist_series.rolling(window_size)
        moving_averages = windows.mean()
        moving_averages_list = moving_averages.tolist()
        plt.plot(moving_averages_list,  linewidth=1.6)#, label = 'F{}'.format(cnt)
        cnt+=1
            
    #true drift points
    for d in true_drifts:
        plt.axvline(d, color = 'red', linewidth=2)
    
    #detected drift points
    j = 1
    for i in drift_detections:
        plt.axvline(i, color = 'black', linewidth=0.2)
        j += 1
    
    plt.xticks(np.arange(0, len(shap_list), plot_steps), rotation=20, fontsize= 20)
    plt.xlabel('Processed instances', fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.ylabel('Shapley Values', fontsize = 20)


    plt.legend()

    return 

def create_abs_shap_plot(true_drifts, drift_detections, shap_list, plot_steps, window_size=50):
    shap_transposed = np.abs(list(map(list, zip(*shap_list))))
        
    cnt=1
    for s_values in shap_transposed:
        dist_series = pd.Series(s_values)
        windows = dist_series.rolling(window_size)
        moving_averages = windows.mean()
        moving_averages_list = moving_averages.tolist()
        plt.plot(moving_averages_list,  linewidth=1.6, label = 'F{}'.format(cnt))
        cnt+=1
    
    #true drift points
    for d in true_drifts:
        plt.axvline(d, color = 'red', linewidth=2.2)
    
    #detected drift points
    j = 1
    for i in drift_detections:
        plt.axvline(i, color = 'black', linewidth=0.5)
        j += 1
        
    plt.xticks(np.arange(0, len(shap_list), plot_steps), rotation=20, fontsize = 20)
    plt.xlabel('Processed instances', fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.ylabel('Absolute Shapley Value', fontsize = 20)

    return 

def create_acc_plot(true_drifts, drift_detections, score_list, plot_steps, window_size=50):    
    # compute rolling mean of accuracy
    dist_series = pd.Series(score_list)
    windows = dist_series.rolling(window_size)
    moving_averages = windows.mean()
    moving_averages_list = moving_averages.tolist()
    plt.plot(moving_averages_list,  linewidth=1,color = 'blue') # label = 'Accuracy', 
    
    # compute rolling mean of f1-score
    dist_series = pd.Series(weighted_f1_list)
    windows = dist_series.rolling(window_size)
    moving_averages = windows.mean()
    moving_averages_list = moving_averages.tolist()
    # additionally plot F1-score
    #plt.plot(moving_averages_list,  linewidth=1.1, label = 'Weighted F1', color = 'orange')
    
    
    #true drift points
    for d in true_drifts:
        plt.axvline(d, color = 'red', linewidth=2.2)
    
    #detected drift points
    j = 1
    for i in drift_detections:
        plt.axvline(i, color = 'black', linewidth=0.5)
        j += 1
        
    plt.xticks(np.arange(0, len(score_list), plot_steps), rotation=20, fontsize=20)
    plt.xlabel('Processed instances', fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.ylabel('Accuracy Score', fontsize = 20)
    plt.legend()
    
    return

In [44]:
# get filenames
path = "../Results/Detector_objs/kswin_objects"

files = [f for f in listdir(path) if isfile(join(path, f))]

In [45]:
#files.remove('.DS_Store')
files

['Test_insects_inc_abr_red_kswin.pickle',
 'Test_insects_abrupt_red_kswin.pickle']

In [46]:
# get all detectors from folder
acc_list = []
detector_list = []
for f in files:
     if f != '.DS_Store':
        d = pickle.load(open("{0}/{1}".format(path,f), "rb"))
        detector_list.append(d)   
    
     else:
        print('Achtung, DS Store nicht gelöscht!')

In [47]:
# plot and store results
df_statistics = pd.DataFrame()
for idx,detector in enumerate(detector_list):
    # Plots
    shap_list  = detector.shap_list
    score_list = detector.score_list
    weighted_f1_list = detector.weighted_f1_list
    dist_list = detector.distances
    drift_detections = detector.drift_detections
    true_drifts = detector.true_drift_points
        
    plt.clf()
    plot_steps = 5000
    #fig = plot_stats(true_drifts, drift_detections, dist_list, shap_list, score_list, plot_steps = plot_steps)#, p_list)

    #plt.savefig("../Results/Plots/{0}_Plot.jpg".format(files[idx]))
    
    # Table
    statistics = detector.get_statistics(drift_range=0)
    statistics["filename"] = files[idx]
    df_statistics = df_statistics.append(statistics, ignore_index=True, sort=False)
    

# if only the accuracy curve is plottet, the corresponding object origins from a test run with performance-based detection enabled

<Figure size 432x288 with 0 Axes>

In [48]:
pd.set_option('display.max_columns', None) 
pd.set_option('display.max_rows', None)  
cols = [
        'filename', 'Model', 'Error Based', 
        'Initial Instances', 'Approach', 'Retrainsize', 'Samplesize', 
        'Base Detector',
        'Ad Delta',
        'Ks Alpha',
        'Ph Alpha', 'Ph Delta','Ph Min Inst', 'Ph Threshold', 
        'FAC','MDC', 'MDR', 'MTD', 'MTFA', 'MTR', 'True Drift Points', 'Triggered Drifts', 'Detections Count',
        'Labels Detection %','Labels Retraining %','Sampling', 'Weighted F1', 'Mcc', 'ROC_AUC', 'Acc']

df_statistics = df_statistics[cols]
df_statistics.sort_values(by=['ROC_AUC','Acc', 'Ad Delta'], ascending=[False,False, False], inplace=True)
df_statistics.sort_values(by=['Labels Retraining %'], ascending=[False], inplace=True)


df_statistics

Unnamed: 0,filename,Model,Error Based,Initial Instances,Approach,Retrainsize,Samplesize,Base Detector,Ad Delta,Ks Alpha,Ph Alpha,Ph Delta,Ph Min Inst,Ph Threshold,FAC,MDC,MDR,MTD,MTFA,MTR,True Drift Points,Triggered Drifts,Detections Count,Labels Detection %,Labels Retraining %,Sampling,Weighted F1,Mcc,ROC_AUC,Acc
1,Test_insects_abrupt_red_kswin.pickle,Xg,True,3999.0,Standard,799.0,1.0,kswin,,1e-06,,,,,1.0,5.0,0.455,244.0,,,"[10000, 11610, 15100, 16858, 25000, 30598, 355...","[10001, 15277, 25364, 30544, 35558, 37613, 44219]",7.0,100.0,100.0,-,0.563,0.48,0.867,0.566
0,Test_insects_inc_abr_red_kswin.pickle,Xg,True,3999.0,Standard,799.0,1.0,kswin,,1e-06,,,,,7.0,9.0,0.5,1096.222,12019.333,5.482,"[3850, 11500, 13800, 16500, 18200, 22568, 2600...","[3744, 16988, 21666, 22589, 23567, 45589, 4662...",16.0,100.0,100.0,-,0.523,0.431,0.843,0.518
