In [5]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt

In [2]:
def avg_last_execution(files_path, drift_detector, generator):
    
    results = []
    for file in files_path:
        #print(file)
        df = pd.read_csv(file)
        results.append(df.iloc[-1]['classifications correct (percent)'])
        #print(df.iloc[-1]['classifications correct (percent)'])
    
    tuple_results = tuple([np.mean(results), np.std(results), drift_detector, generator])
   
    #print(f'avg of all last execution: {np.mean(median)}')
    return tuple_results 

In [3]:
drift_detectors_params = [
                              {"id": "DDM", "drift_name": "DDM", "params": ""},    \
                              {"id": "EDDM", "drift_name": "EDDM", "params": ""},   \
                              {"id": "ADWIN", "drift_name": "ADWINChangeDetector", "params": ""},  \
                              {"id": "ECDD", "drift_name": "EWMAChartDM", "params": ""},   \
                              {"id": "STEPD", "drift_name": "STEPD", "params": ""},  \
                              {"id": "SeqDrift2", "drift_name": "SeqDrift2ChangeDetector", "params": ""},  \
                              {"id": "SEED", "drift_name": "SEEDChangeDetector", "params": ""},   \
                              {"id": "HDDM_A_Test", "drift_name": "HDDM_A_Test", "params": ""},  \
                              {"id": "HDDM_W_Test", "drift_name": "HDDM_W_Test", "params": ""},  \
                              #{"id": "FHDDM", "drift_name": "FHDDM", "params": ""},  \
                              #{"id": "FTDD", "drift_name": "FTDD", "params": ""},   \
                              {"id": "RDDM_30", "drift_name": "RDDM", "params": "-n 30 -w 2 -o 3"}, \
                              {"id": "RDDM","drift_name": "RDDM", "params": ""},   \
                              #{"id": "WSTD","drift_name": "WSTD", "params": ""}
                        ]

## Ranking Gradual HoeffdingTree

In [8]:
classificator = "trees.HoeffdingTree"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]
type_drift = "Gradual"
dict_drift_type = {"gradual_agraw1": "AGRAW1", "gradual_agraw2": "AGRAW2", "gradual_led": "LED", 
                    "gradual_mixed": "MIXED", "gradual_randomRBF": "RANDOM_RBF", "gradual_sine": "SINE", "gradual_waveform": "WAVEFORM"}

df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank = pd.concat([df_result_pivot_rank, df_result_pivot.rank(1,ascending=False, method='first')])


df_result_pivot_rank.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,3.55102,2.897959,10.244898,5.693878,4.387755,4.612245,4.469388,4.673469,9.979592,7.979592,7.510204
std,2.273591,1.939125,0.878697,3.022169,1.525018,2.430846,2.208903,1.712549,1.613611,2.005096,2.246501
min,1.0,1.0,8.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0
25%,2.0,1.0,10.0,3.0,3.0,2.0,3.0,4.0,10.0,8.0,7.0
50%,3.0,2.0,11.0,6.0,5.0,5.0,4.0,5.0,10.0,8.0,8.0
75%,5.0,4.0,11.0,8.0,5.0,6.0,6.0,6.0,11.0,9.0,10.0
max,9.0,7.0,11.0,11.0,8.0,9.0,8.0,9.0,11.0,10.0,10.0


## Ranking Abrupt HoeffdingTree

In [7]:
classificator = "trees.HoeffdingTree"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]
type_drift = "Abrupt"
dict_drift_type = {"abrupt_agraw1": "AGRAW1", "abrupt_agraw2": "AGRAW2", "abrupt_led": "LED", "abrupt_mixed": "MIXED", "abrupt_randomRBF": "RANDOM_RBF",  
                    "abrupt_sine": "SINE", "abrupt_waveform": "WAVEFORM"}


df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank = pd.concat([df_result_pivot_rank, df_result_pivot.rank(1,ascending=False, method='first')])


df_result_pivot_rank.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,3.714286,4.081633,10.122449,6.081633,3.020408,4.204082,4.653061,5.122449,9.693878,7.367347,7.938776
std,2.406588,2.262343,1.033454,3.219969,1.24983,2.791203,2.016092,1.301164,1.516631,2.480708,2.294996
min,1.0,1.0,7.0,1.0,1.0,1.0,1.0,2.0,5.0,1.0,2.0
25%,2.0,2.0,9.0,4.0,2.0,1.0,3.0,4.0,9.0,7.0,7.0
50%,3.0,4.0,10.0,6.0,3.0,4.0,5.0,5.0,10.0,8.0,8.0
75%,6.0,6.0,11.0,9.0,4.0,7.0,6.0,6.0,11.0,9.0,10.0
max,9.0,10.0,11.0,11.0,5.0,9.0,8.0,8.0,11.0,10.0,11.0


## Ranking Gradual NaiveBayes

In [6]:
classificator = "bayes.NaiveBayes"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]

type_drift = "Gradual"
dict_drift_type = {"gradual_agraw1": "AGRAW1", "gradual_agraw2": "AGRAW2", "gradual_led": "LED", 
                    "gradual_mixed": "MIXED", "gradual_randomRBF": "RANDOM_RBF", "gradual_sine": "SINE", "gradual_waveform": "WAVEFORM"}

df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank = pd.concat([df_result_pivot_rank, df_result_pivot.rank(1,ascending=False, method='first')])


df_result_pivot_rank.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,2.693878,5.489796,10.122449,7.55102,4.0,5.346939,3.693878,4.428571,8.122449,8.326531,6.22449
std,1.861944,3.116638,0.927197,3.048642,1.513825,2.634317,2.534792,1.707825,2.513569,1.281076,2.924504
min,1.0,1.0,8.0,2.0,1.0,2.0,1.0,1.0,4.0,5.0,1.0
25%,1.0,2.0,10.0,5.0,3.0,3.0,1.0,3.0,6.0,8.0,4.0
50%,2.0,6.0,10.0,9.0,4.0,6.0,4.0,5.0,8.0,9.0,6.0
75%,4.0,8.0,11.0,10.0,5.0,8.0,6.0,6.0,11.0,9.0,9.0
max,9.0,11.0,11.0,11.0,7.0,9.0,8.0,8.0,11.0,10.0,11.0


## Ranking Abrup NaiveBayes

In [9]:
classificator = "bayes.NaiveBayes"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]

type_drift = "Abrupt"
dict_drift_type = {"abrupt_agraw1": "AGRAW1", "abrupt_agraw2": "AGRAW2", "abrupt_led": "LED", "abrupt_mixed": "MIXED", "abrupt_randomRBF": "RANDOM_RBF",  
                    "abrupt_sine": "SINE", "abrupt_waveform": "WAVEFORM"}

df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank = pd.concat([df_result_pivot_rank, df_result_pivot.rank(1,ascending=False, method='first')])


df_result_pivot_rank.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,2.836735,6.489796,9.816327,8.040816,3.408163,4.734694,3.897959,5.591837,7.346939,7.0,6.836735
std,2.0243,3.635029,1.148794,2.908029,1.206206,3.046689,1.960494,1.593193,2.650087,2.653614,2.771484
min,1.0,1.0,6.0,2.0,1.0,1.0,1.0,1.0,4.0,1.0,2.0
25%,1.0,2.0,9.0,7.0,3.0,2.0,3.0,5.0,5.0,5.0,5.0
50%,2.0,8.0,10.0,9.0,3.0,5.0,4.0,6.0,7.0,8.0,7.0
75%,4.0,9.0,11.0,10.0,4.0,8.0,5.0,6.0,11.0,9.0,9.0
max,9.0,11.0,11.0,11.0,6.0,9.0,7.0,8.0,11.0,10.0,11.0
