In [5]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt

In [9]:
def avg_last_execution(files_path, drift_detector, generator):
    
    results = []
    for file in files_path:
        #print(file)
        df = pd.read_csv(file)
        results.append(df.iloc[-1]['classifications correct (percent)'])
        #print(df.iloc[-1]['classifications correct (percent)'])
    
    tuple_results = tuple([np.mean(results), np.std(results), drift_detector, generator])
   
    #print(f'avg of all last execution: {np.mean(median)}')
    return tuple_results 

In [7]:
drift_detectors_params = [
                              {"id": "DDM", "drift_name": "DDM", "params": ""},    \
                              {"id": "EDDM", "drift_name": "EDDM", "params": ""},   \
                              {"id": "ADWIN", "drift_name": "ADWINChangeDetector", "params": ""},  \
                              {"id": "ECDD", "drift_name": "EWMAChartDM", "params": ""},   \
                              {"id": "STEPD", "drift_name": "STEPD", "params": ""},  \
                              {"id": "SeqDrift2", "drift_name": "SeqDrift2ChangeDetector", "params": ""},  \
                              {"id": "SEED", "drift_name": "SEEDChangeDetector", "params": ""},   \
                              {"id": "HDDM_A_Test", "drift_name": "HDDM_A_Test", "params": ""},  \
                              {"id": "HDDM_W_Test", "drift_name": "HDDM_W_Test", "params": ""},  \
                              #{"id": "FHDDM", "drift_name": "FHDDM", "params": ""},  \
                              #{"id": "FTDD", "drift_name": "FTDD", "params": ""},   \
                              {"id": "RDDM_30", "drift_name": "RDDM", "params": "-n 30 -w 2 -o 3"}, \
                              {"id": "RDDM","drift_name": "RDDM", "params": ""},   \
                              #{"id": "WSTD","drift_name": "WSTD", "params": ""},
                              {"id": "NoChangeDetector","drift_name": "NoChangeDetectorNaive", "params": ""}
                        ]

## Ranking Gradual HoeffdingTree

In [10]:
classificator = "trees.HoeffdingTree"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]
type_drift = "Gradual"
dict_drift_type = {"gradual_agraw1": "AGRAW1", "gradual_agraw2": "AGRAW2", "gradual_led": "LED", 
                    "gradual_mixed": "MIXED", "gradual_randomRBF": "RANDOM_RBF", "gradual_sine": "SINE", "gradual_waveform": "WAVEFORM"}

df_result_pivot_rank_gradual_ht = pd.DataFrame()
df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():
        
        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
        
        #print(f'tuple list result: {tuple_list_result}')
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank_gradual_ht = pd.concat([df_result_pivot_rank_gradual_ht, df_result_pivot.rank(1, ascending=False, method='first')])


df_result_pivot_rank_gradual_ht.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,NoChangeDetector,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,3.653061,2.836735,10.22449,6.061224,4.428571,4.714286,12.0,4.244898,4.346939,9.979592,8.0,7.510204
std,2.268911,1.982706,0.918906,2.734107,1.338532,2.44097,0.0,2.287016,1.854623,1.613611,2.01039,2.246501
min,1.0,1.0,8.0,1.0,2.0,1.0,12.0,1.0,1.0,2.0,1.0,2.0
25%,2.0,1.0,10.0,4.0,3.0,2.0,12.0,2.0,3.0,10.0,8.0,7.0
50%,3.0,2.0,11.0,7.0,5.0,5.0,12.0,4.0,4.0,10.0,9.0,8.0
75%,5.0,4.0,11.0,8.0,5.0,6.0,12.0,6.0,6.0,11.0,9.0,10.0
max,9.0,7.0,11.0,11.0,8.0,9.0,12.0,8.0,9.0,11.0,10.0,10.0


## Ranking Abrupt HoeffdingTree

In [11]:
classificator = "trees.HoeffdingTree"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]
type_drift = "Abrupt"
dict_drift_type = {"abrupt_agraw1": "AGRAW1", "abrupt_agraw2": "AGRAW2", "abrupt_led": "LED", "abrupt_mixed": "MIXED", "abrupt_randomRBF": "RANDOM_RBF",  
                    "abrupt_sine": "SINE", "abrupt_waveform": "WAVEFORM"}

df_result_pivot_rank_abrupt_ht = pd.DataFrame()
df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank_abrupt_ht = pd.concat([df_result_pivot_rank_abrupt_ht, df_result_pivot.rank(1,ascending=False, method='first')])


df_result_pivot_rank_abrupt_ht.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,NoChangeDetector,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,48.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,3.571429,4.183673,10.125,6.612245,2.897959,4.081633,11.979592,4.469388,5.102041,9.673469,7.346939,7.918367
std,2.263846,2.288317,1.064415,2.841774,1.245741,2.856548,0.142857,2.042283,1.31093,1.505376,2.487896,2.280686
min,1.0,1.0,7.0,1.0,1.0,1.0,11.0,1.0,2.0,5.0,1.0,2.0
25%,2.0,2.0,9.0,5.0,2.0,1.0,12.0,3.0,4.0,9.0,7.0,7.0
50%,3.0,4.0,10.5,7.0,3.0,4.0,12.0,5.0,5.0,10.0,8.0,8.0
75%,5.0,6.0,11.0,9.0,4.0,7.0,12.0,6.0,6.0,11.0,9.0,10.0
max,9.0,10.0,11.0,11.0,5.0,9.0,12.0,8.0,8.0,11.0,10.0,11.0


## Ranking Gradual NaiveBayes

In [12]:
classificator = "bayes.NaiveBayes"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]

type_drift = "Gradual"
dict_drift_type = {"gradual_agraw1": "AGRAW1", "gradual_agraw2": "AGRAW2", "gradual_led": "LED", 
                    "gradual_mixed": "MIXED", "gradual_randomRBF": "RANDOM_RBF", "gradual_sine": "SINE", "gradual_waveform": "WAVEFORM"}

df_result_pivot_rank_gradual_nb = pd.DataFrame()
df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank_gradual_nb = pd.concat([df_result_pivot_rank_gradual_nb, df_result_pivot.rank(1,ascending=False, method='first')])


df_result_pivot_rank_gradual_nb.describe()

drift_detector,ADWIN,DDM,ECDD,EDDM,HDDM_A_Test,HDDM_W_Test,NoChangeDetector,RDDM,RDDM_30,SEED,STEPD,SeqDrift2
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,3.040816,5.77551,10.061224,7.938776,3.959184,5.387755,12.0,3.204082,4.040816,8.122449,8.285714,6.183673
std,2.101182,2.931619,0.987593,2.764418,1.513263,2.683408,0.0,2.309217,1.74355,2.513569,1.290994,2.941609
min,1.0,1.0,8.0,2.0,1.0,1.0,12.0,1.0,1.0,4.0,5.0,1.0
25%,1.0,3.0,9.0,7.0,3.0,2.0,12.0,1.0,3.0,6.0,8.0,4.0
50%,3.0,6.0,10.0,9.0,4.0,6.0,12.0,3.0,4.0,8.0,8.0,6.0
75%,5.0,8.0,11.0,10.0,5.0,8.0,12.0,5.0,5.0,11.0,9.0,9.0
max,9.0,11.0,11.0,11.0,7.0,9.0,12.0,8.0,8.0,11.0,10.0,11.0


## Ranking Abrup NaiveBayes

In [None]:
classificator = "bayes.NaiveBayes"
data_stream = [{"data_size": 10000, "drift_position":  [2000, 2000, 2000, 2000]},   \
                {"data_size": 20000, "drift_position": [4000, 4000, 4000, 4000]},    \
                {"data_size": 50000, "drift_position": [10000, 10000, 10000, 10000]}, \
                {"data_size": 100000, "drift_position":[20000, 20000, 20000, 20000]}, \
                {"data_size": 500000, "drift_position":  [100000, 100000, 100000, 100000]}, \
                {"data_size": 1000000, "drift_position":  [200000, 200000, 200000, 200000]}, \
                {"data_size": 2000000, "drift_position": [400000, 400000, 400000, 400000]} 
               ]

type_drift = "Abrupt"
dict_drift_type = {"abrupt_agraw1": "AGRAW1", "abrupt_agraw2": "AGRAW2", "abrupt_led": "LED", "abrupt_mixed": "MIXED", "abrupt_randomRBF": "RANDOM_RBF",  
                    "abrupt_sine": "SINE", "abrupt_waveform": "WAVEFORM"}

df_result_pivot_rank_abrupt_nb = pd.DataFrame()
df_result_pivot_rank = pd.DataFrame()
for data_stream_key in data_stream:
    df_final_result = pd.DataFrame()
    for drift_key, drift_value in dict_drift_type.items():    

        tuple_list_result = list()
        for drift in drift_detectors_params:
            files_path = glob.glob(f'{drift_key}/{classificator}_{drift["id"]}_{data_stream_key["data_size"]}_*_{drift_value}_{type_drift}.csv')
            #print(files_path)
            tuple_result = avg_last_execution(files_path, drift["id"], drift_value)
            tuple_list_result.append(tuple_result)
                            
        df_result = pd.DataFrame(tuple_list_result, columns=['avg', 'std', 'drift_detector', 'generator'])
        df_final_result = pd.concat([df_final_result, df_result])

    df_result_pivot = df_final_result.reset_index().pivot(index="generator", columns="drift_detector", values="avg")
    df_result_pivot_rank_abrupt_nb = pd.concat([df_result_pivot_rank_abrupt_nb, df_result_pivot.rank(1,ascending=False, method='first')])

df_result_pivot_rank_abrupt_nb.describe()

In [None]:
from autorank import autorank, plot_stats, create_report, latex_table

## Nemenyi test Gradual NaiveBayes 

In [None]:
result = autorank(df_result_pivot_rank_gradual_nb, order='ascending', alpha=0.05, verbose=False)
print(result)

In [None]:
plt.figure(figsize=(13,10), dpi= 80)
plot_stats(result)
plt.show()

## Nemenyi test Abrup NaiveBayes 

In [None]:
result = autorank(df_result_pivot_rank_abrupt_nb, order='ascending', alpha=0.05, verbose=False)
print(result)

In [None]:
plt.figure(figsize=(13,10), dpi= 80)
plot_stats(result)
plt.show()

## Nemenyi Test Abrup HT 

In [None]:
result = autorank(df_result_pivot_rank_abrupt_ht, order='ascending', alpha=0.05, verbose=False)
print(result)

In [None]:
plt.figure(figsize=(13,10), dpi= 80)
plot_stats(result)
plt.show()

## Nemenyi Test Gradual HT 

In [None]:
result = autorank(df_result_pivot_rank_gradual_ht, order='ascending', alpha=0.05, verbose=False)
print(result)

In [None]:
plt.figure(figsize=(13,10), dpi= 80)
plot_stats(result)
plt.show()