# Evaluation of UCDD on AGRAW1

## Imports

In [20]:
import numpy as np
import pandas as pd
import sklearn
import sys

sys.stdout = sys._stdout_
sys.stdout = sys.__stdout__
print(sys.stdout)

AttributeError: module 'sys' has no attribute '_stdout_'

## AGRAW1 dataset locations

In [15]:
abrupt_agraw1_path = '../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff'
gradual_agraw1_paths = [
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_05.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_1.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_5.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_10.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_20.arff'
]

all_agraw1_data_paths = [abrupt_agraw1_path] + gradual_agraw1_paths

## Accept and preprocess AGRAW1 datasets

In [16]:
from eval_helpers import accepting
from sklearn.preprocessing import MinMaxScaler
from category_encoders import TargetEncoder
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split


agraw1_exclude_reference_batches = {}
agraw1_exclude_testing_batches = {}
agraw1_onehot_reference_batches = {}
agraw1_onehot_testing_batches = {}
agraw1_target_reference_batches = {}
agraw1_target_testing_batches = {}


# agraw1 with categories excluded
for file_path in all_agraw1_data_paths:
    df_x, df_y = accepting.get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = accepting.divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = accepting.divide_numeric_categorical(df_x_test)
    
    reference_data = df_x_ref_num.to_numpy()
    testing_data = df_x_test_num.to_numpy()
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw1_exclude_reference_batches[file_path] = ref_batches
    agraw1_exclude_testing_batches[file_path] = test_batches
    
print('agraw1 exclude')
print(agraw1_exclude_reference_batches)
print(agraw1_exclude_testing_batches)

# agraw1 with categories onehot encoded
for file_path in all_agraw1_data_paths:
    df_x, df_y = accepting.get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = accepting.divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = accepting.divide_numeric_categorical(df_x_test)
    
    ref_index = df_x_ref_cat.index
    test_index = df_x_test_cat.index
    encoder = OneHotEncoder(sparse=False)
    encoder.fit(df_x_ref_cat)
    df_x_ref_cat_transformed = pd.DataFrame(encoder.transform(df_x_ref_cat))
    df_x_test_cat_transformed = pd.DataFrame(encoder.transform(df_x_test_cat))
    df_x_ref_cat_transformed.set_index(ref_index, inplace=True)
    df_x_test_cat_transformed.set_index(test_index, inplace=True)
    
    reference_data = df_x_ref_num.join(df_x_ref_cat_transformed, lsuffix='_num').to_numpy()
    testing_data = df_x_test_num.join(df_x_test_cat_transformed, lsuffix='_num').to_numpy()
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw1_onehot_reference_batches[file_path] = ref_batches
    agraw1_onehot_testing_batches[file_path] = test_batches
    
print('agraw1 onehot')
print(agraw1_onehot_reference_batches)
print(agraw1_onehot_testing_batches)

# agraw1 with categories target encoded
for file_path in all_agraw1_data_paths:
    df_x, df_y = accepting.get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = accepting.divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = accepting.divide_numeric_categorical(df_x_test)
    
    ref_index = df_x_ref_cat.index
    test_index = df_x_test_cat.index
    encoder = TargetEncoder()
    encoder.fit(df_x_ref_cat, df_y_ref)
    df_x_ref_cat_transformed = pd.DataFrame(encoder.transform(df_x_ref_cat))
    df_x_test_cat_transformed = pd.DataFrame(encoder.transform(df_x_test_cat))
    df_x_ref_cat_transformed.set_index(ref_index, inplace=True)
    df_x_test_cat_transformed.set_index(test_index, inplace=True)
    
    reference_data = df_x_ref_num.join(df_x_ref_cat_transformed, lsuffix='_num').to_numpy()
    testing_data = df_x_test_num.join(df_x_test_cat_transformed, lsuffix='_num').to_numpy()
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw1_target_reference_batches[file_path] = ref_batches
    agraw1_target_testing_batches[file_path] = test_batches
    
print('agraw1 target')
print(agraw1_target_reference_batches)
print(agraw1_target_testing_batches)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.decode('utf-8')
  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.decode('utf-8')
  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.decode('utf-8')
  y = c

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.decode('utf-8')
  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.decode('utf-8')
  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.decode('utf-8')
  y = c

## Helper function

In [6]:
from eval_helpers import kmeans_verbose_helpers


def write_kmeans_results_ucdd_helper(output_filename_no_extension, ref_batches, n_init, max_iter, tol, random_state):
    # dummy = [np.asarray(1), np.asarray(2), np.asarray(3)]
    combinations = []
    for i in range(3):
    #     combinations.append(np.vstack((dummy[i], dummy[(i + 1) % 3])))
        combinations.append(np.vstack((ref_batches[i], ref_batches[(i + 1) % 3])))
        
    all_results_from_combinations = []
    for i, combination in enumerate(combinations):
        filename = output_filename_no_extension + str(i) + '.txt'
        print('filename', filename)
        kmeans_verbose_helpers.write_verbose_kmeans_to_file(result_filename=output_filename_no_extension + str(i) + '.txt',
                                     data_to_cluster=combination,
                                     n_clusters=2, n_init=n_init, max_iter=max_iter, tol=tol, random_state=random_state)
        output_dicts = kmeans_verbose_helpers.convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
        all_results_from_combinations.append(output_dicts)
        kmeans_verbose_helpers.print_stats_from_kmeans_output_dicts(output_dicts)
        
    kmeans_verbose_helpers.print_stats_from_all_combinations(all_results_from_combinations)

## AGRAW1 with categories excluded

### Find the best tol and max_iter (the drift type is irrelevant)

In [7]:
write_kmeans_results_ucdd_helper('agraw1_exclude_new_output', agraw1_exclude_reference_batches[abrupt_agraw1_path],
                                 n_init=100, max_iter=500, tol=0,
                                 random_state=1053)

filename agraw1_exclude_new_output0.txt
random state: 1053
total number of results: 100
maximum number of iterations: 18
minimum initial inertia: 8347.093515830069
maximum initial inertia: 18690.074352335017
number of unique final inertia values: 3
minimum final inertia: 6832.419247273033
maximum final inertia: 6832.419270386381
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
filename agraw1_exclude_new_output1.txt
random state: 1053
total number of results: 100
maximum number of iterations: 18
minimum initial inertia: 8747.52244428163
maximum initial inertia: 17134.178458342543
number of unique final inertia values: 3
minimum final inertia: 6820.074930844902
maximum final inertia: 6820.0750120817775
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
filename agraw1_exclude_new_output2.txt
random state: 1053
total number of results: 100
maximum number of iterations: 15
minimum

### Use them for the analysis

In [8]:
from core import ucdd_eval
from core import ucdd_supported_parameters as spms



agraw1_exclude_stats = {}
agraw1_path = all_agraw1_data_paths[-1]
# for agraw1_path in all_agraw1_data_paths:
#     runs_results_bool, final_fpr_mean, fpr_std_err, final_latency_mean, latency_std_err = \
#         ucdd_eval.all_drifting_batches_randomness_robust(
#         agraw1_reference_batches[agraw1_path],
#         agraw1_testing_batches[agraw1_path],
#         train_batch_strategy=spms.TrainBatchStrategies.ANY,
#         additional_check=True,
#         n_init=100,
#         max_iter=18000,
#         tol=0,
#         true_drift_idx=2,
#         min_runs=2
#     )
#     agraw1_stats[agraw1_path] = {
#         'runs_results_bool': runs_results_bool,
#         'final_fpr_mean': final_fpr_mean,
#         'fpr_std_err': fpr_std_err,
#         'final_latency_mean': final_latency_mean,
#         'latency_std_err': latency_std_err
#     }
    
for agraw1_path in all_agraw1_data_paths:
    runs_results_bool, final_fpr_mean, fpr_std_err, final_latency_mean, latency_std_err = \
        ucdd_eval.all_drifting_batches_randomness_robust(
        agraw1_exclude_reference_batches[agraw1_path],
        agraw1_exclude_testing_batches[agraw1_path],
        train_batch_strategy=spms.TrainBatchStrategies.SUBMAJORITY,
        additional_check=True,
        n_init=100,
        max_iter=18000,
        tol=0,
        true_drift_idx=2,
        min_runs=2
    )
    agraw1_exclude_stats[agraw1_path] = {
        'runs_results_bool': runs_results_bool,
        'final_fpr_mean': final_fpr_mean,
        'fpr_std_err': fpr_std_err,
        'final_latency_mean': final_latency_mean,
        'latency_std_err': latency_std_err
    }

print('AGRAW1 STATS')
print(agraw1_exclude_stats)

train_batch_strategy TrainBatchStrategies.SUBMAJORITY
random_state
0
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0


KeyboardInterrupt: 

Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt: 


n_init 100 max_iter 18000 tol 0
TrainBatchStrategies.SUBMAJORITY
train_batch_strategy None
acceptable_strategies:
TrainBatchStrategies.ANY
TrainBatchStrategies.SUBMAJORITY
TrainBatchStrategies.MAJORITY
TrainBatchStrategies.ALL
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0
TrainBatchStrategies.SUBMAJORITY
train_batch_strategy None
acceptable_strategies:
TrainBatchStrategies.ANY
TrainBatchStrategies.SUBMAJORITY
TrainBatchStrategies.MAJORITY
TrainBatchStrategies.ALL
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0
TrainBatchStrategies.SUBMAJORITY
train_batch_strategy None
acceptable_strategies:
TrainBatchStrategies.ANY
TrainBatchStrategies.SUBMAJORITY
TrainBatchStrategies.MAJORITY
TrainBatchStrategies.ALL
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0
n_init 100 max_iter 18000 tol 0
TrainBatchStrategies.SUBMAJORITY
train_batch_strategy None
acceptable_strategies:
TrainBatchStr

KeyboardInterrupt: 

### Save the obtained results to csv

In [None]:
from eval_helpers import helpers


final_result_dict = {
    'type_of_data': [], 'dataset': [], 'drift': [], 'width': [], 'encoding': [],
    'train_batch_strategy': [], 'additional_check': [],
    'n_init': [], 'max_iter': [], 'tol': [],
    'FPR_mean': [], 'latency_mean': []
}

for data_path, stats_dict in agraw1_exclude_stats.items():
    synthetic_filename_info = helpers.synthetic_data_information(data_path)
    encoding = 'exclude'
    fpr_mean = float(stats_dict['final_fpr_mean'])
    latency_mean = float(stats_dict['final_latency_mean'])
    
    final_result_dict['type_of_data'].append(synthetic_filename_info['type_of_data'])
    final_result_dict['dataset'].append(synthetic_filename_info['dataset_name'])
    final_result_dict['drift'].append(synthetic_filename_info['drift_type'])
    final_result_dict['width'].append(synthetic_filename_info['drift_width'])
    final_result_dict['encoding'].append(encoding)
    final_result_dict['train_batch_strategy'].append('submajority')
    final_result_dict['additional_check'].append('yes')
    final_result_dict['n_init'].append(100)
    final_result_dict['max_iter'].append(18000)
    final_result_dict['tol'].append(0)
    final_result_dict['FPR_mean'].append(fpr_mean)
    final_result_dict['latency_mean'].append(latency_mean)
    
final_result_df = pd.DataFrame.from_dict(final_result_dict)
sorted_final_result_df = final_result_df.sort_values(['drift', 'dataset', 'encoding', 'width'])
final_result_df.to_csv('agraw1_exclude_jupyter_results.csv', index=False)

## AGRAW1 with categories onehot encoded

### Find the best tol and max_iter (the drift type is irrelevant)

In [9]:
write_kmeans_results_ucdd_helper('agraw1_onehot_new_output', agraw1_onehot_reference_batches[abrupt_agraw1_path],
                                 n_init=100, max_iter=500, tol=0,
                                 random_state=1053)

filename agraw1_onehot_new_output0.txt
random state: 1053
total number of results: 100
maximum number of iterations: 34
minimum initial inertia: 104954.65523014727
maximum initial inertia: 116645.01047187511
number of unique final inertia values: 19
minimum final inertia: 58341.71044879218
maximum final inertia: 59761.482370558224
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
filename agraw1_onehot_new_output1.txt
random state: 1053


KeyboardInterrupt: 

Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt: 


total number of results: 100
maximum number of iterations: 33
minimum initial inertia: 105187.473271114
maximum initial inertia: 117235.7046827969
number of unique final inertia values: 13
minimum final inertia: 58307.80854907961
maximum final inertia: 59757.571876885886
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
filename agraw1_onehot_new_output2.txt
random state: 1053
total number of results: 100
maximum number of iterations: 26
minimum initial inertia: 104976.092943036
maximum initial inertia: 118204.3234742695
number of unique final inertia values: 23
minimum final inertia: 58330.68910892084
maximum final inertia: 59758.880442981055
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
{'total_max_iterations': 34, 'total_min_init_inertia': 104954.65523014727, 'total_max_init_inertia': 118204.3234742695, 'total_min_final_inertia': 58307.80854907961, 'total_max_final_inert

### Use them for the analysis

In [10]:
from core import ucdd_eval
from core import ucdd_supported_parameters as spms


print('...')
agraw1_onehot_stats = {}

for agraw1_path in all_agraw1_data_paths:
    runs_results_bool, final_fpr_mean, fpr_std_err, final_latency_mean, latency_std_err = \
        ucdd_eval.all_drifting_batches_randomness_robust(
        agraw1_onehot_reference_batches[agraw1_path],
        agraw1_onehot_testing_batches[agraw1_path],
        train_batch_strategy=spms.TrainBatchStrategies.MAJORITY,
        additional_check=True,
        n_init=100,
        max_iter=34000,
        tol=0,
        true_drift_idx=2,
        min_runs=2
    )
    agraw1_onehot_stats[agraw1_path] = {
        'runs_results_bool': runs_results_bool,
        'final_fpr_mean': final_fpr_mean,
        'fpr_std_err': fpr_std_err,
        'final_latency_mean': final_latency_mean,
        'latency_std_err': latency_std_err
    }

print('AGRAW1 STATS')
print(agraw1_onehot_stats)

...
train_batch_strategy TrainBatchStrategies.MAJORITY
random_state
0
n_init 100 max_iter 34000 tol 0
n_init 100 max_iter 34000 tol 0
n_init 100 max_iter 34000 tol 0


KeyboardInterrupt: 

Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt: 


TrainBatchStrategies.MAJORITY
train_batch_strategy None
acceptable_strategies:
TrainBatchStrategies.ANY
TrainBatchStrategies.SUBMAJORITY
TrainBatchStrategies.MAJORITY
TrainBatchStrategies.ALL
n_init 100 max_iter 34000 tol 0
n_init 100 max_iter 34000 tol 0
n_init 100 max_iter 34000 tol 0
TrainBatchStrategies.MAJORITY
train_batch_strategy None
acceptable_strategies:
TrainBatchStrategies.ANY
TrainBatchStrategies.SUBMAJORITY
TrainBatchStrategies.MAJORITY
TrainBatchStrategies.ALL
n_init 100 max_iter 34000 tol 0
n_init 100 max_iter 34000 tol 0


KeyboardInterrupt: 

Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt: 


n_init 100 max_iter 34000 tol 0


KeyboardInterrupt: 

### Save the obtained results to csv

In [23]:
from eval_helpers import helpers


final_result_dict = {
    'type_of_data': [], 'dataset': [], 'drift': [], 'width': [], 'encoding': [],
    'train_batch_strategy': [], 'additional_check': [],
    'n_init': [], 'max_iter': [], 'tol': [],
    'FPR_mean': [], 'latency_mean': []
}

for data_path, stats_dict in agraw1_onehot_stats.items():
    synthetic_filename_info = helpers.synthetic_data_information(data_path)
    encoding = 'onehot'
    fpr_mean = float(stats_dict['final_fpr_mean'])
    latency_mean = float(stats_dict['final_latency_mean'])
    
    final_result_dict['type_of_data'].append(synthetic_filename_info['type_of_data'])
    final_result_dict['dataset'].append(synthetic_filename_info['dataset_name'])
    final_result_dict['drift'].append(synthetic_filename_info['drift_type'])
    final_result_dict['width'].append(synthetic_filename_info['drift_width'])
    final_result_dict['encoding'].append(encoding)
    final_result_dict['train_batch_strategy'].append('majority')
    final_result_dict['additional_check'].append('yes')
    final_result_dict['n_init'].append(100)
    final_result_dict['max_iter'].append(34000)
    final_result_dict['tol'].append(0)
    final_result_dict['FPR_mean'].append(fpr_mean)
    final_result_dict['latency_mean'].append(latency_mean)
    
final_result_df = pd.DataFrame.from_dict(final_result_dict)
sorted_final_result_df = final_result_df.sort_values(['drift', 'dataset', 'encoding', 'width'])
final_result_df.to_csv('agraw1_onehot_jupyter_results_majority.csv', index=False)

## AGRAW1 with categories target encoded

### Find the best tol and max_iter (the drift type is irrelevant)

In [11]:
write_kmeans_results_ucdd_helper('agraw1_target_new_output', agraw1_target_reference_batches[abrupt_agraw1_path],
                                 n_init=100, max_iter=500, tol=0,
                                 random_state=1053)

filename agraw1_target_new_output0.txt
random state: 1053
total number of results: 100
maximum number of iterations: 18
minimum initial inertia: 17607.98488650251
maximum initial inertia: 29056.647428908935
number of unique final inertia values: 21
minimum final inertia: 13299.613663873319
maximum final inertia: 14276.6351586479
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
filename agraw1_target_new_output1.txt
random state: 1053
total number of results: 100
maximum number of iterations: 22
minimum initial inertia: 17728.884037917425
maximum initial inertia: 30906.112923249697
number of unique final inertia values: 13
minimum final inertia: 13290.390633663217
maximum final inertia: 14285.170980526265
total number of convergences: 100
number of strict convergences: 100
number of tol-based convergences: 0
filename agraw1_target_new_output2.txt
random state: 1053


KeyboardInterrupt: 

### Use them for the analysis

In [None]:
from core import ucdd_eval
from core import ucdd_supported_parameters as spms



agraw1_target_stats = {}
    
for agraw1_path in all_agraw1_data_paths:
    runs_results_bool, final_fpr_mean, fpr_std_err, final_latency_mean, latency_std_err = \
        ucdd_eval.all_drifting_batches_randomness_robust(
        agraw1_target_reference_batches[agraw1_path],
        agraw1_target_testing_batches[agraw1_path],
        train_batch_strategy=spms.TrainBatchStrategies.MAJORITY,
        additional_check=True,
        n_init=100,
        max_iter=45000,
        tol=0,
        true_drift_idx=2,
        min_runs=2
    )
    agraw1_target_stats[agraw1_path] = {
        'runs_results_bool': runs_results_bool,
        'final_fpr_mean': final_fpr_mean,
        'fpr_std_err': fpr_std_err,
        'final_latency_mean': final_latency_mean,
        'latency_std_err': latency_std_err
    }

print('AGRAW1 STATS')
print(agraw1_target_stats)

KeyboardInterrupt: 

Exception ignored in: 'sklearn.cluster._k_means_common._relocate_empty_clusters_dense'
Traceback (most recent call last):
  File "<__array_function__ internals>", line 177, in where
KeyboardInterrupt: 


### Save the obtained results to csv

In [37]:
from eval_helpers import helpers


final_result_dict = {
    'type_of_data': [], 'dataset': [], 'drift': [], 'width': [], 'encoding': [],
    'train_batch_strategy': [], 'additional_check': [],
    'n_init': [], 'max_iter': [], 'tol': [],
    'FPR_mean': [], 'latency_mean': []
}

for data_path, stats_dict in agraw1_target_stats.items():
    synthetic_filename_info = helpers.synthetic_data_information(data_path)
    encoding = 'target'
    fpr_mean = float(stats_dict['final_fpr_mean'])
    latency_mean = float(stats_dict['final_latency_mean'])
    
    final_result_dict['type_of_data'].append(synthetic_filename_info['type_of_data'])
    final_result_dict['dataset'].append(synthetic_filename_info['dataset_name'])
    final_result_dict['drift'].append(synthetic_filename_info['drift_type'])
    final_result_dict['width'].append(synthetic_filename_info['drift_width'])
    final_result_dict['encoding'].append(encoding)
    final_result_dict['train_batch_strategy'].append('majority')
    final_result_dict['additional_check'].append('yes')
    final_result_dict['n_init'].append(100)
    final_result_dict['max_iter'].append(45000)
    final_result_dict['tol'].append(0)
    final_result_dict['FPR_mean'].append(fpr_mean)
    final_result_dict['latency_mean'].append(latency_mean)
    
final_result_df = pd.DataFrame.from_dict(final_result_dict)
sorted_final_result_df = final_result_df.sort_values(['drift', 'dataset', 'encoding', 'width'])
final_result_df.to_csv('agraw1_target_jupyter_results_majority.csv', index=False)