# Evaluation of UCDD (Improved)

## UCDD Algorithm

In [1]:
# fill if necessary

## UCDD Evaluation Helpers

### UCDD Metrics

In [2]:
def fpr_and_latency_when_averaging(drift_locations, num_test_batches, true_drift_idx):
    """The inputs drift_locations and true_drift_idx are is zero-indexed"""
    fpr = 0
    latency = 1
    drift_locations_arr = np.array(drift_locations)
    signal_locations_before_drift = drift_locations_arr[drift_locations_arr < true_drift_idx]
    signal_locations_not_before_drift = drift_locations_arr[drift_locations_arr >= true_drift_idx]
    num_batches_after_first_drift = num_test_batches - (true_drift_idx + 1)
    drift_detected = False # says whether some drift detection was triggered at or after a drift occurrence

    if len(drift_locations) >= 1:
        if len(signal_locations_before_drift) > 0:
            fpr = len(signal_locations_before_drift) / true_drift_idx
        if len(signal_locations_not_before_drift) > 0:
            first_useful_drift_signal = signal_locations_not_before_drift[0]
            latency = (first_useful_drift_signal - true_drift_idx) / num_batches_after_first_drift
            drift_detected = True

    return fpr, latency, drift_detected

### UCDD Randomness-Robust Evaluation

In [3]:
def all_drifting_batches_randomness_robust(reference_data_batches, testing_data_batches, train_batch_strategy,
                                           additional_check,
                                           n_init=10,
                                           max_iter=300, tol=1e-4, true_drift_idx=2, first_random_state=0,
                                           min_runs=10, std_err_threshold=0.05):
    """
    Repeat running ucdd_improved.ucdd.all_drifting_batches(...) until the s.e. of metrics from different runs is low enough

    :param n_init:
    :param max_iter:
    :param tol:
    :param reference_data_batches: list of arrays of shape (n_r_r, #attributes), r_r=reference batch number,
        n_r_r=#points in this batch
    :param testing_data_batches: list of arrays of shape (n_r_t, #attributes), r_t=testing batch number,
        n_r_t=#points in this batch
    :param n_clusters: desired number of clusters for kmeans
    :param first_random_state: random states used will be incremented from this one
    :param coeff: coeff used to detect drift, default=2.66
    :param std_err_threshold: threshold to stop executing the mssw algorithm
    :return: a list of lists from all_drifting_batches(...), and the mean and s.e. of FPR and latency
    """
    fprs = []
    latencies = []
    runs_results_bool = []
    fpr_std_err = -1
    latency_std_err = -1
    num_runs = 0
    random_state = first_random_state
    while num_runs < min_runs or max(fpr_std_err, latency_std_err) > std_err_threshold:
        drifting_batches_bool = ucdd_improved.ucdd.all_drifting_batches(
            reference_data_batches,
            testing_data_batches,
            train_batch_strategy=train_batch_strategy,
            additional_check=additional_check,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            random_state=random_state
        )
        # print('drifting_batches_bool')
        # print(drifting_batches_bool)
        drift_locations = np.arange(len(drifting_batches_bool))[drifting_batches_bool]
        # print('drift_locations')
        # print(drift_locations)
        fpr, latency, _ = fpr_and_latency_when_averaging(
            drift_locations,
            len(testing_data_batches),
            true_drift_idx
        )
        fprs.append(fpr)
        latencies.append(latency)
        runs_results_bool.append(drifting_batches_bool)
        num_runs += 1
        random_state += n_init

        # print('number of runs', num_runs)
        if num_runs >= min_runs:
            fpr_std_err = np.std(fprs) / np.sqrt(len(fprs))
            latency_std_err = np.std(latencies) / np.sqrt(len(latencies))
            print('fpr s.e.', fpr_std_err)
            print('latency s.e.', latency_std_err)
        # print('fprs', fprs, 's.e.', fpr_std_err)
        # print('latencies', latencies, 's.e.', latency_std_err)

    final_fpr_mean = np.mean(fprs)
    final_latency_mean = np.mean(latencies)
    print('final fpr mean', final_fpr_mean)
    print('final latency mean', final_latency_mean)
    return runs_results_bool, final_fpr_mean, fpr_std_err, final_latency_mean, latency_std_err

## UCDD Evaluation on Local Datasets

## Synthetic Dataset Locations

In [4]:
abrupt_sea_path = '../Datasets_concept_drift/synthetic_data/abrupt_drift/sea_1_abrupt_drift_0_noise_balanced.arff'
abrupt_agraw1_path = '../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff'
abrupt_agraw2_path = '../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw2_1_abrupt_drift_0_noise_balanced.arff'

gradual_sea_paths = [
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_05.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_1.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_5.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_10.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/sea_1_gradual_drift_0_noise_balanced_20.arff'
]

gradual_agraw1_paths = [
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_05.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_1.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_5.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_10.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw1_1_gradual_drift_0_noise_balanced_20.arff'
]

gradual_agraw2_paths = [
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_05.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_1.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_5.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_10.arff',
    '../Datasets_concept_drift/synthetic_data/gradual_drift/agraw2_1_gradual_drift_0_noise_balanced_20.arff'
]

all_sea_data_paths = [abrupt_sea_path] + gradual_sea_paths
all_agraw1_data_paths = [abrupt_agraw1_path] + gradual_agraw1_paths
all_agraw2_data_paths = [abrupt_agraw2_path] + gradual_agraw2_paths

only_numerical_data_paths = [abrupt_sea_path] + gradual_sea_paths
only_mixed_data_paths = [abrupt_agraw1_path] + gradual_agraw1_paths + [abrupt_agraw2_path] + gradual_agraw2_paths

## Obtaining all datasets preprocessed

### Helpers

In [5]:
import numpy as np
import pandas as pd
from scipy.io import arff


def accept_data(file_path):
    """Accept an arff file and return its contents in a pandas dataframe"""
    data = arff.loadarff(file_path)
    df = pd.DataFrame(data[0])
    print('df', df)
    return df


def column_values_to_string(df, columns):
    for column in columns:
        df[column] = df[column].str.decode('utf-8')
    return df


def divide_numeric_categorical(df_x):
    df_x_numeric = df_x.select_dtypes(include=[np.number])
    df_x_categorical = df_x.select_dtypes(exclude=[np.number])
    return df_x_numeric, df_x_categorical


def prepare_df_data(df):
    df_y = column_values_to_string(df[['class']], ['class'])
    df_x = df.drop(columns='class')
    df_x_numeric, df_x_categorical = divide_numeric_categorical(df_x)
    df_x_categorical = column_values_to_string(df_x_categorical, list(df_x_categorical.columns))
    return df_x_numeric.join(df_x_categorical), df_y


def get_clean_df(file_path):
    df = accept_data(file_path)
    df_x, df_y = prepare_df_data(df)

    return df_x, df_y

In [6]:
import numpy as np
import pandas as pd
import sklearn

from category_encoders import TargetEncoder
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder, OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split


sea_reference_batches = {}
sea_testing_batches = {}
agraw1_exclude_reference_batches = {}
agraw1_exclude_testing_batches = {}
agraw1_onehot_reference_batches = {}
agraw1_onehot_testing_batches = {}
agraw1_target_reference_batches = {}
agraw1_target_testing_batches = {}
agraw2_exclude_reference_batches = {}
agraw2_exclude_testing_batches = {}
agraw2_onehot_reference_batches = {}
agraw2_onehot_testing_batches = {}
agraw2_target_reference_batches = {}
agraw2_target_testing_batches = {}

for file_path in all_sea_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    reference_data = df_x_ref.to_numpy()
    testing_data = df_x_test.to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    sea_reference_batches[file_path] = ref_batches
    sea_testing_batches[file_path] = test_batches
    
print('sea')
print(sea_reference_batches)
print(sea_testing_batches)

# agraw1 with categories excluded
for file_path in all_agraw1_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = divide_numeric_categorical(df_x_test)
    
    reference_data = df_x_ref_num.to_numpy()
    testing_data = df_x_test_num.to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw1_exclude_reference_batches[file_path] = ref_batches
    agraw1_exclude_testing_batches[file_path] = test_batches
    
print('agraw1 exclude')
print(agraw1_exclude_reference_batches)
print(agraw1_exclude_testing_batches)

# agraw1 with categories onehot encoded
for file_path in all_agraw1_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = divide_numeric_categorical(df_x_test)
    
    ref_index = df_x_ref_cat.index
    test_index = df_x_test_cat.index
    encoder = OneHotEncoder(sparse=False)
    encoder.fit(df_x_ref_cat)
    df_x_ref_cat_transformed = pd.DataFrame(encoder.transform(df_x_ref_cat))
    df_x_test_cat_transformed = pd.DataFrame(encoder.transform(df_x_test_cat))
    df_x_ref_cat_transformed.set_index(ref_index, inplace=True)
    df_x_test_cat_transformed.set_index(test_index, inplace=True)
    
    reference_data = df_x_ref_num.join(df_x_ref_cat_transformed, lsuffix='_num').to_numpy()
    testing_data = df_x_test_num.join(df_x_test_cat_transformed, lsuffix='_num').to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw1_onehot_reference_batches[file_path] = ref_batches
    agraw1_onehot_testing_batches[file_path] = test_batches
    
print('agraw1 onehot')
print(agraw1_onehot_reference_batches)
print(agraw1_onehot_testing_batches)

# agraw1 with categories target encoded
for file_path in all_agraw1_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = divide_numeric_categorical(df_x_test)
    
    ref_index = df_x_ref_cat.index
    test_index = df_x_test_cat.index
    encoder = TargetEncoder()
    encoder.fit(df_x_ref_cat, df_y_ref)
    df_x_ref_cat_transformed = pd.DataFrame(encoder.transform(df_x_ref_cat))
    df_x_test_cat_transformed = pd.DataFrame(encoder.transform(df_x_test_cat))
    df_x_ref_cat_transformed.set_index(ref_index, inplace=True)
    df_x_test_cat_transformed.set_index(test_index, inplace=True)
    
    reference_data = df_x_ref_num.join(df_x_ref_cat_transformed, lsuffix='_num').to_numpy()
    testing_data = df_x_test_num.join(df_x_test_cat_transformed, lsuffix='_num').to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw1_target_reference_batches[file_path] = ref_batches
    agraw1_target_testing_batches[file_path] = test_batches
    
print('agraw1 target')
print(agraw1_target_reference_batches)
print(agraw1_target_testing_batches)


# agraw2 with categories excluded
for file_path in all_agraw2_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = divide_numeric_categorical(df_x_test)
    
    reference_data = df_x_ref_num.to_numpy()
    testing_data = df_x_test_num.to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw2_exclude_reference_batches[file_path] = ref_batches
    agraw2_exclude_testing_batches[file_path] = test_batches
    
print('agraw2 exclude')
print(agraw2_exclude_reference_batches)
print(agraw2_exclude_testing_batches)

# agraw1 with categories onehot encoded
for file_path in all_agraw2_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = divide_numeric_categorical(df_x_test)
    
    ref_index = df_x_ref_cat.index
    test_index = df_x_test_cat.index
    encoder = OneHotEncoder(sparse=False)
    encoder.fit(df_x_ref_cat)
    df_x_ref_cat_transformed = pd.DataFrame(encoder.transform(df_x_ref_cat))
    df_x_test_cat_transformed = pd.DataFrame(encoder.transform(df_x_test_cat))
    df_x_ref_cat_transformed.set_index(ref_index, inplace=True)
    df_x_test_cat_transformed.set_index(test_index, inplace=True)
    
    reference_data = df_x_ref_num.join(df_x_ref_cat_transformed, lsuffix='_num').to_numpy()
    testing_data = df_x_test_num.join(df_x_test_cat_transformed, lsuffix='_num').to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw2_onehot_reference_batches[file_path] = ref_batches
    agraw2_onehot_testing_batches[file_path] = test_batches
    
print('agraw2 onehot')
print(agraw2_onehot_reference_batches)
print(agraw2_onehot_testing_batches)

# agraw2 with categories target encoded
for file_path in all_agraw2_data_paths:
    df_x, df_y = get_clean_df(file_path)
    df_y = pd.DataFrame(LabelEncoder().fit_transform(df_y))

    df_x_ref, df_x_test, df_y_ref, df_y_test = sklearn.model_selection.train_test_split(
        df_x, df_y, test_size=0.7, shuffle=False)
    
    df_x_ref_num, df_x_ref_cat = divide_numeric_categorical(df_x_ref)
    df_x_test_num, df_x_test_cat = divide_numeric_categorical(df_x_test)
    
    ref_index = df_x_ref_cat.index
    test_index = df_x_test_cat.index
    encoder = TargetEncoder()
    encoder.fit(df_x_ref_cat, df_y_ref)
    df_x_ref_cat_transformed = pd.DataFrame(encoder.transform(df_x_ref_cat))
    df_x_test_cat_transformed = pd.DataFrame(encoder.transform(df_x_test_cat))
    df_x_ref_cat_transformed.set_index(ref_index, inplace=True)
    df_x_test_cat_transformed.set_index(test_index, inplace=True)
    
    reference_data = df_x_ref_num.join(df_x_ref_cat_transformed, lsuffix='_num').to_numpy()
    testing_data = df_x_test_num.join(df_x_test_cat_transformed, lsuffix='_num').to_numpy()
    
    scaler = MinMaxScaler()
    scaler.fit(reference_data)
    reference_data = scaler.transform(reference_data)
    testing_data = scaler.transform(testing_data)
    
    num_ref_batches = 3
    num_test_batches = 7
    ref_batches = np.array_split(reference_data, num_ref_batches)
    test_batches = np.array_split(testing_data, num_test_batches)
    
    agraw2_target_reference_batches[file_path] = ref_batches
    agraw2_target_testing_batches[file_path] = test_batches
    
print('agraw2 target')
print(agraw2_target_reference_batches)
print(agraw2_target_testing_batches)

df         attrib1   attrib2   attrib3      class
0      7.308782  4.100808  2.077148  b'groupB'
1      5.833539  0.422983  7.616747  b'groupA'
2      1.397627  6.949480  8.052278  b'groupB'
3      2.750299  0.753878  6.105915  b'groupA'
4      2.049135  6.233638  1.847071  b'groupB'
...         ...       ...       ...        ...
99995  4.636430  6.639370  0.066740  b'groupB'
99996  4.251993  3.351235  5.652197  b'groupA'
99997  4.131405  6.371722  3.125554  b'groupB'
99998  1.404214  4.392506  9.298558  b'groupA'
99999  7.231749  8.770465  3.925490  b'groupB'

[100000 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df         attrib1   attrib2   attrib3      class
0      7.308782  4.100808  2.077148  b'groupB'
1      5.833539  0.422983  7.616747  b'groupA'
2      1.397627  6.949480  8.052278  b'groupB'
3      2.750299  0.753878  6.105915  b'groupA'
4      2.049135  6.233638  1.847071  b'groupB'
...         ...       ...       ...        ...
99995  2.074508  1.775662  1.318589  b'groupA'
99996  4.636430  6.639370  0.066740  b'groupB'
99997  4.251993  3.351235  5.652197  b'groupA'
99998  4.131405  6.371722  3.125554  b'groupB'
99999  1.404214  4.392506  9.298558  b'groupA'

[100000 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df         attrib1   attrib2   attrib3      class
0      7.308782  4.100808  2.077148  b'groupB'
1      5.833539  0.422983  7.616747  b'groupA'
2      1.397627  6.949480  8.052278  b'groupB'
3      2.750299  0.753878  6.105915  b'groupA'
4      2.049135  6.233638  1.847071  b'groupB'
...         ...       ...       ...        ...
99995  2.261206  1.404770  3.977088  b'groupA'
99996  0.795885  8.915077  6.892585  b'groupB'
99997  0.353597  1.289198  0.001943  b'groupA'
99998  6.540503  5.698432  7.743243  b'groupB'
99999  2.074508  1.775662  1.318589  b'groupA'

[100000 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df         attrib1   attrib2   attrib3      class
0      7.308782  4.100808  2.077148  b'groupB'
1      5.833539  0.422983  7.616747  b'groupA'
2      1.397627  6.949480  8.052278  b'groupB'
3      2.750299  0.753878  6.105915  b'groupA'
4      2.049135  6.233638  1.847071  b'groupB'
...         ...       ...       ...        ...
99995  7.680433  9.606008  5.626119  b'groupB'
99996  1.336234  3.864737  1.698313  b'groupA'
99997  0.541297  9.975611  6.822081  b'groupB'
99998  1.709441  4.517255  8.083470  b'groupA'
99999  0.778983  9.326919  2.996122  b'groupA'

[100000 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df         attrib1   attrib2   attrib3      class
0      7.308782  4.100808  2.077148  b'groupB'
1      5.833539  0.422983  7.616747  b'groupA'
2      1.397627  6.949480  8.052278  b'groupB'
3      2.750299  0.753878  6.105915  b'groupA'
4      2.049135  6.233638  1.847071  b'groupB'
...         ...       ...       ...        ...
99995  8.630346  3.981509  1.040496  b'groupB'
99996  0.656783  5.144250  9.648631  b'groupA'
99997  8.299312  7.466245  5.592216  b'groupB'
99998  2.728767  5.140916  0.727831  b'groupA'
99999  8.852912  3.855575  6.128628  b'groupB'

[100000 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df         attrib1   attrib2   attrib3      class
0      7.308782  4.100808  2.077148  b'groupB'
1      5.833539  0.422983  7.616747  b'groupA'
2      1.397627  6.949480  8.052278  b'groupB'
3      2.750299  0.753878  6.105915  b'groupA'
4      2.049135  6.233638  1.847071  b'groupB'
...         ...       ...       ...        ...
99995  8.479838  4.037801  7.474048  b'groupB'
99996  0.587347  2.725972  8.395393  b'groupA'
99997  1.297264  9.227339  6.843533  b'groupB'
99998  2.009023  5.305785  1.423271  b'groupA'
99999  9.680480  4.642564  3.628668  b'groupB'

[100000 rows x 4 columns]
sea
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/sea_1_abrupt_drift_0_noise_balanced.arff': [array([[0.73092176, 0.4100868 , 0.20771329],
       [0.58338649, 0.04229477, 0.76170074],
       [0.13976242, 0.69496137, 0.80525599],
       ...,
       [0.24495193, 0.11831829, 0.06608347],
       [0.41343426, 0.56604359, 0.14036612],
       [0.07940532, 0.19504205, 0.08055242]]), array([[0.90503182

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  142923.630833      0.000000  28.0  b'level0'  b'car13'  b'zipcode3'   
99996   48591.770745  22774.455785  24.0  b'level2'   b'car7'  b'zipcode1'   
99997   40282.441180  70705.062385  24.0  b'level0'  b'car14'  b'zipcode9'   
99998   24701.771322  17773.082776  70.0  b'level2'  b'car14'  b'zipcode1'   
99999  101356.827789      0.000000  63.0  b'level3'  b'car20'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   20450.346419  50131.775485  68.0  b'level2'  b'car18'  b'zipcode7'   
99996  142923.630833      0.000000  28.0  b'level0'  b'car13'  b'zipcode3'   
99997   48591.770745  22774.455785  24.0  b'level2'   b'car7'  b'zipcode1'   
99998   40282.441180  70705.062385  24.0  b'level0'  b'car14'  b'zipcode9'   
99999   24701.771322  17773.082776  70.0  b'level2'  b'car14'  b'zipcode1'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   82084.560570      0.000000  38.0  b'level1'   b'car8'  b'zipcode1'   
99996  122185.589049      0.000000  68.0  b'level2'  b'car16'  b'zipcode5'   
99997   75112.189101      0.000000  43.0  b'level0'  b'car14'  b'zipcode3'   
99998   77645.133039      0.000000  78.0  b'level4'   b'car9'  b'zipcode6'   
99999   20450.346419  50131.775485  68.0  b'level2'  b'car18'  b'zipcode7'   

             hvalue  hyears           loan      class  
0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  129778.800352      0.000000  37.0  b'level2'   b'car1'  b'zipcode8'   
99996  124575.146339      0.000000  53.0  b'level0'   b'car7'  b'zipcode4'   
99997   65021.355936  43217.169171  39.0  b'level3'  b'car18'  b'zipcode4'   
99998  113209.808491      0.000000  53.0  b'level2'  b'car15'  b'zipcode2'   
99999   60121.727315  24438.661331  62.0  b'level1'  b'car14'  b'zipcode2'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   37724.568475  11570.033669  27.0  b'level1'   b'car8'  b'zipcode3'   
99996  110548.635896      0.000000  58.0  b'level4'  b'car20'  b'zipcode2'   
99997  119805.259163      0.000000  73.0  b'level1'  b'car15'  b'zipcode1'   
99998  120012.609799      0.000000  48.0  b'level1'   b'car9'  b'zipcode7'   
99999  120413.864305      0.000000  38.0  b'level4'   b'car9'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   86287.261924      0.000000  65.0  b'level2'  b'car18'  b'zipcode4'   
99996   78633.802398      0.000000  48.0  b'level2'  b'car11'  b'zipcode4'   
99997   99038.707844      0.000000  79.0  b'level1'  b'car16'  b'zipcode8'   
99998  120472.854608      0.000000  40.0  b'level3'  b'car16'  b'zipcode6'   
99999  128351.871620      0.000000  56.0  b'level2'   b'car6'  b'zipcode7'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


agraw1 exclude
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff': [array([[0.73089198, 0.        , 0.6       , 0.28183225, 0.96551724,
        0.71074425],
       [0.15273383, 0.27168449, 0.88333333, 0.18933977, 0.06896552,
        0.34750786],
       [0.52078707, 0.        , 0.53333333, 0.1972512 , 0.20689655,
        0.1198196 ],
       ...,
       [0.79527985, 0.        , 0.7       , 0.07924677, 0.86206897,
        0.26830022],
       [0.62023278, 0.        , 0.56666667, 0.46517532, 0.89655172,
        0.20067956],
       [0.72150735, 0.        , 0.83333333, 0.3726262 , 0.62068966,
        0.19503629]]), array([[0.19878044, 0.96414871, 0.55      , 0.18417979, 0.34482759,
        0.34825058],
       [0.86428378, 0.        , 0.05      , 0.20300161, 0.17241379,
        0.143289  ],
       [0.08956815, 0.36984179, 0.45      , 0.0402585 , 0.06896552,
        0.35019363],
       ...,
       [0.117721  , 0.64512796, 0.9       , 0.29655255,

df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  142923.630833      0.000000  28.0  b'level0'  b'car13'  b'zipcode3'   
99996   48591.770745  22774.455785  24.0  b'level2'   b'car7'  b'zipcode1'   
99997   40282.441180  70705.062385  24.0  b'level0'  b'car14'  b'zipcode9'   
99998   24701.771322  17773.082776  70.0  b'level2'  b'car14'  b'zipcode1'   
99999  101356.827789      0.000000  63.0  b'level3'  b'car20'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   20450.346419  50131.775485  68.0  b'level2'  b'car18'  b'zipcode7'   
99996  142923.630833      0.000000  28.0  b'level0'  b'car13'  b'zipcode3'   
99997   48591.770745  22774.455785  24.0  b'level2'   b'car7'  b'zipcode1'   
99998   40282.441180  70705.062385  24.0  b'level0'  b'car14'  b'zipcode9'   
99999   24701.771322  17773.082776  70.0  b'level2'  b'car14'  b'zipcode1'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   82084.560570      0.000000  38.0  b'level1'   b'car8'  b'zipcode1'   
99996  122185.589049      0.000000  68.0  b'level2'  b'car16'  b'zipcode5'   
99997   75112.189101      0.000000  43.0  b'level0'  b'car14'  b'zipcode3'   
99998   77645.133039      0.000000  78.0  b'level4'   b'car9'  b'zipcode6'   
99999   20450.346419  50131.775485  68.0  b'level2'  b'car18'  b'zipcode7'   

             hvalue  hyears           loan      class  
0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  129778.800352      0.000000  37.0  b'level2'   b'car1'  b'zipcode8'   
99996  124575.146339      0.000000  53.0  b'level0'   b'car7'  b'zipcode4'   
99997   65021.355936  43217.169171  39.0  b'level3'  b'car18'  b'zipcode4'   
99998  113209.808491      0.000000  53.0  b'level2'  b'car15'  b'zipcode2'   
99999   60121.727315  24438.661331  62.0  b'level1'  b'car14'  b'zipcode2'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   37724.568475  11570.033669  27.0  b'level1'   b'car8'  b'zipcode3'   
99996  110548.635896      0.000000  58.0  b'level4'  b'car20'  b'zipcode2'   
99997  119805.259163      0.000000  73.0  b'level1'  b'car15'  b'zipcode1'   
99998  120012.609799      0.000000  48.0  b'level1'   b'car9'  b'zipcode7'   
99999  120413.864305      0.000000  38.0  b'level4'   b'car9'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   86287.261924      0.000000  65.0  b'level2'  b'car18'  b'zipcode4'   
99996   78633.802398      0.000000  48.0  b'level2'  b'car11'  b'zipcode4'   
99997   99038.707844      0.000000  79.0  b'level1'  b'car16'  b'zipcode8'   
99998  120472.854608      0.000000  40.0  b'level3'  b'car16'  b'zipcode6'   
99999  128351.871620      0.000000  56.0  b'level2'   b'car6'  b'zipcode7'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


agraw1 onehot
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff': [array([[0.73089198, 0.        , 0.6       , ..., 0.        , 0.        ,
        0.        ],
       [0.15273383, 0.27168449, 0.88333333, ..., 1.        , 0.        ,
        0.        ],
       [0.52078707, 0.        , 0.53333333, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.79527985, 0.        , 0.7       , ..., 1.        , 0.        ,
        0.        ],
       [0.62023278, 0.        , 0.56666667, ..., 0.        , 0.        ,
        0.        ],
       [0.72150735, 0.        , 0.83333333, ..., 0.        , 0.        ,
        0.        ]]), array([[0.19878044, 0.96414871, 0.55      , ..., 1.        , 0.        ,
        0.        ],
       [0.86428378, 0.        , 0.05      , ..., 0.        , 0.        ,
        0.        ],
       [0.08956815, 0.36984179, 0.45      , ..., 0.        , 1.        ,
        0.        ],
       ...,
       [0.1

df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  142923.630833      0.000000  28.0  b'level0'  b'car13'  b'zipcode3'   
99996   48591.770745  22774.455785  24.0  b'level2'   b'car7'  b'zipcode1'   
99997   40282.441180  70705.062385  24.0  b'level0'  b'car14'  b'zipcode9'   
99998   24701.771322  17773.082776  70.0  b'level2'  b'car14'  b'zipcode1'   
99999  101356.827789      0.000000  63.0  b'level3'  b'car20'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   20450.346419  50131.775485  68.0  b'level2'  b'car18'  b'zipcode7'   
99996  142923.630833      0.000000  28.0  b'level0'  b'car13'  b'zipcode3'   
99997   48591.770745  22774.455785  24.0  b'level2'   b'car7'  b'zipcode1'   
99998   40282.441180  70705.062385  24.0  b'level0'  b'car14'  b'zipcode9'   
99999   24701.771322  17773.082776  70.0  b'level2'  b'car14'  b'zipcode1'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   82084.560570      0.000000  38.0  b'level1'   b'car8'  b'zipcode1'   
99996  122185.589049      0.000000  68.0  b'level2'  b'car16'  b'zipcode5'   
99997   75112.189101      0.000000  43.0  b'level0'  b'car14'  b'zipcode3'   
99998   77645.133039      0.000000  78.0  b'level4'   b'car9'  b'zipcode6'   
99999   20450.346419  50131.775485  68.0  b'level2'  b'car18'  b'zipcode7'   

             hvalue  hyears           loan      class  
0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  129778.800352      0.000000  37.0  b'level2'   b'car1'  b'zipcode8'   
99996  124575.146339      0.000000  53.0  b'level0'   b'car7'  b'zipcode4'   
99997   65021.355936  43217.169171  39.0  b'level3'  b'car18'  b'zipcode4'   
99998  113209.808491      0.000000  53.0  b'level2'  b'car15'  b'zipcode2'   
99999   60121.727315  24438.661331  62.0  b'level1'  b'car14'  b'zipcode2'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   37724.568475  11570.033669  27.0  b'level1'   b'car8'  b'zipcode3'   
99996  110548.635896      0.000000  58.0  b'level4'  b'car20'  b'zipcode2'   
99997  119805.259163      0.000000  73.0  b'level1'  b'car15'  b'zipcode1'   
99998  120012.609799      0.000000  48.0  b'level1'   b'car9'  b'zipcode7'   
99999  120413.864305      0.000000  38.0  b'level4'   b'car9'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0      115014.164791      0.000000  56.0  b'level3'  b'car15'  b'zipcode5'   
1       39855.706174  20372.573526  73.0  b'level2'   b'car3'  b'zipcode7'   
2       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
3       57795.185560  13344.143894  80.0  b'level2'   b'car1'  b'zipcode3'   
4       46602.621787  26569.114334  46.0  b'level0'  b'car13'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   86287.261924      0.000000  65.0  b'level2'  b'car18'  b'zipcode4'   
99996   78633.802398      0.000000  48.0  b'level2'  b'car11'  b'zipcode4'   
99997   99038.707844      0.000000  79.0  b'level1'  b'car16'  b'zipcode8'   
99998  120472.854608      0.000000  40.0  b'level3'  b'car16'  b'zipcode6'   
99999  128351.871620      0.000000  56.0  b'level2'   b'car6'  b'zipcode7'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


agraw1 target
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw1_1_abrupt_drift_0_noise_balanced.arff': [array([[0.73089198, 0.        , 0.6       , ..., 0.27295809, 0.85304062,
        0.63548506],
       [0.15273383, 0.27168449, 0.88333333, ..., 0.14991113, 0.3861165 ,
        0.20596598],
       [0.52078707, 0.        , 0.53333333, ..., 0.        , 0.01822838,
        0.63548506],
       ...,
       [0.79527985, 0.        , 0.7       , ..., 0.        , 1.        ,
        0.20596598],
       [0.62023278, 0.        , 0.56666667, ..., 0.27295809, 0.23885852,
        0.05969598],
       [0.72150735, 0.        , 0.83333333, ..., 0.50887558, 0.76350635,
        0.63548506]]), array([[0.19878044, 0.96414871, 0.55      , ..., 0.14991113, 0.85304062,
        0.20596598],
       [0.86428378, 0.        , 0.05      , ..., 0.50887558, 0.82625127,
        0.        ],
       [0.08956815, 0.36984179, 0.45      , ..., 0.50887558, 0.76350635,
        0.50822506],
       ...,
       [0.1

df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  138438.140611      0.000000  50.0  b'level2'   b'car8'  b'zipcode6'   
99996   90804.449088      0.000000  77.0  b'level4'  b'car13'  b'zipcode7'   
99997  106569.365368      0.000000  51.0  b'level1'   b'car8'  b'zipcode6'   
99998   84356.498251      0.000000  36.0  b'level1'  b'car18'  b'zipcode5'   
99999   87473.968095      0.000000  54.0  b'level0'  b'car11'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   39396.704989  18582.273595  72.0  b'level1'  b'car16'  b'zipcode5'   
99996  138438.140611      0.000000  50.0  b'level2'   b'car8'  b'zipcode6'   
99997   90804.449088      0.000000  77.0  b'level4'  b'car13'  b'zipcode7'   
99998  106569.365368      0.000000  51.0  b'level1'   b'car8'  b'zipcode6'   
99999   84356.498251      0.000000  36.0  b'level1'  b'car18'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  111359.652359      0.000000  61.0  b'level1'  b'car15'  b'zipcode1'   
99996  147593.942041      0.000000  59.0  b'level4'  b'car17'  b'zipcode6'   
99997  123948.303642      0.000000  66.0  b'level4'  b'car11'  b'zipcode4'   
99998   71333.696488  11799.103316  58.0  b'level1'   b'car5'  b'zipcode3'   
99999   39396.704989  18582.273595  72.0  b'level1'  b'car16'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   71983.840341  19299.479268  45.0  b'level2'   b'car7'  b'zipcode8'   
99996   67069.529360  46815.593925  30.0  b'level3'  b'car14'  b'zipcode5'   
99997  104177.660368      0.000000  48.0  b'level1'   b'car9'  b'zipcode5'   
99998  143771.939648      0.000000  71.0  b'level3'  b'car20'  b'zipcode6'   
99999   36163.251675  59471.494372  53.0  b'level0'  b'car18'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   68222.567612  33141.251698  41.0  b'level2'   b'car8'  b'zipcode1'   
99996   50428.438359  59324.405616  26.0  b'level4'   b'car1'  b'zipcode2'   
99997  112914.978798      0.000000  41.0  b'level1'   b'car4'  b'zipcode4'   
99998   90178.866228      0.000000  60.0  b'level1'  b'car13'  b'zipcode6'   
99999   37082.251910  22468.199196  43.0  b'level2'  b'car20'  b'zipcode1'   

             hvalue  hyears           loan      class  
0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   80507.066152      0.000000  59.0  b'level1'   b'car8'  b'zipcode7'   
99996   43953.844702  23112.195013  36.0  b'level1'  b'car19'  b'zipcode6'   
99997  127958.679326      0.000000  59.0  b'level2'   b'car8'  b'zipcode7'   
99998   58150.729498  54772.811468  66.0  b'level3'  b'car15'  b'zipcode8'   
99999  120616.756315      0.000000  59.0  b'level2'   b'car3'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


agraw2 exclude
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw2_1_abrupt_drift_0_noise_balanced.arff': [array([[0.29406669, 0.57232188, 0.95      , 0.40768475, 0.79310345,
        0.76167558],
       [0.274077  , 0.29819404, 0.11666667, 0.49264216, 0.27586207,
        0.54039634],
       [0.97390317, 0.        , 0.1       , 0.23755869, 0.27586207,
        0.9498421 ],
       ...,
       [0.03719466, 0.95578652, 0.31666667, 0.31168795, 0.17241379,
        0.6010682 ],
       [0.31007942, 0.89456625, 0.53333333, 0.59847593, 0.13793103,
        0.19455337],
       [0.34635195, 0.54364268, 0.65      , 0.35574248, 1.        ,
        0.63681602]]), array([[0.83522758, 0.        , 0.71666667, 0.37540913, 0.17241379,
        0.06936832],
       [0.09902868, 0.65900734, 0.48333333, 0.27646875, 0.51724138,
        0.61560327],
       [0.85813019, 0.        , 0.93333333, 0.59290154, 0.10344828,
        0.76047143],
       ...,
       [0.3778118 , 0.43708814, 0.38333333, 0.13871208,

df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  138438.140611      0.000000  50.0  b'level2'   b'car8'  b'zipcode6'   
99996   90804.449088      0.000000  77.0  b'level4'  b'car13'  b'zipcode7'   
99997  106569.365368      0.000000  51.0  b'level1'   b'car8'  b'zipcode6'   
99998   84356.498251      0.000000  36.0  b'level1'  b'car18'  b'zipcode5'   
99999   87473.968095      0.000000  54.0  b'level0'  b'car11'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   39396.704989  18582.273595  72.0  b'level1'  b'car16'  b'zipcode5'   
99996  138438.140611      0.000000  50.0  b'level2'   b'car8'  b'zipcode6'   
99997   90804.449088      0.000000  77.0  b'level4'  b'car13'  b'zipcode7'   
99998  106569.365368      0.000000  51.0  b'level1'   b'car8'  b'zipcode6'   
99999   84356.498251      0.000000  36.0  b'level1'  b'car18'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  111359.652359      0.000000  61.0  b'level1'  b'car15'  b'zipcode1'   
99996  147593.942041      0.000000  59.0  b'level4'  b'car17'  b'zipcode6'   
99997  123948.303642      0.000000  66.0  b'level4'  b'car11'  b'zipcode4'   
99998   71333.696488  11799.103316  58.0  b'level1'   b'car5'  b'zipcode3'   
99999   39396.704989  18582.273595  72.0  b'level1'  b'car16'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   71983.840341  19299.479268  45.0  b'level2'   b'car7'  b'zipcode8'   
99996   67069.529360  46815.593925  30.0  b'level3'  b'car14'  b'zipcode5'   
99997  104177.660368      0.000000  48.0  b'level1'   b'car9'  b'zipcode5'   
99998  143771.939648      0.000000  71.0  b'level3'  b'car20'  b'zipcode6'   
99999   36163.251675  59471.494372  53.0  b'level0'  b'car18'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   68222.567612  33141.251698  41.0  b'level2'   b'car8'  b'zipcode1'   
99996   50428.438359  59324.405616  26.0  b'level4'   b'car1'  b'zipcode2'   
99997  112914.978798      0.000000  41.0  b'level1'   b'car4'  b'zipcode4'   
99998   90178.866228      0.000000  60.0  b'level1'  b'car13'  b'zipcode6'   
99999   37082.251910  22468.199196  43.0  b'level2'  b'car20'  b'zipcode1'   

             hvalue  hyears           loan      class  
0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   80507.066152      0.000000  59.0  b'level1'   b'car8'  b'zipcode7'   
99996   43953.844702  23112.195013  36.0  b'level1'  b'car19'  b'zipcode6'   
99997  127958.679326      0.000000  59.0  b'level2'   b'car8'  b'zipcode7'   
99998   58150.729498  54772.811468  66.0  b'level3'  b'car15'  b'zipcode8'   
99999  120616.756315      0.000000  59.0  b'level2'   b'car3'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


agraw2 onehot
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw2_1_abrupt_drift_0_noise_balanced.arff': [array([[0.29406669, 0.57232188, 0.95      , ..., 0.        , 0.        ,
        0.        ],
       [0.274077  , 0.29819404, 0.11666667, ..., 0.        , 0.        ,
        0.        ],
       [0.97390317, 0.        , 0.1       , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.03719466, 0.95578652, 0.31666667, ..., 0.        , 0.        ,
        0.        ],
       [0.31007942, 0.89456625, 0.53333333, ..., 0.        , 0.        ,
        0.        ],
       [0.34635195, 0.54364268, 0.65      , ..., 0.        , 0.        ,
        0.        ]]), array([[0.83522758, 0.        , 0.71666667, ..., 0.        , 0.        ,
        0.        ],
       [0.09902868, 0.65900734, 0.48333333, ..., 0.        , 0.        ,
        0.        ],
       [0.85813019, 0.        , 0.93333333, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.3

df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  138438.140611      0.000000  50.0  b'level2'   b'car8'  b'zipcode6'   
99996   90804.449088      0.000000  77.0  b'level4'  b'car13'  b'zipcode7'   
99997  106569.365368      0.000000  51.0  b'level1'   b'car8'  b'zipcode6'   
99998   84356.498251      0.000000  36.0  b'level1'  b'car18'  b'zipcode5'   
99999   87473.968095      0.000000  54.0  b'level0'  b'car11'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   39396.704989  18582.273595  72.0  b'level1'  b'car16'  b'zipcode5'   
99996  138438.140611      0.000000  50.0  b'level2'   b'car8'  b'zipcode6'   
99997   90804.449088      0.000000  77.0  b'level4'  b'car13'  b'zipcode7'   
99998  106569.365368      0.000000  51.0  b'level1'   b'car8'  b'zipcode6'   
99999   84356.498251      0.000000  36.0  b'level1'  b'car18'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995  111359.652359      0.000000  61.0  b'level1'  b'car15'  b'zipcode1'   
99996  147593.942041      0.000000  59.0  b'level4'  b'car17'  b'zipcode6'   
99997  123948.303642      0.000000  66.0  b'level4'  b'car11'  b'zipcode4'   
99998   71333.696488  11799.103316  58.0  b'level1'   b'car5'  b'zipcode3'   
99999   39396.704989  18582.273595  72.0  b'level1'  b'car16'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   71983.840341  19299.479268  45.0  b'level2'   b'car7'  b'zipcode8'   
99996   67069.529360  46815.593925  30.0  b'level3'  b'car14'  b'zipcode5'   
99997  104177.660368      0.000000  48.0  b'level1'   b'car9'  b'zipcode5'   
99998  143771.939648      0.000000  71.0  b'level3'  b'car20'  b'zipcode6'   
99999   36163.251675  59471.494372  53.0  b'level0'  b'car18'  b'zipcode5'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   68222.567612  33141.251698  41.0  b'level2'   b'car8'  b'zipcode1'   
99996   50428.438359  59324.405616  26.0  b'level4'   b'car1'  b'zipcode2'   
99997  112914.978798      0.000000  41.0  b'level1'   b'car4'  b'zipcode4'   
99998   90178.866228      0.000000  60.0  b'level1'  b'car13'  b'zipcode6'   
99999   37082.251910  22468.199196  43.0  b'level2'  b'car20'  b'zipcode1'   

             hvalue  hyears           loan      class  
0   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


df               salary    commission   age     elevel       car      zipcode  \
0       58227.414161  42921.435776  77.0  b'level2'   b'car6'  b'zipcode5'   
1       55628.982351  22363.143593  27.0  b'level3'  b'car17'  b'zipcode3'   
2      146598.421289      0.000000  26.0  b'level2'  b'car13'  b'zipcode5'   
3       87701.290931      0.000000  52.0  b'level4'  b'car16'  b'zipcode5'   
4       83888.656723      0.000000  72.0  b'level3'  b'car11'  b'zipcode5'   
...              ...           ...   ...        ...       ...          ...   
99995   80507.066152      0.000000  59.0  b'level1'   b'car8'  b'zipcode7'   
99996   43953.844702  23112.195013  36.0  b'level1'  b'car19'  b'zipcode6'   
99997  127958.679326      0.000000  59.0  b'level2'   b'car8'  b'zipcode7'   
99998   58150.729498  54772.811468  66.0  b'level3'  b'car15'  b'zipcode8'   
99999  120616.756315      0.000000  59.0  b'level2'   b'car3'  b'zipcode3'   

              hvalue  hyears           loan      class  
0  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
  y = column_or_1d(y, warn=True)


agraw2 target
{'../Datasets_concept_drift/synthetic_data/abrupt_drift/agraw2_1_abrupt_drift_0_noise_balanced.arff': [array([[0.29406669, 0.57232188, 0.95      , ..., 0.51653096, 0.        ,
        0.87825406],
       [0.274077  , 0.29819404, 0.11666667, ..., 0.        , 0.57218266,
        1.        ],
       [0.97390317, 0.        , 0.1       , ..., 0.51653096, 0.26878201,
        0.87825406],
       ...,
       [0.03719466, 0.95578652, 0.31666667, ..., 1.        , 0.57218266,
        1.        ],
       [0.31007942, 0.89456625, 0.53333333, ..., 1.        , 0.35827864,
        0.81404917],
       [0.34635195, 0.54364268, 0.65      , ..., 0.        , 0.35601348,
        0.52543549]]), array([[0.83522758, 0.        , 0.71666667, ..., 0.4172376 , 0.64208153,
        0.        ],
       [0.09902868, 0.65900734, 0.48333333, ..., 0.4172376 , 0.57887399,
        0.        ],
       [0.85813019, 0.        , 0.93333333, ..., 1.        , 0.69608301,
        0.5830176 ],
       ...,
       [0.3

## Helpers to obtain valuable information from a kmeans verbose run

In [7]:
import csv
import sys


def write_verbose_kmeans_to_file(result_filename, data_to_cluster, n_clusters, n_init, max_iter, tol, random_state):
    print('random state:', random_state)
    orig_stdout = sys.stdout
    sys.stdout = open(result_filename, 'wt')

    fitted_kmeans = KMeans(
            n_clusters=2,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol,
            verbose=3,
            random_state=random_state
        ).fit(data_to_cluster)

    sys.stdout = orig_stdout
    print('something')


def convert_kmeans_output_file_to_dicts(file_path, n_init):
    # read the verbose output file to be able to reach conclusions
    with open(file_path, newline='') as f:
        rdr = csv.reader(f)
        kmeans_verbose_output_list = list(rdr)

    run_results_list = []
    reversed_run_results_list = []

    current_run_reversed_list_messages = []
    for el in reversed(kmeans_verbose_output_list):
        current_run_reversed_list_messages.append(el)
        if el[0] == 'Initialization complete':
            reversed_run_results_list.append(current_run_reversed_list_messages)
            current_run_reversed_list_messages = []

    for reversed_run_result in reversed_run_results_list:
        run_result = reversed_run_result.copy()
        run_result.reverse()
        run_results_list.append(run_result)

    run_results_list.reverse()

    run_results_dicts = []
    for i in range(n_init):
        result_dict = {'converged': False, 'convergence_dict': {}, 'iterations_inertia': []}
        current_result = run_results_list[i]
        only_iterations = current_result[1:]
        if len(current_result[-1]) == 1: # this run converged
            result_dict['converged'] = True
            only_iterations = only_iterations[:-1]
            converge_message_split = current_result[-1][0].split(' ')
            result_dict['convergence_dict']['iteration'] = int(converge_message_split[3][:-1])
            if len(converge_message_split) == 6:
                result_dict['convergence_dict']['type'] = 'strict'
            else:
                result_dict['convergence_dict']['type'] = 'tol-based'
                result_dict['convergence_dict']['center_shift'] = converge_message_split[6]
                result_dict['convergence_dict']['within_tol'] = converge_message_split[9]

        iterations_inertia = []
        for iteration_message_list in only_iterations:
            inertia = float(iteration_message_list[1].split(' ')[2])
            iterations_inertia.append(inertia)
        result_dict['iterations_inertia'] = iterations_inertia

        run_results_dicts.append(result_dict)

    return run_results_dicts
    

def print_stats_from_kmeans_output_dicts(run_results_dicts):
    max_iterations = -1
    initial_inertia = []
    final_inertia = []
    num_convergences = 0
    num_strict_convergences = 0
    num_tol_based_convergences = 0
    for result_dict in run_results_dicts:
        num_iterations = len(result_dict['iterations_inertia'])
        max_iterations = num_iterations if num_iterations > max_iterations else max_iterations
        initial_inertia.append(result_dict['iterations_inertia'][0])
        final_inertia.append(result_dict['iterations_inertia'][-1])
        if result_dict['converged'] == True:
            num_convergences += 1
            if result_dict['convergence_dict']['type'] == 'strict':
                num_strict_convergences += 1
            else:
                num_tol_based_convergences += 1

    print('total number of results:', len(run_results_dicts))
    print('maximum number of iterations:', max_iterations)
    print('minimum initial inertia:', min(initial_inertia))
    print('maximum initial inertia:', max(initial_inertia))
    print('number of unique final inertia values:', len(np.unique(final_inertia)))
    print('minimum final inertia:', min(final_inertia))
    print('maximum final inertia:', max(final_inertia))
    print('total number of convergences:', num_convergences)
    print('number of strict convergences:', num_strict_convergences)
    print('number of tol-based convergences:', num_tol_based_convergences)

## Finding the best tol and max_iter in SEA

In [8]:
# imports
from sklearn.cluster import KMeans


# helpers
def write_kmeans_results_ucdd_helper(output_filename_no_extension, ref_batches, n_init, max_iter, tol, random_state):
    # dummy = [np.asarray(1), np.asarray(2), np.asarray(3)]
    combinations = []
    for i in range(3):
    #     combinations.append(np.vstack((dummy[i], dummy[(i + 1) % 3])))
        combinations.append(np.vstack((ref_batches[i], ref_batches[(i + 1) % 3])))

    for i, combination in enumerate(combinations):
        filename = output_filename_no_extension + str(i) + '.txt'
        write_verbose_kmeans_to_file(filename, combination,
                                     n_clusters=2, n_init=n_init, max_iter=max_iter, tol=tol, random_state=random_state)
        output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
        print_stats_from_kmeans_output_dicts(output_dicts)

In [9]:
write_kmeans_results_ucdd_helper('sea_output', sea_reference_batches[abrupt_sea_path], n_init=100, max_iter=500, tol=0,
                                 random_state=1053)
# filename = 'sea_output'


# n_init = 100
# random_state=1053
# ref_batches = sea_reference_batches[abrupt_sea_path]

# # dummy = [np.asarray(1), np.asarray(2), np.asarray(3)]
# combinations = []
# for i in range(3):
# #     combinations.append(np.vstack((dummy[i], dummy[(i + 1) % 3])))
#     combinations.append(np.vstack((ref_batches[i], ref_batches[(i + 1) % 3])))

# for i, combination in enumerate(combinations):
#     filename = 'sea_output' + str(i) + '.txt'
#     write_verbose_kmeans_to_file(filename, combination,
#                                  n_clusters=2, n_init=n_init, max_iter=500, tol=0, random_state=random_state)
#     output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
#     print_stats_from_kmeans_output_dicts(output_dicts)

random state: 1053


NameError: name 'filename' is not defined

### Using the Obtained Parameters to Evaluate the Algorithm on SEA Abrupt

In [None]:
# sys.stdout = orig_stdout
# sea_abrupt_all_drifting_batches = all_drifting_batches(sea_abrupt_ref_batches,
#         sea_abrupt_test_batches,
#         n_clusters=2,
#         n_init=100,
#         max_iter=500,
#         tol=1e-7,
#         random_state=0,
#         coeff=2.66)

# print('sea_abrupt_all_drifting_batches', sea_abrupt_all_drifting_batches)

### Using the Obtained Parameters to Evaluate the Algorithm on All SEA Datasets

In [None]:
# sea_all_drifting_batches = {}
# for sea_path in all_sea_data_paths:
#     sea_ref_batches = sea_reference_batches[sea_path]
#     sea_test_batches = sea_testing_batches[sea_path]
    
#     sea_all_drifting_batches[sea_path] = all_drifting_batches(sea_ref_batches,
#         sea_test_batches,
#         n_clusters=2,
#         n_init=100,
#         max_iter=500,
#         tol=1e-7,
#         random_state=0,
#         coeff=2.66)
    
# print(sea_all_drifting_batches)

## Finding the best tol and max_iter in AGRAW1 Exclude

In [None]:
write_kmeans_results_ucdd_helper('agraw1_exclude_output', agraw1_exclude_reference_batches[abrupt_agraw1_path],
                                 n_init=100, max_iter=500, tol=0,
                                 random_state=1053)

## Finding the best tol and max_iter in AGRAW1 Onehot

In [None]:
# filename = 'agraw1_onehot_output.txt'
# n_init = 100
# random_state=1053
# ref_batches = agraw1_onehot_reference_batches[abrupt_agraw1_path]
# test_batches = agraw1_onehot_testing_batches[abrupt_agraw1_path]
# weighted_joined_reference_data, _, _ = mssw_preprocess(ref_batches, test_batches)
# write_verbose_kmeans_to_file(filename, weighted_joined_reference_data,
#                              n_clusters=2, n_init=n_init, max_iter=500, tol=0, random_state=random_state)
# output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
# print_stats_from_kmeans_output_dicts(output_dicts)

## Finding the best tol and max_iter in AGRAW1 Target

In [None]:
# filename = 'agraw1_target_output.txt'
# n_init = 100
# random_state=1053
# ref_batches = agraw1_target_reference_batches[abrupt_agraw1_path]
# test_batches = agraw1_target_testing_batches[abrupt_agraw1_path]
# weighted_joined_reference_data, _, _ = mssw_preprocess(ref_batches, test_batches)
# write_verbose_kmeans_to_file(filename, weighted_joined_reference_data,
#                              n_clusters=2, n_init=n_init, max_iter=500, tol=0, random_state=random_state)
# output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
# print_stats_from_kmeans_output_dicts(output_dicts)

## Finding the best tol and max_iter in AGRAW2 Exclude

In [None]:
# filename = 'agraw2_exclude_output.txt'
# n_init = 100
# random_state=1053
# ref_batches = agraw2_exclude_reference_batches[abrupt_agraw2_path]
# test_batches = agraw2_exclude_testing_batches[abrupt_agraw2_path]
# weighted_joined_reference_data, _, _ = mssw_preprocess(ref_batches, test_batches)
# write_verbose_kmeans_to_file(filename, weighted_joined_reference_data,
#                              n_clusters=2, n_init=n_init, max_iter=500, tol=0, random_state=random_state)
# output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
# print_stats_from_kmeans_output_dicts(output_dicts)

## Finding the best tol and max_iter in AGRAW2 Onehot

In [None]:
# filename = 'agraw2_onehot_output.txt'
# n_init = 100
# random_state=1053
# ref_batches = agraw2_onehot_reference_batches[abrupt_agraw2_path]
# test_batches = agraw2_onehot_testing_batches[abrupt_agraw2_path]
# weighted_joined_reference_data, _, _ = mssw_preprocess(ref_batches, test_batches)
# write_verbose_kmeans_to_file(filename, weighted_joined_reference_data,
#                              n_clusters=2, n_init=n_init, max_iter=500, tol=0, random_state=random_state)
# output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
# print_stats_from_kmeans_output_dicts(output_dicts)

## Finding the best tol and max_iter in AGRAW2 Target

In [None]:
# filename = 'agraw2_target_output.txt'
# n_init = 100
# random_state=1053
# ref_batches = agraw2_target_reference_batches[abrupt_agraw2_path]
# test_batches = agraw2_target_testing_batches[abrupt_agraw2_path]
# weighted_joined_reference_data, _, _ = mssw_preprocess(ref_batches, test_batches)
# write_verbose_kmeans_to_file(filename, weighted_joined_reference_data,
#                              n_clusters=2, n_init=n_init, max_iter=500, tol=0, random_state=random_state)
# output_dicts = convert_kmeans_output_file_to_dicts(filename, n_init=n_init)
# print_stats_from_kmeans_output_dicts(output_dicts)