In [43]:
import sys
import os
sys.path.append(os.path.abspath(".."))

import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_wine, load_digits, load_breast_cancer
from mislabelling import symmetric_noise, pair_noise, NNAR
from testing import *

In [47]:
RESOLUTION = 10
TRIALS = 2
N_ESTIMATORS = 10
TEST_SIZE = 0.25
ITERATIONS = 20

noises = (symmetric_noise, pair_noise, pair_noise, pair_noise, NNAR)
datasets = (load_wine(), load_digits(), load_breast_cancer(), load_gmm5())
rf = RandomForestClassifier(random_state=42)

accuracies_mean = []
accuracies_se = []
relabelling_f1_success = []
relabelling_f1_se = []
relabelling_acc_success = []
relabelling_acc_se = []

In [45]:
def get_ratio(data):
    values, counts = np.unique(data.target, return_counts=True)
    print({int(k): int(v) for k, v in zip(values, counts)})
    total_vals = sum(counts)

    noise_ratio = [1 - count/total_vals for count in counts]
    noise_ratio /= min(noise_ratio)
    print([float(f"{val:.3g}") for val in noise_ratio])
    return noise_ratio

In [48]:
def run_single_experiment(data, noise_idx):
    unique_pairs = None
    noise_ratio = None
    clf = None

    if noise_idx == 2:
        noise_ratio = get_ratio(data)
    if noise_idx == 3:
        unique_pairs = True
    if noise_idx == 4:
        rf = RandomForestClassifier()
        rf.fit(data.data, data.target)
        clf = rf

    accuracies_all, auc_all, relabelling_f1_all, relabelling_acc_all, x_axis = run_noise_level_experiment(
        data, RandomForestClassifier, noises[noise_idx],
        n_estimators=N_ESTIMATORS, trials=TRIALS,
        resolution=RESOLUTION, test_size=TEST_SIZE, iterations=ITERATIONS,
        noise_ratio=noise_ratio, clf=clf, unique_pairs=unique_pairs
    )

    accuracies_boot, auc_boot, relabelling_f1_boot, relabelling_acc_boot, _ = run_noise_level_experiment(
        data, RandomForestClassifier, noises[noise_idx],
        n_estimators=N_ESTIMATORS, trials=TRIALS,
        resolution=RESOLUTION, test_size=TEST_SIZE, iterations=ITERATIONS,
        control=False, bootstrapping=True,
        noise_ratio=noise_ratio, clf=clf, unique_pairs=unique_pairs
    )

    # Concatenate results
    accuracies_all = np.concatenate([accuracies_all, accuracies_boot[:1]], axis=0)
    auc_all = np.concatenate([auc_all, auc_boot[:1]], axis=0)

    blank_f1 = np.zeros_like(relabelling_f1_boot[:1])
    blank_acc = np.zeros_like(relabelling_acc_boot[:1])

    relabelling_f1_all = np.concatenate([relabelling_f1_all, relabelling_f1_boot[:1], blank_f1], axis=0)
    relabelling_acc_all = np.concatenate([relabelling_acc_all, relabelling_acc_boot[:1], blank_acc], axis=0)

    # Process results
    accuracies_mean_exp, accuracies_se_exp = process_experiment_result(accuracies_all)
    relabelling_f1_success_exp, relabelling_f1_se_exp = process_experiment_result(relabelling_f1_all)
    relabelling_acc_success_exp, relabelling_acc_se_exp = process_experiment_result(relabelling_acc_all)

    return accuracies_mean_exp, accuracies_se_exp, relabelling_f1_success_exp, relabelling_f1_se_exp, relabelling_acc_success_exp, relabelling_acc_se_exp

# Build list of tasks
tasks = [(data, i) for data in datasets for i in range(5)]

# Run in parallel
results = Parallel(n_jobs=-1)(delayed(run_single_experiment)(data, noise_idx) for (data, noise_idx) in tasks)

{0: 59, 1: 71, 2: 48}
[1.11, 1.0, 1.21]
{0: 178, 1: 182, 2: 177, 3: 183, 4: 181, 5: 182, 6: 181, 7: 179, 8: 174, 9: 180}
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.01, 1.0]
{0: 212, 1: 357}
[1.68, 1.0]
{0: 618, 1: 632}
[1.02, 1.0]


In [None]:
# Unzip results: separate the six outputs
accuracies_mean_list, accuracies_se_list, relabelling_f1_success_list, relabelling_f1_se_list, relabelling_acc_success_list, relabelling_acc_se_list = zip(*results)

# Stack along axis 0 (tasks)
accuracies_mean = np.stack(accuracies_mean_list, axis=0)  # shape (4,3,10)
accuracies_se = np.stack(accuracies_se_list, axis=0)
relabelling_f1_success = np.stack(relabelling_f1_success_list, axis=0)
relabelling_f1_se = np.stack(relabelling_f1_se_list, axis=0)
relabelling_acc_success = np.stack(relabelling_acc_success_list, axis=0)
relabelling_acc_se = np.stack(relabelling_acc_se_list, axis=0)

In [None]:
# Stack your results into a single big DataFrame
results = []

# Loop through the results and create rows
for dataset_idx, dataset_results in enumerate(zip(accuracies_mean, accuracies_se, relabelling_f1_success, relabelling_f1_se, relabelling_acc_success, relabelling_acc_se)):
    for noise_idx, (acc_mean, acc_se, f1_mean, f1_se, acc_succ_mean, acc_succ_se) in enumerate(zip(*dataset_results)):
        for res_idx, (am, ase, f1m, f1se, asm, asse) in enumerate(zip(acc_mean, acc_se, f1_mean, f1_se, acc_succ_mean, acc_succ_se)):
            results.append({
                'Dataset': dataset_idx,
                'Noise Type': noise_idx,
                'Noise Level Index': res_idx,
                'Accuracy Mean': am,
                'Accuracy SE': ase,
                'Relabelling F1 Mean': f1m,
                'Relabelling F1 SE': f1se,
                'Relabelling Accuracy Mean': asm,
                'Relabelling Accuracy SE': asse
            })

# Convert list of dicts into a DataFrame
results_df = pd.DataFrame(results)

# Save to CSV
results_df.to_csv("experiment_results.csv", index=False)

print("Saved results to experiment_results.csv")

Saved results to experiment_results.csv


In [40]:
import pandas as pd
NOISE = 5

# Load your CSV
df = pd.read_csv('experiment_results.csv')

# Define mappings
dataset_names = {0: 'Wine', 1: 'Digits', 2: '\shortstack{Breast \\\\ Cancer}', 3: 'GMM5'}
method_names = {0: 'Standard', 2: 'Bootstrapped', 1: 'Control'}

# Prepare LaTeX
latex = r"""
\multirow{12}{*}{Accuracy}
"""

# Process each Dataset
for dataset_id in sorted(df['Dataset'].unique()):
    if dataset_id < (NOISE-1) * 4:
        continue
    elif dataset_id > NOISE*4 - 1:
        break
    dataset_df = df[df['Dataset'] == dataset_id]
    for method_id in [0, 2, 1]:  # Order: Standard, Bootstrapped, Control
        method_df = dataset_df[dataset_df['Noise Type'] == method_id]
        accuracies = method_df.sort_values('Noise Level Index')['Accuracy Mean']
        accuracies_se = method_df.sort_values('Noise Level Index')['Accuracy SE']
        values = ' & '.join(f"{mean:.2f} $\\pm$ {se:.2f}" for mean, se in zip(accuracies, accuracies_se))

        if method_id == 0:
            # Start new dataset block
            latex += f"\n    & \\multirow{{3}}{{*}}{{{dataset_names[dataset_id%4]}}} & {method_names[method_id]} & {values}\\\\ \\cline{{3-13}}"
        elif method_id in (2, 1):
            if method_id == 1:
                end = " \\cline{2-13}"
            else:
                end = " \\cline{3-13}"
            latex += f"\n    &                         & {method_names[method_id]} & {values}\\\\{end}"

# Finish LaTeX table
latex += r"""
\hline
\hline
"""


  dataset_names = {0: 'Wine', 1: 'Digits', 2: '\shortstack{Breast \\\\ Cancer}', 3: 'GMM5'}


In [41]:
# Prepare LaTeX
latex += r"""
\multirow{12}{*}{\shortstack{Relabelling \\ $F_1$-Score}}
"""

# Process each Dataset
for dataset_id in sorted(df['Dataset'].unique()):
    if dataset_id < (NOISE-1) * 4:
        continue
    elif dataset_id > NOISE*4 - 1:
        break
    dataset_df = df[df['Dataset'] == dataset_id]
    for method_id in [0, 2, 1]:  # Order: Standard, Bootstrapped, Control
        method_df = dataset_df[dataset_df['Noise Type'] == method_id]
        accuracies = method_df.sort_values('Noise Level Index')['Relabelling F1 Mean']
        accuracies_se = method_df.sort_values('Noise Level Index')['Relabelling F1 SE']
        values = ' & '.join(f"{mean:.2f} $\\pm$ {se:.2f}" for mean, se in zip(accuracies, accuracies_se))

        if method_id == 0:
            # Start new dataset block
            latex += f"\n    & \\multirow{{3}}{{*}}{{{dataset_names[dataset_id%4]}}} & {method_names[method_id]} & {values}\\\\ \\cline{{3-13}}"
        elif method_id in (2, 1):
            if method_id == 1:
                end = " \\cline{2-13}"
            else:
                end = " \\cline{3-13}"
            latex += f"\n    &                         & {method_names[method_id]} & {values}\\\\{end}"

# Finish LaTeX table
latex += r"""
\hline
\hline
"""

In [42]:
# Prepare LaTeX
latex += r"""
\multirow{12}{*}{\shortstack{Relabelling \\ Accuracy}}
"""

# Process each Dataset
for dataset_id in sorted(df['Dataset'].unique()):
    if dataset_id < (NOISE-1) * 4:
        continue
    elif dataset_id > NOISE*4 - 1:
        break
    dataset_df = df[df['Dataset'] == dataset_id]
    for method_id in [0, 2, 1]:  # Order: Standard, Bootstrapped, Control
        method_df = dataset_df[dataset_df['Noise Type'] == method_id]
        accuracies = method_df.sort_values('Noise Level Index')['Relabelling Accuracy Mean']
        accuracies_se = method_df.sort_values('Noise Level Index')['Relabelling Accuracy SE']
        values = ' & '.join(f"{mean:.2f} $\\pm$ {se:.2f}" for mean, se in zip(accuracies, accuracies_se))

        if method_id == 0:
            # Start new dataset block
            latex += f"\n    & \\multirow{{3}}{{*}}{{{dataset_names[dataset_id%4]}}} & {method_names[method_id]} & {values}\\\\ \\cline{{3-13}}"
        elif method_id in (2, 1):
            if method_id == 1:
                end = " \\cline{2-13}"
            else:
                end = " \\cline{3-13}"
            latex += f"\n    &                         & {method_names[method_id]} & {values}\\\\{end}"

# Finish LaTeX table
latex += r"""
\hline
"""

# Save or print
print(latex)



\multirow{12}{*}{Accuracy}

    & \multirow{3}{*}{Wine} & Standard & 0.87 $\pm$ 0.01 & 0.86 $\pm$ 0.00 & 0.86 $\pm$ 0.01 & 0.86 $\pm$ 0.01 & 0.74 $\pm$ 0.02 & 0.30 $\pm$ 0.02 & 0.15 $\pm$ 0.01 & 0.14 $\pm$ 0.01 & 0.13 $\pm$ 0.00 & 0.13 $\pm$ 0.01\\ \cline{3-13}
    &                         & Bootstrapped & 0.88 $\pm$ 0.00 & 0.88 $\pm$ 0.00 & 0.87 $\pm$ 0.00 & 0.84 $\pm$ 0.00 & 0.66 $\pm$ 0.01 & 0.29 $\pm$ 0.01 & 0.15 $\pm$ 0.00 & 0.12 $\pm$ 0.00 & 0.11 $\pm$ 0.00 & 0.12 $\pm$ 0.00\\ \cline{3-13}
    &                         & Control & 0.88 $\pm$ 0.00 & 0.86 $\pm$ 0.00 & 0.79 $\pm$ 0.00 & 0.69 $\pm$ 0.00 & 0.56 $\pm$ 0.01 & 0.42 $\pm$ 0.01 & 0.29 $\pm$ 0.01 & 0.20 $\pm$ 0.00 & 0.14 $\pm$ 0.00 & 0.12 $\pm$ 0.00\\ \cline{2-13}
    & \multirow{3}{*}{Digits} & Standard & 0.87 $\pm$ 0.01 & 0.86 $\pm$ 0.01 & 0.86 $\pm$ 0.01 & 0.85 $\pm$ 0.01 & 0.75 $\pm$ 0.02 & 0.23 $\pm$ 0.02 & 0.15 $\pm$ 0.01 & 0.14 $\pm$ 0.01 & 0.13 $\pm$ 0.01 & 0.13 $\pm$ 0.01\\ \cline{3-13}
    &                     