In [12]:
import numpy as np
from tqdm import tqdm
import time
import matplotlib.pyplot as plt
from ecit import *


def ecit_simu_speed(n_list=[100,500,1000,2000],
              t=100,
              cit=kcit,
              ensembles=[(p_alpha1,1), (p_alpha1,4)],
              show_progress=True):
    
    results = []

    for p_ensemble, k in ensembles:

        if show_progress: print(f"Ensemble (k={k}, {p_ensemble.__name__})") if k!=1 else print(f"RCIT (k=1)")

        eI = [0]*len(n_list)
        eII = [0]*len(n_list)
        tim = [0]*len(n_list)



        for i, n in enumerate(n_list):

    
            start_time = time.time()
            iteration_loop = tqdm(range(t), desc=f"t={n}", leave=True, dynamic_ncols=True) if show_progress else range(t)
            for _ in iteration_loop:

                dataI = np.hstack((generate_samples(n=n,indp='C',z_dis='gaussian',noise_dis='cauchy',noise_std=0.5)))
                dataII = np.hstack((generate_samples(n=n,indp='N',z_dis='gaussian',noise_dis='cauchy',noise_std=0.5)))

                if k == 1:
                    obj_ECIT = ECIT(dataI, rcit, p_ensemble, k=1)
                    pI = obj_ECIT([0], [1], [2])
                    obj_ECIT = ECIT(dataII, rcit, p_ensemble, k=1)
                    pII = obj_ECIT([0], [1], [2])
                elif k == 0:
                    if n < 8000:
                        obj_ECIT = ECIT(dataI, kcit, p_ensemble, k=1)
                        pI = obj_ECIT([0], [1], [2])
                        obj_ECIT = ECIT(dataII, kcit, p_ensemble, k=1)
                        pII = obj_ECIT([0], [1], [2])
                    else:
                        pI = 0
                        pII = 0
                else:
                    obj_ECIT = ECIT(dataI, cit, p_ensemble, int(n/k))
                    pI = obj_ECIT([0], [1], [2])
                    obj_ECIT = ECIT(dataII, cit, p_ensemble, int(n/k))
                    pII = obj_ECIT([0], [1], [2])
                if pI<0.05:
                    eI[i] += 1
                if pII>0.05:
                    eII[i] += 1

            end_time = time.time()
            tim[i] = end_time - start_time
            eI[i] = eI[i]/t
            eII[i] = eII[i]/t

        results.append([eI, eII, tim])

    return results




def show_results(results, ensembles, n_list):
    dir = {}
    for (p_ensemble, k), result in zip(ensembles, results):
        if k==0:
            eI, eII, tim = result
            num_ = np.sum(np.array(eI)<1)
            eI = eI[:num_]
            eII = eII[:num_]
            tim = tim[:num_]
            result = [eI, eII, tim]

        key_name = f"Ensemble KCIT (k={k}, {p_ensemble.__name__})" if k != 1 else f"RCIT"
        dir[key_name] = result

    fig, axes = plt.subplots(2, 2, figsize=(16, 10), dpi=300, sharex=True)
    (ax_eI, ax_eII), (ax_eSum, ax_tim) = axes

    for label, result in dir.items():
        eI, eII, tim = result
        eSum = np.array(eI) + np.array(eII)
        if len(eI) != len(n_list):
            n_list_ = n_list[:len(eI)]
            ax_eI.plot(n_list_, eI, label=label)
            ax_eII.plot(n_list_, eII)
            ax_eSum.plot(n_list_, eSum)
            ax_tim.plot(n_list_, tim)
        else:
            ax_eI.plot(n_list, eI, label=label)
            ax_eII.plot(n_list, eII)
            ax_eSum.plot(n_list, eSum)
            ax_tim.plot(n_list, tim)

    ax_eI.set_title("Error I")
    ax_eI.set_ylabel("Error Rate")

    ax_eII.set_title("Error II")

    ax_eSum.set_title("Error I + Error II")
    ax_eSum.set_xlabel("Sample Size (n)")   
    ax_eSum.set_ylabel("Error Rate")


    ax_tim.set_title("Execution Time")
    ax_tim.set_ylabel("Time (s)")
    ax_tim.set_xlabel("Sample Size (n)")

    fig.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=2, fontsize=12)

    ax_tim.set_xticks(n_list)
    ax_eSum.set_xticks(n_list)

    plt.tight_layout()
    plt.show()

In [11]:
ensembles = [(p_alpha1,0), (p_alpha1,1), (p_alpha2,400)]

n_list = [400, 1000, 2000, 4000, 6000, 8000, 10000]

results = ecit_simu_speed(n_list, t=50, cit=kcit, ensembles=ensembles, show_progress=True)

show_results(results, ensembles, n_list)
results

Ensemble (k=0, p_alpha1)


t=400: 100%|██████████| 50/50 [00:12<00:00,  4.05it/s]
t=1000: 100%|██████████| 50/50 [01:08<00:00,  1.36s/it]
t=2000: 100%|██████████| 50/50 [07:04<00:00,  8.50s/it]
t=4000: 100%|██████████| 50/50 [53:44<00:00, 64.49s/it]
t=6000:  22%|██▏       | 11/50 [36:32<2:14:43, 207.27s/it]

In [None]:
ensembles = [(p_alpha1,0), (p_alpha1,1), (p_alpha2,400)]
# KCIT max 8000
n_list = [2000, 4000, 8000, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000]

results = ecit_simu_speed(n_list, t=1000, cit=kcit, ensembles=ensembles, show_progress=True)

show_results(results, ensembles, n_list)
results