<a href="https://colab.research.google.com/github/AlexeyTimoshin/Statistics/blob/main/Poisson_bootstrap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from typing import Tuple, List
import numpy as np
import multiprocessing as mp
import os
from tqdm import tqdm_notebook

In [None]:

N_bootstaps: int = 10000

def poisson_bootstrap_tp_fp_fn_tn(
    bundle: Tuple[float, List[Tuple[float, float, float, int]]]
                                ) -> List[np.ndarray]:
    treshold, data = bundle
    TP = np.zeros((N_bootstaps))
    FP = np.zeros((N_bootstaps))
    FN = np.zeros((N_bootstaps))
    TN = np.zeros((N_bootstaps))

    for current_label, current_predict, weight, index in data:
        np.random.seed(index)
        current_predict += np.random.normal(0, 0.0125,1)
        current_predict = int(np.clip(current_predict, 0, 1) >= treshold)
        p_sample = np.random.poisson(1, N_bootstaps) * weight

        if current_label == 1 and current_predict == 1:
            TP += p_sample
        if current_label == 1 and current_predict == 0:
            FP += p_sample
        if current_label == 0 and current_predict == 1:
            FN += p_sample
        if current_label == 0 and current_predict == 0:
            TN += p_sample
    return [TP, FP, FN, TN]


In [None]:
N = 10**6
labels = np.random.randint(0,1, N)
predicts = np.clip(np.random.normal(0.5, 1, N), 0, 1)
weight = np.array([1 for _ in range(N)])

In [None]:
chunk_size = 100
treshold = 0.81
generator = (
    (
        treshold,
        [
            (labels[x + y],
             predicts[x + y],
             weight[x + y],
             x + y
             )
        for x in range(chunk_size)
        if x + y < N
        ]
        )
        for y in range(0, N, chunk_size)

)

In [None]:
%%timeit
with mp.Pool(processes=os.cpu_count()) as pool:
    stat_list: List = list(tqdm_notebook(pool.imap(poisson_bootstrap_tp_fp_fn_tn, generator), total=N//chunk_size))


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

77 ms ± 6.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
