In [1]:
from hyperscope import config
from hyperscope.helpers.concurrency import ConcurrentTqdm
from multiprocessing import Pool
import numpy as np
from functools import partial


def process_file(npfile):
    try:
        mask = np.load(npfile)["mask"]
        unique, counts = np.unique(mask, return_counts=True)
        return np.asarray((unique, counts)).T
    except Exception as e:
        return None


class CB:
    def __init__(self):
        self.counts = {}

    def __call__(self, ok, arr):
        if ok:
            for row in arr:
                self.counts[int(row[0])] = int(self.counts.get(row[0], 0) + row[1])

def main():
    cb = CB()
    dd = config.INTERIM_DATA_DIR / "worms" / "sp" / "masks" / "64x64"
    logger.info("Enumerating Files")
    files = list(dd.iterdir())

    logger.info("Creating Futures")
    # Create a pool of workers
    with Pool() as pool:
        # Map the process_file function to all files
        futures = [pool.apply_async(process_file, (f,)) for f in files]

        logger.info("Processing Files")
        # Use ConcurrentTqdm to track progress
        uniq = set()
        for result in ConcurrentTqdm(
            futures, total=len(files), desc="Processing files"
        ):
            cb(*result)

    print(cb.counts)

if __name__ == "__main__":
    main()

[32m2024-12-03 22:06:14.042[0m | [1mINFO    [0m | [36mhyperscope.config[0m:[36m<module>[0m:[36m13[0m - [1mPROJ_ROOT path is: /mnt/d/hyper-scope[0m


[32m2024-12-03 22:06:14.199[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m29[0m - [1mEnumerating Files[0m
[32m2024-12-03 22:06:24.095[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m32[0m - [1mCreating Futures[0m
[32m2024-12-03 22:06:54.590[0m | [1mINFO    [0m | [36m__main__[0m:[36mmain[0m:[36m38[0m - [1mProcessing Files[0m


Processing files: 100%|██████████| 2024253/2024253 [1:03:28<00:00, 531.47it/s]


{0: 7884050302, 1: 296610305, 3: 52085801, 2: 58593880}
