In [1]:
# --- Optimised β₀ persistence workflow ---------------------------------
# Rewrites the original triple‑nested loop to:
#   • Re‑use vertices/edges per segmentation (loaded once)
#   • Distribute direction work in parallel across CPU cores
#   • Collect JSON in‑memory and write once at the end

import json, pathlib, pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor, as_completed
import os, csv

from persistent_homology import (
    BettiZero,
    compute_intervals,
    compute_largest_bar,
    generate_sphere_points,
)

root = pathlib.Path("./lung_segmentations")

def read_csv(path):
    with open(path, newline="") as f:
        reader = csv.reader(f)
        output = []
        for row in reader:
            values = []
            for value in row:
                if len(row) == 3:
                    values.append(float(value))
                elif len(row) == 2:
                    values.append(int(value))
            output.append(values)
        return output
    
def get_subfolders(path):
    """
    Return a list of names of all subdirectories in the given path.
    """
    return [
        name for name in os.listdir(path)
        if os.path.isdir(os.path.join(path, name))
    ]

def load_vertices_edges(seg_folder):
    """Read vertices/edges only once per segmentation."""
    folder_path = './lung_segmentations/{}'.format(seg_folder)
    edges_path = '/edges.csv'
    vertices_path = '/vertices.csv'
    verts = read_csv(folder_path + vertices_path)
    edges = read_csv(folder_path + edges_path)
    return verts, edges

def process_direction(args):
    """Run β₀ persistence for one direction (runs in worker)."""
    direction, vertices, edges = args
    bz = BettiZero(direction, vertices, edges)
    comps, mergers, verts, births = bz.compute_persistence()
    intervals = compute_intervals(births, mergers)
    length, bar = compute_largest_bar(intervals)
    return {
        "direction": list(direction),
        "intervals": intervals,
        "largest_bar": bar,
        "largest_length": length,
        "components": list(comps),
    }

In [3]:
json_data = {}

segmentation_folder_path = "./lung_segmentations"
lung_segmentations = get_subfolders(segmentation_folder_path)

for seg_folder in lung_segmentations:
    vertices, edges = load_vertices_edges(seg_folder)
    directions = generate_sphere_points(5, 5, 1e-7)

    # Parallel processing over directions
    ctx = mp.get_context('fork')
    with ProcessPoolExecutor(mp_context=ctx) as ex:
    #with ProcessPoolExecutor(max_workers=10) as ex:
        future_map = {
            ex.submit(process_direction, (d, vertices, edges)): d
            for d in directions
        }
        for f in as_completed(future_map):
            print(f.re)
        seg_results = {i : v.result() for i, v in enumerate(as_completed(future_map))}

    json_data[seg_folder] = seg_results
    print(f"✓ Processed {seg_folder}")
  
# Single JSON write at the end
with open("BettiZeroSegmentations.json", "w") as fp:
    json.dump(json_data, fp, indent=2)

print("✅ All segmentations done → BettiZeroSegmentations.json")

<Future at 0x718cfaac90d0 state=finished raised BrokenProcessPool>
<Future at 0x718cfab4d4c0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99c080 state=finished raised BrokenProcessPool>
<Future at 0x718cea99ede0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99e0c0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99f320 state=finished raised BrokenProcessPool>
<Future at 0x718cea99e7b0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99f860 state=finished raised BrokenProcessPool>
<Future at 0x718cea99da90 state=finished raised BrokenProcessPool>
<Future at 0x718cea99e060 state=finished raised BrokenProcessPool>
<Future at 0x718cea99eea0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99f0e0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99daf0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99e2d0 state=finished raised BrokenProcessPool>
<Future at 0x718cea99f260 state=finished raised BrokenProcessP

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.