In [1]:
import json, pathlib, pandas as pd
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor, as_completed
import os, csv

from persistent_homology import (
    BettiZero,
    compute_intervals,
    compute_n_largest_bars,
    generate_sphere_points,
)

In [6]:
def read_csv(path):
    with open(path, newline="") as f:
        reader = csv.reader(f)
        output = []
        for row in reader:
            values = []
            for value in row:
                if len(row) == 3:
                    values.append(float(value))
                elif len(row) == 2:
                    values.append(int(value))
            output.append(values)
        return output
    
def get_subfolders(path):
    """
    Return a list of names of all subdirectories in the given path.
    """
    return [
        name for name in os.listdir(path)
        if os.path.isdir(os.path.join(path, name))
    ]

def load_vertices_edges(seg_folder):
    """Read vertices/edges only once per segmentation."""
    folder_path = '../lung_segmentations/{}'.format(seg_folder)
    edges_path = '/edges.csv'
    vertices_path = '/vertices.csv'
    verts = read_csv(folder_path + vertices_path)
    edges = read_csv(folder_path + edges_path)
    return verts, edges

def process_direction(args):
    """Run β₀ persistence for one direction (runs in worker)."""
    direction, vertices, edges = args
    bz = BettiZero(direction, vertices, edges)
    comps, mergers, verts, births = bz.compute_persistence()
    intervals = compute_intervals(births, mergers)
    bars = compute_n_largest_bars(intervals)
    return {
        "direction": list(direction),
        "intervals": intervals,
        "largest_bars": bars,
        #"largest_length": length,
        "components": list(comps),
    }

In [5]:
json_data = {}

segmentation_folder_path = "../lung_segmentations"
lung_segmentations = get_subfolders(segmentation_folder_path)

In [7]:
vertices, edges = load_vertices_edges('Lung segmentation-10')
directions = generate_sphere_points(5, 5, 1e-7)

In [9]:
for direction in directions:
    %timeit process_direction((direction, vertices, edges))

NameError: name 'compute_largest_bar' is not defined

In [None]:
for seg_folder in lung_segmentations[:2]:
    vertices, edges = load_vertices_edges(seg_folder)
    directions = generate_sphere_points(5, 5, 1e-7)

    # Parallel processing over directions
    ctx = mp.get_context('fork')
    with ProcessPoolExecutor(mp_context=ctx) as ex:
        future_map = {
            ex.submit(process_direction, (d, vertices, edges)): d
            for d in directions
        }
        seg_results = {i : v.result() for i, v in enumerate(as_completed(future_map))}

    json_data[seg_folder] = seg_results
    print(f"✓ Processed {seg_folder}")
 
# Single JSON write at the end
with open("BettiZeroSegmentations.json", "w") as fp:
    json.dump(json_data, fp, indent=2)

print("✅ All segmentations done → BettiZeroSegmentations.json")