In [None]:
from pathlib import Path
import h5py
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from scipy.optimize import curve_fit

from topostats.io import hdf5_to_dict
import h5glance

from topostats.measure.curvature import calculate_curvature_stats_image

In [None]:
data_dir = Path("/Users/sylvi/topo_data/pleng/data/")
assert data_dir.exists()

In [None]:
filename = "20231218_2ngSCcats.0_00003.topostats"
file_path = data_dir / filename
assert file_path.exists()

mean_curvatures = []

with h5py.File(file_path, "r") as f:
    image_data = hdf5_to_dict(f, group_path="/")
    print(image_data.keys())
    spline_data = image_data["splining"]["above"]
    p_to_nm = image_data["pixel_to_nm_scaling"]
    print(f"pixel to nm scaling: {p_to_nm}")

    image = image_data["image"]
    plt.imshow(image)
    plt.show()

    spline_data = image_data["splining"]["above"]

    grains_curvature_stats_dict = calculate_curvature_stats_image(
        all_grain_smoothed_data=spline_data,
        pixel_to_nm_scaling=p_to_nm,
    )

    for grain_index, grain_curvature_stats in grains_curvature_stats_dict.items():
        for moleucule_index, molecule_curvature in grain_curvature_stats.items():
            mean_curvatures.append(np.mean(molecule_curvature))

In [None]:
sample_types = [
    "magnesium-unknot-plasmid-nicked",
    "magnesium-unknot-plasmid-supercoiled",
    "nickel-relaxed",
    "nickel-supercoiled",
]
data_dir_prefix = "/Users/sylvi/topo_data/pleng/topology-data-check/processed-data-"
maximum_px_to_nm = 1.0
use_grains_with_only_one_molecule = True
assert data_dir.exists()

results = {}

for sample_type in sample_types:
    data_dir = Path(data_dir_prefix + sample_type)
    assert data_dir.exists()
    # Find all .topostats files in the directory
    topostats_files = list(data_dir.glob("*.topostats"))
    mean_curvatures = []

    for file in topostats_files:
        print(f"Processing file: {file}")
        with h5py.File(file, "r") as f:
            image_data = hdf5_to_dict(f, group_path="/")
            if "splining" not in image_data:
                print(f"[Error] : No splining data found in file {file}")
                continue
            spline_data = image_data["splining"]["above"]
            p_to_nm = image_data["pixel_to_nm_scaling"]
            spline_data = image_data["splining"]["above"]

            grains_curvature_stats_dict = calculate_curvature_stats_image(
                all_grain_smoothed_data=spline_data,
                pixel_to_nm_scaling=p_to_nm,
            )

            for grain_index, grain_curvature_stats in grains_curvature_stats_dict.items():
                if use_grains_with_only_one_molecule:
                    if len(grain_curvature_stats) != 1:
                        continue
                for moleucule_index, molecule_curvature in grain_curvature_stats.items():
                    mean_curvatures.append(np.max(molecule_curvature))

    results[sample_type] = mean_curvatures

# kde plot
fig, ax = plt.subplots(figsize=(10, 6))
for sample_type, curvatures in results.items():
    sns.kdeplot(curvatures, label=f"{sample_type} n = {len(curvatures)}", ax=ax)
plt.legend()
plt.xlabel("Maximum Curvature (1/nm)")
plt.ylabel("Density")
plt.show()