In [6]:
import os
import shutil
import nrrd
import json
import time

import numpy as np
import pandas as pd

from pathlib import Path
from polnet import lio

ROOT_DIR = Path("~/Desktop/Patricia/polnet/data")
in_csv = ROOT_DIR / "data_generated/all_v11/tomos_motif_list.csv"
out_dir = Path(os.getenv("nnUNet_raw") or ROOT_DIR / "data_prepared")
dataset_id = "004"
dataset_suffix = "SV"

fg_labels = {
    "membrane": (1,),
    "mb_prot": (2,3,4,5)
}

mb_prot_radius_vox = 120  # in Angstroms, later scaled by voxel size

# Read metadata
df = pd.read_csv(in_csv, delimiter="\t")
tomos = set(df["Tomo3D"].tolist())
segs = {
    tomo: os.path.join(os.path.dirname(tomo), f"tomo_lbls_{os.path.basename(tomo).split('_')[2]}.mrc")
    for tomo in tomos
}

# Output folder structure
out_dataset = out_dir / f"Dataset{dataset_id}_{dataset_suffix}"
if os.path.exists(out_dataset):
    shutil.rmtree(out_dataset)
os.makedirs(out_dataset / "imagesTr")
os.makedirs(out_dataset / "labelsTr")

out_labels = {"background": 0}

for tomo_id, tomo_in in enumerate(tomos):
    print(f"Processing tomogram: {tomo_in}")
    tomo = lio.load_mrc(tomo_in)
    seg = lio.load_mrc(segs[tomo_in])
    seg_post = np.zeros_like(seg, dtype=np.uint8)
    v_sizes = lio.read_mrc_v_size(tomo_in)
    v_size = float(v_sizes[0])  # assuming isotropic
    v_size_i = 1.0 / v_size

    tomo_df = df[df["Tomo3D"] == tomo_in]

    for i, key in enumerate(fg_labels.keys()):
        print(f"\tProcessing label: {key}")
        lbl_value = i + 1
        if key == "mb_prot":
            feat_df = tomo_df[tomo_df["Label"].isin(fg_labels[key])]
            for _, row in feat_df.iterrows():
                xi = int(round(row["X"] * v_size_i))
                yi = int(round(row["Y"] * v_size_i))
                zi = int(round(row["Z"] * v_size_i))
                ri = int(round(mb_prot_radius_vox * v_size_i))
                x_min, x_max = max(xi - ri, 0), min(xi + ri + 1, tomo.shape[0])
                y_min, y_max = max(yi - ri, 0), min(yi + ri + 1, tomo.shape[1])
                z_min, z_max = max(zi - ri, 0), min(zi + ri + 1, tomo.shape[2])
                for x in range(x_min, x_max):
                    for y in range(y_min, y_max):
                        for z in range(z_min, z_max):
                            if (x - xi)**2 + (y - yi)**2 + (z - zi)**2 <= ri**2:
                                seg_post[x, y, z] = lbl_value
        else:
            for lbl in fg_labels[key]:
                seg_post[seg == lbl] = lbl_value

        out_labels[key] = lbl_value

    # Save NRRD
    tomo_out_name = f"tomo_{str(tomo_id).zfill(3)}"
    nrrd.write(str(out_dataset / f"imagesTr/{tomo_out_name}_0000.nrrd"), tomo)
    nrrd.write(str(out_dataset / f"labelsTr/{tomo_out_name}.nrrd"), seg_post)

# Dataset JSON
dict_json = {
    "channel_names": {"0": "rescale_to_0_1"},
    "labels": out_labels,
    "numTraining": 16,
    "file_ending": ".nrrd",
}
with open(out_dataset / "dataset.json", "w") as outfile:
    json.dump(dict_json, outfile, indent=4)

print("Successfully terminated. (" + time.strftime("%c") + ")")


Processing tomogram: /home/tfg/Desktop/Patricia/polnet/data/data_generated/all_v11/tomos/tomo_rec_6_snr1.04.mrc
	Processing label: membrane
	Processing label: mb_prot
Processing tomogram: /home/tfg/Desktop/Patricia/polnet/data/data_generated/all_v11/tomos/tomo_rec_0_snr1.01.mrc
	Processing label: membrane
	Processing label: mb_prot
Processing tomogram: /home/tfg/Desktop/Patricia/polnet/data/data_generated/all_v11/tomos/tomo_rec_2_snr1.9.mrc
	Processing label: membrane
	Processing label: mb_prot
Processing tomogram: /home/tfg/Desktop/Patricia/polnet/data/data_generated/all_v11/tomos/tomo_rec_8_snr1.5.mrc
	Processing label: membrane
	Processing label: mb_prot
Processing tomogram: /home/tfg/Desktop/Patricia/polnet/data/data_generated/all_v11/tomos/tomo_rec_7_snr1.78.mrc
	Processing label: membrane
	Processing label: mb_prot
Processing tomogram: /home/tfg/Desktop/Patricia/polnet/data/data_generated/all_v11/tomos/tomo_rec_4_snr1.42.mrc
	Processing label: membrane
	Processing label: mb_prot
