## Means, TACS, and metadata
Notebook used to create the following pickle data files with readouts from the combined hedyPET train+validation set (n=80):
 - `readouts/metadata.csv` Participant metadata (weight, demographic-group, and more)- `readouts/acdynPSF_tacs_80.pkl.gz` PET TACs for all segmented regions
 - `readouts/acstatPSF_mean_80.pkl.gz` PET static means for all segmented regions
 - `readouts/patlak_ki_80.pkl.gz` Patlak Ki values for all segmented regions for different input functions (IFs) and number off rames

Extensive metadata and original image files available at X

In [1]:
from hedypet.utils import DERIVATIVES_ROOT, load_splits, get_time_frames_midpoint
from hedypet.utils import get_participant_metadata, get_norm_consts
from nifti_dynamic.utils import load_tac
from nifti_dynamic.patlak import roi_patlak
from parse import parse
from tqdm import tqdm
import pandas as pd 
import os 
import json
import warnings

## Helper functions

In [4]:
def load_json(file_path):
    with open(file_path,"r") as handle:
        d = json.load(handle)
    return d

region_names = {
    'ts_total' :load_json(DERIVATIVES_ROOT / "tacs/ts_total_classes.json"),
    'synthseg' :load_json(DERIVATIVES_ROOT / "tacs/synthseg_classes.json"),
    'synthsegparc' : load_json(DERIVATIVES_ROOT / "tacs/synthseg_classes.json"),
    'ts_tissue' :load_json(DERIVATIVES_ROOT / "tacs/ts_tissue_classes.json"),
    'ts_body' : load_json(DERIVATIVES_ROOT / "tacs/ts_body_classes.json"),
    'totalimage' : {"1":"body"},
}
region_names_aorta = load_json(DERIVATIVES_ROOT / "tacs/aorta_classes.json")

def task_and_ix_to_region_name(task,ix):
    if task.startswith("aorta"):
        return region_names_aorta[ix]
    else:
        return region_names[task][ix]

subs = load_splits()["train0"]+load_splits()["val0"]

## Metadata
Extraction of metadata for each participant

In [19]:
if not os.path.exists(df_path := "../../../readouts/metadata.csv"):
    data = []
    for sub in load_splits()["train0"]+load_splits()["val0"]:
        x = get_participant_metadata(sub)
        x["suv_denominator"] = get_norm_consts(sub)["suv"]
        data.append(x)

    df = pd.DataFrame(data)
    df = df.rename({"InjectedRadioactivity":"injected_readioactivity","participant_id":"participant"},axis=1)
    df = df.drop(["age","height","sex","blanket"],axis=1)
    df.to_csv(df_path,index=False)
else:
    df = pd.read_csv(df_path)


print("Columns:", list(df.columns))
print("Subjects:", df["participant"].nunique())
df["demographic-group"].value_counts()

Columns: ['participant', 'weight', 'demographic-group', 'injected_readioactivity', 'suv_denominator']
Subjects: 80


demographic-group
F18-34    10
F35-49    10
F50-69    10
F70-99    10
M18-34    10
M35-49    10
M50-69    10
M70-99    10
Name: count, dtype: int64

## Time Activity Curves
Combines the time activity curves from the acdynPSF dynamic PET into a single dataframe

In [39]:

if not os.path.exists(df_path := "../../../readouts/acdynPSF_tacs_80.pkl.gz"):
    data = []
    
    for sub in tqdm(subs):
        tacs_root = (DERIVATIVES_ROOT / f"tacs/{sub}/acdynPSF")
        tacs = list(tacs_root.glob("**/tac*"))
        frame_time_middle = get_time_frames_midpoint(sub)
                
        for tac_organ_path in tacs:
            mu_organ, std_organ, n_organ = load_tac(tac_organ_path)
            frame_ixs = list(range(len(mu_organ)))

            assert len(frame_ixs) == len(frame_time_middle)
            
            tags = parse('{}/tacs/{sub}/acdynPSF/{task}/erosion-{erosion}/tac_{ix}',str(tac_organ_path)).named
            vals = {"pet_mu":mu_organ,"pet_std":std_organ,"n_voxels":n_organ,"frame_ix":frame_ixs, "frame_time_middle":frame_time_middle}
            vals.update(tags)
            vals["seg_region_name"] = task_and_ix_to_region_name(vals["task"],vals["ix"])
            data.append(pd.DataFrame(vals))

    df = pd.concat(data)
    df = df.rename({"sub":"participant","task":"seg_task","ix":"seg_region_ix","erosion":"erosion_iterations"},axis="columns")
    df["mL"] = df.n_voxels*(1.65*1.65* 1.65) / 1000
    df.erosion_iterations = df.erosion_iterations.astype(int)
    df.to_pickle(df_path)
else:
    df = pd.read_pickle(df_path)

print("Columns:", list(df.columns),end="\n\n")
print("Segmentation tasks:", list(df.seg_task.unique()),end="\n\n")
print("N Unique regions:", df["seg_region_name"].nunique(), end="\n\n")
print("Unique erosions:", df.erosion_iterations.unique(),end="\n\n")
print("Rows:",len(df))

Columns: ['pet_mu', 'pet_std', 'n_voxels', 'frame_ix', 'frame_time_middle', 'participant', 'seg_task', 'erosion_iterations', 'seg_region_ix', 'seg_region_name', 'mL']

Segmentation tasks: ['ts_total', 'ts_tissue', 'ts_body', 'synthseg', 'synthsegparc', 'aortasegments', 'aortavois_ml-1_width-3', 'aortavois_ml-1.5_width-5', 'aortavois_ml-2_width-5', 'totalimage']

N Unique regions: 227

Unique erosions: [0 1]

Rows: 2859981


## Static Organ Means
Combines the static organ means from the acstatPSF reconstruction into a single dataframe

In [None]:
if not os.path.exists(df_path := "../../../readouts/acstatPSF_means_80.pkl.gz"):
    data = []
    
    for sub in tqdm(subs):
        tacs_root = (DERIVATIVES_ROOT / f"tacs/{sub}/acstatPSF")
        tacs = list(tacs_root.glob("**/tac*"))
                
        # Try all combinations of inputs functions and num_frames
        for tac_organ_path in tacs:
            mu_organ, std_organ, n_organ = load_tac(tac_organ_path)
            tags = parse('{}/tacs/{sub}/acstatPSF/{task}/erosion-{erosion}/tac_{ix}',str(tac_organ_path)).named
            vals = {"pet_mu":float(mu_organ),"pet_std":float(std_organ),"n_voxels":int(n_organ)}
            vals.update(tags)
            vals["seg_region_name"] = task_and_ix_to_region_name(vals["task"],vals["ix"])
            data.append(vals)

    df = pd.DataFrame(data)
    df = df.rename({"sub":"participant","task":"seg_task","ix":"seg_region_ix","erosion":"erosion_iterations"},axis="columns")
    df["mL"] = df.n_voxels*(1.65*1.65* 2.0) / 1000
    df.erosion_iterations = df.erosion_iterations.astype(int)
    df.to_pickle(df_path)
else:
    df = pd.read_pickle(df_path)

print("Columns:", list(df.columns),end="\n\n")
print("Segmentation tasks:", list(df.seg_task.unique()),end="\n\n")
print("N Unique regions:", df["seg_region_name"].nunique(), end="\n\n")
print("Unique erosions:", df.erosion_iterations.unique(),end="\n\n")
print("Rows:",len(df))


Columns: ['pet_mu', 'pet_std', 'n_voxels', 'participant', 'seg_task', 'erosion_iterations', 'seg_region_ix', 'seg_region_name', 'mL']

Segmentation tasks: ['ts_total', 'synthseg', 'synthsegparc', 'ts_tissue', 'ts_body', 'totalimage']

N Unique regions: 223

Unique erosions: [0 1]

Rows: 39801


## Patlak

Computes and saves the Patlak Ki for different organ and input-function combinations

In [None]:

warnings.filterwarnings("ignore")

if not os.path.exists(df_path := "../../../readouts/patlak_ki_80.pkl.gz"):

    frames = [2,3,4,5,6,7,8]
    ki_data = []

    for sub in tqdm(subs):
        #Find all tacs
        tacs_root = (DERIVATIVES_ROOT / f"tacs/{sub}/acdynPSF")
        tacs = list(tacs_root.glob("**/tac*"))

        tacs_if = [x for x in tacs if "aortavois" in str(x)]
        tacs_organs = [x for x in tacs if "aorta" not in str(x)]
        
        t_middle = get_time_frames_midpoint(sub)

        # Try all combinations of inputs functions and num_frames
        for tac_if_path in tacs_if:
            tac_if, _, _ = load_tac(tac_if_path)
            for tac_organ_path in tacs_organs:
                tac_organ, _, n = load_tac(tac_organ_path)
                for frame in frames:
                    slope, intercept, X, Y = roi_patlak(tac_organ,tac_if,t_middle,frame)

                    tags_if = parse('{}/tacs/{}/acdynPSF/{task}/erosion-{erosion}/tac_{ix}',str(tac_if_path)).named
                    tags_organ = parse('{}/tacs/{sub}/acdynPSF/{task}/erosion-{erosion}/tac_{ix}',str(tac_organ_path)).named
                    
                    if_tag = tags_if["task"]+"_"+task_and_ix_to_region_name(tags_if["task"], tags_if["ix"])
                    
                    series = {"Ki":float(slope),"n_voxels":int(n[0]),"n_frames_regression":frame}
                    series["if_tag"] = if_tag
                    series.update(tags_organ)
                    series["seg_region_name"] = task_and_ix_to_region_name(series["task"], series["ix"])
                    ki_data.append(series)
                    
    df = pd.DataFrame(ki_data)
    df = df.rename({"sub":"participant","task":"seg_task","ix":"seg_region_ix","erosion":"erosion_iterations"},axis="columns")
    df["mL"] = df.n_voxels*(1.65*1.65* 1.65) / 1000
    df.erosion_iterations = df.erosion_iterations.astype(int)
    df.to_pickle(df_path)
else:
    df = pd.read_pickle(df_path)

print("Columns:", list(df.columns),end="\n\n")
print("Segmentation tasks:", list(df.seg_task.unique()),end="\n\n")
print("N Unique regions:", df["seg_region_name"].nunique(), end="\n\n")
print("Unique erosions:", df.erosion_iterations.unique(),end="\n\n")
print("Unique patlak frames:", df.n_frames_regression.unique(),end="\n\n")
print("Unique input functions" , df.if_tag.unique(),end="\n\n")
print("Rows:",len(df))

Columns: ['Ki', 'n_voxels', 'n_frames_regression', 'if_tag', 'participant', 'seg_task', 'erosion_iterations', 'seg_region_ix', 'seg_region_name', 'mL']

Segmentation tasks: ['ts_total', 'ts_tissue', 'ts_body', 'synthseg', 'synthsegparc', 'totalimage']

N Unique regions: 223

Unique erosions: [0 1]

Unique patlak frames: [2 3 4 5 6 7 8]

Unique input functions ['aortavois_ml-1_width-3_aorta_ascending'
 'aortavois_ml-1_width-3_aorta_top'
 'aortavois_ml-1_width-3_aorta_descending_upper'
 'aortavois_ml-1_width-3_aorta_descending_lower'
 'aortavois_ml-1.5_width-5_aorta_ascending'
 'aortavois_ml-1.5_width-5_aorta_top'
 'aortavois_ml-1.5_width-5_aorta_descending_upper'
 'aortavois_ml-1.5_width-5_aorta_descending_lower'
 'aortavois_ml-2_width-5_aorta_ascending'
 'aortavois_ml-2_width-5_aorta_top'
 'aortavois_ml-2_width-5_aorta_descending_upper'
 'aortavois_ml-2_width-5_aorta_descending_lower']

Rows: 3347316
