In [1]:
from hedypet.utils import DERIVATIVES_ROOT, load_splits, get_time_frames_midpoint
from nifti_dynamic.utils import load_tac
from nifti_dynamic.patlak import roi_patlak
from parse import parse
from tqdm import tqdm
import warnings
import pandas as pd 
import os 

In [2]:
subs = load_splits()["all"]

In [22]:
import json
def load_json(file_path):
    with open(file_path,"r") as handle:
        d = json.load(handle)
    return d

region_names = {
    'ts_total' :load_json("/homes/hinge/Projects/hedyPET/src/hedypet/analysis/seg-total-classes.json"),
    'synthseg' :load_json("/homes/hinge/Projects/hedyPET/src/hedypet/analysis/seg-synthseg_classes.json"),
    'synthsegparc' : load_json("/homes/hinge/Projects/hedyPET/src/hedypet/analysis/seg-synthseg_classes.json"),
    'ts_tissue' :load_json("/homes/hinge/Projects/hedyPET/src/hedypet/analysis/seg-tissue-classes.json"),
    'ts_body' : {"1":"trunk","2":"extremeties"},
    'totalimage' : {"1":"body"},
}

In [None]:
if not os.path.exists(df_path := "acstatPSF_means.pkl"):
    data = []

    for sub in tqdm(subs):
        tacs_root = (DERIVATIVES_ROOT / f"tacs/{sub}/acstatPSF")
        tacs = list(tacs_root.glob("**/tac*"))

        #IF tacs not relevant for mean analysis
        tacs_if = [x for x in tacs if "aorta" in str(x)]
        tacs_organs = [x for x in tacs if x not in tacs_if]
                
        # Try all combinations of inputs functions and num_frames
        for tac_organ_path in tacs_organs:
            mu_organ, std_organ, n_organ = load_tac(tac_organ_path)
            tags = parse('{}/tacs/{sub}/acstatPSF/{task}/erosion-{erosion}/tac_{ix}',str(tac_organ_path)).named
            vals = {"mu":float(mu_organ),"std":float(std_organ),"n":int(n_organ)}
            vals.update(tags)
            vals["region"] = region_names[vals["task"]][vals["ix"]]
            data.append(vals)

    df = pd.DataFrame(data)
    df.to_pickle(df_path)
else:
    df = pd.read_pickle(df_path)

  vals = {"mu":float(mu_organ),"std":float(std_organ),"n":int(n_organ)}
100%|██████████| 100/100 [03:37<00:00,  2.18s/it]


In [24]:
from hedypet.utils import get_norm_consts

print("columns:", df.columns)
print("Unique task:", df.task.unique())
print("Unique erosion:", df.erosion.unique())
print("Available normalizations:", list(get_norm_consts("sub-000").keys()))
print("Rows:",len(df))
print("Subjects:", df["sub"].nunique())

columns: Index(['mu', 'std', 'n', 'sub', 'task', 'erosion', 'ix', 'region'], dtype='object')
Unique task: ['ts_total' 'synthseg' 'synthsegparc' 'ts_tissue' 'ts_body' 'totalimage']
Unique erosion: ['0' '1']
Available normalizations: ['suv', 'sul_janma', 'sul_james', 'sul_decazes', 'sul_auto', 'luv', 'sul_auto_decazes']
Rows: 49750
Subjects: 100


In [149]:
# Group by erosion and task, calculate mean and std of mu
from hedypet.utils import get_norm_consts

def get_normalizer(normalization_method):
    norm_dict = {sub:get_norm_consts(sub) for sub in subs}
    for k in norm_dict:
        norm_dict[k]["none"] = 1000
    return lambda x: 1000*x["mu"]/norm_dict[x["sub"]][normalization_method]


def filter_and_combine_regions(df,regions):
    group_cols = ["sub","erosion","task"]
    results = []

    for k, v in regions.items():
        mask = df.region.isin(v) if isinstance(v, list) else df.region == v
        filtered = df[mask]
        
        if isinstance(v, list):
            result = filtered.groupby(group_cols).apply(
                lambda x: pd.Series({
                    'mu': (x.mu * x.n).sum() / x.n.sum(),
                    'n': x.n.sum()
                })
            ).reset_index()
        else:
            result = filtered
        
        result['region'] = k
        results.append(result)

    return pd.concat(results, ignore_index=True)

def summarize_activity_and_volume(df,normalization="suv",ml_per_vox=1.65 * 1.65 * 2.0 / 1000):

    df["volume"] = df.n *  ml_per_vox
    df[normalization] = df.apply(get_normalizer(normalization),axis=1)
    summary_df = df.groupby(['erosion', 'region',"task"]).agg({
        normalization: ['mean', 'std', 'count'],
        'volume': ['mean','std']
    })

    # Flatten column names
    summary_df.columns = [f'{normalization}_mean', f'{normalization}_std', f'{normalization}_count', 'volume_mean', 'volume_std']
    summary_df = summary_df.reset_index()

    # Add standard error of the mean for both SUV and volume
    summary_df[f'{normalization}_sem'] = (summary_df[f'{normalization}_std'] / (summary_df[f'{normalization}_count'] ** 0.5)).round(4)
    summary_df['volume_sem'] = (summary_df['volume_std'] / (summary_df[f'{normalization}_count'] ** 0.5)).round(4)
    return summary_df


In [None]:
df_ki.tac_if_path.iloc[0].aook

AttributeError: 'PosixPath' object has no attribute 'str'

In [170]:
df_ki.tac_if_path.apply(lambda x:"aortasegments/erosion-0/tac_1" in str(x)).sum()

np.int64(542421)

In [None]:
df_ki = pd.read_pickle("patlak_ki.pkl")
().sum()

np.int64(49311)

In [181]:
import pandas as pd

regions = {
    "spleen":"spleen",
    "kidneys": ["kidney_right","kidney_left"],
    "liver":"liver",
    "stomach":"stomach",
    "lungs": ['lung_upper_lobe_left', 'lung_lower_lobe_left','lung_upper_lobe_right', 'lung_middle_lobe_right','lung_lower_lobe_right'],
    'colon':"colon",
    'subcutaneous_fat':"subcutaneous_fat",
    "muscle":"skeletal_muscle",
    'visceral_fat':"visceral_fat",
    "gray_matter":['Left-Cerebral-Cortex', 'Right-Cerebral-Cortex'], 
    "white_matter": ['Right-Cerebral-White-Matter' , 'Left-Cerebral-White-Matter'],
}

def create_latex_table(data_dfs, normalizations):
    """
    Creates table with structure:
    Region | Erosion | Volume | SUV | SUL
    liver  | eros1   | xx±yy  | xx±yy | xx±yy
           | eros2   | xx±yy  | xx±yy | xx±yy
    """
    # Get latex versions of all dataframes
    latex_dfs = {}
    for norm in normalizations:
        df = data_dfs[norm].copy()
        df[f'{norm}_latex'] = '$' + df[f'{norm}_mean'].round(2).astype(str) + r' \pm ' + df[f'{norm}_sem'].round(3).astype(str) + '$'
        df['volume_latex'] = '$' + df['volume_mean'].round(0).astype(str) + r' \pm ' + df['volume_sem'].round(0).astype(str) + '$'
        latex_dfs[norm] = df[['erosion', 'region', f'{norm}_latex', 'volume_latex']]
    
    # Merge all dataframes
    combined = latex_dfs[normalizations[0]]
    for norm in normalizations[1:]:
        combined = combined.merge(latex_dfs[norm], on=['erosion', 'region'], suffixes=('', f'_{norm}'))
    
    # FIX ROW ISSUE: Remove duplicates if multiple tasks exist
    combined = combined.drop_duplicates(subset=['erosion', 'region'])
    
    # Create LaTeX table with multirow
    latex_str = r'\begin{tabular}{|l|l|' + 'c|' * len(normalizations + ['volume']) + '}\n'
    latex_str += r'\hline' + '\n'
    
    # Header
    cols = ['Region', 'Erosion', 'Volume'] + [norm.upper() for norm in normalizations]
    header = ' & '.join(cols) + r' \\' + '\n'
    latex_str += header
    latex_str += r'\hline' + '\n'
    
    # Data rows with multirow for regions
    for region in combined.region.unique():
        region_data = combined[combined.region == region].sort_values('erosion')
        n_erosions = len(region_data)
        
        for i, (_, row_data) in enumerate(region_data.iterrows()):
            if i == 0:
                # First row: multirow region name
                region_cell = f"\\multirow{{{n_erosions}}}{{*}}{{{region.replace('_', ' ')}}}"
            else:
                # Subsequent rows: empty region cell
                region_cell = ""
            
            # Build data row
            data_row = region_cell
            data_row += f" & {row_data['erosion']}"
            data_row += f" & {row_data['volume_latex']}"
            for norm in normalizations:
                data_row += f" & {row_data[f'{norm}_latex']}"
            data_row += r' \\' + '\n'
            
            latex_str += data_row
        
        # Add horizontal line after each region group
        latex_str += r'\hline' + '\n'
    
    latex_str += r'\end{tabular}' + '\n'
    latex_str = latex_str.replace("_"," ")
    
    with open('/homes/hinge/Projects/hedyPET/manuscript/tables/table_data.tex', 'w') as f:
        f.write(latex_str)
    
    # Also return a regular DataFrame for inspection
    table_data = []
    for region in combined.region.unique():
        region_data = combined[combined.region == region].sort_values('erosion')
        for _, row_data in region_data.iterrows():
            row = [region.replace('_', ' '), row_data['erosion'], row_data['volume_latex']]
            for norm in normalizations:
                row.append(row_data[f'{norm}_latex'])
            table_data.append(row)
    
    result_df = pd.DataFrame(table_data, columns=cols)
    return result_df


df_ki = pd.read_pickle("patlak_ki.pkl")
df_ki = df_ki[(df_ki.frames==5) & (df_ki.tac_if_path.apply(lambda x: "aortasegments/erosion-1/tac_4" in str(x)))]
df_ki = df_ki.drop(columns=["tac_if_path","tac_organ_path","frames","if_tag"])
df_ki = df_ki.rename(columns={"slope":"mu"})
df_ki.mu*=1000

# Usage
df_comb = filter_and_combine_regions(df, regions)
df_ki = filter_and_combine_regions(df_ki,regions)

df_suv = summarize_activity_and_volume(df_comb, "suv")  
df_sul = summarize_activity_and_volume(df_comb, "sul_decazes")
df_ki = summarize_activity_and_volume(df_ki, "none")

data_dfs = {'suv': df_suv, 'sul_decazes': df_sul,"none":df_ki}
normalizations = ['suv', 'sul_decazes',"none"]

table = create_latex_table(data_dfs, normalizations)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result['region'] = k
  result = filtered.groupby(group_cols).apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result['region'] = k
  result = filtered.groupby(group_cols).apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result['region'] = k
  result = filtered.groupby(group_cols).apply(
 