# Functions

In [2]:
def add_missing_PID_df(orig_df, model_df):
  unique_PID_cases = orig_df.PID.unique()
  out_df = model_df.copy()
  for PID_current in unique_PID_cases:
    if PID_current not in model_df.PID.unique():
      #retrieve metadata from orig_df
      orig_caseID_serie = orig_df[orig_df["PID"] == PID_current].head(1).squeeze()
      #overwrite values with volume=0mL
      orig_caseID_serie["ai_Value"] = 0
      orig_caseID_serie["SegmentAlgorithmName"] = model_df.SegmentAlgorithmName.unique()[0]
      #add to current model_df
      out_df = pd.concat([out_df, pd.DataFrame([orig_caseID_serie])],
                           ignore_index=True)
  return out_df

In [3]:
def calculate_time_index(row, df_input, studyDateColName="image_StudyDate"): #assumes only two studies per PatientID
  other_study_time = df_input[(df_input["image_PatientID"] == row["image_PatientID"])
    & (df_input["StudyInstanceUID"] != row["StudyInstanceUID"])][studyDateColName].values[0]
  if row[studyDateColName] < other_study_time:
    return "T0"
  elif row[studyDateColName] > other_study_time:
    return "T1"
  else:
    return "T"

In [4]:
def calculate_time_index_prostatex_inf_only(row, df_input,
                         seriesTimecolName="image_SeriesTime"): #assumes only two studies per PatientID
  seriesTimesLst = df_input[(df_input["image_PatientID"] == row["image_PatientID"])
    & (df_input["StudyInstanceUID"] == row["StudyInstanceUID"])][seriesTimecolName].unique()
  for idx, el in enumerate(sorted(seriesTimesLst)):
    if el == row[seriesTimecolName]:
      return f"T{idx}"

# Imports

In [5]:
import os
import numpy as np
import glob
import pandas as pd
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.stats.multitest import multipletests

In [6]:
import plotly.graph_objects as go
import plotly.express as px
# import colorcet as cc
import os
import plotly.io as pio

In [7]:
# #colab
# from google.colab import auth
# auth.authenticate_user()
# project_id = "idc-sandbox-003"
# os.environ["GCP_PROJECT_ID"] = project_id
# from google.cloud import bigquery
# bq_client = bigquery.Client(os.environ["GCP_PROJECT_ID"])

# Data import

In [8]:
# selection_query = f"""
# SELECT *
# FROM `idc-sandbox-003.prostate_seg_terra_mhub_v3.final_table_looker_studio`
#  """
# selection_result = bq_client.query(selection_query)
# data_new_df = selection_result.result().to_dataframe()
# data_new_df["algorithmNameCollection"] = data_new_df.apply(lambda x : x["SegmentAlgorithmName"]+"-"+x["collection_id"], axis=1)
!wget https://raw.githubusercontent.com/ImagingDataCommons/idc-prostate-mri-analysis/refs/heads/main/analysis_results/analysis_results.csv -O analysis_results.csv
data_new_df = pd.read_csv("analysis_results.csv")

data_new_df["ai_Value"] = data_new_df.apply(lambda x : x["ai_Value"]/1000 if x["ai_Value"] is not None else None, axis=1)
data_new_df["expert_Value"] = data_new_df.apply(lambda x : x["expert_Value"]/1000 if x["expert_Value"] is not None else None, axis=1)

--2024-11-08 15:38:21--  https://raw.githubusercontent.com/ccosmin97/idc-prostate-mri-analysis/refs/heads/main/analysis_results.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7256924 (6.9M) [text/plain]
Saving to: ‘analysis_results.csv’


2024-11-08 15:38:22 (73.3 MB/s) - ‘analysis_results.csv’ saved [7256924/7256924]



In [9]:
data_new_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4909 entries, 0 to 4908
Data columns (total 61 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   segmentationInstanceUID               4907 non-null   object 
 1   sourceSegmentedSeriesUID              4907 non-null   object 
 2   expert_QuantityCodeMeaning            4909 non-null   object 
 3   expert_QuantityCodeValue              4909 non-null   object 
 4   expert_Value                          4909 non-null   float64
 5   expert_UnitsCodeValue                 4909 non-null   object 
 6   expert_UnitsCodeMeaning               4909 non-null   object 
 7   expert_findingSiteCodeValue           4909 non-null   object 
 8   expert_findingSiteCodeMeaning         4909 non-null   object 
 9   ai_QuantityCodeMeaning                4907 non-null   object 
 10  ai_QuantityCodeValue                  4907 non-null   object 
 11  ai_QuantityCoding

# Evaluation Statistics

### Whole prostate quantitative analysis

In [None]:
data_new_df.collection_id.unique()

array(['prostatex', 'prostate_mri_us_biopsy',
       'qin_prostate_repeatability'], dtype=object)

In [None]:
data_new_df.idcSegmentCodeMeaning.unique()

array(['Prostate', 'Peripheral_zone_of_prostate',
       'Transition_zone_of_prostate', 'Peripheral_zone_of_the_prostate'],
      dtype=object)

In [None]:
temp_df = data_new_df[data_new_df.SegmentedPropertyTypeCodeMeaning.isin(['Prostate', 'Prostatic structure'])]
len(temp_df)

4459

In [None]:
temp2_df = data_new_df[data_new_df.idcSegmentCodeMeaning.isin(['Prostate'])]
len(temp2_df)

4459

In [None]:
temp_df.collection_id.unique()

array(['prostatex', 'prostate_mri_us_biopsy',
       'qin_prostate_repeatability'], dtype=object)

ProstateX

In [None]:
metrics_summary_prostatex = temp_df[temp_df["collection_id"]=="prostatex"].groupby('SegmentAlgorithmName').agg(
    mean_dice=('dsc', 'mean'),
    std_dice=('dsc', 'std'),
    min_dice=('dsc', 'min'),
    max_dice=('dsc', 'max'),
    mean_hausdorff95=('hsdff_95', 'mean'),
    std_hausdorff95=('hsdff_95', 'std'),
    min_hausdorff95=('hsdff_95', 'min'),
    max_hausdorff95=('hsdff_95', 'max'),
    mean_ai_volume=("ai_Value", "mean"),
    std_ai_volume=("ai_Value", "std"),
    min_ai_volume=('ai_Value', 'min'),
    max_ai_volume=('ai_Value', 'max'),
    mean_expert_volume=("expert_Value", "mean"),
    std_expert_volume=("expert_Value", "std"),
    min_expert_volume=('expert_Value', 'min'),
    max_expert_volume=('expert_Value', 'max'),
    num_samples=('dsc', 'size')  # Counting number of samples
).round(2).reset_index()

In [None]:
metrics_summary_prostatex

Unnamed: 0,SegmentAlgorithmName,mean_dice,std_dice,min_dice,max_dice,mean_hausdorff95,std_hausdorff95,min_hausdorff95,max_hausdorff95,mean_ai_volume,std_ai_volume,min_ai_volume,max_ai_volume,mean_expert_volume,std_expert_volume,min_expert_volume,max_expert_volume,num_samples
0,bamf_nnunet_mr_prostate,0.95,0.01,0.91,0.97,0.3,0.28,0.0,1.5,67.710969,32.36,18.723625,167.490781,66.392519,32.11,17.599844,167.920156,98
1,monai_prostate158,0.92,0.02,0.86,0.96,0.92,0.41,0.25,2.5,65.011331,30.7,19.729719,161.858781,66.392519,32.11,17.599844,167.920156,98
2,nnunet_prostate_task24,0.93,0.03,0.74,0.96,0.9,0.91,0.0,8.13,67.971718,30.44,22.18625,166.366906,66.392519,32.11,17.599844,167.920156,98
3,nnunet_prostate_zonal_task05,0.9,0.04,0.64,0.95,1.69,1.46,0.35,12.2,66.914626,30.1,20.170844,163.664281,66.392519,32.11,17.599844,167.920156,98


QIN

In [None]:
metrics_summary_qin = temp_df[temp_df["collection_id"]=="qin_prostate_repeatability"].groupby('SegmentAlgorithmName').agg(
    mean_dice=('dsc', 'mean'),
    std_dice=('dsc', 'std'),
    min_dice=('dsc', 'min'),
    max_dice=('dsc', 'max'),
    mean_hausdorff95=('hsdff_95', 'mean'),
    std_hausdorff95=('hsdff_95', 'std'),
    min_hausdorff95=('hsdff_95', 'min'),
    max_hausdorff95=('hsdff_95', 'max'),
    mean_ai_volume=("ai_Value", "mean"),
    std_ai_volume=("ai_Value", "std"),
    min_ai_volume=('ai_Value', 'min'),
    max_ai_volume=('ai_Value', 'max'),
    mean_expert_volume=("expert_Value", "mean"),
    std_expert_volume=("expert_Value", "std"),
    min_expert_volume=('expert_Value', 'min'),
    max_expert_volume=('expert_Value', 'max'),
    num_samples=('dsc', 'size')  # Counting number of samples
).round(2).reset_index()

In [None]:
metrics_summary_qin

Unnamed: 0,SegmentAlgorithmName,mean_dice,std_dice,min_dice,max_dice,mean_hausdorff95,std_hausdorff95,min_hausdorff95,max_hausdorff95,mean_ai_volume,std_ai_volume,min_ai_volume,max_ai_volume,mean_expert_volume,std_expert_volume,min_expert_volume,max_expert_volume,num_samples
0,bamf_nnunet_mr_prostate,0.79,0.17,0.19,0.92,4.73,4.74,0.62,18.18,39.983223,19.71,3.877832,96.642958,46.450388,28.11,19.038892,115.874483,30
1,monai_prostate158,0.83,0.05,0.7,0.9,3.55,2.38,1.5,12.07,52.783244,29.91,23.1203,124.452408,46.450388,28.11,19.038892,115.874483,30
2,nnunet_prostate_task24,0.85,0.05,0.72,0.92,2.6,1.93,1.09,9.98,54.373543,27.91,25.27432,111.598841,46.450388,28.11,19.038892,115.874483,30
3,nnunet_prostate_zonal_task05,0.57,0.27,0.0,0.87,14.82,12.88,1.81,56.11,42.76282,22.83,0.169168,88.113334,46.450388,28.11,19.038892,115.874483,30


Prostate-MRI-US-Biopsy

In [None]:
metrics_summary_prostate_mri_us_biopsy = temp_df[temp_df["collection_id"]=="prostate_mri_us_biopsy"].groupby('SegmentAlgorithmName').agg(
    mean_dice=('dsc', 'mean'),
    std_dice=('dsc', 'std'),
    min_dice=('dsc', 'min'),
    max_dice=('dsc', 'max'),
    mean_hausdorff95=('hsdff_95', 'mean'),
    std_hausdorff95=('hsdff_95', 'std'),
    min_hausdorff95=('hsdff_95', 'min'),
    max_hausdorff95=('hsdff_95', 'max'),
    mean_ai_volume=("ai_Value", "mean"),
    std_ai_volume=("ai_Value", "std"),
    min_ai_volume=('ai_Value', 'min'),
    max_ai_volume=('ai_Value', 'max'),
    mean_expert_volume=("expert_Value", "mean"),
    std_expert_volume=("expert_Value", "std"),
    min_expert_volume=('expert_Value', 'min'),
    max_expert_volume=('expert_Value', 'max'),
    num_samples=('dsc', 'size')  # Counting number of samples
).round(2).reset_index()

In [None]:
metrics_summary_prostate_mri_us_biopsy.head()

Unnamed: 0,SegmentAlgorithmName,mean_dice,std_dice,min_dice,max_dice,mean_hausdorff95,std_hausdorff95,min_hausdorff95,max_hausdorff95,mean_ai_volume,std_ai_volume,min_ai_volume,max_ai_volume,mean_expert_volume,std_expert_volume,min_expert_volume,max_expert_volume,num_samples
0,bamf_nnunet_mr_prostate,0.89,0.05,0.13,0.96,1.54,1.66,0.0,24.93,50.928355,26.32,10.166936,230.466186,48.966954,26.11,10.563183,222.405063,986
1,monai_prostate158,0.88,0.07,0.12,0.95,1.77,2.61,0.33,34.99,47.462448,24.06,8.067497,193.951943,48.958933,26.1,10.563183,222.405063,987
2,nnunet_prostate_task24,0.86,0.09,0.1,0.95,2.94,6.53,0.33,56.04,51.524569,22.2,19.45762,202.835437,48.958933,26.1,10.563183,222.405063,987
3,nnunet_prostate_zonal_task05,0.79,0.16,0.0,0.94,9.17,14.33,0.47,63.17,50.593193,22.6,7.370234,215.524546,48.958933,26.1,10.563183,222.405063,987


### Peripheral zone quantitative analysis

In [None]:
temp_df = data_new_df[data_new_df.SegmentedPropertyTypeCodeMeaning == 'Structure of peripheral glandular zone of prostate (body structure)']

In [None]:
metrics_summary_prostatex = temp_df[temp_df["collection_id"]=="prostatex"].groupby('SegmentAlgorithmName').agg(
    mean_dice=('dsc', 'mean'),
    std_dice=('dsc', 'std'),
    min_dice=('dsc', 'min'),
    max_dice=('dsc', 'max'),
    mean_hausdorff95=('hsdff_95', 'mean'),
    std_hausdorff95=('hsdff_95', 'std'),
    min_hausdorff95=('hsdff_95', 'min'),
    max_hausdorff95=('hsdff_95', 'max'),
    mean_ai_volume=("ai_Value", "mean"),
    std_ai_volume=("ai_Value", "std"),
    min_ai_volume=('ai_Value', 'min'),
    max_ai_volume=('ai_Value', 'max'),
    mean_expert_volume=("expert_Value", "mean"),
    std_expert_volume=("expert_Value", "std"),
    min_expert_volume=('expert_Value', 'min'),
    max_expert_volume=('expert_Value', 'max'),
    num_samples=('dsc', 'size')  # Counting number of samples
).round(2).reset_index()

In [None]:
metrics_summary_prostatex.head()

Unnamed: 0,SegmentAlgorithmName,mean_dice,std_dice,min_dice,max_dice,mean_hausdorff95,std_hausdorff95,min_hausdorff95,max_hausdorff95,mean_ai_volume,std_ai_volume,min_ai_volume,max_ai_volume,mean_expert_volume,std_expert_volume,min_expert_volume,max_expert_volume,num_samples
0,monai_prostate158,0.74,0.08,0.46,0.88,3.27,1.88,1.21,12.79,14.661519,5.3,5.763938,33.002906,19.779091,6.98,6.54725,42.050969,98
1,nnunet_prostate_zonal_task05,0.67,0.14,0.06,0.84,4.81,3.18,1.5,20.07,12.938248,4.94,0.796156,31.919719,19.799546,7.01,6.54725,42.050969,97


In [None]:
metrics_summary_qin = temp_df[temp_df["collection_id"]=="qin_prostate_repeatability"].groupby('SegmentAlgorithmName').agg(
    mean_dice=('dsc', 'mean'),
    std_dice=('dsc', 'std'),
    min_dice=('dsc', 'min'),
    max_dice=('dsc', 'max'),
    mean_hausdorff95=('hsdff_95', 'mean'),
    std_hausdorff95=('hsdff_95', 'std'),
    min_hausdorff95=('hsdff_95', 'min'),
    max_hausdorff95=('hsdff_95', 'max'),
    mean_ai_volume=("ai_Value", "mean"),
    std_ai_volume=("ai_Value", "std"),
    min_ai_volume=('ai_Value', 'min'),
    max_ai_volume=('ai_Value', 'max'),
    mean_expert_volume=("expert_Value", "mean"),
    std_expert_volume=("expert_Value", "std"),
    min_expert_volume=('expert_Value', 'min'),
    max_expert_volume=('expert_Value', 'max'),
    num_samples=('dsc', 'size')  # Counting number of samples
).round(2).reset_index()

In [None]:
metrics_summary_qin.head()

Unnamed: 0,SegmentAlgorithmName,mean_dice,std_dice,min_dice,max_dice,mean_hausdorff95,std_hausdorff95,min_hausdorff95,max_hausdorff95,mean_ai_volume,std_ai_volume,min_ai_volume,max_ai_volume,mean_expert_volume,std_expert_volume,min_expert_volume,max_expert_volume,num_samples
0,monai_prostate158,0.65,0.16,0.15,0.79,5.48,4.56,1.88,20.88,11.147744,3.15,5.128125,17.695422,9.8965,3.98,3.202537,18.637438,30
1,nnunet_prostate_zonal_task05,0.39,0.3,0.0,0.8,18.37,23.4,2.49,97.66,6.754592,5.51,0.00593,15.105507,10.063348,3.94,3.202537,18.637438,29


### Transition zone quantitative analysis

In [None]:
temp_df = data_new_df[data_new_df.SegmentedPropertyTypeCodeMeaning == 'Structure of transition zone of prostate (body structure)']

In [None]:
metrics_summary_prostatex = temp_df[temp_df["collection_id"]=="prostatex"].groupby('SegmentAlgorithmName').agg(
    mean_dice=('dsc', 'mean'),
    std_dice=('dsc', 'std'),
    min_dice=('dsc', 'min'),
    max_dice=('dsc', 'max'),
    mean_hausdorff95=('hsdff_95', 'mean'),
    std_hausdorff95=('hsdff_95', 'std'),
    min_hausdorff95=('hsdff_95', 'min'),
    max_hausdorff95=('hsdff_95', 'max'),
    mean_ai_volume=("ai_Value", "mean"),
    std_ai_volume=("ai_Value", "std"),
    min_ai_volume=('ai_Value', 'min'),
    max_ai_volume=('ai_Value', 'max'),
    mean_expert_volume=("expert_Value", "mean"),
    std_expert_volume=("expert_Value", "std"),
    min_expert_volume=('expert_Value', 'min'),
    max_expert_volume=('expert_Value', 'max'),
    num_samples=('dsc', 'size')  # Counting number of samples
).round(2).reset_index()

In [None]:
metrics_summary_prostatex.head()

Unnamed: 0,SegmentAlgorithmName,mean_dice,std_dice,min_dice,max_dice,mean_hausdorff95,std_hausdorff95,min_hausdorff95,max_hausdorff95,mean_ai_volume,std_ai_volume,min_ai_volume,max_ai_volume,mean_expert_volume,std_expert_volume,min_expert_volume,max_expert_volume,num_samples
0,monai_prostate158,0.85,0.07,0.59,0.94,2.17,1.06,0.35,5.29,49.950719,29.56,10.05675,145.559812,45.192203,30.09,6.134531,144.135031,98
1,nnunet_prostate_zonal_task05,0.82,0.11,0.35,0.94,2.96,2.28,0.75,15.83,54.021906,29.65,12.21775,149.196125,45.192203,30.09,6.134531,144.135031,98
