# Imports

In [None]:
import os
import glob
import shutil
import pandas as pd

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!apt-get install lz4

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  lz4
0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.
Need to get 90.0 kB of archives.
After this operation, 236 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 lz4 amd64 1.9.3-2build2 [90.0 kB]
Fetched 90.0 kB in 1s (70.2 kB/s)
Selecting previously unselected package lz4.
(Reading database ... 123629 files and directories currently installed.)
Preparing to unpack .../lz4_1.9.3-2build2_amd64.deb ...
Unpacking lz4 (1.9.3-2build2) ...
Setting up lz4 (1.9.3-2build2) ...
Processing triggers for man-db (2.10.2-1) ...


# Setup

## Input and output paths definition and creation

In [None]:
ddl_terra_out_tsv_path = "https://github.com/ImagingDataCommons/idc-prostate-mri-analysis/blob/main/terra_mhub/data_tables/terra_mhub_all_collections_v3_SITK_RES.tsv" #@param{type:"string"}
local_terra_out_path = "terra_mhub_all_collections_v3_SITK_RES.tsv"
!wget -O {local_terra_out_path} {ddl_terra_out_tsv_path}

--2024-10-22 13:33:44--  https://raw.githubusercontent.com/ccosmin97/idc-prostate-mri-analysis/refs/heads/main/terra_mhub/data_tables/terra_mhub_all_collections_v3_SITK_RES.tsv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 694428 (678K) [text/plain]
Saving to: ‘terra_mhub_all_collections_v3_SITK_RES.tsv’


2024-10-22 13:33:45 (4.18 MB/s) - ‘terra_mhub_all_collections_v3_SITK_RES.tsv’ saved [694428/694428]



In [None]:
!rm -rf out_terra
#RAW ARCHIVE OUTPUTS
!mkdir -p out_terra/archives_final
#AI OUTPUTS
!mkdir -p out_terra/ai_dicom_seg
!mkdir -p out_terra/ai_dicom_sr
#IDC EXPERT SEG OUTPUTS
!mkdir -p out_terra/idc_dicom_seg
!mkdir -p out_terra/idc_dicom_sr
#IDC EXPERT ADD -- SECOND SET OUTPUTS
!mkdir -p out_terra/idc_add_dicom_seg
!mkdir -p out_terra/idc_add_dicom_sr
#QUANT EVAL OUTPUTS
!mkdir -p out_terra/quant_eval
#QUANT EVAL ADD OUTPUTS
!mkdir -p out_terra/quant_eval_add

## Processing functions

In [None]:
def create_batch_dir(base_dir, collection_id, batch_id):
  for out_dir in ["ai_dicom_seg", "ai_dicom_sr",
                  "idc_dicom_seg", "idc_dicom_sr",
                  "idc_add_dicom_seg", "idc_add_dicom_sr",
                  "quant_eval", "quant_eval_add",
                  "archives_final"]:
    out_path = os.path.join(base_dir, out_dir, str(collection_id), str(batch_id))
    !rm -rf {out_path}
    !mkdir -p {out_path}

In [None]:
def ddl_unzip_terra_archive(archive_lz4, output_folder):
  archive_filemame = archive_lz4.split("/")[-1]
  zip_ddl_path = os.path.join(output_folder)
  !gsutil -m cp -r {archive_lz4} {zip_ddl_path}
  # !cp $archive_lz4 $zip_ddl_path
  zip_path = os.path.join(output_folder, archive_filemame)
  !lz4 -dc < $zip_path | tar xvf - -C $zip_ddl_path
  #move archive contents to output folder
  # return glob.glob(os.path.join(zip_ddl_path, "**", "*.dcm"), recursive=True)

## Load terra results data

In [None]:
terra_out_df = pd.read_csv(local_terra_out_path, sep="\t")

In [None]:
terra_out_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 37 columns):
 #   Column                                     Non-Null Count  Dtype 
---  ------                                     --------------  ----- 
 0   entity:terra_mhub_all_collections_v3_id    95 non-null     int64 
 1   adcSeriesInstanceUIDs                      95 non-null     object
 2   collection_id                              95 non-null     object
 3   dicomAiCodeMeaningEval_lst                 95 non-null     object
 4   dicomAiCodeValuesEval_lst                  95 non-null     object
 5   dicomAiCodingSchemeDesignatorEval_lst      95 non-null     object
 6   dicomCodeMeaningProstate_lst               95 non-null     object
 7   dicomCodeValuesProstate_lst                95 non-null     object
 8   dicomCodingSchemeDesignatorProstate_lst    95 non-null     object
 9   dicomIdcAddCodeMeaningEval_lst             6 non-null      object
 10  dicomIdcAddCodeValuesEval_lst           

## Unzip terra archives

In [None]:
for index, row in terra_out_df.iterrows():
  collection_id = row['collection_id']
  batch_id = row['entity:terra_mhub_all_collections_v3_id']
  seriesInstanceUIDs = row['seriesInstanceUIDs']
  finalCompressedOutputFile = row['finalCompressedOutputFile']
  print(f"batch id : {batch_id}")
  print(f"seriesInstanceUIDs :{seriesInstanceUIDs}")
  print(f"finalCompressedOutputFile :{finalCompressedOutputFile}")
  create_batch_dir("out_terra", collection_id, batch_id)
  #COPY and UNZIP final archive
  ddl_unzip_terra_archive(finalCompressedOutputFile,
                          os.path.join("out_terra", "archives_final",
                                       collection_id, str(batch_id)))
  #move AI DICOM SEG/SR objects to ai_dicom_seg/ai_dicom_sr
  ## AI DICOM SEG
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                             str(collection_id), str(batch_id),
                                             "aggregated_results", "ai_results",
                                             "dicom_seg/*")):
    print(object_list)
    out_path = os.path.join("out_terra", "ai_dicom_seg", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  ## AI DICOM SR
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                             str(collection_id), str(batch_id),
                                             "aggregated_results", "ai_results",
                                             "dicom_sr/*")):
    out_path = os.path.join("out_terra", "ai_dicom_sr", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  #move IDC DICOM SEG/SR objects to idc_dicom_seg/idc_dicom_sr
  ## IDC DICOM SEG
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                             str(collection_id), str(batch_id),
                                             "aggregated_results", "idc_expert_results",
                                             "dicom_seg/*")):
    out_path = os.path.join("out_terra", "idc_dicom_seg", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  ## IDC DICOM SR
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                            str(collection_id), str(batch_id),
                                            "aggregated_results", "idc_expert_results",
                                            "dicom_sr/*")):
    out_path = os.path.join("out_terra", "idc_dicom_sr", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  #move IDC ADD DICOM SEG/SR objects to idc_add_dicom_seg/idc_add_dicom_sr
  ## IDC ADD DICOM SEG
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                             str(collection_id), str(batch_id),
                                             "aggregated_results", "idc_expert_results_add",
                                             "dicom_seg/*")):
    out_path = os.path.join("out_terra", "idc_add_dicom_seg", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  ## IDC ADD DICOM SR
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                            str(collection_id), str(batch_id),
                                            "aggregated_results", "idc_expert_results_add",
                                            "dicom_sr/*")):
    out_path = os.path.join("out_terra", "idc_add_dicom_sr", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  #move eval quantitative results to quant_eval
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                            str(collection_id), str(batch_id),
                                            "aggregated_results", "quant_eval_results/*")):
    out_path = os.path.join("out_terra", "quant_eval", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  #move eval add quantitative results to quant_eval_add
  for object_list in  glob.glob(os.path.join("out_terra", "archives_final",
                                            str(collection_id),str( batch_id),
                                            "aggregated_results", "quant_eval_results_add/*")):
    out_path = os.path.join("out_terra", "quant_eval_add", str(collection_id), str(batch_id))
    !cp -r {object_list} {out_path}
  print("...")
  print("\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
aggregated_results/papermill_logs/
aggregated_results/papermill_logs/nnunet_prostate_task24/
aggregated_results/papermill_logs/nnunet_prostate_task24/ai_mhub_seg_dicom_combination-output.ipynb
aggregated_results/papermill_logs/nnunet_prostate_task24/sr_dicom_generation-output.ipynb
aggregated_results/papermill_logs/nnunet_prostate_task24/seg_dicom_eval-output.ipynb
aggregated_results/papermill_logs/bamf_nnunet_mr_prostate/
aggregated_results/papermill_logs/bamf_nnunet_mr_prostate/ai_mhub_seg_dicom_combination-output.ipynb
aggregated_results/papermill_logs/bamf_nnunet_mr_prostate/sr_dicom_generation-output.ipynb
aggregated_results/papermill_logs/bamf_nnunet_mr_prostate/seg_dicom_eval-output.ipynb
aggregated_results/papermill_logs/idc_expert_seg/
aggregated_results/papermill_logs/idc_expert_seg/sr_dicom_generation-output.ipynb
aggregated_results/papermill_logs/idc_expert_seg/idc_seg_dicom_combination-output.ipynb
aggregated

In [None]:
!mv /content/out_terra/archives_final /content/

## Create sorted_data zip

In [None]:
!zip -r results_sorted.zip /content/out_terra

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/40/bamf_nnunet_mr_prostate/1.3.6.1.4.1.14519.5.2.1.236242567483759797890842187553264148986/1.3.6.1.4.1.14519.5.2.1.236242567483759797890842187553264148986_1.3.6.1.4.1.14519.5.2.1.236242567483759797890842187553264148986_SR.dcm (deflated 90%)
  adding: content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/40/bamf_nnunet_mr_prostate/1.3.6.1.4.1.14519.5.2.1.72886393662886314085171123358732192950/ (stored 0%)
  adding: content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/40/bamf_nnunet_mr_prostate/1.3.6.1.4.1.14519.5.2.1.72886393662886314085171123358732192950/1.3.6.1.4.1.14519.5.2.1.72886393662886314085171123358732192950_1.3.6.1.4.1.14519.5.2.1.72886393662886314085171123358732192950_SR.dcm (deflated 90%)
  adding: content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/40/bamf_nnunet_mr_prostate/1.3.6.1.4.1.14519.5.2.1.86468801022876021368602404390378084388/ (stored 0%

## Concatenate quantitative evaluation results and save to disk

In [None]:
quantEvalDfLst = []

In [None]:
for eval_csv in glob.glob(os.path.join("out_terra", "quant_eval", "**", "*.csv"), recursive=True) + \
glob.glob(os.path.join("out_terra", "quant_eval_add", "**", "*.csv"), recursive=True):
  try:
    quantEvalDfLst.append(pd.read_csv(eval_csv, index_col=False))
  except:
    pass

In [None]:
quantEvalDf = pd.concat(quantEvalDfLst)
quantEvalDf.to_csv("quantEvalResults.csv", index=None)

In [None]:
quantEvalDf.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4910 entries, 0 to 23
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   refSerieUID             4910 non-null   object 
 1   ai_seg_serieUID         4910 non-null   object 
 2   idc_seg_serieUID        4910 non-null   object 
 3   aiSegmentNumber         4910 non-null   int64  
 4   idcSegmentNumber        4910 non-null   int64  
 5   idcSegmentCodeMeaning   4910 non-null   object 
 6   aiSegmentCodeMeaning    4910 non-null   object 
 7   aiSegmentCodeValue      4910 non-null   int64  
 8   aiSegmentCodingScheme   4910 non-null   object 
 9   idcSegmentCodeValue     4910 non-null   object 
 10  idcSegmentCodingScheme  4910 non-null   object 
 11  dsc                     4910 non-null   float64
 12  hsdff                   4910 non-null   float64
 13  hsdff_95                4910 non-null   float64
 14  tp                      4910 non-null   int64  

# Create/Import data to GCS buckets -> DICOM Stores -> BQ tables

## GCP/GCS variables definition

In [None]:
#@title Global parameters
project_id = "idc-sandbox-003" #@param{type:"string"}
location="us" #@param{type:"string"}

In [None]:
#@title Buckets
parent_bucket = "gs://prostate_seg_terra_v3_sitk_res_mhub" #@param{type:"string"}
ai_dicom_seg_bucket=f"gs://{parent_bucket}/ai_dicom_seg" #@param{type:"string"}
ai_dicom_sr_bucket=f"gs://{parent_bucket}/ai_dicom_sr" #@param{type:"string"}
idc_dicom_seg_bucket=f"gs://{parent_bucket}/idc_dicom_seg" #@param{type:"string"}
idc_dicom_sr_bucket=f"gs://{parent_bucket}/idc_dicom_sr" #@param{type:"string"}
quant_eval_bucket=f"gs://{parent_bucket}/quant_eval_results" #@param{type:"string"}

In [None]:
#@title DICOM STORES
dicom_dataset_id = "prostate_seg_terra_mhub_v3_sitk_res_dataset" #@param{type:"string"}
ai_dicom_seg_store="aiDicomSegDicomStore" #@param{type:"string"}
ai_dicom_sr_store="aiDicomSrDicomStore" #@param{type:"string"}
idc_dicom_seg_store="idcDicomSegDicomStore"#@param{type:"string"}
idc_dicom_sr_store="idcDicomSrDicomStore" #@param{type:"string"}

In [None]:
#@title BigQuery parameters
#@title test
bq_dataset_id = "prostate_seg_terra_mhub_v3_sitk_res" #@param{type:"string"}
bq_ai_dicom_seg = "ai_dicom_seg_table" #@param{type:"string"}
bq_ai_dicom_sr = "ai_dicom_sr_table" #@param{type:"string"}
bq_idc_dicom_seg = "idc_dicom_seg_table" #@param{type:"string"}
bq_idc_dicom_sr = "idc_dicom_sr_table"#@param{type:"string"}
bq_quant_eval = "quantitative_results_table"#@param{type:"string"}

## Set GCP project id

In [None]:
!gcloud config set project {project_id}

Updated property [core/project].


## Create DICOM dataset

In [None]:
!gcloud healthcare datasets delete $dicom_dataset_id --location=$location --project $project_id --quiet

[1;31mERROR:[0m (gcloud.healthcare.datasets.delete) NOT_FOUND: Dataset projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset not found. This command is authenticated as cciausu97@gmail.com which is the active account specified by the [core/account] property.


In [None]:
!gcloud healthcare datasets create $dicom_dataset_id --location=$location --project $project_id

Create request issued for: [prostate_seg_terra_mhub_v3_sitk_res_dataset]
Created dataset [prostate_seg_terra_mhub_v3_sitk_res_dataset].


## Create DICOM Stores

In [None]:
lst_dicom_stores = [ai_dicom_seg_store,
                    ai_dicom_sr_store,
                    idc_dicom_seg_store,
                    idc_dicom_sr_store]
for dicom_store in lst_dicom_stores:
  !gcloud healthcare dicom-stores delete $dicom_store \
    --dataset=$dicom_dataset_id \
    --location=$location \
    --project=$project_id --quiet
  !gcloud healthcare dicom-stores create $dicom_store \
    --dataset=$dicom_dataset_id \
    --location=$location \
    --project=$project_id --quiet

[1;31mERROR:[0m (gcloud.healthcare.dicom-stores.delete) NOT_FOUND: DICOM store: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/aiDicomSegDicomStore not found. This command is authenticated as cciausu97@gmail.com which is the active account specified by the [core/account] property
Created dicomStore [aiDicomSegDicomStore].
[1;31mERROR:[0m (gcloud.healthcare.dicom-stores.delete) NOT_FOUND: DICOM store: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/aiDicomSrDicomStore not found. This command is authenticated as cciausu97@gmail.com which is the active account specified by the [core/account] property
Created dicomStore [aiDicomSrDicomStore].
[1;31mERROR:[0m (gcloud.healthcare.dicom-stores.delete) NOT_FOUND: DICOM store: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSegDicomStore not found. This command is auth

## Create parent GCS bucket

In [None]:
!gcloud storage rm -r {parent_bucket} --project $project_id
!gcloud storage buckets delete {parent_bucket} --project $project_id

Removing objects:
[1;31mERROR:[0m (gcloud.storage.rm) gs://prostate_seg_terra_v3_sitk_res_mhub not found: 404.
Removing gs://prostate_seg_terra_v3_sitk_res_mhub/...
[1;31mERROR:[0m gs://prostate_seg_terra_v3_sitk_res_mhub not found: 404.


In [None]:
!gcloud storage buckets create {parent_bucket} --location=$location --project $project_id

Creating gs://prostate_seg_terra_v3_sitk_res_mhub/...


## Move local data to GCS buckets

In [None]:
#Move AI DICOM SEG
!gsutil -m cp -r /content/out_terra/ai_dicom_seg {parent_bucket}
#Move AI DICOM SR
!gsutil -m cp -r /content/out_terra/ai_dicom_sr {parent_bucket}
#Move IDC DICOM SEG
!gsutil -m cp -r /content/out_terra/idc_dicom_seg {parent_bucket}
#Move IDC DICOM SR
!gsutil -m cp -r /content/out_terra/idc_dicom_sr {parent_bucket}
#Move IDC ADD DICOM SEG
!gsutil -m cp -r /content/out_terra/idc_add_dicom_seg {parent_bucket}
#Move IDC ADD DICOM SR
!gsutil -m cp -r /content/out_terra/idc_add_dicom_sr {parent_bucket}
#Move eval results
!gsutil -m cp -r /content/out_terra/quant_eval {parent_bucket}
#Move eval add results
!gsutil -m cp -r /content/out_terra/quant_eval_add {parent_bucket}

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Copying file:///content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/62/monai_prostate158/1.3.6.1.4.1.14519.5.2.1.30997405570836350697128723682957492690/1.3.6.1.4.1.14519.5.2.1.30997405570836350697128723682957492690_1.3.6.1.4.1.14519.5.2.1.30997405570836350697128723682957492690_SR.dcm [Content-Type=application/dicom]...
Copying file:///content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/62/monai_prostate158/1.3.6.1.4.1.14519.5.2.1.152624899633248969237716692972911665818/1.3.6.1.4.1.14519.5.2.1.152624899633248969237716692972911665818_1.3.6.1.4.1.14519.5.2.1.152624899633248969237716692972911665818_SR.dcm [Content-Type=application/dicom]...
Copying file:///content/out_terra/ai_dicom_sr/prostate_mri_us_biopsy/62/monai_prostate158/1.3.6.1.4.1.14519.5.2.1.303901212553223345293951922331363743761/1.3.6.1.4.1.14519.5.2.1.303901212553223345293951922331363743761_1.3.6.1.4.1.14519.5.2.1.303901212553223345293951922331363743761_SR.dcm

## Export GCS bucket data to dicom stores

In [None]:
!gcloud healthcare dicom-stores delete dicomAllAIIdcSegSr \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --project=$project_id --quiet

!gcloud healthcare dicom-stores create dicomAllAIIdcSegSr \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --project=$project_id --quiet

[1;31mERROR:[0m (gcloud.healthcare.dicom-stores.delete) NOT_FOUND: DICOM store: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/dicomAllAIIdcSegSr not found. This command is authenticated as cciausu97@gmail.com which is the active account specified by the [core/account] property
Created dicomStore [dicomAllAIIdcSegSr].


In [None]:
#ai dicom seg
temp_gcs_uri = f'{parent_bucket}/ai_dicom_seg/**.dcm'
!gcloud healthcare dicom-stores import gcs dicomAllAIIdcSegSr \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id
#ai dicom sr
temp_gcs_uri = f'{parent_bucket}/ai_dicom_sr/**.dcm'
!gcloud healthcare dicom-stores import gcs dicomAllAIIdcSegSr \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id
#idc dicom seg
temp_gcs_uri = f'{parent_bucket}/idc_dicom_seg/**.dcm'
!gcloud healthcare dicom-stores import gcs dicomAllAIIdcSegSr \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id
#idc dicom sr
temp_gcs_uri = f'{parent_bucket}/idc_dicom_sr/**.dcm'
!gcloud healthcare dicom-stores import gcs dicomAllAIIdcSegSr \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [dicomAllAIIdcSegSr]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/dicomAllAIIdcSegSr
Request issued for: [dicomAllAIIdcSegSr]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/dicomAllAIIdcSegSr
Request issued for: [dicomAllAIIdcSegSr]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/dicomAllAIIdcSegSr
Request issued for: [dicomAllAIIdcSegSr]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/dicomAllAIIdcSegSr


AI DICOM SEG

In [None]:
temp_gcs_uri = f'{parent_bucket}/ai_dicom_seg/**.dcm'
!gcloud healthcare dicom-stores import gcs $ai_dicom_seg_store \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [aiDicomSegDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/aiDicomSegDicomStore


AI DICOM SR

In [None]:
temp_gcs_uri = f'{parent_bucket}/ai_dicom_sr/**.dcm'
!gcloud healthcare dicom-stores import gcs $ai_dicom_sr_store \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [aiDicomSrDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/aiDicomSrDicomStore


IDC DICOM SEG

In [None]:
temp_gcs_uri = f'{parent_bucket}/idc_dicom_seg/**.dcm'
!gcloud healthcare dicom-stores import gcs $idc_dicom_seg_store \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [idcDicomSegDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSegDicomStore


IDC DICOM SR

In [None]:
temp_gcs_uri = f'{parent_bucket}/idc_dicom_sr/**.dcm'
!gcloud healthcare dicom-stores import gcs $idc_dicom_sr_store \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [idcDicomSrDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSrDicomStore


IDC ADD DICOM SEG

In [None]:
temp_gcs_uri = f'{parent_bucket}/idc_add_dicom_seg/**.dcm'
!gcloud healthcare dicom-stores import gcs $idc_dicom_seg_store \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [idcDicomSegDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSegDicomStore


IDC ADD DICOM SR

In [None]:
temp_gcs_uri = f'{parent_bucket}/idc_add_dicom_sr/**.dcm'
!gcloud healthcare dicom-stores import gcs $idc_dicom_sr_store \
  --dataset=$dicom_dataset_id \
  --location=$location \
  --gcs-uri=$temp_gcs_uri \
  --project $project_id

Request issued for: [idcDicomSrDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSrDicomStore


## Export DICOM Stores to BigQuery

## Create BigQuery dataset

In [None]:
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client(project=project_id)
client.delete_dataset(
    bq_dataset_id, delete_contents=True, not_found_ok=True
)  # Make an API request.

print("Deleted dataset '{}'.".format(bq_dataset_id))

Deleted dataset 'prostate_seg_terra_mhub_v3_sitk_res'.


In [None]:
!bq --location=$location mk -d \
--description "dataset for terra_mhub prostate segmentation analysis" \
--project_id=$project_id \
$bq_dataset_id

Dataset 'idc-sandbox-003:prostate_seg_terra_mhub_v3_sitk_res' successfully created.


Export terra output data table to bigquery

In [None]:
from ast import literal_eval
import numpy as np
from google.cloud import bigquery
tempDf = pd.read_csv(local_terra_out_path, index_col=None, sep="\t")
col_lsts = ['adcSeriesInstanceUIDs', 'dicomAiCodeMeaningEval_lst',
       'dicomAiCodeValuesEval_lst',
       'dicomAiCodingSchemeDesignatorEval_lst',
       'dicomCodeMeaningProstate_lst', 'dicomCodeValuesProstate_lst',
       'dicomCodingSchemeDesignatorProstate_lst',
       'dicomIdcAddCodeMeaningEval_lst', 'dicomIdcAddCodeValuesEval_lst',
       'dicomIdcAddCodingSchemeDesignatorEval_lst',
       'dicomIdcCodeMeaningEval_lst', 'dicomIdcCodeValuesEval_lst',
       'dicomIdcCodingSchemeDesignatorEval_lst',
       'dicomSrAiCodeMeaning_lst', 'dicomSrAiCodeValues_lst',
       'dicomSrAiCodingSchemeDesignator_lst', 'dicomSrIdcCodeMeaning_lst',
       'dicomSrIdcCodeValues_lst', 'dicomSrIdcCodingSchemeDesignator_lst',
       'evalAddCompressedOutputFile', 'evalCompressedOutputFile', 'idcAddSegSeriesInstancceUIDs', 'idcSegSeriesInstanceUIDs',
       'mhub_model_name_lst', 'mhubai_custom_config_lst',
       'mhubaiCustomSegmentAlgorithmName_lst', 'mhubCompressedOutputFile', 'seriesInstanceUIDs']
outTempDf = tempDf.copy()
for col_processed in outTempDf.columns.values:
  if col_processed in col_lsts:
    print(f"col_processed : {col_processed}")
    outTempDf[col_processed] = outTempDf[col_processed].apply(lambda x: literal_eval(x) if x is not np.nan else np.nan)
client = bigquery.Client(project="idc-sandbox-003")
job_config = bigquery.LoadJobConfig(
    schema=[
        bigquery.SchemaField("entity:terra_mhub_all_collections_v3_id", bigquery.enums.SqlTypeNames.INT64), # create each column in Big Query along with types
        bigquery.SchemaField("adcSeriesInstanceUIDs", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("collection_id", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("dicomAiCodeMeaningEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomAiCodeValuesEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomAiCodingSchemeDesignatorEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomCodeMeaningProstate_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomCodeValuesProstate_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomCodingSchemeDesignatorProstate_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomIdcAddCodeMeaningEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomIdcAddCodeValuesEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomIdcAddCodingSchemeDesignatorEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomIdcCodeMeaningEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomIdcCodeValuesEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomIdcCodingSchemeDesignatorEval_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomSrAiCodeMeaning_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomSrAiCodeValues_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomSrAiCodingSchemeDesignator_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomSrIdcCodeMeaning_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomSrIdcCodeValues_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("dicomSrIdcCodingSchemeDesignator_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("evalAddCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("evalCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("finalCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("idcAddSegSeriesInstancceUIDs", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("idcExpertAddCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("idcExpertCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("idcSegSeriesInstanceUIDs", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("mhub_model_name_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("mhubai_custom_config_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("mhubaiCustomSegmentAlgorithmName_lst", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("mhubCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("radsAiCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
        bigquery.SchemaField("radsIdcExpertAddCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("radsIdcExpertCompressedOutputFile", bigquery.enums.SqlTypeNames.STRING),
        bigquery.SchemaField("seriesInstanceUIDs", bigquery.enums.SqlTypeNames.STRING, mode='REPEATED'),
    ],
    write_disposition="WRITE_APPEND",
)
job = client.load_table_from_dataframe(
    outTempDf, f'idc-sandbox-003.{bq_dataset_id}.terra_mhub_all_collections_out_table', job_config=job_config)

col_processed : adcSeriesInstanceUIDs
col_processed : dicomAiCodeMeaningEval_lst
col_processed : dicomAiCodeValuesEval_lst
col_processed : dicomAiCodingSchemeDesignatorEval_lst
col_processed : dicomCodeMeaningProstate_lst
col_processed : dicomCodeValuesProstate_lst
col_processed : dicomCodingSchemeDesignatorProstate_lst
col_processed : dicomIdcAddCodeMeaningEval_lst
col_processed : dicomIdcAddCodeValuesEval_lst
col_processed : dicomIdcAddCodingSchemeDesignatorEval_lst
col_processed : dicomIdcCodeMeaningEval_lst
col_processed : dicomIdcCodeValuesEval_lst
col_processed : dicomIdcCodingSchemeDesignatorEval_lst
col_processed : dicomSrAiCodeMeaning_lst
col_processed : dicomSrAiCodeValues_lst
col_processed : dicomSrAiCodingSchemeDesignator_lst
col_processed : dicomSrIdcCodeMeaning_lst
col_processed : dicomSrIdcCodeValues_lst
col_processed : dicomSrIdcCodingSchemeDesignator_lst
col_processed : evalAddCompressedOutputFile
col_processed : evalCompressedOutputFile
col_processed : idcAddSegSeries

## Export DICOM Stores to BigQuery tables

AI DICOM SEG

In [None]:
bq_table_path = ".".join((project_id, bq_dataset_id, bq_ai_dicom_seg))
!gcloud healthcare dicom-stores export bq $ai_dicom_seg_store \
  --location=$location \
  --dataset=$dicom_dataset_id \
  --bq-table=bq://{bq_table_path}

Request issued for: [aiDicomSegDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/aiDicomSegDicomStore


AI DICOM SR

In [None]:
bq_table_path = ".".join((project_id, bq_dataset_id, bq_ai_dicom_sr))
!gcloud healthcare dicom-stores export bq $ai_dicom_sr_store \
  --location=$location \
  --dataset=$dicom_dataset_id \
  --bq-table=bq://{bq_table_path}

Request issued for: [aiDicomSrDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/aiDicomSrDicomStore


IDC DICOM SEG

In [None]:
bq_table_path = ".".join((project_id, bq_dataset_id, bq_idc_dicom_seg))
!gcloud healthcare dicom-stores export bq $idc_dicom_seg_store \
  --location=$location \
  --dataset=$dicom_dataset_id \
  --bq-table=bq://{bq_table_path}

Request issued for: [idcDicomSegDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSegDicomStore


IDC DICOM SR

In [None]:
bq_table_path = ".".join((project_id, bq_dataset_id, bq_idc_dicom_sr))
!gcloud healthcare dicom-stores export bq $idc_dicom_sr_store \
  --location=$location \
  --dataset=$dicom_dataset_id \
  --bq-table=bq://{bq_table_path}

Request issued for: [idcDicomSrDicomStore]
name: projects/idc-sandbox-003/locations/us/datasets/prostate_seg_terra_mhub_v3_sitk_res_dataset/dicomStores/idcDicomSrDicomStore


Quantitative evaluation results

In [None]:
quantEvalDf

Unnamed: 0,refSerieUID,ai_seg_serieUID,idc_seg_serieUID,aiSegmentNumber,idcSegmentNumber,idcSegmentCodeMeaning,aiSegmentCodeMeaning,aiSegmentCodeValue,aiSegmentCodingScheme,idcSegmentCodeValue,...,dsc,hsdff,hsdff_95,tp,fp,tn,fn,sensitivity,specificity,asd
0,1.3.6.1.4.1.14519.5.2.1.3671.4754.212083125906...,1.2.276.0.7230010.3.1.3.313263360.143.17295686...,1.2.276.0.7230010.3.1.3.313263360.801.17295642...,1,4,Prostate,Prostatic_structure,41216001,SCT,T-9200B,...,0.856750,8.192882,2.500619,91991,24923,7217279,5839,0.940315,0.996559,0.00
1,1.3.6.1.4.1.14519.5.2.1.3671.4754.124813816937...,1.2.276.0.7230010.3.1.3.313263360.195.17295686...,1.2.276.0.7230010.3.1.3.313263360.447.17295641...,1,3,Prostate,Prostatic_structure,41216001,SCT,T-9200B,...,0.927317,5.999993,0.662913,192154,17021,8690620,13101,0.936172,0.998045,0.00
2,1.3.6.1.4.1.14519.5.2.1.3671.4754.155463265953...,1.2.276.0.7230010.3.1.3.313263360.247.17295686...,1.2.276.0.7230010.3.1.3.313263360.940.17295642...,1,3,Prostate,Prostatic_structure,41216001,SCT,T-9200B,...,0.459570,15.339385,8.822466,30046,14499,7239321,56166,0.348513,0.998001,0.00
3,1.3.6.1.4.1.14519.5.2.1.3671.4754.117459950571...,1.2.276.0.7230010.3.1.3.313263360.298.17295686...,1.2.276.0.7230010.3.1.3.313263360.239.17295641...,1,4,Prostate,Prostatic_structure,41216001,SCT,T-9200B,...,0.556203,27.999924,12.249967,144321,3529,6965402,226780,0.388900,0.999494,0.00
4,1.3.6.1.4.1.14519.5.2.1.3671.4754.165941479363...,1.2.276.0.7230010.3.1.3.313263360.350.17295687...,1.2.276.0.7230010.3.1.3.313263360.728.17295642...,1,3,Prostate,Prostatic_structure,41216001,SCT,T-9200B,...,0.827481,6.000004,2.341433,54471,12105,7787136,10608,0.836998,0.998448,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19,1.3.6.1.4.1.14519.5.2.1.7311.5101.166265209513...,1.2.276.0.7230010.3.1.3.313263360.477.17295668...,1.2.276.0.7230010.3.1.3.313263360.173.17295641...,2,2,Transition_zone_of_prostate,Structure_of_transition_zone_of_prostate_(body...,399384005,SCT,399384005,...,0.441981,67.083900,6.948081,9328,22315,2768782,1239,0.882748,0.992005,0.00
20,1.3.6.1.4.1.14519.5.2.1.7310.5101.260386623049...,1.2.276.0.7230010.3.1.3.313263360.367.17295668...,1.2.276.0.7230010.3.1.3.313263360.828.17295641...,1,1,Peripheral_zone_of_prostate,Structure_of_peripheral_glandular_zone_of_pros...,279706003,SCT,279706003,...,0.759743,10.259142,3.298854,14894,4024,2777350,5396,0.734056,0.998553,0.93
21,1.3.6.1.4.1.14519.5.2.1.7310.5101.260386623049...,1.2.276.0.7230010.3.1.3.313263360.367.17295668...,1.2.276.0.7230010.3.1.3.313263360.828.17295641...,2,2,Transition_zone_of_prostate,Structure_of_transition_zone_of_prostate_(body...,399384005,SCT,399384005,...,0.825391,18.282505,4.716990,28294,10697,2761399,1274,0.956913,0.996141,0.00
22,1.3.6.1.4.1.14519.5.2.1.7310.5101.107276353018...,1.2.276.0.7230010.3.1.3.313263360.748.17295668...,1.2.276.0.7230010.3.1.3.313263360.973.17295642...,1,1,Peripheral_zone_of_prostate,Structure_of_peripheral_glandular_zone_of_pros...,279706003,SCT,279706003,...,0.744647,8.893906,3.139955,8642,2501,1931031,3426,0.716109,0.998707,0.88


In [None]:
path_bq_table=f"{bq_dataset_id}.{bq_quant_eval}"
path_csv="quantEvalResults.csv"
!bq load --source_format=CSV --project_id=$project_id \
--autodetect=true $path_bq_table $path_csv

Upload complete.
Waiting on bqjob_r2e9c97122eeb89bd_00000192b4a4684f_1 ... (1s) Current status: DONE   
