# Prostate segmentation evaluation on IDC collection -- Radiomics -- QIN prostate repeatability
*   Dataset : [QIN-Prostate-Repeatability]()
*   Goal : Prostate segmentation using Task24 Prostate nnU-net pre-trained model, T2 input

# Custom functions

## IDC processing

In [None]:
def parse_json_dcmqi(json_path):
  """
  Get dictionary of CodeMeaning : labelID
  json_path : str, dcmqi json path
  """
  out_dic = {}
  data = json.load(open(json_path))
  for segment_dic_arr in data["segmentAttributes"]:
    dic_in = segment_dic_arr[0]
    out_dic[dic_in["SegmentedPropertyTypeCodeSequence"]["CodeMeaning"]] \
    = dic_in["labelID"]
  return out_dic

In [None]:
def convert_image_dcm_to_nrrd(input_path, output_path_root, target_format="nrrd", prefix=""):
  """
  Conversion of DICOM MR data to NIFTI using dcm2niix
  input_path : str, folder containing DICOM instances .dcm
  output_path_root : str, output folder
  prefix : str, prefix for output file name
  """
  if not os.path.exists(output_path_root):
    !mkdir -p $output_path_root
  !dcm2niix -z y -m y -f %i_{prefix} -o $output_path_root $input_path

In [None]:
def convert_seg_to_nii(input_path, output_path):
  """
  Conversion of SEG DICOM object to NIFTI
  input_path : str, input_path DICOM folder
  output_path : str, folder output paths
  """
  if not os.path.exists(output_path):
    !mkdir -p $output_path
  print(f'input path : {input_path}')
  print(f'output_path : {output_path}')
  !segimage2itkimage --inputDICOM $input_path --outputDirectory $output_path \
  --outputType nii

In [None]:
def convert_dcm_sorted(input_path,output_path, idc_df):
  """
  Convert DCM MR sorted by PatID/StudyUID/SerieUID in NIFTI format
  input_path : str, root folder containing all series sorted by patid/studyUID/serieUID/sopUID.dcm
  output_path : str, root folder of where to store converted MR T2 series
  idc_df : pandas.Dataframe, contains IDC metadata to retrieve modality information etc per serieUID
  """
  for serie_folder in sorted(glob.glob(os.path.join(input_path, "**", "**", "*"))):#, recursive = True):
    path_serie_dcm_lst = glob.glob(os.path.join(serie_folder, "*.dcm"))
    modality = idc_df[idc_df["SeriesInstanceUID"] == path_serie_dcm_lst[0].split('/')[-2]]["Modality"].iloc[0]#'SEG' if pydicom.dcmread(path_serie_dcm_lst[0]).Modality == "SEG" else "MR"
    seriesInstanceUID = serie_folder.split("/")[-1]
    studyInstanceUID = serie_folder.split("/")[-2]
    patientID = serie_folder.split("/")[-3]
    print(f"Serie processed : {serie_folder}")
    print(f"SeriesDescription : {pydicom.read_file(glob.glob(os.path.join(serie_folder, '*.dcm'))[0]).SeriesDescription}")
    print(f"Modality : {pydicom.read_file(glob.glob(os.path.join(serie_folder, '*.dcm'))[0]).Modality}")
    #convert to nii
    convert_image_dcm_to_nrrd(input_path=serie_folder,
                           output_path_root=output_path,
                           prefix=f"{seriesInstanceUID}")

In [None]:
def download_idc_data_serie_uid(idc_df, out_path, out_path_nii):
  """
  Download IDC data from gcs_urls retrieved from BigQuery
  idc_df : pandas.Dataframe, contains MR series DICOM metadata and gcs_urls
  out_path_nii : str, output path folder for converted NIFTI volumes MR T2
  """
  # save the list of GCS URLs into a file
  selection_manifest = os.path.join(os.environ["IDC_IMG_Downloads"], "idc_manifest.txt")
  idc_df["gcs_url"].to_csv(selection_manifest, header=False, index=False)
  !cat {selection_manifest} | gsutil -m cp -I {os.environ["IDC_IMG_Downloads"]}
  !python dicomsort/dicomsort.py -k -u {os.environ["IDC_IMG_Downloads"]} {os.environ["IDC_IMG_Downloads_Sorted"]}/%PatientID/%StudyInstanceUID/%SeriesInstanceUID/%SOPInstanceUID.dcm
  # !rm -rf {os.environ["qin_prostate_rep_dicom"]+"/*"}
  in_mv = os.environ['IDC_IMG_Downloads_Sorted']+'/*'
  !mv $in_mv $out_path
  convert_dcm_sorted(input_path=out_path,
                  output_path=out_path_nii, idc_df=idc_df)

## Radiomics imports and config

This function below might have to be updated depending on the format of your dicomseg json file.

If you have a single nii file from nnUNet that holds multiple label values, this splits it into multiple segments.

This is the function to compute the features

In [None]:
def compute_pyradiomics_3D_features(ct_nifti_path,
                                    label_values,
                                    label_names,
                                    split_pred_nifti_path,
                                    nnunet_shape_features_code_mapping_df):

  """Function to compute pyradiomics 3D features for each label in a nifti file.


  Inputs:
    ct_nifti_path            : the MR nifti file
    label_values             : the label value for each of the segments from the json file
    label_names              : the corresponding label name for each of the segments
    split_pred_nifti_path    : where to save the individual nii segments needed
                               for pyradiomics
    nnunet_shape_features_code_mapping_df : the df where we will obtain the
                                            list of the shape features to
                                            compute

  Outputs:
    Writes the features_csv_path_nnunet to disk.

  """

  # Get the names of the features from the nnunet_shape_features_code_mapping_df
  # shape_features = list(nnunet_shape_features_code_mapping_df['shape_feature'].values)

  # Instantiate the extractor and modify the settings to keep the 3D shape features
  extractor = featureextractor.RadiomicsFeatureExtractor()
  extractor.settings['minimumROIDimensions'] = 3
  extractor.settings['correctMask'] = True
  # extractor.settings['geometryTolerance'] = 1e-2
  # extractor.disableAllFeatures()
  extractor.enableAllFeatures()
  # extractor.enableFeaturesByName(shape=shape_features)

  # Calculate features for each label and create a dataframe
  num_labels = len([f for f in os.listdir(split_pred_nifti_path) if f.endswith('.nii.gz')])
  df_list = []
  for n in range(0,num_labels):
    mask_path = os.path.join(split_pred_nifti_path, str(label_values[n]) + '.nii.gz')
    print(mask_path)
    corresponding_label_value = label_values[n]#label_values[label_names.index(label_names[n])]
    # Run the extractor
    print(mask_path)
    assert os.path.exists(mask_path)
    assert os.path.exists(ct_nifti_path)
    result = extractor.execute(ct_nifti_path, mask_path, label=corresponding_label_value) # dictionary
    # keep only the features we want
    # Get the corresponding label number -- all might not be present
    dict_keep = {'ReferencedSegment': corresponding_label_value,
                 'label_name': label_names[n]}
    keys_keep = [f for f in result.keys()]# if 'original_shape' in f]
    # Just keep the feature keys we want
    dict_keep_new_values = {key_keep: result[key_keep] for key_keep in keys_keep}
    dict_keep.update(dict_keep_new_values)
    df1 = pd.DataFrame([dict_keep])
    # change values of columns to remove original_shape_
    df1.columns = df1.columns.str.replace('original_shape_', '')
    # Append to the ReferencedSegment and label_name df
    df_list.append(df1)

  # concat all label features
  df = pd.concat(df_list)

  return df

Get the label values and label names from the dicomseg json file. If you have multiple labels in one nifti file, call `split_nii` to split into separate files. The compute the radiomics features for each of the labels. `ct_nifti_path` is the MR nifti file.

# Colab

In [None]:
#colab
from google.colab import auth
auth.authenticate_user()

# Setup GCP Project ID

In [None]:
import os
project_id = "idc-sandbox-003"
os.environ["GCP_PROJECT_ID"] = project_id

# Setup of the Colab VM



In the following cells we will confirm you have a GPU before doing anything else, and will install and import all the Python dependencies.

The main python packages we need to install are:
* `nnunet` - which is the [codebase for the nn-UNet framework](https://github.com/MIC-DKFZ/nnUNet) we are going to be using for the segmentation step;
* `pydicom`, a Python [package](https://github.com/pydicom/pydicom) that lets the use read, modify, and write DICOM data in an easy "pythonic" way - that we are going to use to distinguish different DICOM objects from each other.

## GPU checks

In [None]:
# check wether the Colab Instance was correctly initialized with a GPU instance
gpu_list = !nvidia-smi --list-gpus

has_gpu = False if "failed" in gpu_list[0] else True

if not has_gpu:
  print("Your Colab VM does not have a GPU - check \"Runtime > Change runtime type\"")

In [None]:
# check which model of GPU the notebook is equipped with - a Tesla K80 or T4
# T4 is the best performing on the two - and can about half the GPU processing time

!nvidia-smi

Mon May 15 09:23:24 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Environment Setup

Here we will configure the Linux environment variables needed to run the nnU-Net pipeline.

Three main variables are needed by default to run the nnU-Net segmentation pipelines:
* `nnUNet_raw_data_base` is the path to the folder where the segmentation pipeline expects to find the data to process;
* `nnUNet_preprocessed` is the path to the folder where the preprocessed data are saved;
* `RESULTS_FOLDER` is the path to the folder storing by default the model weights and, in our case, for simplicity, the segmentation masks produced by the pipeline.

We will use the additional variable `PATH_TO_MODEL_FILE` to point to the location where the pre-trained model weights for the chosen model will be stored (more on this later).

Please notice that these variables need to be set using `os.environ[]` in Google Colab - as `!export` is not sufficient to guarantee the variables are kept from one cell to the other. For more in-depth information regarding what the nnU-Net framework uses these folders for, please visit [the dedicated nnU-Net documentation page](https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/setting_up_paths.md)

## Install command-line tools


[Plastimatch](https://plastimatch.org/index.html) is considered to be the swiss army knife of medical images processing: we will use it to convert DICOM (CT, RTSTRUCT) series to NRRD files - but it can be used for a multitude of other tasks, such as registration, resampling, cropping, and computing statistics to name a few. Plastimatch is also available as a 3DSlicer plug-in and can be used directly from the Slicer GUI.

For the sake of clarity and simplicity, we will call Plastimatch from a very simple [Python wrapper](https://github.com/denbonte/pyplastimatch) written for the occasion (unfortunately, Plastimatch does not provide an official one).

In [None]:
%%capture
!sudo apt update

!sudo apt install plastimatch

In [None]:
!echo $(plastimatch --version)

plastimatch version 1.8.0


[dcmqi](https://github.com/QIICR/dcmqi) is an open source library that can help with the conversion between imaging research formats and the standard DICOM representation for image analysis results. More specifically, you can use dcmqi convert DICOM Segmentation objects (DICOM SEG) into research formats, such as NIfTI and NRRD.

In [None]:
%%capture
!wget https://github.com/QIICR/dcmqi/releases/download/v1.2.5/dcmqi-1.2.5-linux.tar.gz
!tar zxvf dcmqi-1.2.5-linux.tar.gz
!cp dcmqi-1.2.5-linux/bin/* /usr/local/bin/

Finally, we are going to install [Subversion](https://subversion.apache.org/), a tool that will allow us to clone GitHub repositories only partially (to save time and space).

In [None]:
%%capture

!sudo apt install subversion

In [None]:
!echo $(svn --version | head -n 2)

svn, version 1.13.0 (r1867053) compiled May 12 2022, 20:47:08 on x86_64-pc-linux-gnu


## Install Python packages

In [None]:
%%capture
!pip install nnunet
!pip install pydicom
!pip install nibabel
!pip install dcm2niix
!pip install SimpleITK
!pip install medpy
!pip install pyradiomics

Unpack and install model we downloaded earlier (under `PATH_TO_MODEL_FILE`). This step can take about 1-2 minutes.

In [None]:
%%capture
!nnUNet_install_pretrained_model_from_zip $PATH_TO_MODEL_FILE

Next we set up few things to help with visualization of the segmentations later.

In [None]:
import os
import sys
import shutil
import csv
import random

import os
import glob
import csv
import json

import nibabel as nib

import time
import gdown

import json
import pprint
import numpy as np
import pandas as pd

import pydicom
import nibabel as nib
import SimpleITK as sitk
from medpy.metric.binary import asd


from radiomics import featureextractor
# from medpy.metric.binary import dc as dice_coef
# from medpy.metric.binary import hd as hausdorff_distance
# from medpy.metric.binary import asd as avg_surf_distance

# from medpy.filter.binary import largest_connected_component

# use the "tensorflow_version" magic to make sure TF 1.x is imported
# %tensorflow_version 2.x
# import tensorflow as tf
# import keras

print("\nThis Colab instance is equipped with a GPU.")


This Colab instance is equipped with a GPU.


In [None]:
# PyPlastimatch - python wrapper for Plastimatch (and interactive notebook visualisation)
!svn checkout https://github.com/AIM-Harvard/pyplastimatch/trunk/pyplastimatch pyplastimatch

Checked out revision 25.


In [None]:
# dicomsort is the pythong package that can sort DICOM files into
# folder organization based on user-specified DICOM attributes
!git clone https://github.com/pieper/dicomsort.git

fatal: destination path 'dicomsort' already exists and is not an empty directory.


In [None]:
from pyplastimatch import pyplastimatch as pypla
from pyplastimatch.utils import viz as viz_utils
from pyplastimatch.utils import data as data_utils

# Data selection, downloading and structuring -- Conversion to DICOM

We want to select here the collection named qin-prostate repeatibility, and more particularly the two timepoints per patient ID for further analysis.

In order to use data hosted by IDC effectively, you will need to utilize metadata to navigate what data is available and to select specific files that are relevant in your analysis. The main metadata table you will need for this purpose is the [`bigquery-public-data.idc_current.dicom_all`](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=idc_current&t=dicom_all&page=table) table.

This query has one row per file hosted by IDC. All of IDC data is in DICOM format, and each of the rows in this table will have all of the DICOM attributes extracted from a given file. It will also have various columns containing non-DICOM metadata, such as the name of the collection where the file is included, size of the file, and URL that can be used to retrieve that file.

To query IDC BigQuery tables, you can use one of the following approaches:
1. `%%bigquery` magic will allow you to define your query in plain SQL, and load the result of the query into a Pandas dataframe.
2. [BigQuery Python API](https://googleapis.dev/python/bigquery/latest/index.html) is more flexible in allowing you to parameterize your query.
3. [Google Cloud BigQuery console](https://console.cloud.google.com/bigquery) is very convenient for interactive query exploration of tables.
4. [`gcloud bq`](https://cloud.google.com/bigquery/docs/bq-command-line-tool) is the command line tool that comes as part of [Cloud SDK](https://cloud.google.com/sdk) and is convenient for scripting interactions from the shell. Cloud SDK is preinstalled on Colab.

In the following cells we will utilize `%%bigquery`, Python BigQuery SDK and BigQuery console for working with IDC BigQuery tables.

First, to verify that you are authenticated, and your project ID is working, let's run a test query against IDC BigQuery table to get the summary statistics about the  of data available in IDC.


Given `SeriesInstanceUID` value identifying the image series, we can query the IDC metadata table to get the list of files (defined by the Google Storage URLs) corresponding to this series.

All of the DICOM metadata for each of the DICOM files is available in the BigQuery table we will be querying. We will get not just the `gcs_url`, but also identifiers for the Study, Series and Instance, to better understand organization of data, and since `StudyInstanceUID` will be handy later when we get to the visualization of the data.

In [None]:
from google.cloud import bigquery
bq_client = bigquery.Client(os.environ["GCP_PROJECT_ID"])

Get SerieUIDs in bucket processed from nnunet

In [None]:
!rm bucketUIDs_nnunet.csv
!gcloud storage ls --recursive gs://idc_qin_prostate_repeatability/model1/preds_processed_dcm/* > bucketUIDs_nnunet.csv
serieUID_nnunet_processed = pd.read_csv("bucketUIDs_nnunet.csv", names=["serieUID"], skiprows=[0])
seriesInstanceUID_nnunet_processed_lst = [x.split("/")[-1].split("_")[1].replace(".dcm","") for x in serieUID_nnunet_processed.serieUID.values]

In [None]:
len(seriesInstanceUID_nnunet_processed_lst)

30

Get processed series -- extracted radiomics for custom bigquery table

In [None]:
from google.cloud import bigquery
selection_query = f"""
    SELECT
      DISTINCT(RefSerieUID)
    FROM
      `idc-sandbox-003.qin_prostate_repeatability_serieUID.model1_preds_prostate_radiomics` as eval_table"""
selection_result = bq_client.query(selection_query)
selection_df = selection_result.result().to_dataframe()
eval_refSerieUID = selection_df.RefSerieUID.values

In [None]:
not_processed_RefSerieUID = set(seriesInstanceUID_nnunet_processed_lst) - set(eval_refSerieUID)

In [None]:
len(not_processed_RefSerieUID)

29

# Main loop

These define the shape features that we are extracting as well as the segments. You will likely have to create your own `nnunet_segments_code_mapping.csv` with the segments of the prostate.

In [None]:
seg_rad_segment_nnUNet = "https://www.dropbox.com/s/4x2lo3ll7srw9jf/radiomics_json_segment.csv?dl=0"
out_path_mod = "nnunet_segments_code_mapping.csv"
!wget -O $out_path_mod $seg_rad_segment_nnUNet

--2023-05-15 09:24:13--  https://www.dropbox.com/s/4x2lo3ll7srw9jf/radiomics_json_segment.csv?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.85.18, 2620:100:6031:18::a27d:5112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.85.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/4x2lo3ll7srw9jf/radiomics_json_segment.csv [following]
--2023-05-15 09:24:13--  https://www.dropbox.com/s/raw/4x2lo3ll7srw9jf/radiomics_json_segment.csv
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc5e3ab8a0a65fbcdd5d1eed4afc.dl.dropboxusercontent.com/cd/0/inline/B8EjMWCYhWyN2ojK3rPSuszEQQDzC1_mO8COwxw5l0N2VOUXOmFOhwxOr5QAGuERlrqoOYn2hhGQuBMhmQQvkLkODWS6RtX3iKCxPqun6hcw6LhsUBftJAAUGPMSpMtkUAII5YKwtDNzZPemQvSezGrVtO3NAScSo01EE4ghso05-A/file# [following]
--2023-05-15 09:24:14--  https://uc5e3ab8a0a65fbcdd5d1eed4afc.dl.dropboxusercontent.com/cd/0/inline/B8EjMWCYhWyN2ojK3rPSuszEQQDzC1

In [None]:
nnunet_segments_code_mapping_df = pd.read_csv("nnunet_segments_code_mapping.csv")

In [None]:
nnunet_segments_code_mapping_df.head()

Unnamed: 0,segment,Finding_CodingSchemeDesignator,Finding_CodeValue,Finding_CodeMeaning,FindingSite_CodingSchemeDesignator,FindingSite_CodeValue,FindingSite_CodeMeaning
0,Prostate,SCT,41216001,Anatomical Structure,SCT,123037004,Prostate


In [None]:
from google.cloud import bigquery
selection_query = f"""
    SELECT
      *
    FROM
      `bigquery-public-data.idc_v14.dicom_all` as dc_all
    WHERE
      Modality = 'MR'
    AND SeriesInstanceUID IN UNNEST(%s)""" %list(not_processed_RefSerieUID)
selection_result = bq_client.query(selection_query)
selection_t2_df = selection_result.result().to_dataframe()

In [None]:
import os
#nnunet_preds
os.environ["nnunet_preds"] = os.path.join(os.getcwd(), "nnunet_preds")
os.environ["nnunet_preds_dcm"] = os.path.join(os.environ["nnunet_preds"], "dcm")
os.environ["nnunet_preds_nii"] = os.path.join(os.environ["nnunet_preds"], "nii")
# #seg files
# os.environ["idc_seg"] = os.path.join(os.getcwd(), "idc_seg")
# os.environ["idc_seg_dcm"] = os.path.join(os.environ["idc_seg"], "dcm")
# os.environ["idc_seg_nii"] = os.path.join(os.environ["idc_seg"], "nii")
#idc image files
os.environ["idc_data"] = os.path.join(os.getcwd(), "idc_data")
os.environ["idc_data_dcm"] = os.path.join(os.environ["idc_data"], "dcm")
os.environ["idc_data_nii"] = os.path.join(os.environ["idc_data"], "nii")
#idc data
os.environ["IDC_Downloads"] = os.path.join(os.getcwd(), "IDC_Downloads")
os.environ["IDC_Downloads_Sorted"] = os.path.join(os.getcwd(), "IDC_Downloads_Sorted")
os.environ["IDC_IMG_Downloads"] = os.path.join(os.getcwd(), "IDC_IMG_Downloads")
os.environ["IDC_IMG_Downloads_Sorted"] = os.path.join(os.getcwd(), "IDC_IMG_Downloads_Sorted")
#evaluation
os.environ["prostatex_analysis"] = os.path.join(os.getcwd(), "prostatex_analysis")
os.environ["prostatex_analysis_results"] = os.path.join(os.environ["prostatex_analysis"], "results")
os.environ["prostatex_analysis_verbose"] = os.path.join(os.environ["prostatex_analysis"], "results_verbose")
#radiomics

In [None]:
def reset_folders():
  for key, path in os.environ.items():
    check_patterns = [True for el in ["nnunet_preds", "idc",
                                      "IDC", "prostatex_analysis"] if el in key]
    if True in check_patterns:
      !rm -rf $path
      !mkdir -p $path

We process each SerieUID one at a time, we first retrieve serieUIDs processed in GCP bucket and subtract that amount from the serieUIDs retrieved from custom processed BigQuery radiomics table, obtaining the non processed serieUID_current_lst.

If that is not the desired behaviour :

- select all seriesUID in the IDC MR T2 series bigquery cell -- uncomment UNNEST filter
- change not_processed_RefSerieUID below to list(selection_t2_df.SeriesInstanceUID.unique())

In [None]:
#whole process
for serieUID_current in list(not_processed_RefSerieUID):#list(selection_t2_df.SeriesInstanceUID.unique())
  #reset processing folders
  reset_folders()
  #download nnunet data
  !gsutil -m cp -r gs://idc_qin_prostate_repeatability/model1/preds_processed_dcm/*{serieUID_current}* {os.environ['nnunet_preds_dcm']}/
  ##convert to nii
  convert_seg_to_nii(input_path=glob.glob(f"{os.environ['nnunet_preds_dcm']}/*.dcm")[0], \
                    output_path=os.environ['nnunet_preds_nii'])
  #download_idc_data
  idc_data_df = selection_t2_df[selection_t2_df.SeriesInstanceUID \
                                                           == serieUID_current]
  download_idc_data_serie_uid(idc_df=idc_data_df,
                              out_path=os.environ["idc_data_dcm"],
                              out_path_nii=os.environ["idc_data_nii"])
  assert len(np.unique(idc_data_df.SeriesInstanceUID.values)) == 1
  nnunet_dic = parse_json_dcmqi(glob.glob(os.path.join(os.environ["nnunet_preds_nii"], \
                                        "**", "*.json"), recursive=True)[0])
  print(f"nnunet segment and labelIDs : {nnunet_dic}")
  # compute_pyradiomics_3D_features()
  df_features = compute_pyradiomics_3D_features(glob.glob(os.environ["idc_data_nii"]+"/*.nii.gz")[0],
                                              list(nnunet_dic.values()),
                                              list(nnunet_dic.keys()),
                                              os.environ["nnunet_preds_nii"],
                                              nnunet_segments_code_mapping_df)
  features_extract = df_features[["VoxelVolume", "MeshVolume", "Sphericity", "ReferencedSegment", "label_name"]]
  features_extract["MeshVolume"] = features_extract.MeshVolume.apply(lambda x : x[()])
  features_extract["Sphericity"] = features_extract.Sphericity.apply(lambda x : x[()])
  features_extract["RefSerieUID"] = [serieUID_current for x in range(len(features_extract))]
  features_extract["collection_id"] = ["qin_prostate_repeatability" for x in range(len(features_extract))]
  rows_to_insert = features_extract.to_dict('records')
  print(rows_to_insert)
  # upload to bigquery table
  # Construct a BigQuery client object.
  client = bigquery.Client()
  table_id = "idc-sandbox-003.qin_prostate_repeatability_serieUID.model1_preds_prostate_radiomics"
  errors = client.insert_rows_json(table_id, rows_to_insert)  # Make an API request.
  if errors == []:
      print("New rows have been added.")
  else:
      print("Encountered errors while inserting rows: {}".format(errors))