In [2]:
import numpy as np
import pandas as pd
import SimpleITK as sitk
from tqdm import tqdm
import scipy.ndimage
import cv2
import os
from matplotlib import pyplot as plt
import sys
import skimage
from  scipy import ndimage


parent_dir = os.path.abspath('../')
if "UCAN-PET-CT-image-data-handling-pipeline" not in parent_dir:
    parent_dir = os.path.abspath('./')

if parent_dir not in sys.path:
    sys.path.append(parent_dir)
print("parent_dir: ", parent_dir)

from Utils import utils

# reading main config file
config = utils.read_config()

system = 1 # 1 or 2
if system == 1:
    source_path = config["Source"]["paths"]["source_path_system_1"]
elif system == 2:
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    source_path = config["Source"]["paths"]["source_path_system_2"]
else:
    source_path = ""
    print("Invalid system")

env: SITK_SHOW_COMMAND='/home/andres/Downloads/Slicer-5.4.0-linux-amd64/Slicer'


In [6]:
collages_dataset_for_regression = pd.read_excel(source_path + config["collages_for_rergession_dataframe"])
collages_dataset_for_classification = pd.read_excel(source_path + config["collages_for_classification_dataframe"])
collages_dataset_for_classification2 = pd.read_excel(source_path + config["collages_for_classification_dataframe_new_diagnosis"])

### Regression Summary

In [13]:
print("Number of patients: ", collages_dataset_for_regression.patient_ID.nunique())
print("Number of exams: ", collages_dataset_for_regression.patient_ID.count())
collages_dataset_for_regression.head(2)

Number of patients:  1062
Number of exams:  1783


Unnamed: 0,patient_ID,scan_date,SUV_MIP,CT_MIP,SUV_bone,CT_bone,SUV_lean,CT_lean,SUV_adipose,CT_adipose,SUV_air,CT_air,patient_age
0,lpr385705046400,20140313,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,33
1,lpr415675513429,20190201,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,32


### Classification Summary

In [14]:
print("Number of patients: ", collages_dataset_for_classification.patient_ID.nunique())
print("Number of exams: ", collages_dataset_for_classification.patient_ID.count())
collages_dataset_for_classification.head(2)

Number of patients:  364
Number of exams:  720


Unnamed: 0,patient_ID,scan_date,SUV_MIP,CT_MIP,SUV_bone,CT_bone,SUV_lean,CT_lean,SUV_adipose,CT_adipose,SUV_air,CT_air,sex,diagnosis_groups,GT_diagnosis_label
0,npr100169878450,20130412,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,1,C81,0
1,npr100169878450,20130611,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,1,C81,0


In [21]:
# sex classification: unique patients
collages_dataset_for_classification.groupby(['sex']).patient_ID.nunique()

sex
0    144
1    220
Name: patient_ID, dtype: int64

In [18]:
# sex classification: unique exams
collages_dataset_for_classification.sex.value_counts()

sex
1    472
0    248
Name: count, dtype: int64

In [22]:
# diagnosis classification: unique patients
collages_dataset_for_classification.groupby(['diagnosis_groups']).patient_ID.nunique()

diagnosis_groups
C81    119
C82     32
C83    132
C84     10
C85     59
C86      4
C88      8
Name: patient_ID, dtype: int64

In [19]:
# diagnosis classification: unique exams
collages_dataset_for_classification.diagnosis_groups.value_counts()

diagnosis_groups
C81    302
C83    223
C85     97
C82     51
C84     28
C88     11
C86      8
Name: count, dtype: int64

In [23]:
# diagnosis classification: unique patients
collages_dataset_for_classification.groupby(['GT_diagnosis_label']).patient_ID.nunique()

GT_diagnosis_label
0    119
1    132
2    113
Name: patient_ID, dtype: int64

In [24]:
# diagnosis classification: unique exams
collages_dataset_for_classification.GT_diagnosis_label.value_counts()

GT_diagnosis_label
0    302
1    223
2    195
Name: count, dtype: int64

In [15]:
print("Number of patients: ", collages_dataset_for_classification2.patient_ID.nunique())
print("Number of exams: ", collages_dataset_for_classification2.patient_ID.count())
collages_dataset_for_classification2.head(2)

Number of patients:  186
Number of exams:  396


Unnamed: 0.1,Unnamed: 0,SUV_MIP,CT_MIP,SUV_bone,CT_bone,SUV_lean,CT_lean,SUV_adipose,CT_adipose,SUV_air,CT_air,unique_patient_ID_scan_date,patient_ID,scan_date,sex,diagnosis_groups,diagnosis_groups_new,GT_diagnosis_label,GT_diagnosis_label_new
0,1756,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,npr967652562138_20100730,npr967652562138,20100730,1,C81,C81.9,0,2
1,1757,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,/media/andres/T7 Shield1/UCAN_project/collages...,npr967652562138_20100930,npr967652562138,20100930,1,C81,C81.9,0,2


In [26]:
collages_dataset_for_classification2.groupby(['GT_diagnosis_label_new']).patient_ID.nunique()

GT_diagnosis_label_new
0    87
1    51
2    48
Name: patient_ID, dtype: int64

In [27]:
collages_dataset_for_classification2.GT_diagnosis_label_new.value_counts()

GT_diagnosis_label_new
0    139
1    131
2    126
Name: count, dtype: int64

In [None]:
#regression_metrics = utils.evaluate_best_models_all_folds(system=1, type="regression", category=None, experiment_number=3, folds_list=list(range(10)))
#print(regression_metrics)

# For classification: sex
sex_classification_metrics = utils.evaluate_best_models_all_folds(system=1, type="classification", category="Sex", experiment_number=2, folds_list=list(range(10)))
print(sex_classification_metrics)

# For classification: diagnosis groups - C81, C83, Others
diagnosis_classification_metrics = utils.evaluate_best_models_all_folds_metric_based(system=1, type="classification", category="Diagnosis", experiment_number=1, folds_list=list(range(10)))
print(diagnosis_classification_metrics)

# For classification: diagnosis groups - C83.3, C81.1, C81.9
diagnosis_classification_metrics = utils.evaluate_best_models_all_folds_metric_based(system=1, type="classification", category="Diagnosis", experiment_number=2, folds_list=list(range(10)))
print(diagnosis_classification_metrics)