# Data Exploration

author = Caroline Magg <br>
date = 31 March 2020 <br>

___________________________________
history: <br>
2020-03-31 <br>
inspect single folder content <br>
write methods for reading structure, contour names and contour content <br>
2020-04-02 <br>
generate list of contours

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import pydicom
import vtk

In [2]:
from utils_read import read_structure,read_contour,read_contour_names,read_contour_row

In [3]:
%matplotlib inline

### Add paths and dependencies
This can vary depending on your environment

In [4]:
# add KidsBrainProject main folder to paths
sys.path.append(os.path.abspath('../../'))

In [5]:
# add path to data here
path_data = "../../Data/" 

# Inspect single folder content

In [6]:
idx = 1
folder_name = os.path.join(path_data, str(idx))
folder = os.listdir(folder_name)
folder

['CT',
 'RD.Jacks1IMPT.dcm',
 'RD.Jacks1IMPTb.dcm',
 'RS.Jacks1.dcm',
 'T1 +C 3-15-16',
 'T1 +C 3-23-16',
 'T1 +C 4-29-16',
 'T2 CISS 3-15-16',
 'T2 CUBE 4-29-16']

### CT

In [7]:
folder_ct_name = os.path.join(path_data, str(idx),'CT')
folder_ct = os.listdir(folder_ct_name)
folder_ct[:5]

['CT.1.1.dcm', 'CT.1.10.dcm', 'CT.1.100.dcm', 'CT.1.101.dcm', 'CT.1.102.dcm']

In [8]:
df_ct = read_structure(folder_ct_name)
len(df_ct)

292

### MRI

In [9]:
folder_mri_name = os.path.join(path_data, str(idx),'T1 +C 3-15-16')
folder_mri = os.listdir(folder_mri_name)
folder_mri[:5]

['MR.1.1.dcm', 'MR.1.10.dcm', 'MR.1.100.dcm', 'MR.1.101.dcm', 'MR.1.102.dcm']

In [10]:
df_mri = read_structure(folder_mri_name)
len(df_mri)

292

### Contours

In [11]:
path_contours = os.path.join(path_data, str(idx), 'RS.Jacks1.dcm')

In [12]:
df_contours = read_contour_names(path_contours)
len(df_contours)

number of ROIs 104


104

In [13]:
contours = read_contour(path_contours, df_contours)
len(contours)

number of ROIs 104
0 Body
number of contour 293
1 Brain
number of contour 177
2 Brainstem
number of contour 51
3 BrainstemCore
number of contour 51
4 BrainstemSurf
number of contour 102
5 BrainSupraTent
number of contour 248
6 CochleaLt
number of contour 5
7 CochleaRt
number of contour 4
8 CTV
number of contour 38
9 GTV
number of contour 34
10 HippoHeadLt
number of contour 17
11 HippoHeadRt
number of contour 16
12 HippoTailLt
number of contour 21
13 HippoTailRt
number of contour 21
14 Hypothalamus
number of contour 20
15 LacrimalLt
number of contour 17
16 LacrimalRt
number of contour 19
17 LensLt
number of contour 9
18 LensRt
number of contour 8
19 MastoidLt
number of contour 36
20 MastoidRt
number of contour 37
21 NonTargetBody
number of contour 337
22 NonTargetBrain
number of contour 194
23 NPX
number of contour 16
24 OpticChiasm
number of contour 3
25 OpticNerveLt
number of contour 16
26 OpticNerveRt
number of contour 14
27 Pituitary
number of contour 4
28 zPlanBody
number of contou

5973

# Inspect first 10 folders and generate contours list

In [23]:
contours_all = []
for idx in range(1,11):
    folder_name = os.path.join(path_data, str(idx))
    rs_file = [x for x in os.listdir(folder_name) if 'RS' in x]    
    path_contours = os.path.join(path_data, str(idx), rs_file[0])
    contours_all.append(read_contour_names(path_contours))
len(contours_all)

number of ROIs 104
number of ROIs 87
number of ROIs 97
number of ROIs 95
number of ROIs 84
number of ROIs 85
number of ROIs 90
number of ROIs 94
number of ROIs 92
number of ROIs 84


10

In [101]:
df_all = pd.DataFrame(columns=['ID','RoiNumber','RoiName','Count'])
for i in range(len(contours_all)):
    df_single = contours_all[i]
    for j in range(len(df_single)):
        row = df_single.loc[j]
        if row['RoiName'] not in df_all['RoiName'].values:
            row['Count'] = [i+1]
            df_all = df_all.append(row)
        else:
            idx = np.where(df_all['RoiName'] == row['RoiName'])[0][0]
            row = df_all.iloc[idx]
            row['Count'].append(i+1)

In [102]:
len(df_all)

188

In [103]:
df_all

Unnamed: 0,ID,RoiNumber,RoiName,Count
0,0,29,Body,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
1,1,1,Brain,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
2,2,28,Brainstem,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
3,3,3,BrainstemCore,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
4,4,4,BrainstemSurf,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
5,5,2,BrainSupraTent,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
6,6,6,CochleaLt,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
7,7,7,CochleaRt,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
8,8,5,CTV,"[1, 10]"
9,9,8,GTV,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"


In [104]:
np.unique(df_all['RoiName'])

array(['ACL', 'Amydgala L', 'Amydgala LEFT', 'Amydgala R',
       'Amydgala left', 'Amydgala right', 'Amygdala left',
       'Amygdala right', 'Ant Thalamus L', 'Ant Thalamus R', 'BOS',
       'Body', 'Body-CTV', 'Brain', 'Brain-CTV', 'BrainInfratent',
       'BrainSupraTent', 'Brainstem', 'BrainstemCore', 'BrainstemSurf',
       'C INF', 'C POST', 'C1A', 'C1P', 'CG L', 'CG R', 'CTV', 'CTV1',
       'CTV2', 'Cerebel ANT YL', 'CerebelL ANT YL', 'Cerebell  ANT YL',
       'Cerebell ANT YL', 'Cerebell POST YL', 'Cerebell YL', 'Cerebellum',
       'Cerebellum Jacks', 'CerebellumAnt LT', 'Cingulum left',
       'Cingulum right', 'Circuit of papez', 'CochleaLt', 'CochleaRt',
       'Corpus callosum', 'Dentate nucleus', 'Dose 102.8[%]',
       'Dose 103.7[%]', 'Dose 103.8[%]', 'Dose 104[%]', 'Dose 105.8[%]',
       'Dose 95[%]', 'Entorhinal C L', 'Entorhinal cor L',
       'Entorhinal cor l', 'Entorhinal cor.1', 'Entorhinal cor.L',
       'Entorhinal cor.R', 'EntorhinalC L', 'EntorhinalC R',


In [105]:
df_all.to_csv('all_unique_contours.csv',index=False)