# Radiomics Feature Extraction

Extracting features from PET and CT images.

In [1]:
import nrrd
import utils
import ioutil

import numpy as np
import pandas as pd

from feature_extraction import feature_extractor

In [2]:
# Path to images.
path_ct_imagedir = './../../../data/source/raw_images/ct_cropped/'
path_pet_imagedir = './../../../data/source/raw_images/pet_cropped/'
path_masksdir = './../../../data/source/raw_images/masks_cropped/'

In [3]:
paths_ct_images = ioutil.sample_paths(
    path_ct_imagedir, path_masksdir, target_format='nrrd'
)
paths_pet_images = ioutil.sample_paths(
    path_pet_imagedir, path_masksdir, target_format='nrrd'
)

## Bin Width

PET and CT bin widths for histogram and image quantification.

In [6]:
utils.bin_width(paths_ct_images, 130)

12.030159642370972

In [7]:
utils.bin_width(paths_pet_images, 64)

0.2058715146898671

## Computing features

In [12]:
_ = feature_extractor(
    './parameter_files/no_filtering/ct_130_config.yaml', 
    paths_ct_images, 
    verbose=1, 
    path_to_results='./../../../data/radiomics_features/no_filtering/ct_130.csv'
)

Initiated feature extraction.


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:   20.5s
[Parallel(n_jobs=3)]: Done 198 out of 198 | elapsed:  1.3min finished


In [13]:
_ = feature_extractor(
    './parameter_files/no_filtering/pet_64_config.yaml', 
    paths_pet_images, 
    verbose=1, 
    path_to_results='./../../../data/radiomics_features/no_filtering/pet_64.csv'
)

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:   20.1s
[Parallel(n_jobs=3)]: Done 198 out of 198 | elapsed:  1.3min finished


[OrderedDict([('Image',
               './../../../data/source/raw_images/pet_cropped/P002PET.nrrd'),
              ('Mask',
               './../../../data/source/raw_images/masks_cropped/P002mask.nrrd'),
              ('Patient', 2),
              ('Reader', ''),
              ('original_shape_Elongation', 0.73888201976968),
              ('original_shape_Flatness', 0.7239253889917328),
              ('original_shape_LeastAxis', 27.06052891133134),
              ('original_shape_MajorAxis', 37.38027333040584),
              ('original_shape_Maximum2DDiameterColumn', 41.97618372363071),
              ('original_shape_Maximum2DDiameterRow', 44.598206241955516),
              ('original_shape_Maximum2DDiameterSlice', 42.720018726587654),
              ('original_shape_Maximum3DDiameter', 45.617978911828175),
              ('original_shape_MinorAxis', 27.619611857912968),
              ('original_shape_Sphericity', 0.661531512608995),
              ('original_shape_SurfaceArea', 4860.055

# Postprocessing

In [14]:
true_index = np.load('./../../../data/source/patient_id.npy')

In [15]:
raw_ct = pd.read_csv(
    './../../../data/radiomics_features/no_filtering/ct_130.csv', 
    index_col=0
)
raw_ct.head()

Unnamed: 0,Image,Mask,Patient,Reader,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxis,original_shape_MajorAxis,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,...,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,label
0,./../../../data/source/raw_images/ct_cropped/P...,./../../../data/source/raw_images/masks_croppe...,2,,0.738882,0.723925,27.060529,37.380273,41.976184,44.598206,...,0.270957,0.003096,0.008828,0.001663,52.686,0.004953,0.023557,0.000736,0.004306,
1,./../../../data/source/raw_images/ct_cropped/P...,./../../../data/source/raw_images/masks_croppe...,4,,0.7969,0.629917,19.845151,31.504408,38.587563,35.468296,...,0.668202,0.005037,0.012243,0.003236,1350.718517,0.001131,0.182981,0.04507,0.001133,
2,./../../../data/source/raw_images/ct_cropped/P...,./../../../data/source/raw_images/masks_croppe...,5,,0.600926,0.53514,22.515072,42.073251,46.065171,43.011626,...,0.367954,0.003778,0.009121,0.002442,219.449851,0.001491,0.082733,0.011363,0.001439,
3,./../../../data/source/raw_images/ct_cropped/P...,./../../../data/source/raw_images/masks_croppe...,8,,0.784571,0.414247,30.263897,73.057649,74.1485,80.956779,...,0.312841,0.003261,0.008661,0.001911,555.455131,0.000515,0.056019,0.004547,0.000487,
4,./../../../data/source/raw_images/ct_cropped/P...,./../../../data/source/raw_images/masks_croppe...,10,,0.69032,0.539743,19.449801,36.035312,33.286634,38.013156,...,0.994418,0.002294,0.002973,0.002124,19.483858,0.026248,0.007089,5.7e-05,0.024128,


In [16]:
raw_pet = pd.read_csv(
    './../../../data/radiomics_features/no_filtering/pet_64.csv', 
    index_col=0
)
raw_pet.head()

Unnamed: 0,Image,Mask,Patient,Reader,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxis,original_shape_MajorAxis,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,...,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,label
0,./../../../data/source/raw_images/pet_cropped/...,./../../../data/source/raw_images/masks_croppe...,2,,0.738882,0.723925,27.060529,37.380273,41.976184,44.598206,...,0.015766,0.034134,5.519092,0.000475,2.42709,0.001254,48.302361,0.061505,0.456285,
1,./../../../data/source/raw_images/pet_cropped/...,./../../../data/source/raw_images/masks_croppe...,4,,0.7969,0.629917,19.845151,31.504408,38.587563,35.468296,...,0.027938,0.039891,6.131197,0.000615,1.658665,0.002416,53.140323,0.084396,0.697578,
2,./../../../data/source/raw_images/pet_cropped/...,./../../../data/source/raw_images/masks_croppe...,5,,0.600926,0.53514,22.515072,42.073251,46.065171,43.011626,...,0.027003,0.036831,5.047617,0.000795,2.297591,0.001332,39.715808,0.057499,0.450344,
3,./../../../data/source/raw_images/pet_cropped/...,./../../../data/source/raw_images/masks_croppe...,8,,0.784571,0.414247,30.263897,73.057649,74.1485,80.956779,...,0.03237,0.026322,3.684029,0.000522,9.430757,0.000576,40.275824,0.063654,0.098864,
4,./../../../data/source/raw_images/pet_cropped/...,./../../../data/source/raw_images/masks_croppe...,10,,0.69032,0.539743,19.449801,36.035312,33.286634,38.013156,...,0.017618,0.035494,4.875052,0.000603,1.643871,0.002969,51.761539,0.057269,0.473581,


In [17]:
raw_ct.shape, raw_pet.shape 

((198, 110), (198, 110))

In [18]:
ct_feats, pet_feats = raw_ct.copy(), raw_pet.copy()

In [19]:
# Shape features are computed from image masks: PET and CT shape features are identical.
shape_features = raw_ct.filter(regex='shape')
shape_features.head()

Unnamed: 0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxis,original_shape_MajorAxis,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MinorAxis,original_shape_Sphericity,original_shape_SurfaceArea,original_shape_SurfaceVolumeRatio,original_shape_Volume
0,0.738882,0.723925,27.060529,37.380273,41.976184,44.598206,42.720019,45.617979,27.619612,0.661532,4860.055715,0.283517,17142.0
1,0.7969,0.629917,19.845151,31.504408,38.587563,35.468296,29.410882,38.704005,25.105855,0.701721,3126.087371,0.323578,9661.0
2,0.600926,0.53514,22.515072,42.073251,46.065171,43.011626,32.015621,46.454279,25.282894,0.762365,4063.633046,0.250625,16214.0
3,0.784571,0.414247,30.263897,73.057649,74.1485,80.956779,65.764732,83.4386,57.318945,0.520001,15698.615155,0.226355,69354.0
4,0.69032,0.539743,19.449801,36.035312,33.286634,38.013156,33.015148,43.150898,24.875896,0.643822,3658.448414,0.340352,10749.0


In [20]:
shape_features.shape

(198, 13)

In [21]:
shape_features.index = true_index
shape_features.index.name = 'patient'

In [22]:
# Features extracted from filtered image.
ct_feats = ct_feats.filter(regex='original')
pet_feats = pet_feats.filter(regex='original')
ct_feats.shape, pet_feats.shape

((198, 105), (198, 105))

In [23]:
105 - 13

92

In [24]:
# Drop shape features from PET and CT data.
ct_feats.drop(list(shape_features.columns), inplace=True, axis=1)
pet_feats.drop(list(shape_features.columns), inplace=True, axis=1)
ct_feats.shape, pet_feats.shape

((198, 92), (198, 92))

In [25]:
# Ensure unique identifiers on PET and CT features.
ct_feats.columns = ['CT {}'.format(label) for label in ct_feats.columns]
pet_feats.columns = ['PET {}'.format(label) for label in pet_feats.columns]

In [26]:
ct_feats.index = true_index
ct_feats.index.name = 'patient'
pet_feats.index = true_index
pet_feats.index.name = 'patient'
ct_feats.shape, pet_feats.shape

((198, 92), (198, 92))

In [27]:
# Combine features sets (including PET params).
pet_params = pd.read_csv(
    './../../../data/to_analysis/pet_params.csv', index_col=0
)
pet_params.head()

Unnamed: 0_level_0,SUVpeak,MTV,TLG
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,21.616549,7.384,124.870726
4,15.296275,3.406,41.554406
5,14.473272,7.934,86.22842
8,10.510859,26.926,205.413389
10,7.21319,6.041,32.10377


In [28]:
pet_params.shape

(198, 3)

In [29]:
pet_params.columns = ['PET {}'.format(col) for col in pet_params.columns]
pet_params.head()

Unnamed: 0_level_0,PET SUVpeak,PET MTV,PET TLG
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,21.616549,7.384,124.870726
4,15.296275,3.406,41.554406
5,14.473272,7.934,86.22842
8,10.510859,26.926,205.413389
10,7.21319,6.041,32.10377


In [30]:
X = pd.concat((shape_features, ct_feats, pet_feats, pet_params), axis=1)
X.head()

Unnamed: 0_level_0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxis,original_shape_MajorAxis,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MinorAxis,original_shape_Sphericity,...,PET original_gldm_SmallDependenceHighGrayLevelEmphasis,PET original_gldm_SmallDependenceLowGrayLevelEmphasis,PET original_ngtdm_Busyness,PET original_ngtdm_Coarseness,PET original_ngtdm_Complexity,PET original_ngtdm_Contrast,PET original_ngtdm_Strength,PET SUVpeak,PET MTV,PET TLG
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.738882,0.723925,27.060529,37.380273,41.976184,44.598206,42.720019,45.617979,27.619612,0.661532,...,5.519092,0.000475,2.42709,0.001254,48.302361,0.061505,0.456285,21.616549,7.384,124.870726
4,0.7969,0.629917,19.845151,31.504408,38.587563,35.468296,29.410882,38.704005,25.105855,0.701721,...,6.131197,0.000615,1.658665,0.002416,53.140323,0.084396,0.697578,15.296275,3.406,41.554406
5,0.600926,0.53514,22.515072,42.073251,46.065171,43.011626,32.015621,46.454279,25.282894,0.762365,...,5.047617,0.000795,2.297591,0.001332,39.715808,0.057499,0.450344,14.473272,7.934,86.22842
8,0.784571,0.414247,30.263897,73.057649,74.1485,80.956779,65.764732,83.4386,57.318945,0.520001,...,3.684029,0.000522,9.430757,0.000576,40.275824,0.063654,0.098864,10.510859,26.926,205.413389
10,0.69032,0.539743,19.449801,36.035312,33.286634,38.013156,33.015148,43.150898,24.875896,0.643822,...,4.875052,0.000603,1.643871,0.002969,51.761539,0.057269,0.473581,7.21319,6.041,32.10377


In [31]:
X.shape

(198, 200)

In [32]:
X.filter(regex='CT').columns.size

92

In [33]:
X.filter(regex='PET').columns.size

95

In [34]:
X.filter(regex='shape').columns.size

13

### Redundat Features

In [35]:
redundant_cols = X.columns[X.var() == 0.0]
redundant_cols 

Index([], dtype='object')

In [37]:
pd.Series(redundant_cols).to_csv(
    './../../../data/radiomics_features/no_filtering/errors/redundant.txt'
)
X.drop(redundant_cols, axis=1, inplace=True)

### Missing Value Features

In [39]:
missing_value_cols = X.columns[X.isnull().any()]
missing_value_cols

Index(['CT original_ngtdm_Contrast'], dtype='object')

In [40]:
pd.Series(missing_value_cols).to_csv(
    './../../../data/radiomics_features/no_filtering/errors/missing_values.txt'
)
X.drop(missing_value_cols, axis=1, inplace=True)

In [41]:
X.to_csv('./../../../data/to_analysis/img_features.csv')