In [None]:
# we need the following libraries, so let's install them
!pip install boto3
!pip install torchxrayvision
!pip install SimpleITK
!pip install radiomics
!pip install pyradiomics
!pip install torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# importing libraries
from time import time
import multiprocessing
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from glob import glob
import skimage
import torch
import SimpleITK as sitk
import matplotlib.pyplot as plt
# importing written classes and methods
import sys
from radiomics import featureextractor
from radiomics import firstorder, glcm, glrlm, glszm, shape, shape2D, ngtdm, gldm
import warnings
import logging
warnings.filterwarnings("ignore", category=DeprecationWarning)
# set level for all classes
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)
# # ... or set level for specific class
# logger = logging.getLogger("radiomics.glcm")
# logger.setLevel(logging.ERROR)


In [None]:
# making a new directory for the extracted radiomic features
directory_ = '/content/sample_data/MIMIC/'
directory_processed_images = directory_ + 'processed_images'
directory_filterss = directory_ + 'filters'
directory_segments = directory_ + 'segments'
directory_cleaned_targets = directory_ + 'cleaned_targets'
directory_features = directory_ + 'features'
try:
  os.makedirs(directory_features)
except:
  pass

In [None]:
# loading the target (clean targets)
target = np.load(directory_cleaned_targets + '/new_targets.npy',allow_pickle=True).item()

In [None]:
# loading the name of processed imaghes
name_of_images = glob(directory_processed_images + '/*.npy')

In [None]:
#loading a sample of image name
name_of_images[0]

'/content/sample_data/MIMIC/processed_images/8706019e-6bd7e61c-9976b8a0-8981b2aa-880a9e3b__image.npy'

In [None]:
#  loading all names of filters
name_of_masks = glob(directory_filterss + '/*.npy')

In [None]:
len(name_of_masks)

243324

In [None]:

mask_name = directory_filterss + '/' + id_image + '__filters.npy'
feature_name = directory_features + '/' + id_image + '__features.npy'

In [None]:
# feature radiomics module

def RadiomicsFeatures(imge, msk):
  # this module gets the name of the image and its 4 filteres and extract 116 feeaetures per filtere(segment)
  # here is the number of features per image(segment)
  n_fos_features = 18
  n_3d_features = 14
  n_2d_features = 9
  n_glcm_features = 24
  n_glszm_features = 16
  n_glrlm_features = 16
  n_ngtdm_features = 5
  n_gldm_features = 14


  try:
    fos = firstorder.RadiomicsFirstOrder(inputImage=imge, inputMask=msk)
    first_order_features = np.array(list(fos.execute().values()))
  except:
    first_order_features = np.ones((n_fos_features,))*np.nan
  #-----------------------------------------------------------------------------

  try:
    shape3d = shape.RadiomicsShape(inputImage=imge, inputMask=msk, force2D=True)
    features_3d = np.array(list(shape3d.execute().values()))
  except:
    features_3d = np.ones((n_3d_features,))*np.nan
  #-----------------------------------------------------------------------------
  try:
    shape2d = shape2D.RadiomicsShape2D(inputImage=imge, inputMask=msk, force2D=True)
    features_2d = np.array(list(shape2d.execute().values()))
  except:
    features_2d = np.ones((n_2d_features,))*np.nan
  #-----------------------------------------------------------------------------
  try:
    rglcms = glcm.RadiomicsGLCM(inputImage=imge, inputMask=msk, symmetricalGLCM=True, weightingNorm='euclidean')
    features_rglcms = np.array(list(rglcms.execute().values()))
  except:
    features_rglcms = np.ones((n_glcm_features,))*np.nan
  #-----------------------------------------------------------------------------
  try:
    rglszms = glszm.RadiomicsGLSZM(inputImage=imge, inputMask=msk)
    features_rglszms = np.array(list(rglszms.execute().values()))
  except:
    features_rglszms = np.ones((n_glszm_features,))*np.nan
  #-----------------------------------------------------------------------------
  try:
    rglrms = glrlm.RadiomicsGLRLM(inputImage=imge, inputMask=msk)
    features_rglrms = np.array(list(rglrms.execute().values()))
  except:
    features_rglrms = np.ones((n_glrlm_features,))*np.nan
  #-----------------------------------------------------------------------------
  try:
    rngtdms = ngtdm.RadiomicsNGTDM(inputImage=imge, inputMask=msk)
    features_rngtdms = np.array(list(rngtdms.execute().values()))
  except:
    features_rngtdms = np.ones((n_ngtdm_features,))*np.nan
  #--------------------------------------------------------------------------
  try:
    rngldms = gldm.RadiomicsGLDM(inputImage=imge, inputMask=msk)
    features_rngldms = np.array(list(rngldms.execute().values()))
  except:
    features_rngldms = np.ones((n_gldm_features,))*np.nan
  #-----------------------------------------------------------------------------
  all_feature_sets = np.hstack((first_order_features,
                                features_3d,
                                features_2d,
                                features_rglcms,
                                features_rglszms,
                                features_rglrms,
                                features_rngtdms,
                                features_rngldms))
  return all_feature_sets


#-------------------------------------------------------------------------------
def single_task_execute(i):
  # first we need to select an image and its corresponding filters
  imge = sitk.GetImageFromArray(np.load(name_of_images[i]))
  id_image = name_of_images[i].split('/')[-1].split('__image')[0]
  mask_name = directory_filterss + '/' + id_image + '__filters.npy'

  # this is going to be the name of feaetures to be saved
  feature_name = directory_features + '/' + id_image + '__features.npy'

  # wee load the filter
  filters = np.load(mask_name)

  # we check the filter to see whether they are having a good quality
  if np.all(filters[:,0,:,:]==0) or np.all(filters[:,1,:,:]==0) or np.all(filters[:,2,:,:]==0) or np.all(filters[:,3,:,:]==0):
    stacked_features =  np.ones((116 * 4,)) * np.nan
    np.save(feature_name,{'stacked_features':stacked_features,'id_image':id_image})
    return

  else:
    # if features are good, we proceed with feature extraction
    filter1 = sitk.GetImageFromArray(filters[:,0,:,:])
    filter2 = sitk.GetImageFromArray(filters[:,1,:,:])
    filter3 = sitk.GetImageFromArray(filters[:,2,:,:])
    filter4 = sitk.GetImageFromArray(filters[:,3,:,:])
    # we stack features of all 4 filters
    stacked_features = np.hstack((RadiomicsFeatures(imge, filter1), RadiomicsFeatures(imge, filter2), RadiomicsFeatures(imge, filter3), RadiomicsFeatures(imge, filter4)))
    np.save(feature_name,{'stacked_features':stacked_features,'id_image':id_image})
  return

In [None]:
if __name__ == '__main__': # this is the main code: the place that we call all previous cell. Additionally, we setup parallel processing
    # we make multiple workers (each worker work on a single image: 8 workers at the samee time)
    pool = multiprocessing.Pool(processes=os.cpu_count())  # use 8 CPU cores
    inputs = range(len(name_of_images))  # define the inputs for the loop(the number of iterations)
    for result in tqdm(pool.imap_unordered(single_task_execute, inputs), total=len(inputs)): # we execute over all images.
        pass

  4%|▎         | 8683/243324 [14:36:42<419:26:41,  6.44s/it]FAILED: Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/radiomics/base.py", line 238, in _calculateFeatures
    yield True, feature, getattr(self, 'get%sFeatureValue' % feature)()
  File "/usr/local/lib/python3.8/dist-packages/radiomics/glcm.py", line 648, in getMCCFeatureValue
    Q_eigenValue = numpy.linalg.eigvals(Q.transpose((0, 3, 1, 2)))
  File "<__array_function__ internals>", line 180, in eigvals
  File "/usr/local/lib/python3.8/dist-packages/numpy/linalg/linalg.py", line 1050, in eigvals
    _assert_finite(a)
  File "/usr/local/lib/python3.8/dist-packages/numpy/linalg/linalg.py", line 208, in _assert_finite
    raise LinAlgError("Array must not contain infs or NaNs")
numpy.linalg.LinAlgError: Array must not contain infs or NaNs

ERROR:radiomics.glcm:FAILED: Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/radiomics/base.py", line 238, in _calculateFeat