In [1]:
# Some code modified from Pyradiomics source code

import csv
import numpy as np
import glob
import operator
import os


def write_path2csv(path_images, path_masks, path_save):
    save_fname = path_save + "pyrad_input.csv"
    path_images = glob.glob(path_images)
    with open(save_fname, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['No', 'Image', 'Mask'])
        
        for path_tmp in path_images:
            filename = os.path.basename(path_tmp)
            filename = filename[0:-12]
            path_mask = path_masks + filename + ".nii.gz"
            writer.writerow([filename, path_tmp, path_mask])
    return None


In [11]:
path_images = '/scratch/yiang/nnUNet/nnUNet_pred/Task102_CaOvarySeg_HK/imagesTs/*.nii.gz'
path_masks = '/scratch/yiang/nnUNet/nnUNet_pred/Task102_CaOvarySeg_HK/labelsTs/'
path_save = '/home/yiang/Documents/PycharmProjects/DL_CaOvary/manuscript/radiomics/'
write_path2csv(path_images, path_masks, path_save)

In [12]:
path_images = '/scratch/yiang/nnUNet/nnUNet_pred/Task102_CaOvarySeg_HK/imagesTs/*.nii.gz'
path_masks = '/scratch/yiang/nnUNet/nnUNet_pred/Task102_CaOvarySeg_HK/op_3d_cascade/'
path_save = '/home/yiang/Documents/PycharmProjects/DL_CaOvary/manuscript/radiomics/'
write_path2csv(path_images, path_masks, path_save)

In [7]:
import SimpleITK as sitk
import radiomics
from radiomics import featureextractor
import collections


def save_feats(inputCSV, outputFilepath, params):
    extractor = featureextractor.RadiomicsFeatureExtractor(params)
    with open(inputCSV, 'r') as inFile:
        cr = csv.DictReader(inFile, lineterminator='\n')
        flists = [row for row in cr]

    for idx, entry in enumerate(flists, start=1):
        imageFilepath = entry['Image']
        maskFilepath = entry['Mask']
        label = entry.get('Label', None)
        if str(label).isdigit():
            label = int(label)
        else:
            label = None

        if (imageFilepath is not None) and (maskFilepath is not None):
            featureVector = collections.OrderedDict(entry)
            featureVector['Image'] = os.path.basename(imageFilepath)
            featureVector['Mask'] = os.path.basename(maskFilepath)
            featureVector.update(extractor.execute(imageFilepath, maskFilepath, label))

            with open(outputFilepath, 'a') as outputFile:
                writer = csv.writer(outputFile, lineterminator='\n')
                headers = list(featureVector.keys())
                writer.writerow(headers)
                row = []
                for h in headers:
                    row.append(featureVector.get(h, "N/A"))

                writer.writerow(row)


In [2]:
from __future__ import print_function

from collections import OrderedDict
import csv
from datetime import datetime
import logging
from multiprocessing import cpu_count, Pool
import os
import shutil
import threading

import SimpleITK as sitk

import radiomics
from radiomics.featureextractor import RadiomicsFeatureExtractor

threading.current_thread().name = 'Main'

# File variables
ROOT = '/home/yiang/Documents/PycharmProjects/DL_CaOvary/manuscript/radiomics/'
PARAMS = os.path.join(ROOT, 'exampleCT.yaml')
LOG = os.path.join(ROOT, 'log.txt')  # Location of output log file
INPUTCSV = os.path.join(ROOT, 'pyrad_input2.csv')
OUTPUTCSV = os.path.join(ROOT, 'feats_2.csv')


# Parallel processing variables
TEMP_DIR = '_TEMP'
REMOVE_TEMP_DIR = True  # Remove temporary directory when results have been successfully stored into 1 file
NUM_OF_WORKERS = cpu_count() - 1  # Number of processors to use, keep one processor free for other work
if NUM_OF_WORKERS < 1:  # in case only one processor is available, ensure that it is used
  NUM_OF_WORKERS = 1
HEADERS = None  # headers of all extracted features

# Assumes the input CSV has at least 2 columns: "Image" and "Mask"
# These columns indicate the location of the image file and mask file, respectively
# Additionally, this script uses 2 additonal Columns: "Patient" and "Reader"
# These columns indicate the name of the patient (i.e. the image), the reader (i.e. the segmentation), if
# these columns are omitted, a value is automatically generated ("Patient" = "Pt <Pt_index>", "Reader" = "N/A")

# Assumes the following relative paths to this script:
# - Same folder (ROOT): Params.yaml (settings), input.csv (input csv file)
# Creates a log file in the root folder

# Set up logging
################

rLogger = radiomics.logger
logHandler = logging.FileHandler(filename=LOG, mode='a')
logHandler.setLevel(logging.INFO)
logHandler.setFormatter(logging.Formatter('%(levelname)-.1s: (%(threadName)s) %(name)s: %(message)s'))
rLogger.addHandler(logHandler)


# Define filter that allows messages from specified filter and level INFO and up, and level WARNING and up from other
# loggers.
class info_filter(logging.Filter):
  def __init__(self, name):
    super(info_filter, self).__init__(name)
    self.level = logging.WARNING

  def filter(self, record):
    if record.levelno >= self.level:
      return True
    if record.name == self.name and record.levelno >= logging.INFO:
      return True
    return False


# Adding the filter to the first handler of the radiomics logger limits the info messages on the output to just those
# from radiomics.batch, but warnings and errors from the entire library are also printed to the output. This does not
# affect the amount of logging stored in the log file.
outputhandler = rLogger.handlers[0]  # Handler printing to the output
outputhandler.setFormatter(logging.Formatter('[%(asctime)-.19s] (%(threadName)s) %(name)s: %(message)s'))
outputhandler.setLevel(logging.INFO)  # Ensures that INFO messages are being passed to the filter
outputhandler.addFilter(info_filter('radiomics.batch'))

logging.getLogger('radiomics.batch').debug('Logging init')


def run(case):
  global PARAMS, ROOT, TEMP_DIR
  ptLogger = logging.getLogger('radiomics.batch')

  feature_vector = OrderedDict(case)

  try:
    # set thread name to patient name
    threading.current_thread().name = case['Patient']

    filename = r'features_' + str(case['Reader']) + '_' + str(case['Patient']) + '.csv'
    output_filename = os.path.join(ROOT, TEMP_DIR, filename)

    if os.path.isfile(output_filename):
      # Output already generated, load result (prevents re-extraction in case of interrupted process)
      with open(output_filename, 'w') as outputFile:
        reader = csv.reader(outputFile)
        headers = reader.rows[0]
        values = reader.rows[1]
        feature_vector = OrderedDict(zip(headers, values))

      ptLogger.info('Patient %s read by %s already processed...', case['Patient'], case['Reader'])

    else:
      t = datetime.now()

      imageFilepath = case['Image']  # Required
      maskFilepath = case['Mask']  # Required
      label = case.get('Label', None)  # Optional

      # Instantiate Radiomics Feature extractor

      extractor = RadiomicsFeatureExtractor(PARAMS)

      # Extract features
      feature_vector.update(extractor.execute(imageFilepath, maskFilepath, label=label))

      # Store results in temporary separate files to prevent write conflicts
      # This allows for the extraction to be interrupted. Upon restarting, already processed cases are found in the
      # TEMP_DIR directory and loaded instead of re-extracted
      with open(output_filename, 'w') as outputFile:
        writer = csv.DictWriter(outputFile, fieldnames=list(feature_vector.keys()), lineterminator='\n')
        writer.writeheader()
        writer.writerow(feature_vector)

      # Display message

      delta_t = datetime.now() - t

      ptLogger.info('Patient %s read by %s processed in %s', case['Patient'], case['Reader'], delta_t)

  except Exception:
    ptLogger.error('Feature extraction failed!', exc_info=True)

  return feature_vector


def _writeResults(featureVector):
  global HEADERS, OUTPUTCSV

  # Use the lock to prevent write access conflicts
  try:
    with open(OUTPUTCSV, 'a') as outputFile:
      writer = csv.writer(outputFile, lineterminator='\n')
      if HEADERS is None:
        HEADERS = list(featureVector.keys())
        writer.writerow(HEADERS)

      row = []
      for h in HEADERS:
        row.append(featureVector.get(h, "N/A"))
      writer.writerow(row)
  except Exception:
    logging.getLogger('radiomics.batch').error('Error writing the results!', exc_info=True)

  

In [3]:
logger = logging.getLogger('radiomics.batch')

# Ensure the entire extraction is handled on 1 thread
#####################################################

sitk.ProcessObject_SetGlobalDefaultNumberOfThreads(1)

# Set up the pool processing
############################

logger.info('pyradiomics version: %s', radiomics.__version__)
logger.info('Loading CSV...')

# Extract List of cases
cases = []
with open(INPUTCSV, 'r') as inFile:
  cr = csv.DictReader(inFile, lineterminator='\n')
  cases = []
  for row_idx, row in enumerate(cr, start=1):
    # If not included, add a "Patient" and "Reader" column.
    if 'Patient' not in row:
      row['Patient'] = row_idx
    if 'Reader' not in row:
      row['Reader'] = 'N-A'
    cases.append(row)

logger.info('Loaded %d jobs', len(cases))

# Make output directory if necessary
if not os.path.isdir(os.path.join(ROOT, TEMP_DIR)):
    logger.info('Creating temporary output directory %s', os.path.join(ROOT, TEMP_DIR))
    os.mkdir(os.path.join(ROOT, TEMP_DIR))

# Start parallel processing
###########################

logger.info('Starting parralel pool with %d workers out of %d CPUs', NUM_OF_WORKERS, cpu_count())
# Running the Pool
pool = Pool(NUM_OF_WORKERS)
results = pool.map(run, cases)

try:
    # Store all results into 1 file
    with open(OUTPUTCSV, mode='w') as outputFile:
      writer = csv.DictWriter(outputFile,
                              fieldnames=list(results[0].keys()),
                              restval='',
                              extrasaction='raise',  # raise error when a case contains more headers than first case
                              lineterminator='\n')
      writer.writeheader()
      writer.writerows(results)

    if REMOVE_TEMP_DIR:
      logger.info('Removing temporary directory %s (contains individual case results files)',
                  os.path.join(ROOT, TEMP_DIR))
      shutil.rmtree(os.path.join(ROOT, TEMP_DIR))
except Exception:
    logger.error('Error storing results into single file!', exc_info=True)


[2022-09-12 16:08:19] (Main) radiomics.batch: pyradiomics version: v3.0.1
[2022-09-12 16:08:19] (Main) radiomics.batch: Loading CSV...
[2022-09-12 16:08:19] (Main) radiomics.batch: Loaded 84 jobs
[2022-09-12 16:08:19] (Main) radiomics.batch: Creating temporary output directory /home/yiang/Documents/PycharmProjects/DL_CaOvary/manuscript/radiomics/_TEMP
[2022-09-12 16:08:19] (Main) radiomics.batch: Starting parralel pool with 63 workers out of 64 CPUs
[2022-09-12 16:08:24] (47) radiomics.batch: Patient 47 read by N-A processed in 0:00:04.963739
[2022-09-12 16:08:35] (57) radiomics.batch: Patient 57 read by N-A processed in 0:00:15.413991
[2022-09-12 16:08:40] (52) radiomics.batch: Patient 52 read by N-A processed in 0:00:20.484287
[2022-09-12 16:08:40] (4) radiomics.batch: Patient 4 read by N-A processed in 0:00:20.710390
[2022-09-12 16:08:41] (9) radiomics.batch: Patient 9 read by N-A processed in 0:00:21.483450
[2022-09-12 16:08:47] (21) radiomics.batch: Patient 21 read by N-A processe

[2022-09-12 16:12:55] (68) radiomics.batch: Patient 68 read by N-A processed in 0:04:14.569271
[2022-09-12 16:21:04] (66) radiomics.batch: Patient 66 read by N-A processed in 0:12:23.692450
[2022-09-12 16:21:04] (Main) radiomics.batch: Removing temporary directory /home/yiang/Documents/PycharmProjects/DL_CaOvary/manuscript/radiomics/_TEMP (contains individual case results files)


In [8]:
outPath = '/home/yiang/Documents/PycharmProjects/DL_CaOvary/manuscript/radiomics/'
inputCSV = os.path.join(outPath, 'pyrad_input1.csv')
outputFilepath = os.path.join(outPath, 'feats_1.csv')
params = os.path.join(outPath, 'CaOvary_CT.yaml')
save_feats(inputCSV, outputFilepath, params)