<a href="https://colab.research.google.com/github/lukevrobbins/covid_19_severity_prediction/blob/main/Feature_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install PyRadiomics if not installed already
!pip install pyradiomics

Collecting pyradiomics
  Downloading pyradiomics-3.0.1-cp37-cp37m-manylinux1_x86_64.whl (188 kB)
[?25l[K     |█▊                              | 10 kB 18.3 MB/s eta 0:00:01[K     |███▌                            | 20 kB 9.6 MB/s eta 0:00:01[K     |█████▏                          | 30 kB 7.6 MB/s eta 0:00:01[K     |███████                         | 40 kB 7.1 MB/s eta 0:00:01[K     |████████▊                       | 51 kB 3.6 MB/s eta 0:00:01[K     |██████████▍                     | 61 kB 4.2 MB/s eta 0:00:01[K     |████████████▏                   | 71 kB 4.5 MB/s eta 0:00:01[K     |██████████████                  | 81 kB 3.5 MB/s eta 0:00:01[K     |███████████████▋                | 92 kB 3.9 MB/s eta 0:00:01[K     |█████████████████▍              | 102 kB 4.3 MB/s eta 0:00:01[K     |███████████████████▏            | 112 kB 4.3 MB/s eta 0:00:01[K     |████████████████████▉           | 122 kB 4.3 MB/s eta 0:00:01[K     |██████████████████████▋         | 133 kB 4.

In [None]:
# importing libraries
from __future__ import print_function
import sys
import os
import collections
import SimpleITK as sitk
from SimpleITK.SimpleITK import JoinSeries
import logging
import six
import radiomics
from radiomics import featureextractor, getFeatureClasses, getTestCase, firstorder, glcm, imageoperations, shape, glrlm, glszm
import numpy as np
import pandas as pd
import seaborn as sns
import zipfile
import PIL
from PIL import Image

In [None]:
!wget --no-check-certificate \
    "https://github.com/lukevrobbins/covid_19_severity_prediction/archive/refs/heads/Justin.zip" \
    -O "/tmp/chest-x-rays.zip"

zip_ref = zipfile.ZipFile('/tmp/chest-x-rays.zip', 'r') # Opens the zip file in read mode
zip_ref.extractall('/tmp') # Extracts the files into the /tmp folder
zip_ref.close()


--2022-02-25 18:31:35--  https://github.com/lukevrobbins/covid_19_severity_prediction/archive/refs/heads/Justin.zip
Resolving github.com (github.com)... 13.114.40.48
Connecting to github.com (github.com)|13.114.40.48|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/lukevrobbins/covid_19_severity_prediction/zip/refs/heads/Justin [following]
--2022-02-25 18:31:36--  https://codeload.github.com/lukevrobbins/covid_19_severity_prediction/zip/refs/heads/Justin
Resolving codeload.github.com (codeload.github.com)... 52.193.111.178
Connecting to codeload.github.com (codeload.github.com)|52.193.111.178|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘/tmp/chest-x-rays.zip’

/tmp/chest-x-rays.z     [  <=>               ]   2.03M  7.88MB/s    in 0.3s    

2022-02-25 18:31:37 (7.88 MB/s) - ‘/tmp/chest-x-rays.zip’ saved [2124428]



In [None]:
# Define the testcase name
testCase = "1"

# Get the relative path to pyradiomics\data
# os.cwd() returns the current working directory
# ".." points to the parent directory: \pyradiomics\bin\Notebooks\..\ is equal to \pyradiomics\bin\
# Move up 2 directories (i.e. go to \pyradiomics\) and then move into \pyradiomics\data

dataDir = os.path.join('/tmp/covid_19_severity_prediction-Justin/Chest X-ray Image Data sets')
print("dataDir, relative path:", dataDir)
print("dataDir, absolute path:", os.path.abspath(dataDir))

# Resizing images to 256 x 256
for file in os.listdir(dataDir):
    f_img = dataDir + "/" + file
    img = Image.open(f_img)
    img = img.resize((256,256))
    img.save(f_img)

# Store the file paths of our testing image and label map into two variables
imagePath = os.path.join(dataDir, testCase + ".jpg")
maskPath = os.path.join(dataDir, testCase + "-mask.png")

# Transforming into "3D" image for pyradiomics
image = sitk.ReadImage(imagePath)
image3d = sitk.JoinSeries(image)
mask = sitk.ReadImage(maskPath)
mask3d = sitk.JoinSeries(mask)


dataDir, relative path: /tmp/covid_19_severity_prediction-Justin/Chest X-ray Image Data sets
dataDir, absolute path: /tmp/covid_19_severity_prediction-Justin/Chest X-ray Image Data sets


In [None]:
# First define the settings of extractor
settings = {}
#settings['binWidth'] = 25
#settings['resampledPixelSpacing'] = None
#settings['resampledPixelSpacing'] = [3, 3, 3]  # This is an example for defining resampling (voxels with size 3x3x3mm)
#settings['interpolator'] = 'sitkBSpline'
#settings['verbose'] = True
extractor = featureextractor.RadiomicsFeatureExtractor(**settings)
print('Extraction parameters:\n\t', extractor.settings)

# Enable a filter (in addition to the 'Original' filter already enabled)
extractor.enableImageTypeByName('LoG')
print('')
print('Enabled filters:\n\t', extractor.enabledImagetypes)

# Specify feature classes, all are enabled by default
extractor.disableAllFeatures()
extractor.enableAllFeatures()
#extractor.enableFeatureClassByName('firstorder')

# Specify some additional features in the GLCM feature class
#extractor.enableFeaturesByName(glcm=['Autocorrelation', 'Homogeneity1', 'SumSquares'])
print('')
print('Enabled features:\n\t', extractor.enabledFeatures)


Extraction parameters:
	 {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 1, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True}

Enabled filters:
	 {'Original': {}, 'LoG': {}}

Enabled features:
	 {'firstorder': [], 'glcm': [], 'gldm': [], 'glrlm': [], 'glszm': [], 'ngtdm': [], 'shape': [], 'shape2D': []}


In [None]:
# Extract features
result = extractor.execute(image3d, mask3d)
print('Result type:', type(result))  # result is returned in a Python ordered dictionary
print('')
print('Calculated features')
for key, value in six.iteritems(result):
    print('\t', key, ':', value)

parameter force2D must be set to True to enable shape2D extraction
  return numpy.nanmean(ac, 1)
  return numpy.nanmean(cp, 1)
  return numpy.nanmean(cs, 1)
  return numpy.nanmean(ct, 1)
  return numpy.nanmean(cont, 1)
  return numpy.nanmean(corr, (1, 2, 3))
  return numpy.nanmean(diffavg, 1)
  return numpy.nanmean(difent, 1)
  return numpy.nanmean(diffvar, 1)
  return numpy.nanmean(invDiff, 1)
  return numpy.nanmean(idm, 1)
  return numpy.nanmean(idmn, 1)
  return numpy.nanmean(idn, 1)
  return numpy.nanmean(imc1, 1)
  return numpy.nanmean(imc2, 1)
  return numpy.nanmean(inv, 1)
  return self.coefficients['ux'].mean((1, 2, 3))
  ret, rcount, out=ret, casting='unsafe', subok=False)
  return numpy.nanmean(ene, 1)
  return numpy.nanmean(ent, 1)
  return numpy.nanmean(MCC, 1).real
  return numpy.nanmean(maxprob, 1)
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  return numpy.nanmean(sumavg, 1)
  return numpy.nanmean(sumentr, 1)
  return nump

Result type: <class 'collections.OrderedDict'>

Calculated features
	 diagnostics_Versions_PyRadiomics : v3.0.1
	 diagnostics_Versions_Numpy : 1.21.5
	 diagnostics_Versions_SimpleITK : 2.1.1
	 diagnostics_Versions_PyWavelet : 1.2.0
	 diagnostics_Versions_Python : 3.7.12
	 diagnostics_Configuration_Settings : {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 1, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True}
	 diagnostics_Configuration_EnabledImageTypes : {'Original': {}, 'LoG': {}}
	 diagnostics_Image-original_Hash : 0aae883c2f350b3362b5b40383c88732e8afe37e
	 diagnostics_Image-original_Dimensionality : 3D
	 diagnostics_Image-original_Spacing : (1.0, 1.0, 1.0)
	 diagnostics_Image-original_Size : (256, 256, 1)
	 diagnostics_Image-original_Mean : 192.190322875

In [None]:
from google.colab import files
import pandas as pd
import csv
# test if the 
field_value= []
field_names= []
for key,value in six.iteritems(result):
    field_names.append(key)
    field_value.append(value)
with open("result.csv", "w") as outfile:
    csvwriter = csv.writer(outfile)
    csvwriter.writerow(field_names)
    csvwriter.writerow(field_value)

In [None]:

 


# with open('data.csv', 'w') as f:
#     for key in result.keys():
#         f.write("%s, %s\n" % (key, dict[key]))

AttributeError: ignored

In [None]:
# Get the relative path to pyradiomics\data
# os.cwd() returns the current working directory
# ".." points to the parent directory: \pyradiomics\bin\Notebooks\..\ is equal to \pyradiomics\bin\
# Move up 2 directories (i.e. go to \pyradiomics\) and then move into \pyradiomics\data

dataDir = os.path.join('/tmp/covid_19_severity_prediction-Justin/Chest X-ray Image Data sets')
print("dataDir, relative path:", dataDir)
print("dataDir, absolute path:", os.path.abspath(dataDir))

for file in os.listdir(dataDir): 
  # Define the testcase name
  count = 1
  CXR_name = count + ""


# Resizing images to 256 x 256
for file in os.listdir(dataDir):
    f_img = dataDir + "/" + file
    img = Image.open(f_img)
    img = img.resize((256,256))
    img.save(f_img)

# Store the file paths of our testing image and label map into two variables
imagePath = os.path.join(dataDir, CXR_name + ".jpg")
maskPath = os.path.join(dataDir, CXR_name + "-mask.png")

# Transforming into "3D" image for pyradiomics
image = sitk.ReadImage(imagePath)
image3d = sitk.JoinSeries(image)
mask = sitk.ReadImage(maskPath)