In [None]:
# create radiomic features dataset

In [1]:
from radiomics import featureextractor
import os
import SimpleITK as sitk
import json, codecs
import numpy as np

In [2]:
jsonPath = os.path.join('..', 'data', 'prepared for extraction', 'squares')
imagesPath = os.path.join('..', 'data', 'prepared for extraction', 'images')
resultsPath = os.path.join('..', 'data', 'dataset')

In [3]:
types = {'AML': 0, 'pRCC': 1, 'ccRCC': 1, 'onkocytoma': 0, 'łagodny-inny': 0, 'chRCC': 1, 'złośliwy-inny': 1}

In [4]:
def cropDict(dictionary, keys=['original_']):
    cropped = {}
    for k in keys:
        for key, value in dictionary.items():
            if key.startswith(k):
                cropped[key] = value
    
    return cropped

In [5]:
def jsonifyDict(dictWithNumpyArrays):
    jsonified = dictWithNumpyArrays.copy()
    for feature, value in jsonified.items():
        if type(value) is np.ndarray:
            jsonified[feature] = value.tolist()
    
    return jsonified

In [9]:
def extraction(fileName):
    filePath = os.path.join(jsonPath, fileName)
    savePath = os.path.join(resultsPath, fileName)
    
    extractor = featureextractor.RadiomicsFeatureExtractor()
    extractor.disableAllFeatures()
    extractor.enableFeatureClassByName('firstorder')
    extractor.enableFeatureClassByName('glcm')
    extractor.enableFeatureClassByName('glrlm')
    extractor.enableFeatureClassByName('glszm')
    extractor.enableFeatureClassByName('gldm')
    extractor.enableFeatureClassByName('ngtdm')
    
    with open(filePath, 'r') as file:
        for line in file:
            entry = json.loads(line)
            
            imagePath = os.path.join(imagesPath, entry['Directory Name'], entry['File Name'])
            maskPath = os.path.join(imagesPath, entry['Directory Name'], 'mask_' + entry['File Name'])
            
            image = sitk.ReadImage(imagePath)
            mask = sitk.ReadImage(maskPath)
            
            result = extractor.execute(image, mask)
            result = cropDict(result)
            result = jsonifyDict(result)
            result['Directory Name'] = entry['Directory Name']
            result['File Name'] = entry['File Name']
            result['Label'] = types[entry['Tumor Type']]
            
            with open(savePath, 'a') as saveFile:
                json.dump(result, saveFile)
                saveFile.write('\n')

In [None]:
extraction('mvp_etap4_all_3_test.json')

In [None]:
extraction('mvp_etap4_one_3_train.json')

In [None]:
extraction('mvp_etap4_all_3_train.json')

In [6]:
def extractionWithExtraSlices(fileName):
    filePath = os.path.join(jsonPath, fileName)
    savePath = os.path.join(resultsPath, 'extra_slices_' + fileName)
    
    extractor = featureextractor.RadiomicsFeatureExtractor()
    extractor.disableAllFeatures()
    extractor.enableFeatureClassByName('firstorder')
    extractor.enableFeatureClassByName('glcm')
    extractor.enableFeatureClassByName('glrlm')
    extractor.enableFeatureClassByName('glszm')
    extractor.enableFeatureClassByName('gldm')
    extractor.enableFeatureClassByName('ngtdm')
    
    with open(filePath, 'r') as file:
        for line in file:
            entry = json.loads(line)
            
            imagePath = os.path.join(imagesPath, entry['Directory Name'], entry['File Name'])
            maskPath = os.path.join(imagesPath, entry['Directory Name'], 'mask_' + entry['File Name'])
            
            image = sitk.ReadImage(imagePath)
            mask = sitk.ReadImage(maskPath)
            
            result = extractor.execute(image, mask)
            result = cropDict(result)
            result = jsonifyDict(result)
            result['Directory Name'] = entry['Directory Name']
            result['File Name'] = entry['File Name']
            result['Label'] = types[entry['Tumor Type']]
            
            with open(savePath, 'a') as saveFile:
                json.dump(result, saveFile)
                saveFile.write('\n')
            
            name = entry['File Name']
            name = name.replace('_anonymized.dcm', '')
            name = name.replace('T', '')
            
            number = int(name)
            numbers = [number-1, number-2, number+1, number+2]
            
            for n in numbers:
                newName = 'T' + str(n) + '_anonymized.dcm'
                newPath = os.path.join(imagesPath, entry['Directory Name'], newName)
                if os.path.isfile(newPath):
                    neighbor = sitk.ReadImage(newPath)
                    result = extractor.execute(neighbor, mask)
                    result = cropDict(result)
                    result = jsonifyDict(result)
                    result['Directory Name'] = entry['Directory Name']
                    result['File Name'] = newName
                    result['Label'] = types[entry['Tumor Type']]
            
                    with open(savePath, 'a') as saveFile:
                        json.dump(result, saveFile)
                        saveFile.write('\n')
            

In [None]:
extractionWithExtraSlices('mvp_etap4_all_3_test.json')

In [None]:
extractionWithExtraSlices('mvp_etap4_one_3_train.json')

In [None]:
extractionWithExtraSlices('mvp_etap4_all_3_train.json')