In [None]:
from os import path
import os
import radiomics
from radiomics import featureextractor
import pandas as pd
import pickle
from copy import copy
import pydicom
from collections import defaultdict
import nibabel as nib
import numpy as np

# Open the mri & mask dict
Contains both UMCG and radboud path strings

In [None]:
# LOAD
with open("20_mask_dict.pickle", "rb") as pkl_handle:
    output = pickle.load(pkl_handle)

In [None]:
mri_mask_list_copy = copy(output)

In [None]:
mri_mask_list_copy

# Interpolation experiment
using parameter files and a non-normalized ADC

In [None]:
# grab all possible setting files
setting_list = []

for dir_path, sub_dir, files in os.walk(os.path.normpath("./Radiomics_settings"), topdown=True):
    for file_ in files:
        if file_.startswith("int"):
            full_path = os.path.join(dir_path,file_)
            setting_list.append(full_path)

In [None]:
# Create sepearate lists for each specific sequence or deviating setting
t2_list_3d = [x for x in setting_list if '_t2_' in x and '2D' not in x and 'ANISO' not in x]
t2_list_3d

In [None]:
# Create sepearate lists for each specific sequence or deviating setting
t2_list_3d_aniso = [x for x in setting_list if '_t2_' in x and '2D' not in x and 'ANISO' in x]
t2_list_3d_aniso

In [None]:
# Create sepearate lists for each specific sequence or deviating setting
t2_list_2d = [x for x in setting_list if '_t2_' in x and '2D' in x]
t2_list_2d

In [None]:
# Create sepearate lists for each specific sequence or deviating setting
dwi_list_3d = [x for x in setting_list if '_dwi_' in x and '2D' not in x and 'ANISO' not in x]
dwi_list_3d

In [None]:
# Create sepearate lists for each specific sequence or deviating setting
dwi_list_3d_aniso = [x for x in setting_list if '_dwi_' in x and '2D' not in x and 'ANISO' in x]
dwi_list_3d_aniso

In [None]:
# Create sepearate lists for each specific sequence or deviating setting
dwi_list_2d = [x for x in setting_list if '_dwi_' in x and '2D' in x]
dwi_list_2d

## T2-Weighted

In [None]:
def radiomics_calc_t2(file_mask_combo_dict, params_file):
        """ Function that calculates radiomics features with a given mri/mask dict supplied        
        """
        # Due to more than one finding in some MR studies we need to keep check of already completed feature extractions
        dataframe_list = []
        problem_list = []
               
        # Dictionary keys are used as identifiers and can also be used to find the correct masks
        for keys in file_mask_combo_dict.keys():
            print(keys)
            if "mark" in keys:
                continue
            for mr_and_mask in file_mask_combo_dict[keys]:
                mr_file_path = mr_and_mask[0]
                mask_file_path = mr_and_mask[1] 
                
                # Take the selected sequence only
                if 't2' in mr_file_path.lower():

                    #Take detailed description for dataframe
                    detail_id1 = mask_file_path.rfind("\\")
                    id_string = mask_file_path[detail_id1+1:]                

                    # Initiate extractor with hardcoded settings
                    extractor = radiomics.featureextractor.RadiomicsFeatureExtractor(params_file)

                    # Remove the unwanted general information that is automatically extracted
                    keys_remove = ('general_info_BoundingBox', 'general_info_EnabledImageTypes','general_info_GeneralSettings','general_info_ImageHash', 'general_info_ImageSpacing', 'general_info_MaskHash', 'general_info_NumpyVersion', 'general_info_PyWaveletVersion', 'general_info_SimpleITKVersion','general_info_Version','general_info_VolumeNum','general_info_VoxelNum',
                                      'diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet', 'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Hash', 'diagnostics_Image-original_Spacing', 'diagnostics_Image-original_Size', 'diagnostics_Image-original_Mean', 'diagnostics_Image-original_Minimum', 'diagnostics_Image-original_Maximum', 'diagnostics_Mask-original_Hash', 'diagnostics_Mask-original_Spacing', 'diagnostics_Mask-original_Size', 'diagnostics_Mask-original_BoundingBox', 'diagnostics_Mask-original_VoxelNum', 'diagnostics_Mask-original_VolumeNum', 'diagnostics_Mask-original_CenterOfMassIndex', 'diagnostics_Mask-original_CenterOfMass', 'diagnostics_Image-interpolated_Spacing', 'diagnostics_Image-interpolated_Size', 'diagnostics_Image-interpolated_Mean', 'diagnostics_Image-interpolated_Minimum', 'diagnostics_Image-interpolated_Maximum', 'diagnostics_Mask-interpolated_Spacing', 'diagnostics_Mask-interpolated_Size', 'diagnostics_Mask-interpolated_BoundingBox', 'diagnostics_Mask-interpolated_VoxelNum', 'diagnostics_Mask-interpolated_VolumeNum', 'diagnostics_Mask-interpolated_CenterOfMassIndex', 'diagnostics_Mask-interpolated_CenterOfMass', 'diagnostics_Mask-interpolated_Mean', 'diagnostics_Mask-interpolated_Minimum', 'diagnostics_Mask-interpolated_Maximum',
                                        'diagnostics_Mask-corrected_Spacing', 'diagnostics_Mask-corrected_Size','diagnostics_Mask-corrected_BoundingBox', 'diagnostics_Mask-corrected_VoxelNum', 'diagnostics_Mask-corrected_VolumeNum', 'diagnostics_Mask-corrected_CenterOfMassIndex', 'diagnostics_Mask-corrected_CenterOfMass', 'diagnostics_Mask-corrected_Mean', 'diagnostics_Mask-corrected_Minimum', 'diagnostics_Mask-corrected_Maximum')
                    try:
                        # Initiate the feature extraction
                        feature_vector = extractor.execute(mr_file_path, mask_file_path) 

                        for k in keys_remove:
                            feature_vector.pop(k, None)

                        # Create Temporary dataframes
                        temporary_df = pd.DataFrame(feature_vector, columns=feature_vector.keys(), index=[keys])
                        temporary_df['Details'] = id_string
                        temporary_df['Anon_ID'] = keys
                        temporary_df['file_name'] = mr_file_path
                        dataframe_list.append(temporary_df)            
                    except:
                        problem_list.append((mr_file_path, mask_file_path))
                                   
               
        # Create one big dataframe based on the list of dataframes
        df_result = pd.concat(dataframe_list)
               
        return(df_result, problem_list) 

In [None]:
for current_set in  [t2_list_3d, t2_list_3d_aniso,t2_list_2d]:
    for t2_settings in current_set:
        # remove .yml from string
        index = t2_settings.rfind("\\")
        yml_index = t2_settings.rfind('.yml')
        current_settings = t2_settings[index+1:yml_index]
        print(current_settings)

        # run feature calculation with current parameter file
        result_df_t2, error_check_list_t2 = radiomics_calc_t2(mri_mask_list_copy, t2_settings)

        file_name_t2 = "RADIOMICS_DATA_20mmDLM_" + current_settings
        print(file_name_t2)
        result_df_t2.to_csv(file_name_t2 + '.csv')

## ADC & DWI

In [None]:
def radiomics_calc_dwi3(file_mask_combo_dict, params_file_dwi):
        """ Function that calculates radiomics features with a given mri/mask dict supplied        
        """
        # Due to more than one finding in some MR studies we need to keep check of already completed feature extractions
        dataframe_list = []
        problem_list = []
               
        # Dictionary keys are used as identifiers and can also be used to find the correct masks
        for keys in file_mask_combo_dict.keys():
            print(keys)
            if "mark" in keys:
                continue
            for mr_and_mask in file_mask_combo_dict[keys]:
                mr_file_path = mr_and_mask[0]
                mask_file_path = mr_and_mask[1] 
                
                #Take detailed description for dataframe
                detail_id1 = mask_file_path.rfind("\\")
                id_string = mask_file_path[detail_id1+1:]                

                # Initiate extractor with hardcoded settings
                extractor = radiomics.featureextractor.RadiomicsFeatureExtractor(params_file_dwi)

                # Remove the unwanted general information that is automatically extracted
                keys_remove = ('general_info_BoundingBox', 'general_info_EnabledImageTypes','general_info_GeneralSettings','general_info_ImageHash', 'general_info_ImageSpacing', 'general_info_MaskHash', 'general_info_NumpyVersion', 'general_info_PyWaveletVersion', 'general_info_SimpleITKVersion','general_info_Version','general_info_VolumeNum','general_info_VoxelNum',
                                  'diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet', 'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Hash', 'diagnostics_Image-original_Spacing', 'diagnostics_Image-original_Size', 'diagnostics_Image-original_Mean', 'diagnostics_Image-original_Minimum', 'diagnostics_Image-original_Maximum', 'diagnostics_Mask-original_Hash', 'diagnostics_Mask-original_Spacing', 'diagnostics_Mask-original_Size', 'diagnostics_Mask-original_BoundingBox', 'diagnostics_Mask-original_VoxelNum', 'diagnostics_Mask-original_VolumeNum', 'diagnostics_Mask-original_CenterOfMassIndex', 'diagnostics_Mask-original_CenterOfMass', 'diagnostics_Image-interpolated_Spacing', 'diagnostics_Image-interpolated_Size', 'diagnostics_Image-interpolated_Mean', 'diagnostics_Image-interpolated_Minimum', 'diagnostics_Image-interpolated_Maximum', 'diagnostics_Mask-interpolated_Spacing', 'diagnostics_Mask-interpolated_Size', 'diagnostics_Mask-interpolated_BoundingBox', 'diagnostics_Mask-interpolated_VoxelNum', 'diagnostics_Mask-interpolated_VolumeNum', 'diagnostics_Mask-interpolated_CenterOfMassIndex', 'diagnostics_Mask-interpolated_CenterOfMass', 'diagnostics_Mask-interpolated_Mean', 'diagnostics_Mask-interpolated_Minimum', 'diagnostics_Mask-interpolated_Maximum',
                                    'diagnostics_Mask-corrected_Spacing', 'diagnostics_Mask-corrected_Size','diagnostics_Mask-corrected_BoundingBox', 'diagnostics_Mask-corrected_VoxelNum', 'diagnostics_Mask-corrected_VolumeNum', 'diagnostics_Mask-corrected_CenterOfMassIndex', 'diagnostics_Mask-corrected_CenterOfMass', 'diagnostics_Mask-corrected_Mean', 'diagnostics_Mask-corrected_Minimum', 'diagnostics_Mask-corrected_Maximum')

                try:
                    # Initiate the feature extraction
                    feature_vector = extractor.execute(mr_file_path, mask_file_path) 

                    for k in keys_remove:
                        feature_vector.pop(k, None)

                    # Create Temporary dataframes
                    temporary_df = pd.DataFrame(feature_vector, columns=feature_vector.keys(), index=[keys])
                    temporary_df['Details'] = id_string
                    temporary_df['Anon_ID'] = keys
                    temporary_df['file_name'] = mr_file_path
                    dataframe_list.append(temporary_df)            
                except:
                    problem_list.append((mr_file_path, mask_file_path))                            
                                   
               
        # Create one big dataframe based on the list of dataframes
        df_result = pd.concat(dataframe_list)
               
        return(df_result, problem_list) 

In [None]:
# dwi datasets
for current_set in  [dwi_list_3d, dwi_list_3d_aniso, dwi_list_2d]:
    for dwi_settings in current_set:
        # remove .yml from string
        index_adc = dwi_settings.rfind("\\")
        yml_index = dwi_settings.rfind('.yml')
        current_settings_adc = dwi_settings[index_adc+1:yml_index]
        print(current_settings_adc)

        # run feature calculation with current parameter file
        result_df_dwi, error_check_list_dwi = radiomics_calc_dwi3(mri_mask_list_copy, dwi_settings)

        file_name_adc = "RADIOMICS_DATA_20mmDLM_" + current_settings_adc
        print(file_name_adc)
        result_df_dwi.to_csv(file_name_adc + '.csv')

# All Sequences
## For the comparison 

In [None]:
def radiomics_calc(file_mask_combo_dict, params_file):
        """ Function that calculates radiomics features with a given mri/mask dict supplied        
        """
        # Due to more than one finding in some MR studies we need to keep check of already completed feature extractions
        dataframe_list = []
        problem_list = []
               
        # Dictionary keys are used as identifiers and can also be used to find the correct masks
        for keys in file_mask_combo_dict.keys():
            print(keys)
            if "mark" in keys:
                continue
            for mr_and_mask in file_mask_combo_dict[keys]:
                mr_file_path = mr_and_mask[0]
                mask_file_path = mr_and_mask[1] 
                
                #Take detailed description for dataframe
                detail_id1 = mask_file_path.rfind("\\")
                id_string = mask_file_path[detail_id1+1:]                

                # Initiate extractor with hardcoded settings
                extractor = radiomics.featureextractor.RadiomicsFeatureExtractor(params_file)

                # Remove the unwanted general information that is automatically extracted
                keys_remove = ('general_info_BoundingBox', 'general_info_EnabledImageTypes','general_info_GeneralSettings','general_info_ImageHash', 'general_info_ImageSpacing', 'general_info_MaskHash', 'general_info_NumpyVersion', 'general_info_PyWaveletVersion', 'general_info_SimpleITKVersion','general_info_Version','general_info_VolumeNum','general_info_VoxelNum',
                                  'diagnostics_Versions_PyRadiomics', 'diagnostics_Versions_Numpy', 'diagnostics_Versions_SimpleITK', 'diagnostics_Versions_PyWavelet', 'diagnostics_Versions_Python', 'diagnostics_Configuration_Settings', 'diagnostics_Configuration_EnabledImageTypes', 'diagnostics_Image-original_Hash', 'diagnostics_Image-original_Spacing', 'diagnostics_Image-original_Size', 'diagnostics_Image-original_Mean', 'diagnostics_Image-original_Minimum', 'diagnostics_Image-original_Maximum', 'diagnostics_Mask-original_Hash', 'diagnostics_Mask-original_Spacing', 'diagnostics_Mask-original_Size', 'diagnostics_Mask-original_BoundingBox', 'diagnostics_Mask-original_VoxelNum', 'diagnostics_Mask-original_VolumeNum', 'diagnostics_Mask-original_CenterOfMassIndex', 'diagnostics_Mask-original_CenterOfMass', 'diagnostics_Image-interpolated_Spacing', 'diagnostics_Image-interpolated_Size', 'diagnostics_Image-interpolated_Mean', 'diagnostics_Image-interpolated_Minimum', 'diagnostics_Image-interpolated_Maximum', 'diagnostics_Mask-interpolated_Spacing', 'diagnostics_Mask-interpolated_Size', 'diagnostics_Mask-interpolated_BoundingBox', 'diagnostics_Mask-interpolated_VoxelNum', 'diagnostics_Mask-interpolated_VolumeNum', 'diagnostics_Mask-interpolated_CenterOfMassIndex', 'diagnostics_Mask-interpolated_CenterOfMass', 'diagnostics_Mask-interpolated_Mean', 'diagnostics_Mask-interpolated_Minimum', 'diagnostics_Mask-interpolated_Maximum',
                                    'diagnostics_Mask-corrected_Spacing', 'diagnostics_Mask-corrected_Size','diagnostics_Mask-corrected_BoundingBox', 'diagnostics_Mask-corrected_VoxelNum', 'diagnostics_Mask-corrected_VolumeNum', 'diagnostics_Mask-corrected_CenterOfMassIndex', 'diagnostics_Mask-corrected_CenterOfMass', 'diagnostics_Mask-corrected_Mean', 'diagnostics_Mask-corrected_Minimum', 'diagnostics_Mask-corrected_Maximum')
                try:
                    # Initiate the feature extraction
                    feature_vector = extractor.execute(mr_file_path, mask_file_path) 

                    for k in keys_remove:
                        feature_vector.pop(k, None)

                    # Create Temporary dataframes
                    temporary_df = pd.DataFrame(feature_vector, columns=feature_vector.keys(), index=[keys])
                    temporary_df['Details'] = id_string
                    temporary_df['Anon_ID'] = keys
                    temporary_df['file_name'] = mr_file_path
                    dataframe_list.append(temporary_df)            
                except:
                    problem_list.append((mr_file_path, mask_file_path))

               
        # Create one big dataframe based on the list of dataframes
        df_result = pd.concat(dataframe_list)
               
        return(df_result, problem_list) 

In [None]:
gen_set = ["./Radiomics_settings/2D_noresampling.yml",
          "./Radiomics_settings/3D_noresampling.yml"]

for i in gen_set:
    # Run feature calculation
    result_df, error_check_list = radiomics_calc(mri_mask_list_copy, i)

    # Define filename and save feature dataframe to csv
    name = i[i.rfind("/")+1:]
    name_final = name.replace(".yml","")
    file_name = "[Radiomics_data]_20mmDLM_" + name_final
    
    result_df.to_csv(file_name + '.csv')

In [None]:
# Open general 2D radiomics settings
general_settings = "./Radiomics_settings/2D_noresampling_comparison.yml"

# Run feature calculation
result_df, error_check_list = radiomics_calc(mri_mask_list_copy, general_settings)

# Define filename and save feature dataframe to csv
file_name = "[Radiomics_data]_18mmDLM_2D_noresampling_comparison" 
result_df.to_csv(file_name + '.csv')

# For easy readability also create excel file
writer = pd.ExcelWriter(file_name_t2 + '.xlsx')
result_df_t2.to_excel(writer,'Sheet1')

# Save file
writer.save() 

## Sorting the features

In [None]:
from collections import OrderedDict

class PreML:
    
    def __init__(self, dataframe, search1, image_name):
        self.df = dataframe
        self.ss = search1
        self.img = image_name
        
    def select_image_type(self):
        """ Another select image type function this time solely for selecting the image type"""
        df_result = self.df.loc[self.df['file_name'].str.contains(self.ss, case=False)]
        return df_result
    
    def select_image_type_negative(self):
        """ Another select image type function this time solely for selecting the image type, without also taking adc"""
        df_result = self.df.loc[self.df['file_name'].str.contains(self.ss, case=False)]
        return df_result
    
    def select_image_type_multi(self, search2):
        """ Another select image type function this time solely for selecting the image type"""
        df_result = self.df[(self.df['file_name'].str.contains(self.ss, case=False)) & (self.df['file_name'].str.contains(search2, case=False))]
        return df_result
    
    def select_image_type_multi_negative(self, search2):
        """ Another select image type function this time solely for selecting the image type, without also taking adc"""
        df_result = self.df[(self.df['file_name'].str.contains(self.ss, case=False)) & (self.df['file_name'].str.contains(search2, case=False))]
        return df_result 
    
    def rename_clm(self, selected_df):
        """Function that renames the column names to be more specific, i.e. image type gets added"""
        lst_clm = selected_df.columns.tolist()
        df_copy = selected_df.loc[:]
        skip_list = ['Anon_ID',  'SeriesDescription', 'B-Value', 'DCM_header_path'] 

        for clm in lst_clm:
            if clm not in skip_list:
                new_name =  clm + "-" + self.img
                df_copy.rename(columns={clm:new_name}, inplace=True)
        return df_copy
    

In [None]:
names_2d = ['int1_t2_0_5_2D','int1_t2_0_35_2D','int1_dwi_1_37_2D','int1_dwi_2_2D',
        'int2_t2_0_5_2D','int2_t2_0_35_2D','int2_dwi_1_37_2D','int2_dwi_2_2D',
        'int3_t2_0_5_2D','int3_t2_0_35_2D','int3_dwi_1_37_2D','int3_dwi_2_2D',
        'int10_t2_0_5_2D','int10_t2_0_35_2D','int10_dwi_1_37_2D','int10_dwi_2_2D',
        'int1_t2_0_8_2D','int1_dwi_2_5_2D', 'int2_t2_0_8_2D','int2_dwi_2_5_2D',
        'int3_t2_0_8_2D','int3_dwi_2_5_2D', 'int10_t2_0_8_2D','int10_dwi_2_5_2D']

In [None]:
names = ['int1_t2_0_5','int1_t2_0_35','int1_dwi_1_37','int1_dwi_2',
        'int2_t2_0_5','int2_t2_0_35','int2_dwi_1_37','int2_dwi_2',
        'int3_t2_0_5','int3_t2_0_35','int3_dwi_1_37','int3_dwi_2',
        'int10_t2_0_5','int10_t2_0_35','int10_dwi_1_37','int10_dwi_2',
        'int1_t2_0_8','int1_dwi_2_5', 'int2_t2_0_8','int2_dwi_2_5',
        'int3_t2_0_8','int3_dwi_2_5', 'int10_t2_0_8','int10_dwi_2_5']

In [None]:
names = ['int1_dwi_1_37']

In [None]:
for name in names:
    print(name)
    name1 = './/non_sorted_radiomics/resampled/3D/[Radiomics_data]_20mmDLM_3D_'+ name +'.csv'

    # Import the calculated features in the form of a csv file with pandas
    test_features = pd.read_csv(name1, error_bad_lines=False, sep=',')

    # Create copy to work in
    test_data = test_features.loc[:]
    test_data['file_name'] = test_data.file_name.str.replace('\\','=')
    del test_data["diagnostics_Image-original_Dimensionality"]
    del test_data['Unnamed: 0']
    test_data = test_data.dropna()
    print(test_data.shape)
    if "_t2_" in name:
        # T2
        t2_search1 = "t2"
        t2_sag_search2 = "sag"
        T2Sag = PreML(test_data, t2_search1, "T2-sag")

        # execute
        sag_selected = T2Sag.select_image_type_multi(t2_sag_search2)
        t2_sag = T2Sag.rename_clm(sag_selected)
        t2_sag_dd = t2_sag.drop_duplicates(subset=['Anon_ID'])
        
        # T2 cor
        t2_cor_search2 = "cor"
        T2Cor = PreML(test_data, t2_search1, "T2-cor")

        # execute
        cor_selected = T2Cor.select_image_type_multi(t2_cor_search2)
        t2_cor = T2Cor.rename_clm(cor_selected)
        t2_cor_dd = t2_cor.drop_duplicates(subset=['Anon_ID'])
        
        # T2 tra
        t2_tra_search2 = "tra"
        T2Tra = PreML(test_data, t2_search1, "T2-tra")

        # execute
        tra_selected = T2Tra.select_image_type_multi(t2_tra_search2)
        t2_tra = T2Tra.rename_clm(tra_selected)
        t2_tra_dd = t2_tra.drop_duplicates(subset=['Anon_ID'])      

        # Merge on anon identifiers      
        df_1 = t2_tra_dd.merge(t2_cor_dd,on='Anon_ID')
        df_2 = df_1.merge(t2_sag_dd,on='Anon_ID')
        
        name2 = 'SRDart4_20mmDLM_' + name + '.csv'
        # save as csv, change name
        name3 = name2.replace('[','')
        name4 = name3.replace(']','')
        df_2.to_csv(name4)
        print(df_2.shape)
        
    else:

        # b50
        b50_search1 = "=b-0=|=b-50=|=b-100="
        B50 = PreML(test_data, b50_search1, "DWI-b50")

        # execute
        b50_selected = B50.select_image_type_negative()
        b50 = B50.rename_clm(b50_selected)
        b50_dd = b50.drop_duplicates(subset=['Anon_ID'])

        # b400/500
        b400_search1 = "=b-400=|=b-500="
        B400 = PreML(test_data, b400_search1, "DWI-b400")

        # execute
        b400_selected = B400.select_image_type_negative()
        b400 = B400.rename_clm(b400_selected)
        b400_dd = b400.drop_duplicates(subset=['Anon_ID'])

        # b800/100/750
        b800_search1 = "=b-800=|=b-1000=|=b-750="
        B800 = PreML(test_data, b800_search1, "DWI-b800")

        # execute
        b800_selected = B800.select_image_type_negative()
        b800 = B800.rename_clm(b800_selected)
        b800_dd = b800.drop_duplicates(subset=['Anon_ID'])

        # bcal
        b1400_search1 = "=b-2000=|=b-1400=|=b-1500="
        B1400 = PreML(test_data, b1400_search1, "DWI-b1400")

        # execute
        b1400_selected = B1400.select_image_type_negative()
        b1400 = B1400.rename_clm(b1400_selected)
        b1400_dd = b1400.drop_duplicates(subset=['Anon_ID'])

        # adc
        adc_search1 = "adc"
        ADC = PreML(test_data, adc_search1, "ADC")

        # execute
        adc_selected = ADC.select_image_type_negative()
        adc = ADC.rename_clm(adc_selected)
        adc_dd = adc.drop_duplicates(subset=['Anon_ID'])

         # Merge results back together
        df_1 = b50_dd.merge(b400_dd,on='Anon_ID')
        df_2 = df_1.merge(b800_dd,on='Anon_ID')
        df_3 = df_2.merge(b1400_dd,on='Anon_ID')
        df_4 = df_3.merge(adc_dd,on='Anon_ID')

        name2 = 'SRDart4_20mmDLM_' + name + '.csv'
        name3 = name2.replace('[','')
        name4 = name3.replace(']','')
        # save as csv, change name
        df_4.to_csv(name4)
        print(df_4.shape)


# no resampling 

In [None]:
file_path = "./non_sorted_radiomics/noresampling/3D/[Radiomics_data]_20mmDLM_3D_noresampling.csv"

# Import the calculated features in the form of a csv file with pandas
test_features = pd.read_csv(file_path, error_bad_lines=False, sep=',')

# Create copy to work in
test_data = test_features.loc[:]
test_data['file_name'] = test_data.file_name.str.replace('\\','=')
del test_data["diagnostics_Image-original_Dimensionality"]
del test_data['Unnamed: 0']
test_data = test_data.dropna()
print(test_data.shape)

# T2
t2_search1 = "t2"
t2_sag_search2 = "sag"
T2Sag = PreML(test_data, t2_search1, "T2-sag")

# execute
sag_selected = T2Sag.select_image_type_multi(t2_sag_search2)
t2_sag = T2Sag.rename_clm(sag_selected)
t2_sag_dd = t2_sag.drop_duplicates(subset=['Anon_ID'])

# T2 cor
t2_cor_search2 = "cor"
T2Cor = PreML(test_data, t2_search1, "T2-cor")

# execute
cor_selected = T2Cor.select_image_type_multi(t2_cor_search2)
t2_cor = T2Cor.rename_clm(cor_selected)
t2_cor_dd = t2_cor.drop_duplicates(subset=['Anon_ID'])

# T2 tra
t2_tra_search2 = "tra"
T2Tra = PreML(test_data, t2_search1, "T2-tra")

# execute
tra_selected = T2Tra.select_image_type_multi(t2_tra_search2)
t2_tra = T2Tra.rename_clm(tra_selected)
t2_tra_dd = t2_tra.drop_duplicates(subset=['Anon_ID'])      

# b50
b50_search1 = "=b-0=|=b-50=|=b-100="
B50 = PreML(test_data, b50_search1, "DWI-b50")

# execute
b50_selected = B50.select_image_type_negative()
b50 = B50.rename_clm(b50_selected)
b50_dd = b50.drop_duplicates(subset=['Anon_ID'])

# b400/500
b400_search1 = "=b-400=|=b-500="
B400 = PreML(test_data, b400_search1, "DWI-b400")

# execute
b400_selected = B400.select_image_type_negative()
b400 = B400.rename_clm(b400_selected)
b400_dd = b400.drop_duplicates(subset=['Anon_ID'])

# b800/100/750
b800_search1 = "=b-800=|=b-1000=|=b-750="
B800 = PreML(test_data, b800_search1, "DWI-b800")

# execute
b800_selected = B800.select_image_type_negative()
b800 = B800.rename_clm(b800_selected)
b800_dd = b800.drop_duplicates(subset=['Anon_ID'])

# bcal
b1400_search1 = "=b-2000=|=b-1400=|=b-1500="
B1400 = PreML(test_data, b1400_search1, "DWI-b1400")

# execute
b1400_selected = B1400.select_image_type_negative()
b1400 = B1400.rename_clm(b1400_selected)
b1400_dd = b1400.drop_duplicates(subset=['Anon_ID'])

# adc
adc_search1 = "adc"
ADC = PreML(test_data, adc_search1, "ADC")

# execute
adc_selected = ADC.select_image_type_negative()
adc = ADC.rename_clm(adc_selected)
adc_dd = adc.drop_duplicates(subset=['Anon_ID'])

 # Merge results back together
df_1 = t2_sag_dd.merge(t2_cor_dd,on='Anon_ID')
df_2 = df_1.merge(t2_tra_dd,on='Anon_ID')
df_3 = df_2.merge(b50_dd,on='Anon_ID')
df_4 = df_3.merge(b400_dd,on='Anon_ID')
df_5 = df_4.merge(b800_dd,on='Anon_ID')
df_6 = df_5.merge(b1400_dd,on='Anon_ID')
df_7 = df_6.merge(adc_dd,on='Anon_ID')
print(df_7.shape)

df_7.to_csv("SRDart4_20mmDLM_3D_noresampling.csv")

# Merge datasets

In [None]:
def csv_path_seeker(dir_path, int_set):
    df_merge = pd.read_csv("./non_sorted_radiomics/resampled/2D/[Radiomics_data]_20mmDLM_2D_int1_t2_0_5_2D.csv", index_col=0)
    """Function that that uses the dir_path found with the other function to grab a csv path"""
    for dir_path, sub_dir, files in os.walk(os.path.normpath(dir_path), topdown=True):
        for file_ in files:
            if file_.endswith(".csv"):
                full_path = os.path.join(dir_path,file_)
                if int_set in full_path:
                    csv_temp = pd.read_csv(full_path, index_col=0)
                    
    return df_list                

In [None]:
names = ['int1_t2_0_5_2D','int1_t2_0_35_2D','int1_dwi_1_37_2D','int1_dwi_2_2D',
        'int2_t2_0_5_2D','int2_t2_0_35_2D','int2_dwi_1_37_2D','int2_dwi_2_2D',
        'int3_t2_0_5_2D','int3_t2_0_35_2D','int3_dwi_1_37_2D','int3_dwi_2_2D',
        'int10_t2_0_5_2D','int10_t2_0_35_2D','int10_dwi_1_37_2D','int10_dwi_2_2D',
        'int1_t2_0_8_2D','int1_dwi_2_5_2D', 'int2_t2_0_8_2D','int2_dwi_2_5_2D',
        'int3_t2_0_8_2D','int3_dwi_2_5_2D', 'int10_t2_0_8_2D','int10_dwi_2_5_2D',
         'int1_t2_0_5','int1_t2_0_35','int1_dwi_2',
        'int2_t2_0_5','int2_t2_0_35','int2_dwi_1_37','int2_dwi_2',
        'int3_t2_0_5','int3_t2_0_35','int3_dwi_1_37','int3_dwi_2',
        'int10_t2_0_5','int10_t2_0_35','int10_dwi_1_37','int10_dwi_2',
        'int1_t2_0_8','int1_dwi_2_5', 'int2_t2_0_8','int2_dwi_2_5',
        'int3_t2_0_8','int3_dwi_2_5', 'int10_t2_0_8','int10_dwi_2_5']

total_names = []
for name in names:
    if "2D" in name:
        name = "./sorted_radiomics/resampled/2D/SRDart4_20mmDLM_" + name + ".csv"
        total_names.append(name)
    else:
        name = "./sorted_radiomics/resampled/3D/SRDart4_20mmDLM_" + name + ".csv"
        total_names.append(name)

In [None]:
dfs = []
for filename in total_names:
    df_temp = pd.read_csv(filename, index_col=0)
    df_app = df_temp.Anon_ID
    dfs.append(df_app)

In [None]:
dfs

In [None]:
from functools import reduce
df = reduce(lambda df1,df2: pd.merge(df1,df2,on='Anon_ID'), dfs)

In [None]:
df.to_csv("Total_ANON_ID_list.csv")

In [None]:
all_data_uf =  pd.read_csv("./sorted_radiomics/resampled/2D/SRDart4_20mmDLM_int2_t2_0_8_2D.csv", index_col=0)

In [None]:
filter_df = pd.read_csv("./Total_ANON_ID_list.csv", index_col=0)
all_data = all_data_uf[all_data_uf.Anon_ID.isin(filter_df.Anon_ID)]

In [None]:
all_data

# Gather MRI settings

In [None]:
class MRSettings:
    def __init__(self, information_dictionary):
        self.dict = information_dictionary
        
    def dicom_path_list(self, current_path):
        """
        Due to use of nifti files some header info is lost, still this is easily obtained by searching for a dicom.
        """
        dicom_list = []

        for dir_path, sub_dir, files in os.walk(os.path.normpath(current_path), topdown=True):
            for file_ in files:
                if file_.endswith(".dcm"):
                    full_path = os.path.join(dir_path,file_)
                    dicom_list.append(full_path)
        return dicom_list
    
    def dicom_load(self, dicom_path):
        # load dicom in pydicom
        dm = pydicom.dcmread(dicom_path)  
        
        rt = float(dm.RepetitionTime)
        et = float(dm.EchoTime)
        flip = float(dm.FlipAngle)
        sar = float(dm.SAR)
        dbdt = float(dm.dBdt)
        phase = float(dm[0x0018, 0x0089].value)
        echotrain = float(dm.EchoTrainLength)
        psamp = float(dm.PercentSampling)
        phasefov = float(dm[0x0018, 0x0094].value)
        PixBW = float(dm.PixelBandwidth)
        aqmat = dm.AcquisitionMatrix
        aqmat2 = [ float(x) for x in aqmat ]
        aqm0 = aqmat2[0]
        aqm1 = aqmat2[1]
        aqm2 = aqmat2[2]
        aqm3 = aqmat2[3]
        nex = float(dm[0x0018, 0x0083].value)
        sbs = float(dm.SpacingBetweenSlices)
        direct = str(dm[0x0018, 0x1312].value)
        pixels = dm.PixelSpacing
        x_pix = pixels[0]
        return rt, et, flip, sar, dbdt, phase, echotrain, psamp, phasefov, PixBW, aqm0,aqm1,aqm2,aqm3, nex, sbs, direct, x_pix 
    
    def dicom_model(self, dicom_path):
        # load dicom in pydicom
        dm = pydicom.dcmread(dicom_path)         
        model = str(dm.ManufacturerModelName)
        mag = float(dm.MagneticFieldStrength)
        return model, mag      
         
    
    def series_cleaner(self, series_name): 
         # clean up the series descriptions
        if 't2' in series_name.lower():
            if 'sag' in series_name.lower():
                series_final = "T2-sag"
            if 'cor' in series_name.lower():
                series_final = "T2-cor"
            if 'tra' in series_name.lower():
                series_final = "T2-tra"
        elif 'adc' in series_name.lower() and 'b-' not in series_name:
            series_final = "ADC"
        else:
            series_final = "DWI-" + series_name.replace("-","")
        return series_final                           
                    
                    
                    
    def dicom_dict(self):
        dicom_set = defaultdict(list)

        # Dictionary keys are used as identifiers and can also be used to find the correct masks
        for keys in self.dict.keys():
            if "mark" in keys:
                continue
            for mr_and_mask in self.dict[keys]:
                mr_file_path = mr_and_mask[0]

                # Remove nifti name and grab the index for the folder path and series description
                index1 = mr_file_path.rfind("\\")

                # Take folder path and the relevant dicom paths for each key
                folder_path = mr_file_path[:index1]
                dicom_path_list = self.dicom_path_list(folder_path)                
                if "adc" in mr_file_path.lower() or "t2" in mr_file_path.lower():
                    dicom_set[keys].append(dicom_path_list[0])
                    
        return dicom_set
                
    def dataframe_creator_ADC(self, dicoms):
        df_list = []
        for keys in dicoms.keys():            
            for dicom_path in dicoms[keys]:
                
                # take series info
                index1 = dicom_path.rfind("\\")
                index2 = dicom_path[:index1].rfind("\\")
                
                # Create clean series name and dicom path list
                series_name = dicom_path[index2+1:index1]
                series_final = self.series_cleaner(series_name) 
                
                if series_final == "ADC":
                    rt, et, flip, sar, dbdt, phase, echotrain, psamp, phasefov, PixBW, aqm0,aqm1,aqm2,aqm3, nex, sbs,direct, x_pix = self.dicom_load(dicom_path)
                    if direct == "ROW":
                        fov = (aqm1 * x_pix)*0.1
                    elif direct == "COL":
                        fov = (aqm0 * x_pix)*0.1
                                  
                    
                    temp_dict = {"RepetitionTime-"+series_final: rt,"EchoTime-"+series_final: et,
                                "SAR-"+series_final: sar,"dbdt-"+series_final:dbdt,
                                "NumPhaseSteps-"+series_final:phase, "EchoTrainLenght-"+series_final:echotrain, 
                                 "PercentSampling-"+series_final:psamp, "PercentPhasefov-"+series_final:phasefov,
                                 "PixelBandwidth-"+series_final:PixBW,"Matrix_0-"+series_final:aqm0,
                                 "Matrix_1-"+series_final:aqm1,"Matrix_2-"+series_final:aqm2,"Matrix_3-"+series_final:aqm3,
                                "NEX-"+series_final:nex,"SpacingBetweenSlices-"+series_final:sbs, "PhaseDirection-"+series_final:direct,
                                 "FieldOfFiew-"+series_final:fov}
                    temp_df = pd.DataFrame(temp_dict, columns=temp_dict.keys(), index=[keys])
                    
                    model, mag = self.dicom_model(dicom_path)
                    temp_df['Model'] = model
                    temp_df['MagneticField'] = mag
                    temp_df['Anon_ID'] = keys                    
                    df_list.append(temp_df)
        df_result = pd.concat(df_list,axis=0)
        return df_result

    def dataframe_creator_t2c(self, dicoms):
        df_list = []
        for keys in dicoms.keys():
            for dicom_path in dicoms[keys]:
                # take series info
                index1 = dicom_path.rfind("\\")
                index2 = dicom_path[:index1].rfind("\\")
                
                # Create clean series name and dicom path list
                series_name = dicom_path[index2+1:index1]
                series_final = self.series_cleaner(series_name) 
                if series_final == "T2-cor":
                    rt, et, flip, sar, dbdt, phase, echotrain, psamp, phasefov, PixBW, aqm0,aqm1,aqm2,aqm3, nex, sbs,direct,x_pix= self.dicom_load(dicom_path)
                    if direct == "ROW":
                        fov = (aqm1 * x_pix)*0.1
                    elif direct == "COL":
                        fov = (aqm0 * x_pix)*0.1
                                  
                    temp_dict = {"RepetitionTime-"+series_final: rt,"EchoTime-"+series_final: et,
                                "FlipAngle-"+series_final: flip,"SAR-"+series_final: sar,"dbdt-"+series_final:dbdt,
                                "NumPhaseSteps-"+series_final:phase, "EchoTrainLenght-"+series_final:echotrain, 
                                 "PercentSampling-"+series_final:psamp, "PercentPhasefov-"+series_final:phasefov,
                                 "PixelBandwidth-"+series_final:PixBW,"Matrix_0-"+series_final:aqm0,
                                 "Matrix_1-"+series_final:aqm1,"Matrix_2-"+series_final:aqm2,"Matrix_3-"+series_final:aqm3,
                                "NEX-"+series_final:nex,"SpacingBetweenSlices-"+series_final:sbs,"PhaseDirection-"+series_final:direct,
                                 "FieldOfFiew-"+series_final:fov}
                    temp_df = pd.DataFrame(temp_dict, columns=temp_dict.keys(), index=[keys])
                    df_list.append(temp_df)
                    
        df_result = pd.concat(df_list,axis=0)
        return df_result

    def dataframe_creator_t2s(self, dicoms):
        df_list = []
        for keys in dicoms.keys():
            for dicom_path in dicoms[keys]:
                # take series info
                index1 = dicom_path.rfind("\\")
                index2 = dicom_path[:index1].rfind("\\")
                
                # Create clean series name and dicom path list
                series_name = dicom_path[index2+1:index1]
                series_final = self.series_cleaner(series_name) 
                if series_final == "T2-sag":
                    rt, et, flip, sar, dbdt, phase, echotrain, psamp, phasefov, PixBW, aqm0,aqm1,aqm2,aqm3, nex, sbs,direct,x_pix = self.dicom_load(dicom_path)
                    if direct == "ROW":
                        fov = (aqm1 * x_pix)*0.1
                    elif direct == "COL":
                        fov = (aqm0 * x_pix)*0.1
                                  
                    temp_dict = {"RepetitionTime-"+series_final: rt,"EchoTime-"+series_final: et,
                                "FlipAngle-"+series_final: flip,"SAR-"+series_final: sar,"dbdt-"+series_final:dbdt,
                                "NumPhaseSteps-"+series_final:phase, "EchoTrainLenght-"+series_final:echotrain, 
                                 "PercentSampling-"+series_final:psamp, "PercentPhasefov-"+series_final:phasefov,
                                 "PixelBandwidth-"+series_final:PixBW,"Matrix_0-"+series_final:aqm0,
                                 "Matrix_1-"+series_final:aqm1,"Matrix_2-"+series_final:aqm2,"Matrix_3-"+series_final:aqm3,
                                "NEX-"+series_final:nex,"SpacingBetweenSlices-"+series_final:sbs,"PhaseDirection-"+series_final:direct,
                                  "FieldOfFiew-"+series_final:fov}
                    temp_df = pd.DataFrame(temp_dict, columns=temp_dict.keys(), index=[keys])
                    df_list.append(temp_df)
                    
        df_result = pd.concat(df_list,axis=0)
        return df_result

    def dataframe_creator_t2t(self, dicoms):
        df_list = []
        for keys in dicoms.keys():
            for dicom_path in dicoms[keys]:
                # take series info
                index1 = dicom_path.rfind("\\")
                index2 = dicom_path[:index1].rfind("\\")
                
                # Create clean series name and dicom path list
                series_name = dicom_path[index2+1:index1]
                series_final = self.series_cleaner(series_name) 
                if series_final == "T2-tra":
                    rt, et, flip, sar, dbdt, phase, echotrain, psamp, phasefov, PixBW, aqm0,aqm1,aqm2,aqm3, nex, sbs,direct,x_pix = self.dicom_load(dicom_path)
                    if direct == "ROW":
                        fov = (aqm1 * x_pix)*0.1
                    elif direct == "COL":
                        fov = (aqm0 * x_pix)*0.1
                                  
                    temp_dict = {"RepetitionTime-"+series_final: rt,"EchoTime-"+series_final: et,
                                "FlipAngle-"+series_final: flip,"SAR-"+series_final: sar,"dbdt-"+series_final:dbdt,
                                "NumPhaseSteps-"+series_final:phase, "EchoTrainLenght-"+series_final:echotrain, 
                                 "PercentSampling-"+series_final:psamp, "PercentPhasefov-"+series_final:phasefov,
                                 "PixelBandwidth-"+series_final:PixBW,"Matrix_0-"+series_final:aqm0,
                                 "Matrix_1-"+series_final:aqm1,"Matrix_2-"+series_final:aqm2,"Matrix_3-"+series_final:aqm3,
                                "NEX-"+series_final:nex,"SpacingBetweenSlices-"+series_final:sbs,"PhaseDirection-"+series_final:direct,
                                  "FieldOfFiew-"+series_final:fov}
                    temp_df = pd.DataFrame(temp_dict, columns=temp_dict.keys(), index=[keys])
                    df_list.append(temp_df)
                    
        df_result = pd.concat(df_list,axis=0)
        return df_result
    
    def dataframe_combinator(self, dicoms):
        df_adc = self.dataframe_creator_ADC(dicoms)
        df_t2c = self.dataframe_creator_t2c(dicoms)
        df_t2s = self.dataframe_creator_t2s(dicoms)
        df_t2t = self.dataframe_creator_t2t(dicoms)
        
        return pd.concat([df_adc,df_t2c,df_t2s,df_t2t],axis=1)

In [None]:
MRset = MRSettings(mri_mask_list_copy)

In [None]:
relevant_dicoms = MRset.dicom_dict()

In [None]:
df_settings = MRset.dataframe_combinator(relevant_dicoms)

In [None]:
df_settings