In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
import os
import pydicom
import SimpleITK as sitk
from radiomics import featureextractor


def load_dicom_series(directory):
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(directory)
    reader.SetFileNames(dicom_names)
    image = reader.Execute()
    return image

def load_dicom_file(file_path):
    return sitk.ReadImage(file_path)

def resample_image_to_reference(image, reference_image):
    resampler = sitk.ResampleImageFilter()
    resampler.SetReferenceImage(reference_image)
    resampler.SetInterpolator(sitk.sitkNearestNeighbor)
    resampler.SetTransform(sitk.Transform())
    resampler.SetOutputSpacing(reference_image.GetSpacing())
    resampler.SetSize(reference_image.GetSize())
    resampler.SetOutputDirection(reference_image.GetDirection())
    resampler.SetOutputOrigin(reference_image.GetOrigin())
    return resampler.Execute(image)

paths_df = pd.read_csv('df_patients.csv')

df_of_noduls = pd.read_csv('C:/Users/Gabriel/OneDrive/Dokumente/GitHub/Medical_image/diagnosis.csv')

In [11]:
pat_list = pd.read_excel("tcia-diagnosis-data-2012-04-20.xls")['TCIA Patient ID'].tolist()

#filter all rows with patient id in pat_list
paths_df = paths_df[paths_df['Patient'].isin(pat_list)]
paths_df.drop(columns=['Unnamed: 0'], inplace=True)
paths_df.reset_index(drop=True, inplace=True)
paths_df

Unnamed: 0,Patient,Path_ct,Path_seg
0,LIDC-IDRI-0068,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
1,LIDC-IDRI-0072,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
2,LIDC-IDRI-0088,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
3,LIDC-IDRI-0090,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
4,LIDC-IDRI-0091,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
...,...,...,...
137,LIDC-IDRI-0994,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
138,LIDC-IDRI-1002,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
139,LIDC-IDRI-1004,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...
140,LIDC-IDRI-1010,G:/Dokumente/MedicalIM/Lung_Cancer/manifest-16...,['G:/Dokumente/MedicalIM/Lung_Cancer_Seg/manif...


In [12]:
list_of_seg_paths = []
for path in paths_df["Path_seg"]:
    path_seg1 = ast.literal_eval(path)
    temp = []
    for i in path_seg1:
        files = os.listdir(i)[0]
        path_seg_full = f'{i}/{files}'
        temp.append(path_seg_full)
    list_of_seg_paths.append(temp)



In [13]:
# Initialize feature extractor (assuming you have a params.yaml)
params = 'C:/Users/Gabriel/OneDrive/Dokumente/GitHub/Medical_image/git_extraction/params/qa4iqi_params.yml'
extractor = featureextractor.RadiomicsFeatureExtractor(params)

list_radiomics = []
list_patient_names = []
list_node_names = []
counter = 0
for ct in range(len(paths_df["Path_ct"])):
    image = load_dicom_series(paths_df["Path_ct"][ct])
    for seg in range(len(list_of_seg_paths[ct])):
        mask = load_dicom_file(list_of_seg_paths[ct][seg])
        resampled_mask = resample_image_to_reference(mask, image)
        features = extractor.execute(image, resampled_mask, label=255)
        list_patient_names.append(paths_df["Patient"][ct])
        list_node_names.append(f'Node: {seg}')                    
        list_radiomics.append(features)
    counter += 1    
    print(f'Patient {counter} finished')
#ct_image = load_dicom_series(paths_df["Path_ct"][0])
#mask = load_dicom_file(path_seg_full)



Patient 1 finished
Patient 2 finished
Patient 3 finished
Patient 4 finished
Patient 5 finished
Patient 6 finished
Patient 7 finished
Patient 8 finished
Patient 9 finished
Patient 10 finished
Patient 11 finished
Patient 12 finished
Patient 13 finished
Patient 14 finished
Patient 15 finished
Patient 16 finished
Patient 17 finished
Patient 18 finished
Patient 19 finished
Patient 20 finished
Patient 21 finished
Patient 22 finished
Patient 23 finished
Patient 24 finished
Patient 25 finished
Patient 26 finished
Patient 27 finished
Patient 28 finished
Patient 29 finished
Patient 30 finished
Patient 31 finished
Patient 32 finished
Patient 33 finished
Patient 34 finished
Patient 35 finished
Patient 36 finished
Patient 37 finished
Patient 38 finished
Patient 39 finished
Patient 40 finished
Patient 41 finished
Patient 42 finished
Patient 43 finished
Patient 44 finished
Patient 45 finished
Patient 46 finished
Patient 47 finished
Patient 48 finished
Patient 49 finished
Patient 50 finished
Patient 5

In [None]:
import pandas as pd
from collections import OrderedDict
import json
import numpy as np

# Function to convert OrderedDict to a serializable format
def convert_to_serializable(odict):
    serializable_dict = OrderedDict()
    for key, value in odict.items():
        if isinstance(value, np.ndarray):
            # Convert numpy arrays to lists
            serializable_dict[key] = value.tolist()
        else:
            serializable_dict[key] = value
    return serializable_dict

# Example data - replace this with your actual data
#list_patient_names = ['Patient1', 'Patient2']
#list_node_names = ['Node1', 'Node2']
#list_radiomics = [OrderedDict([('key1', np.array([1, 2, 3])), ('key2', 'value2')]), OrderedDict([('key3', 'value3'), ('key4', np.array([4, 5, 6]))])]

# Convert OrderedDict objects to a serializable format and then to JSON strings
DF_Radiomics = pd.DataFrame({
    "Patient": list_patient_names,
    "Node": list_node_names,
    "Radiomics": [json.dumps(convert_to_serializable(radiomics), ensure_ascii=False) for radiomics in list_radiomics]
})

labs_list = []
for i in DF_Radiomics["Patient"].tolist():
    labs_list.append(df_of_noduls[df_of_noduls["TCIA Patient ID"] == i]["Diagnosis at the Patient Level"].tolist()[0])

DF_Radiomics["Patient_label"] = labs_list

# Save to CSV
DF_Radiomics.to_csv("DF_Radiomics_all.csv", index=False)

In [None]:
DF_Radiomics

Unnamed: 0,Patient,Node,Radiomics,Patient_label
0,LIDC-IDRI-0068,Node: 0,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
1,LIDC-IDRI-0068,Node: 1,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
2,LIDC-IDRI-0068,Node: 2,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
3,LIDC-IDRI-0068,Node: 3,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
4,LIDC-IDRI-0068,Node: 4,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
5,LIDC-IDRI-0068,Node: 5,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
6,LIDC-IDRI-0068,Node: 6,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
7,LIDC-IDRI-0068,Node: 7,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
8,LIDC-IDRI-0068,Node: 8,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3
9,LIDC-IDRI-0068,Node: 9,"{""diagnostics_Versions_PyRadiomics"": ""0+unknow...",3


In [None]:



labs_list


[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

In [None]:


# Load your CSV
DF_Radiomics = pd.read_csv("DF_Radiomics_all.csv")
DF_Radiomics['Radiomics'] = DF_Radiomics['Radiomics'].apply(lambda x: json.loads(x, object_pairs_hook=OrderedDict))

In [None]:
for feature_name, feature_value in DF_Radiomics['Radiomics'][0].items():
    print(f"{feature_name}: {feature_value}")

diagnostics_Versions_PyRadiomics: 0+unknown
diagnostics_Versions_Numpy: 1.26.2
diagnostics_Versions_SimpleITK: 2.1.1.2
diagnostics_Versions_PyWavelet: 1.2.0
diagnostics_Versions_Python: 3.10.13
diagnostics_Configuration_Settings: OrderedDict([('minimumROIDimensions', 2), ('minimumROISize', None), ('normalize', False), ('normalizeScale', 1), ('removeOutliers', None), ('resampledPixelSpacing', None), ('interpolator', 'sitkBSpline'), ('preCrop', False), ('padDistance', 5), ('distances', [1]), ('force2D', False), ('force2Ddimension', 0), ('resegmentRange', None), ('label', 255), ('additionalInfo', True), ('binWidth', 25), ('weightingNorm', None)])
diagnostics_Configuration_EnabledImageTypes: OrderedDict([('Original', OrderedDict())])
diagnostics_Image-original_Hash: bea2c9750ea59a0bebb6d3bd63ffacc40fcf6a28
diagnostics_Image-original_Dimensionality: 3D
diagnostics_Image-original_Spacing: [0.683594, 0.683594, 1.25]
diagnostics_Image-original_Size: [512, 512, 261]
diagnostics_Image-original_M