In [1]:
import numpy as np
import pandas as pd
import pydicom as dicom
import matplotlib.pylab as plt
import glob
import SimpleITK as sitk
import re
import pydicom
import os

In [7]:
def checkDistortedImg(vol_img, ptype='mean', angle=90):
    projection = {'sum': sitk.SumProjection,
                  'mean': sitk.MeanProjection,
                  'std': sitk.StandardDeviationProjection,
                  'min': sitk.MinimumProjection,
                  'max': sitk.MaximumProjection}
    paxis = 0

    rotation_axis = [0, 0, 1]
    rotation_angles = np.linspace(-1 / 2 * np.pi, 1 / 2 * np.pi, int(180.0 / angle))  # 15.0 degree

    rotation_center = vol_img.TransformContinuousIndexToPhysicalPoint(
        [(index - 1) / 2.0 for index in vol_img.GetSize()])

    rotation_transform = sitk.VersorRigid3DTransform()
    rotation_transform.SetCenter(rotation_center)

    # Compute bounding box of rotating volume and the resampling grid structure
    image_indexes = list(zip([0, 0, 0], [sz - 1 for sz in vol_img.GetSize()]))
    image_bounds = []
    for i in image_indexes[0]:
        for j in image_indexes[1]:
            for k in image_indexes[2]:
                image_bounds.append(vol_img.TransformIndexToPhysicalPoint([i, j, k]))

    all_points = []
    for angle in rotation_angles:
        rotation_transform.SetRotation(rotation_axis, angle)
        all_points.extend([rotation_transform.TransformPoint(pnt) for pnt in image_bounds])

    all_points = np.array(all_points)
    min_bounds = all_points.min(0)
    max_bounds = all_points.max(0)

    new_spc = [np.min(vol_img.GetSpacing())] * 3
    new_sz = [int(sz / spc + 0.5) for spc, sz in zip(new_spc, max_bounds - min_bounds)]

    for angle in rotation_angles:
        rotation_transform.SetRotation(rotation_axis, angle)
        resampled_image = sitk.Resample(image1=vol_img,
                                        size=new_sz,
                                        transform=rotation_transform,
                                        interpolator=sitk.sitkLinear,
                                        outputOrigin=min_bounds,
                                        outputSpacing=new_spc,
                                        outputDirection=[1, 0, 0, 0, 1, 0, 0, 0, 1],
                                        defaultPixelValue=-20,
                                        # HU unit for air in CT, possibly set to 0 in other cases
                                        outputPixelType=vol_img.GetPixelID())
        proj_image = projection[ptype](resampled_image, paxis)
        extract_size = list(proj_image.GetSize())
        extract_size[paxis] = 0
        sitk.Extract(proj_image, extract_size)


def outputDistortedImg(df):
    pid = os.getpid()
    ppid = os.getppid()
    start = time.time()
    print("PPID %s->%s Started on %s" % (ppid, pid, str(datetime.now())))

    exception_lst = []

    for _, row in df.iterrows():
        reader = sitk.ImageSeriesReader()
        dicom_names = reader.GetGDCMSeriesFileNames(row['directory'])
        reader.SetFileNames(dicom_names)
        vol_img = reader.Execute()
        try:
            checkDistortedImg(vol_img)
        except:Mem:       32522512    11526808     2814172       60464    18181532    20466524
            print(row["directory"])
            exception_lst.append(row['directory'])

    end = time.time()

    print("PPID %s Completed in %s" % (os.getpid(), round((end - start) / 60, 2)))

    return exception_lst

In [11]:
dataset = pd.read_excel("/media/andres/T7 Shield/ucan_lymfom/Excel_files/06_11_2023/Final_Selected_exams_from_U-CAN-Lymfom.xlsx")

# distorted_file_to_check = "/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr207978513481-20190903/CT-20190903-122850-3.6_WB_FDG_CT_VENFAS-WB_Venfas_3_ax-0.976562mm"
# distorted_file_to_check = "/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr207978513481-20190903/CT-20190903-122850-3.6_WB_FDG_CT_VENFAS-WB_Venfas_3_ax-3.000000mm"

for file in dataset["directory"]:
    print(file)
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(file)
    reader.SetFileNames(dicom_names)
    vol_img = reader.Execute()
    checkDistortedImg(vol_img=vol_img)

/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999612862346-20210628/CT-20210628-114331-3.11_WB_F18_FDG_VENFAS__THORAX_INANDAD-WB_Venfas_3_ax-3.000000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999612862346-20210628/PT-20210628-112746-3.11_WB_F18_FDG_VENFAS__THORAX_INANDAD-QCFX-S_400_M.Free-2.8000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999612862346-20210407/CT-20210407-105418-3.12_WB_F18_FDG_VENFAS__THORAX_INANDAD-WB_Venfas_0.6_ax-0.625000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999612862346-20210407/PT-20210407-104228-3.12_WB_F18_FDG_VENFAS__THORAX_INANDAD-QCFX-S_400_M.Free-2.8000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999612862346-20210212/CT-20210212-130055-3.12_WB_FDG_VENFAS__THORAX_INANDAD-WB_Venfas_3_ax-3.000000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999612862346-20210212/PT-20210212-124236-3.12_WB_FDG_VENFAS__THORAX_INANDAD-QCFX-S_400_M.Free-2.8000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr9995612758

ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000997159



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999470312453-20220201/PT-20220201-142500-3.11_WB_F18_FDG_VENFAS__THORAX_INANDAD-QCFX-S_400_M.Free-2.8000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999062108241-20190308/CT-20190308-110945-6.3_UAS-WB-FDG-3D_CT_VENFAS-WB_VEN_AX-0.949219mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr999062108241-20190308/PT-20190308-104217-UAS-WB-FDG-3D_CT_VENFAS-WB-FDG-AC-3.2700mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr998272047145-20210701/PT-20210701-144502-3.11_WB_F18_FDG_VENFAS__THORAX_INANDAD-QCFX-S_400_M.Free-2.8000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr998272047145-20210701/CT-20210701-150132-3.11_WB_F18_FDG_VENFAS__THORAX_INANDAD-WB_Venfas_3_ax-3.000000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr997877056847-20120103/CT-20120103-095149-6.3_UAS-WB-FDG-3D-STANDARD_CT_RECON-3.750000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr997877056847-20120103/PT-20120103-095404-UAS-WB-FDG-3D-UAS-WB

ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000997368



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr994849838028-20210310/PT-20210310-101248-3.12_WB_FDG_VENFAS__THORAX_INANDAD-QCFX-S_400_M.Free-2.8000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr994849838028-20210310/CT-20210310-103202-3.12_WB_FDG_VENFAS__THORAX_INANDAD-WB_Venfas_3_ax-3.000000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00498753



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr993867426231-20180326/PT-20180326-110931-UAS-WB-FDG-3D_CT_VENFAS-WB-FDG-AC-3.2700mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr993867426231-20180326/CT-20180326-113432-6.3_UAS-WB-FDG-3D_CT_VENFAS-WB_VEN_AX-0.949219mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr992367836483-20181031/PT-20181031-150153-UAS-WB-FDG-3D_CT_VENFAS-WB-FDG-AC-3.2700mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr992367836483-20181031/CT-20181031-152948-6.3_UAS-WB-FDG-3D_CT_VENFAS-WB_VEN_AX-0.949219mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr991811682588-20160804/PT-20160804-120758-UAS-WB-FDG-3D_VENFASTHORAX_INSP_20160120-WB-FDG-AC-3.2700mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr991811682588-20160804/CT-20160804-124256-6.6_UAS-WB-FDG-3D_VENFASTHORAX_INSP_20160120-WB_VEN_AX-0.949219mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr991043996626-20200605/CT-20200605-100800-3.17_WB_FDG_CT_THORAX_INANDAD-WB_Venfas_3_ax-3

ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000997368



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr991043996626-20200605/PT-20200605-095011-3.17_WB_FDG_CT_THORAX_INANDAD-QCFX-S_400-2.7900mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20190126/PT-20190126-102901-3.5_WB_FDG_CT_VENFAS-QCFX-S_400-2.7900mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20190126/CT-20190126-104518-3.5_WB_FDG_CT_VENFAS-WB_Venfas_3_ax-3.000000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00498684



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20180518/PT-20180518-100700-6.39_WB_FDG_CT_VENFAS-WB_AC_FDG_VPFX-S-2.7900mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20180518/CT-20180518-102113-6.39_WB_FDG_CT_VENFAS-WB_Venfas_3_ax-3.000000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000996875



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20161227/PT-20161227-113237-UAS-WB-FDG-3D_CT_VENFAS-WB-FDG-AC-3.2700mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20161227/CT-20161227-120308-6.3_UAS-WB-FDG-3D_CT_VENFAS-WB_VEN_AX-0.949219mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20160819/CT-20160819-132246-6.1_UAS-WB-FDG-3D-STANDARD_CT_RECON-3.750000mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988957180883-20160819/PT-20160819-132426-UAS-WB-FDG-3D-WB-FDG-AC-3.2700mm
/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988864921432-20220428/PT-20220428-120103-3.8_WB_F18_FDG_VENFAS-QCFX-S_400_M.Free-2.8000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:141.608



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988864921432-20220428/CT-20220428-121324-3.8_WB_F18_FDG_VENFAS-WB_Venfas_3_ax-3.000000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:163.837



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988864921432-20220125/PT-20220125-104104-3.8_WB_F18_FDG_VENFAS-QCFX-S_400_M.Free-2.8000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:105.002



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988864921432-20220125/CT-20220125-105521-3.8_WB_F18_FDG_VENFAS-WB_Venfas_3_ax-3.000000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:616.943



/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr988864921432-20210721/CT-20210721-124711-3.11_WB_F18_FDG_VENFAS__THORAX_INANDAD-WB_Venfas_3_ax-3.000000mm


ImageSeriesReader (0x7f001c34ef20): Non uniform sampling or missing slices detected,  maximum nonuniformity:804.996



RuntimeError: Exception thrown in SimpleITK ResampleImageFilter_Execute: /tmp/SimpleITK-build/ITK-prefix/include/ITK-5.3/itkImportImageContainer.hxx:191:
Failed to allocate memory for image.

In [6]:
print(exception_lst)
len(exception_lst)

NameError: name 'exception_lst' is not defined

In [None]:
source_path = "/media/andres/T7 Shield/ucan_lymfom"
destination_path = os.path.join(source_path, "metadata.xlsx")

directory_list = list()
for root, dirs, files in os.walk(source_path, topdown=False):
    for name in dirs:
        directory_list.append(os.path.join(root, name))

In [None]:
 dataset = pd.DataFrame(directory_list, columns=['directory'])
 countfiles_selected = {"directory": [], "count":[]}

In [None]:
for index, row in dataset.iterrows():
    count = 0
    for path in os.listdir(row["directory"]):
        if os.path.isfile(os.path.join(row["directory"], path)):
            count += 1
            
    countfiles_selected["directory"].append(row["directory"])
    countfiles_selected["count"].append(count)


In [None]:
countfiles_selected_df = pd.DataFrame.from_dict(countfiles_selected)
exams_with_distorted_images_file = countfiles_selected_df[countfiles_selected_df["count"] < 179].reset_index()
print(f"Number of exams with ones file: {exams_with_distorted_images_file.shape}")
exams_with_distorted_images_file.head(2)

In [None]:
    exams_with_distorted_images_file[['source_directory', 'patient_directory', 'PET-CT_info']] = exams_with_distorted_images_file['directory'].str.rsplit(pat='/', n=2, expand=True)
    exams_with_distorted_images_file["patient_directory"].nunique()

In [None]:
exams_with_distorted_images_file.to_excel("/media/andres/T7 Shield/ucan_lymfom/Excel_files/06_11_2023/exams_with_distorted_images_file.xlsx")

In [None]:
dataset.shape

In [None]:
dataset = dataset[~dataset.directory.isin(exams_with_distorted_images_file.directory)]

In [None]:
dataset.shape

In [None]:
dataset.to_excel("/media/andres/T7 Shield/ucan_lymfom/Excel_files/06_11_2023/data_ready_for_filtering.xlsx")

In [None]:
dataset_test = pd.read_excel("/media/andres/T7 Shield/ucan_lymfom/Excel_files/06_11_2023/data_ready_for_filtering.xlsx")

In [None]:
type(dataset_test)

In [None]:
dataset_test.columns