In [9]:
import numpy as np
import pandas as pd
import pydicom as dicom
import matplotlib.pylab as plt
import glob
import SimpleITK as sitk
import re
import pydicom
import os

In [1]:
def checkDistortedImg(vol_img, ptype='mean', angle=90):
    projection = {'sum': sitk.SumProjection,
                  'mean': sitk.MeanProjection,
                  'std': sitk.StandardDeviationProjection,
                  'min': sitk.MinimumProjection,
                  'max': sitk.MaximumProjection}
    paxis = 0

    rotation_axis = [0, 0, 1]
    rotation_angles = np.linspace(-1 / 2 * np.pi, 1 / 2 * np.pi, int(180.0 / angle))  # 15.0 degree

    rotation_center = vol_img.TransformContinuousIndexToPhysicalPoint(
        [(index - 1) / 2.0 for index in vol_img.GetSize()])

    rotation_transform = sitk.VersorRigid3DTransform()
    rotation_transform.SetCenter(rotation_center)

    # Compute bounding box of rotating volume and the resampling grid structure
    image_indexes = list(zip([0, 0, 0], [sz - 1 for sz in vol_img.GetSize()]))
    image_bounds = []
    for i in image_indexes[0]:
        for j in image_indexes[1]:
            for k in image_indexes[2]:
                image_bounds.append(vol_img.TransformIndexToPhysicalPoint([i, j, k]))

    all_points = []
    for angle in rotation_angles:
        rotation_transform.SetRotation(rotation_axis, angle)
        all_points.extend([rotation_transform.TransformPoint(pnt) for pnt in image_bounds])

    all_points = np.array(all_points)
    min_bounds = all_points.min(0)
    max_bounds = all_points.max(0)

    new_spc = [np.min(vol_img.GetSpacing())] * 3
    new_sz = [int(sz / spc + 0.5) for spc, sz in zip(new_spc, max_bounds - min_bounds)]

    for angle in rotation_angles:
        rotation_transform.SetRotation(rotation_axis, angle)
        resampled_image = sitk.Resample(image1=vol_img,
                                        size=new_sz,
                                        transform=rotation_transform,
                                        interpolator=sitk.sitkLinear,
                                        outputOrigin=min_bounds,
                                        outputSpacing=new_spc,
                                        outputDirection=[1, 0, 0, 0, 1, 0, 0, 0, 1],
                                        defaultPixelValue=-20,
                                        # HU unit for air in CT, possibly set to 0 in other cases
                                        outputPixelType=vol_img.GetPixelID())
        proj_image = projection[ptype](resampled_image, paxis)
        extract_size = list(proj_image.GetSize())
        extract_size[paxis] = 0
        sitk.Extract(proj_image, extract_size)


def outputDistortedImg(df):
    pid = os.getpid()
    ppid = os.getppid()
    start = time.time()
    print("PPID %s->%s Started on %s" % (ppid, pid, str(datetime.now())))

    exception_lst = []

    for _, row in df.iterrows():
        reader = sitk.ImageSeriesReader()
        dicom_names = reader.GetGDCMSeriesFileNames(row['directory'])
        reader.SetFileNames(dicom_names)
        vol_img = reader.Execute()
        try:
            checkDistortedImg(vol_img)
        except:
            exception_lst.append(row['directory'])

    end = time.time()

    print("PPID %s Completed in %s" % (os.getpid(), round((end - start) / 60, 2)))

    return exception_lst

In [32]:
distorted_file_to_check = "/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr207978513481-20190903/CT-20190903-122850-3.6_WB_FDG_CT_VENFAS-WB_Venfas_3_ax-0.976562mm"
# distorted_file_to_check = "/media/andres/T7 Shield/ucan_lymfom/ASPTCTX0001_npr207978513481-20190903/CT-20190903-122850-3.6_WB_FDG_CT_VENFAS-WB_Venfas_3_ax-3.000000mm"

reader = sitk.ImageSeriesReader()
dicom_names = reader.GetGDCMSeriesFileNames(distorted_file_to_check)
reader.SetFileNames(dicom_names)
vol_img = reader.Execute()
checkDistortedImg(vol_img=vol_img)

In [46]:
source_path = "/media/andres/T7 Shield/ucan_lymfom"
destination_path = os.path.join(source_path, "metadata.xlsx")

directory_list = list()
for root, dirs, files in os.walk(source_path, topdown=False):
    for name in dirs:
        directory_list.append(os.path.join(root, name))

In [47]:
 dataset = pd.DataFrame(directory_list, columns=['directory'])
 countfiles_selected = {"directory": [], "count":[]}

In [48]:
for index, row in dataset.iterrows():
    count = 0
    for path in os.listdir(row["directory"]):
        if os.path.isfile(os.path.join(row["directory"], path)):
            count += 1
            
    countfiles_selected["directory"].append(row["directory"])
    countfiles_selected["count"].append(count)


In [49]:
countfiles_selected_df = pd.DataFrame.from_dict(countfiles_selected)
exams_with_one_file = countfiles_selected_df[countfiles_selected_df["count"] < 3].reset_index()
print(f"Number of exams with ones file: {exams_with_one_file.shape}")
exams_with_one_file.head(2)

Number of exams with ones file: (6645, 3)


Unnamed: 0,index,directory,count
0,0,/media/andres/T7 Shield/ucan_lymfom/ASPTCTX000...,1
1,4,/media/andres/T7 Shield/ucan_lymfom/ASPTCTX000...,1


In [50]:
    exams_with_one_file[['source_directory', 'patient_directory', 'PET-CT_info']] = exams_with_one_file['directory'].str.rsplit(pat='/', n=2, expand=True)
    exams_with_one_file["patient_directory"].nunique()

1877

In [59]:
exams_with_one_file.to_excel("/media/andres/T7 Shield/Excel_Files_for_UCAN/Excel_files/01_11_2023/exams_with_one_file.xlsx")

In [60]:
dataset.shape

(10642, 1)

In [57]:
dataset = dataset[~dataset.directory.isin(exams_with_one_file.directory)]

In [58]:
dataset.shape

(10642, 1)

In [None]:
USERS[~USERS.email.isin(EXCLUDE.email)]