In [1]:
import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

In [2]:
adc_dir_path = "/Users/ananyashukla/Desktop/Brown@Research/lesion_segmentation/data_dir/BONBID2023_Val/1ADC_ss"
label_dir_path = "/Users/ananyashukla/Desktop/Brown@Research/lesion_segmentation/data_dir/BONBID2023_Val/3LABEL"
adc_dest_path = "/Users/ananyashukla/Desktop/Brown@Research/lesion_segmentation/data_dir/BONBID2023_Val/ADC"
label_dest_path = "/Users/ananyashukla/Desktop/Brown@Research/lesion_segmentation/data_dir/BONBID2023_Val/LABEL"
zadc_dir_path = "/Users/ananyashukla/Desktop/Brown@Research/lesion_segmentation/data_dir/BONBID2023_Val/2Z_ADC"
zadc_dest_path = "/Users/ananyashukla/Desktop/Brown@Research/lesion_segmentation/data_dir/BONBID2023_Val/Z_ADC"

# image = sitk.ReadImage(image_path)
# print(sitk.GetArrayFromImage(image).shape)
# print(image.GetSize())
# print(image.GetSpacing())
# print(image.GetOrigin())
# print(image.GetDirection())
# print(image.GetPixelIDTypeAsString())
# print(image.GetMetaDataKeys())
# print(image.GetDepth())
# print(image.GetWidth())
# print(image.GetHeight())
# print(image.GetDimension())

# for key in image.GetMetaDataKeys():
#     print(key + ": " + image.GetMetaData(key))

In [3]:
def extract_mha_file(file_path:str) -> tuple:
    """This function disassembles an MHA file and returns a numpy array, the spacing, the direction and the origin of the image. 
    If required, this function will be modified to return other parameters mentioned in the the metadata of the mha file."""
    image = sitk.ReadImage(file_path)
    image_array = sitk.GetArrayFromImage(image)
    spacing = image.GetSpacing()
    direction = image.GetDirection()
    origin = image.GetOrigin()
    return image_array, spacing[::-1], direction[::-1], origin[::-1]

def save_slices(dest_dir,image_id,image_array,category = '') -> None:
    """Saves 2d Slices as npy files of 3d Image"""
    for i in range(image_array.shape[0]):
        np.save(f"{dest_dir}/{image_id}_{category.upper()}_slice_{i}.npy", image_array[i])

def reassemble_to_3d(folder_path, uid) -> np.ndarray:
    """Reads the npy files of a certain patient and stacks them into a 3d image"""
    files = sorted([file for file in os.listdir(folder_path) if extract_id(file)==uid], key = lambda x:int(x.split('.')[0].split('_')[-1]))
    # files = sorted(os.listdir(folder_path), key = lambda x:int(x.split('.')[0].split('_')[-1]))
    slices = []
    for file in files:
        slices.append(np.load(f"{folder_path}/{file}"))
    return np.stack(slices)

def extract_id(file_name:str) -> str:
    """The assumption is that the patient ID is the fist numeric sequence in the file name."""
    elements = file_name.split('-')[0].split('_')
    for i in elements:
        if i.isdigit():
            return i

def calculate_volume_percentage(adc_image:np.ndarray, label_image:np.ndarray) -> float:
    """
    Calculate the percentage of lesion volume in the brain volume, from the 3D ADC and Label Image
    """
    brain_mask = np.where((adc_image >= 1) & (adc_image <= 3400), 1, 0)
    lesion_mask = np.where(label_image == 1, 1, 0)
    return np.sum(lesion_mask) / np.sum(brain_mask) * 100

def split_files_gen_csv(source_dir:str, dest_dir:str, category:str, gen_csv:bool=False, adc_dir = None)->None:
    """Saves 3d files as 2d npy files from a given directory. Can caclulate volume if masks have been provided. 
    Will generate a CSV containing metadata."""
    if gen_csv and adc_dir is not None:
        meta_df=pd.DataFrame(columns=["Patient ID","Axial Slices", "Coronal Slices", "Sagittal Slices", "Lesion Percentage","Axial Spacing", "Coronal Spacing", "Sagittal Spacing"])

    if not os.path.exists(dest_dir):
        os.makedirs(dest_dir)
    
    for file in os.listdir(source_dir):
        if not file.endswith('.mha'):
            continue

        image_array, spacing, direction, origin = extract_mha_file(f"{source_dir}/{file}")
        uid = extract_id(file)

        save_slices(dest_dir,uid,image_array,category)

        if gen_csv:
            # reconstruct 3d image for adc with this UID, and pass that ND MAAK
            num_axial, num_coronal, num_sagittal = image_array.shape
            spacing_axial, spacing_coronal, spacing_sagittal = spacing
            
            adc_array = reassemble_to_3d(adc_dir, uid)
            volume = calculate_volume_percentage(adc_array, image_array)

            meta_df.loc[len(meta_df.index)] = [uid, num_axial, num_coronal, num_sagittal, volume, spacing_axial, spacing_coronal, spacing_sagittal]

    if gen_csv:
        meta_df.to_csv(f"{dest_dir}/metadata.csv", index=False)

In [None]:
image_array, spacing, direction, origin = extract_mha_file(image_path)
print(image_array.shape)
print(spacing)
print(direction)
print(origin)

In [None]:
# MGHNICU_010-VISIT_01-ADC_ss
save_slices('/Users/amograo/Research_Projects/DL_HIE_2024/test_saving','MGHNICU_010-VISIT_01-ADC_ss',image_array)

In [None]:
files = os.listdir('/Users/amograo/Research_Projects/DL_HIE_2024/test_saving')
ids = [extract_id(file) for file in files]
print(ids)

In [None]:
new_3d = reassemble_to_3d('/Users/amograo/Research_Projects/DL_HIE_2024/test_saving','010')

In [None]:
print(new_3d.shape)
print(new_3d.dtype)
print(image_array.dtype)
print(np.array_equal(new_3d,image_array))
print(np.array_equal(new_3d,sitk.GetArrayFromImage(image)))

In [None]:
# # plot each image in image_array
# for i in range(image_array.shape[0]):
#     plt.imshow(image_array[i,:,:])
#     plt.show()
#     plt.close()

l = []
k = []

for i in image_array:
    l.append(i)
    # plt.imshow(i)
    # plt.show()
    # plt.close()

for i in range(image_array.shape[0]):
    k.append(image_array[i,:,:])
#     plt.imshow(image_array[i,:,:])
#     plt.show()
#     plt.close()

for i in range(len(l)):
    print(np.array_equal(l[i], k[i]))

In [None]:
"050".isdigit()