In [42]:
import os
import shutil
import pandas as pd
import nibabel as nib
import SimpleITK as sitk
import pydicom
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle as pkl
def dcm2nii(path_read, path_save):
    '''
    ## Convert Dicom Series Files to a Single NII File
    ### Args:
        path_read: The file folder containing dicom series files(No other files exits)
        path_save: The path you save the .nii/.nii.gz data file
    '''
    # GetGDCMSeriesIDs读取序列号相同的dcm文件
    series_id = sitk.ImageSeriesReader.GetGDCMSeriesIDs(path_read)
    # GetGDCMSeriesFileNames读取序列号相同dcm文件的路径，series[0]代表第一个序列号对应的文件
    series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(path_read, series_id[0])
    series_reader = sitk.ImageSeriesReader()
    series_reader.SetFileNames(series_file_names)
    image3d = series_reader.Execute()
    sitk.WriteImage(image3d, path_save)

def rename_dicom(dataroot='./DICOMs'):
    for root,dirs,files in os.walk(dataroot):
        # if len(dirs)==0:
        if True:
            for i in range(len(files)):
                if files[i]=='DICOMDIR.dcm':
                    os.remove(os.path.join(root,files[i]))
                    print(f'{os.path.join(root,files[i])} deleted')
                if not files[i].endswith('.dcm'):
                    os.rename(os.path.join(root,files[i]),os.path.join(root,f'{files[i]}.dcm'))
 

0

In [3]:
def resample_volume(Origin='NII.nii.gz', volume=None, interpolator = sitk.sitkLinear, new_spacing = [1.7708333730698,1.7708333730698,1],output='Resampled.nii'):
    '''
    ## Resample MRI files to specified spacing, here we define spacing=[1.7708333730698,1.7708333730698,1]
    ### Args:
        Origin: The Original MRI file, MUST NOT BE DICOM!!!!!
        interpolater: The method of resampling
        new_spacing: The spacing we want to set
        output: The Output path of resampled MRI data
    ### Return:
        resample_image: The resampled MRI data
    '''
    if volume is None:
        volume = sitk.ReadImage(Origin)
    original_spacing = volume.GetSpacing()
    original_size = volume.GetSize()
    new_size = [int(round(osz*ospc/nspc)) for osz,ospc,nspc in zip(original_size, original_spacing, new_spacing)]
    resampled_image = sitk.Resample(volume,
                                    new_size,
                                    sitk.Transform(), 
                                    interpolator,
                                    volume.GetOrigin(), 
                                    new_spacing, volume.GetDirection(), 
                                    0,
                                    volume.GetPixelID())
    if output!='':
        sitk.WriteImage(resampled_image,output)
    return resampled_image

## Map resolution from nifti to nifti

In [29]:
def resample_dataset2(spacing=[0.994,1.826],datapath='E:/VST_fusion/dataset'):
    error=[]
    for space in spacing:
        for roots,dirs,files in os.walk(datapath):
            if len(dirs)==0:
                files=exclude_seg_files(os.listdir(roots))
                for i in range(len(files)):
                    data_path=os.path.join(roots,files[i])
                    pre_data=sitk.ReadImage(data_path)
                    data_spacing=list(pre_data.GetSpacing())
                    data_spacing[0]=space
                    data_spacing[1]=space
                    new_data=resample_volume(Origin=data_path,new_spacing=data_spacing,output='')
                    if len(sitk.GetArrayFromImage(new_data))!=25 and 'LGE' not in data_path:
                        print(f'frame resample error: {data_path} {sitk.GetArrayFromImage(new_data).shape}')
                        error.append(data_path)
                    else:
                        save_path=roots.replace('nifti',f'nifti_spacing_{space}')
                        save_path=save_path.replace('dataset',f'dataset_spacing_{space}')
                        # save_path=save_path.replace('E:','F:')
                        if not os.path.exists(save_path):
                            os.makedirs(save_path)
                        save_path=os.path.join(save_path,files[i])
                        sitk.WriteImage(new_data,save_path)
                        print(f'{save_path} saved')
    return error
error=resample_dataset2(datapath='/Users/airskcer/Downloads/NP_nifti-20230109/',spacing=[0.994,1.826])
# error=resample_dataset2(datapath='/Users/airskcer/Downloads/RCM_nifti-20230107/',spacing=[0.994,1.826])

/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_13.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_mid.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_down.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_11.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_15.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_17.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_up.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11183087_LI_CHANG_AN/slice_18.nii.gz saved
/Users/airskcer/Downloads/NP_nifti_spacing_0.994-20230109/SAX_data/11028695_DAI_XIN_HUA/slice_mid.nii.gz saved
/Users

In [14]:
def check_spacings(root='/data/Airscker/VST_fusion_dataset_M1/Dataset/Abnormal_nifti_spacing_1.826'):
    pats = []
    for roots, dirs, files in os.walk(root):
        if len(dirs) == 0:
            pats.append(roots)
    bar = tqdm(range(len(pats)))
    error = []
    for i in bar:
        target = pats[i].replace('1.826', '0.994')
        if not os.path.exists(target):
            error.append(pats[i])
        else:
            pat_num = len(os.listdir(pats[i]))
            target_num = len(os.listdir(target))
            if pat_num != target_num:
                error.append(f'{pats[i]} {pat_num} {target_num}')
    print(len(error))
    for i in range(len(error)):
        print(error[i])
check_spacings(root='/Users/airskcer/Downloads/HCM_nifti_spacing_1.826-10s/')

100%|██████████| 40/40 [00:00<00:00, 17527.39it/s]

0





## check slices' data size, if every patient's slices' size isn't uniformed, error occurs

In [10]:
def check_size(dataroot='/Users/airskcer/Downloads/DCM_nifti-20230107/'):
    error={}
    folders=[]
    for roots,dirs,files in os.walk(dataroot):
        if len(dirs)==0:
            folders.append(roots)
    bar=tqdm(range(len(folders)))
    for i in bar:
        files=exclude_seg_files(os.listdir(folders[i]))
        if len(files)==1:
            continue
        sizes=[]
        for j in range(len(files)):
            data=sitk.ReadImage(os.path.join(folders[i],files[j]))
            sizes.append(sitk.GetArrayFromImage(data).shape)
        sizes=np.array(sizes)
        if np.sum(np.std(sizes,axis=0))!=0:
            error[folders[i]]=sizes
    for key in error:
        print(key,error[key])
    return error
error=check_size(dataroot='/Users/airskcer/Downloads/LVNC_nifti-20221224/')

100%|██████████| 1316/1316 [02:04<00:00, 10.57it/s]


In [None]:
def error_process(error_map: dict):
    """
    The error_process function is used to find the files that have different shapes and move them into a new folder.
    
    
    :param error_map: dict: Store the error information
    :return: The file names and the shape of the files that have an error
    :doc-author: Trelent
    """
    for path in error_map.keys():
        map={}
        files=exclude_seg_files(os.listdir(path))
        for i in range(len(files)):
            map[files[i]]=np.array(list(sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(path,files[i]))).shape))
        shapes=np.array(list(map.values()))
        new_shape=np.unique(shapes,axis=0)
        error_shape=None
        if len(files)>2:
            for i in range(len(new_shape)):
                count=0
                for j in range(len(shapes)):
                    if np.all(shapes[j]==new_shape[i]):
                        count+=1
                if count==1:
                    error_shape=new_shape[i]
        else:
            continue
        if error_shape is not None:
            for key in map:
                if np.all(error_shape==map[key]):
                    print(path,key,error_shape)
                    # if 'SAX_LGE_data' in path:
                    #     target = path.replace('SAX_LGE_data', '4CH_LGE_data')
                        # try:
                        #     os.makedirs(target)
                        # except:
                        #     pass
                        # try:
                        #     shutil.move(os.path.join(path,key),target)
                        # except:
                        #     print(f'{target} already exists')
                    # os.remove(os.path.join(path,key))
error_process(error_map=error)


/Users/airskcer/Downloads/LVNC_nifti-20221224/SAX_data/11143330_XU_DONG_XI slice_8.nii.gz [ 25 162 192]
/Users/airskcer/Downloads/LVNC_nifti-20221224/SAX_data/11470714_LIU_LI slice_8.nii.gz [  5 192 192]
/Users/airskcer/Downloads/LVNC_nifti-20221224/SAX_data/11555140_LIU_FANG_FANG slice_8.nii.gz [ 25 162 192]
/Users/airskcer/Downloads/LVNC_nifti-20221224/SAX_data/11850433_ZHANG_XIN slice_1.nii.gz [ 25 352 352]
/Users/airskcer/Downloads/LVNC_nifti-20221224/SAX_data/11313413_QIAN_XIAO_LIN slice_12.nii.gz [ 25 168 192]
/Users/airskcer/Downloads/LVNC_nifti-20221224/4CH_data/12108320_CHEN_HAO 12108320_CHEN_HAO_4.nii.gz [ 35 164 208]
/Users/airskcer/Downloads/LVNC_nifti-20221224/4CH_data/11133538_QIAN_XIAO_LIN 11133538_QIAN_XIAO_LIN_8.nii.gz [ 25 162 192]
/Users/airskcer/Downloads/LVNC_nifti-20221224/SAX_LGE_data/12108320_CHEN_HAO 12108320_CHEN_HAO_2.nii.gz [  5 256 256]
/Users/airskcer/Downloads/LVNC_nifti-20221224/4CH_LGE_data/12108320_CHEN_HAO 12108320_CHEN_HAO_3.nii.gz [  1 256 256]
/Use

## Resample Every Patient's Data

In [3]:
import os
import pydicom
import shutil
import numpy as np
def resample_dataset(dataroot='./DICOMs',output='./New_dicom/'):
    """Resample the dataset to a new directory .

    Args:
        dataroot (str, optional):The root path of all patients' dicom files. Defaults to './DICOMs'.
        output (str, optional):The output path of resampled dicom dataset. Defaults to './New_dicom/'.
    """
    error=[]
    count=0
    for paths,dirs,files in os.walk(dataroot):
        # if len(dirs)==0:
        if True:
            for i in range(len(files)):
                try:
                    info=pydicom.dcmread(os.path.join(paths,files[i]))
                    id=info['0010','0020'].value
                    name=str(info['0010','0010'].value).replace(' ','_')
                    mod=str(info['0008','103e'].value).replace(':','_')
                    date=str(info['0008','0020'].value)
                    ui=str(info['0008','0018'].value)
                except:
                    print(f'Basic info missed: {os.path.join(paths,files[i])}')
                    continue
                if name!='':
                    savepath=os.path.join(output,f'{id}_{name}',date,mod).replace('*','_').replace('?','_').replace('>','_').replace('<','_')
                    if not os.path.exists(savepath):
                        os.makedirs(savepath)
                    count+=1
                    try:
                        shutil.copyfile(os.path.join(paths,files[i]),os.path.join(savepath,f'{ui}.dcm'))
                    except:
                        print(f"Error occured when copying {os.path.join(paths,files[i])} to {os.path.join(savepath,f'{ui}.dcm')}")
                    # os.remove(os.path.join(paths,files[i]))
                else:
                    error.append(os.path.join(paths,files[i]))
        # for i in range(len(error)):
        #     print(error[i])
    return error
resample_dataset(dataroot='/Users/airskcer/Downloads/HCM_20230112/',output='/Users/airskcer/Downloads/HCM_Resampled_dicom-20230112')
# resample_dataset(dataroot='/Users/airskcer/Downloads/RCM_20230107/',output='/Users/airskcer/Downloads/RCM_Resampled_dicom-20230107')
# resample_dataset(dataroot='E:/BaiduNetdiskDownload/HHD/',output='F:/resampled_dicom/HHD_Resampled_dicom-20221217')

Basic info missed: /Users/airskcer/Downloads/HCM_20230112/.DS_Store
Basic info missed: /Users/airskcer/Downloads/HCM_20230112/.DS_Store.dcm
Basic info missed: /Users/airskcer/Downloads/HCM_20230112/20230108-HCM(425例图+425例信息) 王家鑫/.DS_Store.dcm
Basic info missed: /Users/airskcer/Downloads/HCM_20230112/20230108-HCM(425例图+425例信息) 王家鑫/2018.4th n=146/FU_ZHI_QIANG_046Y/Win10 (C) - ¿ì½Ý·½Ê½.lnk.dcm
Basic info missed: /Users/airskcer/Downloads/HCM_20230112/HCM yk2/.DS_Store.dcm
Basic info missed: /Users/airskcer/Downloads/HCM_20230112/HCM yk1/.DS_Store.dcm


[]

## Save dicom data as nifti data, must be done AFTER dataset resample
For SAX data, bigger slice number means lower position(at the most time)

In [None]:
def save_nifti(dataroot='./New_dicom/',output='./',fps=25,thres=4):
    """Convert dicom data files into nifti format, and resample its resolution
    Args:
        dataroot (str, optional): The root path of resampled data. Defaults to './New_dicom/'.
        output (str, optional): The output path of generated nfiti data. Defaults to './'.
        fps (int, optional): The fps of a single slice(SAX/LAX4CH). Defaults to 25.
    """
    for roots,dirs,files in os.walk(dataroot):
        if len(dirs)==0:
            slice_map={}
            file_info={}
            for i in range(len(files)):
                info=pydicom.dcmread(os.path.join(roots,files[i]))
                file_info[files[i]]=info
                sn=info.SeriesNumber
                if sn not in list(slice_map.keys()):
                    slice_map[sn]=[files[i]]
                else:
                    slice_map[sn].append(files[i])
            slices=list(slice_map.keys())
            for i in range(len(slices)):
                dcms=slice_map[slices[i]]
                pats_name=str(file_info[dcms[0]]['0010','0010'].value).replace(' ','_')
                pats_id=file_info[dcms[0]]['0010','0020'].value
                filetag=str(file_info[dcms[0]]['0008','103e'].value)
                print(f'filetag:{filetag}')
                if ('BH' in filetag or '4CH' in filetag or '4ch' in filetag or '4 CH' in filetag or 'TFE' in filetag or 'SecondaryCapture' in filetag or '1'==filetag) and ('SA' not in filetag and 'sa' not in filetag and 'shot' not in filetag and '8slices' not in filetag and 'LVOT' not in filetag):
                    if len(dcms)%fps==0:
                        temp_folder=os.path.join(output,'4ch_temp')
                        savepath=os.path.join(output,'4CH_data',f'{pats_id}_{pats_name}')
                        filename=f'{pats_id}_{pats_name}_{slices[i]}.nii.gz'
                    else:
                        temp_folder=os.path.join(output,'4ch_lge_temp')
                        savepath=os.path.join(output,'4CH_LGE_data',f'{pats_id}_{pats_name}')
                        filename=f'{pats_id}_{pats_name}_{slices[i]}.nii.gz'
                    for j in range(len(dcms)):
                        if not os.path.exists(temp_folder):
                            os.makedirs(temp_folder)
                        shutil.copyfile(os.path.join(roots,dcms[j]),os.path.join(temp_folder,dcms[j]))
                    if not os.path.exists(savepath):
                        os.makedirs(savepath)
                    # else:
                    #     os.remove(os.path.join(savepath,os.listdir(savepath)[0]))
                    try:
                        dcm2nii(temp_folder,os.path.join(savepath,filename))
                    except:
                        pass
                    # resample_volume(Origin=os.path.join(savepath,filename),output=os.path.join(savepath,filename))
                    print(f'{filename} saved into {savepath}')
                    shutil.rmtree(temp_folder)
                elif 'SA' in filetag or 'sa' in filetag or '2'==filetag or 'shot' in filetag or 'PSIR_TFE 8slices'==filetag:
                    """
                    Bigger slice ID, lower z-axis position
                    """
                    if len(dcms)%fps==0:
                        temp_folder=os.path.join(output,'sax_temp')
                        savepath=os.path.join(output,'SAX_data',f'{pats_id}_{pats_name}')
                        filename=f'slice_{slices[i]}.nii.gz'
                    else:
                        temp_folder=os.path.join(output,'sax_lge_temp')
                        savepath=os.path.join(output,'SAX_LGE_data',f'{pats_id}_{pats_name}')
                        filename=f'{pats_id}_{pats_name}_{slices[i]}.nii.gz'
                    for j in range(len(dcms)):
                        if not os.path.exists(temp_folder):
                            os.makedirs(temp_folder)
                        shutil.copyfile(os.path.join(roots,dcms[j]),os.path.join(temp_folder,dcms[j]))
                    if not os.path.exists(savepath):
                        os.makedirs(savepath)
                    try:
                        dcm2nii(temp_folder,os.path.join(savepath,filename))
                    except:
                        pass
                    # resample_volume(Origin=os.path.join(savepath,filename),output=os.path.join(savepath,filename))
                    print(f'{filename} saved into {savepath}')
                    shutil.rmtree(temp_folder)
    temps=['4ch_temp','4ch_lge_temp','sax_temp','sax_lge_temp']
    for i in range(len(temps)):
        try:
            shutil.rmtree(os.path.join(output,temps[i]))
        except: 
            pass

# save_nifti(dataroot='./test2/HCM_new/12162262_WANG_JIAN_LIANG/',output='./test2/HCM_nfiti')
# save_nifti(dataroot='F:/resampled_dicom/DCM_Resampled_dcm-20221129/12093484_MA_QUAN_BAO/20220105/PS_PSMDE SPGR SA/',output='E:/nifti_original_space/DCM_nifti-20221129/SAX_LGE_data/',fps=25,thres=4)
# save_nifti(dataroot='E:/BaiduNetdiskDownload/NP_Resampled_dicom-20221212/12083851_WANG_WEN_HOU/20211210/Cine FIESTA SA/',output='E:/BaiduNetdiskDownload/NP_nifti-20221212/',fps=25,thres=4)
# fix_seriesnum(root=r'E:\BaiduNetdiskDownload\CAD_Resampled_dicom\12027538_LI_BAO_MING\20210309\Cine FIESTA SA',threshold=30)
# save_nifti2(dataroot='E:/BaiduNetdiskDownload/ARVC_2_Resampled_dicom/11589224_ZHANG_MING_HAO/20160126/B-TFE_BH/',output='E:/VST_fusion/ARVC_2_nifti/',fps=30,thres=50)
# save_nifti2(dataroot='E:/BaiduNetdiskDownload/RCM_Resampled_dicom-20221216/11894742_SUN_JIAN_LIN/20210705/4CH_LGE/',output='E:/BaiduNetdiskDownload/RCM_nifti-20221216',fps=25,thres=4)
# save_nifti2(dataroot='E:/BaiduNetdiskDownload/HCM_Resampled_dicom/12066703_CHEN_BAI_GANG/20210810/PSIR_TFE 8slices_LGE/',output='E:/BaiduNetdiskDownload/HCM_nifti',fps=25,thres=4)
# save_nifti2(dataroot='E:/BaiduNetdiskDownload/HCM_Resampled_dcm/12145504_SUN_QING_HE/20220808/cine_trufi_retro_4ch/',output='E:/BaiduNetdiskDownload/HCM_nifti',fps=25)

## Save CINE

In [20]:
def save_cine(dataroot='./New_dicom/',output='./',fps=25,thres=60):
    """Convert dicom data files into nifti format, and resample its resolution
    Args:
        dataroot (str, optional): The root path of resampled data. Defaults to './New_dicom/'.
        output (str, optional): The output path of generated nfiti data. Defaults to './'.
        fps (int, optional): The fps of a single slice(SAX/LAX4CH). Defaults to 25.
    """
    error=[]
    for roots,dirs,files in os.walk(dataroot):
        if len(dirs)==0:
            try:
                slice_map={}
                file_info={}
                files=exclude_seg_files(files)
                for i in range(len(files)):
                    info=pydicom.dcmread(os.path.join(roots,files[i]))
                    file_info[files[i]]=info
                    sn=info.SeriesNumber
                    if sn not in list(slice_map.keys()):
                        slice_map[sn]=[files[i]]
                    else:
                        slice_map[sn].append(files[i])
                slices=list(slice_map.keys())
                for i in range(len(slices)):
                    dcms=slice_map[slices[i]]
                    pats_name=str(file_info[dcms[0]]['0010','0010'].value).replace(' ','_')
                    pats_id=file_info[dcms[0]]['0010','0020'].value
                    filetag=str(file_info[dcms[0]]['0008','103e'].value)
                    print(filetag)
                    # if ('BH' in filetag or '4CH' in filetag or '4 CH' in filetag or 'TFE' in filetag or 'SecondaryCapture' in filetag or '1'==filetag) and ('SA' not in filetag and 'sa' not in filetag and 'shot' not in filetag and '8slices' not in filetag and 'LVOT' not in filetag):
                    if len(files)<thres and 'LVOT' not in filetag:
                        print(f'fps: {len(dcms)}')
                        # if len(dcms)%fps==0:
                        if len(dcms)>=10:
                            temp_folder=os.path.join(output,'4ch_temp')
                            savepath=os.path.join(output,'4CH_data',f'{pats_id}_{pats_name}')
                            filename=f'{pats_id}_{pats_name}_{slices[i]}.nii.gz'
                            # continue
                        else:
                            # temp_folder=os.path.join(output,'4ch_lge_temp')
                            # savepath=os.path.join(output,'4CH_LGE_data',f'{pats_id}_{pats_name}')
                            # filename=f'{pats_id}_{pats_name}_{slices[i]}.nii.gz'
                            continue
                        for j in range(len(dcms)):
                            if not os.path.exists(temp_folder):
                                os.makedirs(temp_folder)
                            shutil.copyfile(os.path.join(roots,dcms[j]),os.path.join(temp_folder,dcms[j]))
                        if not os.path.exists(savepath):
                            os.makedirs(savepath)
                        try:
                        # if True:
                            dcm2nii(temp_folder,os.path.join(savepath,filename))
                            # resample_volume(Origin=os.path.join(savepath,filename),output=os.path.join(savepath,filename))
                            print(f'{filename} saved into {savepath}')
                        except:
                            pass
                        shutil.rmtree(temp_folder)
                    # elif 'SA' in filetag or 'sa' in filetag or '2'==filetag or 'shot' in filetag or 'PSIR_TFE 8slices'==filetag:
                    elif len(files)>=thres:
                        print(f'fps: {len(dcms)}')
                        """
                        Bigger slice ID, lower z-axis position
                        """
                        # if len(dcms)%fps==0:
                        # if len(dcms)>=25:
                        if True:
                            temp_folder=os.path.join(output,'sax_temp')
                            savepath=os.path.join(output,'SAX_data',f'{pats_id}_{pats_name}')
                            filename=f'slice_{slices[i]}.nii.gz'
                            # continue
                        else:
                            # temp_folder=os.path.join(output,'sax_lge_temp')
                            # savepath=os.path.join(output,'SAX_LGE_data',f'{pats_id}_{pats_name}')
                            # filename=f'{pats_id}_{pats_name}_{slices[i]}.nii.gz'
                            continue
                        for j in range(len(dcms)):
                            if not os.path.exists(temp_folder):
                                os.makedirs(temp_folder)
                            shutil.copyfile(os.path.join(roots,dcms[j]),os.path.join(temp_folder,dcms[j]))
                        if not os.path.exists(savepath):
                            os.makedirs(savepath)
                        try:
                            dcm2nii(temp_folder,os.path.join(savepath,filename))
                            # resample_volume(Origin=os.path.join(savepath,filename),output=os.path.join(savepath,filename))
                            print(f'{filename} saved into {savepath}')
                        except:
                            pass 
                        shutil.rmtree(temp_folder)
            except:
                print(f'unknow error occured with {roots}')
                error.append(roots)
    temps=['4ch_temp','4ch_lge_temp','sax_temp','sax_lge_temp']
    for i in range(len(temps)):
        try:
            shutil.rmtree(os.path.join(output,temps[i]))
        except:
            pass
    return error
cine_error=save_cine(dataroot='/Users/airskcer/Downloads/HCM_Resampled_dicom-20230112/',output='/Users/airskcer/Downloads/HCM_nifti-20230112/',fps=25,thres=80)
# save_cine(dataroot='/Users/airskcer/Downloads/RCM_Resampled_dicom-20230107/11124028_SHI_KE_GANG/',output='/Users/airskcer/Downloads/RCM_nifti-20230107/',fps=25,thres=60)

In [None]:
# path='/Users/airskcer/Downloads/check/'
# pats=os.listdir(path)
# pats=exclude_seg_files(pats)
# for i in range(len(pats)):
#     id=int(pats[i].split('.jpg')[0].split('_')[-1])
#     pat_name=pats[i].split('.jpg')[0].replace(f'_{id}','')
#     save_cine(dataroot=f'/Users/airskcer/Downloads/HCM_Resampled_dicom-20230102/{pat_name}',output='/Users/airskcer/Downloads/HCM_nifti-20230102/',thres=60)
# for roots,dirs,files in os.walk('/Users/airskcer/Downloads/HCM_nifti-20230102/4CH_data/'):
#     if len(dirs)==0:
#         files=exclude_seg_files(files)
#         for i in range(len(files)):
#             if files[i].replace('.nii.gz','.jpg') in pats:
#                 print(files[i])
#                 os.remove(os.path.join(roots,files[i]))

## Check data lost

In [21]:
nifti_path='/Users/airskcer/Downloads/HCM_nifti-20230112/4CH_data/'
dcm_path='/Users/airskcer/Downloads/HCM_Resampled_dicom-20230112/'
dcms=os.listdir(dcm_path)
niftis=os.listdir(nifti_path)
error=[]
for i in range(len(niftis)):
    if niftis[i].startswith('._'):
        os.remove(os.path.join(nifti_path,niftis[i]))
print(len(niftis),len(dcms))
for i in range(len(dcms)):
    if dcms[i] not in niftis:
        print(dcms[i])
        error.append(os.path.join(dcm_path,dcms[i]))
# print(error)11

672 679
11935261_PEI_YUN_PENG
11868487_LIU_FENG_LAN
12104331_ZHONG_HUI
12096792_HOU_XIAO_LI
12103375_MA_YI_FEN
11865661_WU_CUI_LAN
11266381_LUO_CHANG_SHENG


In [18]:
ds_store(root='/Users/airskcer/Downloads/')
del_empty(root='/Users/airskcer/Downloads/')

## Divide 4CH_LGE and SAX_LGE into different folders

In [8]:
def fix_4ch_lge(path=r'E:\BaiduNetdiskDownload\ARVC_1\11235202_HE_HUI',fps=25,thres=4):
    for roots,dirs,files in os.walk(path):
        if len(dirs)==0:
            size={}
            if len(files)%fps!=0 and len(files)<50:
                for i in range(len(files)):
                    sd=pydicom.dcmread(os.path.join(roots,files[i])).pixel_array.shape
                    if sd in size.keys():
                        size[sd].append(os.path.join(roots,files[i]))
                    else:
                        size[sd]=[os.path.join(roots,files[i])]
                # print(size)
                # if len(size.keys())>1:
                #     print(roots)
                if len(size.keys())==2:
                    savepath=roots.replace(roots.split('/')[-1],'4CH_LGE')
                    # print(savepath)
                    try:
                        os.makedirs(savepath)
                    except:
                        pass
                    f_key=None
                    num=len(files)
                    for key in size.keys():
                        if len(size[key])<num:
                            f_key=key
                            num=len(size[key])
                    for i in range(len(size[f_key])): 
                        shutil.move(size[key][i],savepath)
                        print(f'{size[key][i]} moved to {savepath}')
# for i in range(len(error)):
#     save_nifti2(dataroot=error[i],output='E:/BaiduNetdiskDownload/ARVC_1_nifti',fps=25,thres=4)
# fix_4ch_lge(path='E:/BaiduNetdiskDownload/CHD_Resampled_dicom-20221210/')
fix_4ch_lge(path='/Users/airskcer/Downloads/HCM_Resampled_dicom-20230112/')
# fix_4ch_lge(path='/Users/airskcer/Downloads/RCM_Resampled_dicom-20230107/')


AttributeError: Unable to convert the pixel data: one of Pixel Data, Float Pixel Data or Double Float Pixel Data must be present in the dataset

## Get spacing info

In [27]:
import pandas as pd
from tqdm import tqdm

def get_data(path='./test2/HCM_new/'):
    """
    The get_data function returns a list of dictionaries, where each dictionary contains the patient's name, 
    patient ID number, and vendor. The function takes one argument: path to the directory containing all patients' 
    data folders. It will return a list of dictionaries with each dictionary containing information about one patient.
    
    :param path='./test2/HCM_new/': Specify the path to the directory containing all of your patients' dicom files
    :return: A list of dictionaries
    """
    pats=os.listdir(path)
    all_data=[]
    for i in range(len(pats)):
        pat_path=os.path.join(path,pats[i])
        pat_data={}
        for roots,dirs,files in os.walk(pat_path):
            if len(dirs)==0:
                for i in range(len(files)):
                    info=pydicom.dcmread(os.path.join(roots,files[i]))
                    pat_data['Name']=str(info['0010','0010'].value)
                    pat_data['ID']=info['0010','0020'].value
                    if len(files)%25==0:
                        pat_data['Vendor_cine']=str(info['0008','0070'].value)
                        if len(files)>100:
                            try:
                                pat_data['SAX_cine_z_spacing']=float(info['0018','0088'].value)
                            except:
                                pass
                            try:
                                pat_data['SAX_cine_xy_spacing']=list(info['0028','0030'].value)
                            except:
                                pass
                        else:
                            try:
                                pat_data['4CH_cine_z_spacing']=float(info['0018','0088'].value)
                            except:
                                pass
                            try:
                                pat_data['4CH_cine_xy_spacing']=list(info['0028','0030'].value)
                            except:
                                pass
                    # elif len(files):
                    else:
                        pat_data['Vendor_LGE']=info['0008','0070'].value
                        if len(files)>5:
                            try:
                                pat_data['SAX_LGE_z_spacing']=float(info['0018','0088'].value)
                            except:
                                pass
                            try:
                                pat_data['SAX_LGE_xy_spacing']=list(info['0028','0030'].value)
                            except:
                                pass
                        else:
                            try:
                                pat_data['4CH_LGE_z_spacing']=float(info['0018','0088'].value)
                            except:
                                pass
                            try:
                                pat_data['4CH_LGE_xy_spacing']=list(info['0028','0030'].value)
                            except:
                                pass
        print(pat_data)
        all_data.append(pat_data)
    return all_data
def get_info_from_nifti(nifti_path='E:/BaiduNetdiskDownload/DCM_nifti/',check_mode=True):
    niftis=['4CH_data','4CH_LGE_data','SAX_data','SAX_LGE_data']
    avail_pats=[]
    for i in range(len(niftis)):
        try:
            pats=os.listdir(os.path.join(nifti_path,niftis[i]))
            if len(pats)>len(avail_pats):
                avail_pats=pats
        except:
            pass
    print(f'{len(avail_pats)} patients available')
    all_info=[]
    
    bar=tqdm(range(len(avail_pats)),mininterval=1)
    for i in bar:
        pat_info=dict(name=avail_pats[i])
        for j in range(len(niftis)):
            sizes = []
            try:
                pat_folder=os.path.join(nifti_path,niftis[j],avail_pats[i])
                pat_files=exclude_seg_files(os.listdir(pat_folder))
                if check_mode:
                    for k in range(len(pat_files)):
                        data=sitk.ReadImage(os.path.join(pat_folder,pat_files[k]))
                        sizes.append(list(data.GetSpacing()))
                else:
                    data = sitk.ReadImage(os.path.join(pat_folder, pat_files[0]))
                spacing=data.GetSpacing()
                pat_info[f'{niftis[j]}_xyz']=spacing
            except:
                pass
            if check_mode:
                sizes=np.array(sizes)
                if np.all(np.std(sizes,axis=0))!=0:
                    print(pat_folder,sizes)
        all_info.append(pat_info)
        # break
    return all_info
# all_info=[]
# mods=os.listdir('E:/nifti_original_space/')
# for i in range(len(mods)):
#     folder=os.path.join('E:/nifti_original_space/',mods[i])
#     if os.path.isdir(folder):
#         print(folder)
#         niftis=['4CH_data','4CH_LGE_data','SAX_data','SAX_LGE_data']
#         avail_pats=[]
#         for j in range(len(niftis)):
#             try:
#                 pats=os.listdir(os.path.join(folder,niftis[j]))
#                 if len(pats)>len(avail_pats):
#                     avail_pats=pats
#             except:
#                 pass
        # all_info+=get_info_from_nifti(nifti_path=folder)
        # all_info+=find_error(nifti_path=folder,niftis=avail_pats)
# all_info=get_info_from_nifti(nifti_path='/Volumes/SSD/HHD_nifti-20221217/')
all_info=get_info_from_nifti(nifti_path='/Users/airskcer/Downloads/NP_nifti-20230109/',check_mode=False)
# all_info=find_error(nifti_path='E:/BaiduNetdiskDownload/RCM_nifti-20221216/',niftis=os.listdir('E:/BaiduNetdiskDownload/RCM_Resampled_dicom-20221216'))
# hcm_data1=get_data(path='E:/BaiduNetdiskDownload/HCM_Resampled_dcm/')
# cad_data=get_data(path='./test2/CAD_new/')
# all_data=pd.DataFrame(hcm_data+cad_data)
# all_data.to_excel('spacing_info.xlsx',index=False)
# data=pd.DataFrame(get_data(path='./test4/DCM_new/'))
# data.to_excel('./test4/spacing_info.xlsx',index=False)

151 patients available


100%|██████████| 151/151 [00:04<00:00, 33.13it/s]


In [28]:
pd.DataFrame(all_info).to_csv('./fuwai_dataset/NP_20230109/spacing_info.csv',index=False)

## Print all meteadata of a single dicom file or all dicom files in a folder

In [None]:
def dicom_metadata(path):
    if os.path.isdir(path):
        for roots,dirs,files in os.walk(path):
            if len(dirs)==0:
                for i in range(len(files)):
                    if files[i].endswith('.dcm'):
                        info=pydicom.dcmread(os.path.join(roots,files[i]))
                        print(info)
    elif path.endswith('.dcm'):
        info=pydicom.dcmread(path)
        print(info)
    return info
# dicom_metadata('./New_dicom/11892346_ZHANG_YING/20190528/Cine FIESTA SA/')

In [None]:
dicom_metadata('F:/resampled_dicom/ARVC_1_Resampled_dicom/11097334_FENG_ZHEN/20110117/4CH_LGE/')