In [1]:
import os
import glob
from pathlib import Path
import json
import pandas as pd
import numpy as np
import nibabel as nib
from PIL import Image
import matplotlib.pyplot as plt

In [19]:
wd = Path.cwd().parent
data_dir = '/project/dane2/wficai/pda'
os.listdir(data_dir)

In [20]:
os.listdir(data_dir)

FileNotFoundError: [Errno 2] No such file or directory: '/zfs/wficai/xray/xray/'

In [7]:
metadata_glob = data_dir + '/Images/*/*.json'
nifti_glob = data_dir + '/Images/*/*.nii'

In [8]:
csv_metadata_path = (wd / 'data/metadata.csv').resolve().as_posix()
csv_metadata_path

'/home/xiaofey/xray/xray-master/data/metadata.csv'

In [9]:
csv_nifti_path = (wd / 'data/nifti.csv').resolve().as_posix()

In [10]:
png_export_path = (wd / 'data/exported_pngs').resolve().as_posix()
jpg_export_path = (wd / 'data/exported_jpgs').resolve().as_posix()
png_export_path, jpg_export_path

('/home/xiaofey/xray/xray-master/data/exported_pngs',
 '/home/xiaofey/xray/xray-master/data/exported_jpgs')

## Utilities

In [11]:
def clean_pid(pid):
    an_num = pid.split(' ')[-1].replace('(', '').replace(')', '')
    pid_clean = 'AN' + f"{an_num.zfill(5)}"
    
    return pid_clean
    
def next_power_of_2(x):  
    return 1 if x == 0 else 2**(x - 1).bit_length()

## Load metadata

In [12]:
meta_data_files = glob.glob(metadata_glob, recursive=True)

In [13]:
meta_data_files

[]

In [14]:

len(meta_data_files)

0

In [11]:
def read_json(filepath):
    with open(filepath, 'r') as fp:
        record = json.load(fp)
    
    record['FULL_PATH'] = filepath.replace('\\', '/')
        
    # add metadata embedded in filepath
    path_split = filepath.split('/')
    record['PATIENT_ID'] = clean_pid(path_split[-2])
    record['FILENAME_VIEW'] = path_split[-1].split('.json')[0]
    
    return record

read_json(meta_data_files[2])

{'Modality': 'CR',
 'Manufacturer': 'Philips',
 'ManufacturersModelName': 'DigitalDiagnost',
 'InstitutionName': 'Greenville Memorial Hospital',
 'InstitutionalDepartmentName': 'E.R. X-Ray',
 'InstitutionAddress': '801 Grove Road GREENVILLE, SC 29605 US',
 'DeviceSerialNumber': '963334482925',
 'StationName': 'GMHER1',
 'BodyPartExamined': 'SHOULDER',
 'ProcedureStepDescription': 'XR SHOULDER 2+ VW LEFT',
 'SoftwareVersions': '3.1.2\\PMS81.101.1.1 GXR GXRIM9.1',
 'SeriesDescription': 'AP External Rot',
 'ProtocolName': 'Shoulder L',
 'ImageType': ['ORIGINAL', 'PRIMARY'],
 'SeriesNumber': 2,
 'AcquisitionTime': '17:55:20.000000',
 'XRayExposure': 25,
 'ImageOrientationPatientDICOM': [0, 0, 0, 0, 0, 0],
 'ConversionSoftware': 'dcm2niix',
 'ConversionSoftwareVersion': 'v1.0.20201102',
 'FULL_PATH': '/zfs/wficai/xray/xray//Images/AN (121)/Shoulder_L_2.json',
 'PATIENT_ID': 'AN00121',
 'FILENAME_VIEW': 'Shoulder_L_2'}

In [12]:
meta_data = pd.DataFrame([read_json(fp) for fp in meta_data_files])

In [13]:
meta_data

Unnamed: 0,Modality,Manufacturer,ManufacturersModelName,InstitutionName,InstitutionalDepartmentName,InstitutionAddress,DeviceSerialNumber,StationName,BodyPartExamined,ProcedureStepDescription,...,XRayExposure,ImageOrientationPatientDICOM,ConversionSoftware,ConversionSoftwareVersion,FULL_PATH,PATIENT_ID,FILENAME_VIEW,RawImage,AcquisitionNumber,ImageComments
0,CR,Philips,DigitalDiagnost,Greenville Memorial Hospital,E.R. X-Ray,"801 Grove Road GREENVILLE, SC 29605 US",963334482925,GMHER1,SHOULDER,XR SHOULDER 2+ VW LEFT,...,26.0,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,AN00121,Shoulder_L_1,,,
1,CR,Philips,DigitalDiagnost,Greenville Memorial Hospital,E.R. X-Ray,"801 Grove Road GREENVILLE, SC 29605 US",963334482925,GMHER1,SHOULDER,XR SHOULDER 2+ VW LEFT,...,10.0,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,AN00121,Shoulder_L_3,,,
2,CR,Philips,DigitalDiagnost,Greenville Memorial Hospital,E.R. X-Ray,"801 Grove Road GREENVILLE, SC 29605 US",963334482925,GMHER1,SHOULDER,XR SHOULDER 2+ VW LEFT,...,25.0,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,AN00121,Shoulder_L_2,,,
3,,Canon,CXDI Control Software NE,Oconee Memorial Hospital,,,1B000458,555-NE-5230028,HUMERUS,,...,8.0,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (308)/Humerus...,AN00308,Humerus_LAT_2,False,,
4,,Canon,CXDI Control Software NE,Oconee Memorial Hospital,,,1B000458,555-NE-5230028,HUMERUS,,...,8.0,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (308)/Humerus...,AN00308,Humerus_AP_1,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1143,CR,Philips,digital DIAGNOST,Greer Memorial Hospital,Radiology,,08.02.432,Digital Diagnost,SHOULDER,ABMH SHOULDER AP/LAT/AXILARYL,...,40.0,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (135)/Shoulde...,AN00135,Shoulder_Y_view_3,False,25858.0,Shoulder Y view//////
1144,CR,Canon,DRX-1,"GREENVILLE HEALTH SYSTEM, GREER CAMPUS",UNKNOWN,"830 S BUNCOMBE RD GREER, SC",000775,GRMHDRX3,SCAPULA,XR SHOULDER 2+ VW RIGHT,...,,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (176)/AXIAL_3...,AN00176,AXIAL_3,False,1.0,
1145,CR,Canon,DRX-1,"GREENVILLE HEALTH SYSTEM, GREER CAMPUS",UNKNOWN,"830 S BUNCOMBE RD GREER, SC",000775,GRMHDRX3,SCAPULA,XR SHOULDER 2+ VW RIGHT,...,,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (176)/AP_EXT_...,AN00176,AP_EXT_ROT_2,False,1.0,
1146,CR,Canon,DRX-1,"GREENVILLE HEALTH SYSTEM, GREER CAMPUS",UNKNOWN,"830 S BUNCOMBE RD GREER, SC",000775,GRMHDRX3,SCAPULA,XR SHOULDER 2+ VW RIGHT,...,,"[0, 0, 0, 0, 0, 0]",dcm2niix,v1.0.20201102,/zfs/wficai/xray/xray//Images/AN (176)/AP_INT_...,AN00176,AP_INT_ROT_1,False,1.0,


In [14]:
meta_data.to_csv(csv_metadata_path, index=None)

## Load images

In [15]:
nifti_files = [fp.replace('\\', '/') for fp in glob.glob(nifti_glob, recursive=True)]

In [16]:
len(nifti_files)

1386

In [1]:
def read_nifti(filepath):
    path_split = filepath.split('/')
    record = dict()
    record['PATIENT_ID'] = clean_pid(path_split[-2])
    record['FULL_PATH'] = filepath
    record['FILENAME_VIEW'] = path_split[-1].split('.nii')[0]
    
    #load the nifti image
    img_nib = nib.load(filepath)
    img = np.asarray(img_nib.dataobj)
    record['MIN'] = img.min()
    record['MAX'] = img.max()
    record['DATA_TYPE'] = img.dtype
    record['WIDTH'] = img.shape[-3]
    record['HEIGHT'] = img.shape[-2]
    record['NUM_CHANNELS'] = img.shape[-1]
    
    # determine if image represents a ROI
    record['ROI'] = 'ROI' in record['FILENAME_VIEW'].split('_')[-1]
    
    # Format image for saving
    img = img.astype(np.float32)
    img = (img - record['MIN']) / (record['MAX'] - record['MIN'])
    img = img.squeeze().T[-1::-1]
    
    # save png and jpg
    save_path_jpg =  jpg_export_path + f"/{record['PATIENT_ID']}_{record['FILENAME_VIEW']}.jpg"
    record['JPG_PATH'] = save_path_jpg
    plt.imsave(save_path_jpg, img, cmap='Greys_r')
    
    return record

In [18]:
nifti_files[0]

'/zfs/wficai/xray/xray//Images/AN (121)/Shoulder_L_1.nii'

In [19]:
nifti_meta_data = []
for ix, fp in enumerate(nifti_files):
    try:
        print(f'Processing image {ix+1} / {len(nifti_files)}', end='\r')
        nifti_meta_data.append(read_nifti(fp))
    except Exception as e:
        print(e)

nifti_meta_data = pd.DataFrame(nifti_meta_data)   

Third dimension must be 3 or 4
Third dimension must be 3 or 4
Processing image 1386 / 1386

In [20]:
nifti_meta_data

Unnamed: 0,PATIENT_ID,FULL_PATH,FILENAME_VIEW,MIN,MAX,DATA_TYPE,WIDTH,HEIGHT,NUM_CHANNELS,ROI,JPG_PATH
0,AN00121,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,Shoulder_L_1,0,32767,int16,2566,1779,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1,AN00121,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,Shoulder_L_3_ROI1,0,1,uint8,1057,1605,1,True,/home/xiaofey/xray/xray-master/data/exported_j...
2,AN00121,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,Shoulder_L_2,0,32767,int16,2153,1844,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
3,AN00121,/zfs/wficai/xray/xray//Images/AN (121)/Shoulde...,Shoulder_L_3,0,29119,int16,1057,1605,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
4,AN00308,/zfs/wficai/xray/xray//Images/AN (308)/Humerus...,Humerus_AP_1,0,4095,int16,3240,2774,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
...,...,...,...,...,...,...,...,...,...,...,...
1379,AN00135,/zfs/wficai/xray/xray//Images/AN (135)/Shoulde...,Shoulder_Y_view_3,0,1023,int16,2518,3001,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1380,AN00176,/zfs/wficai/xray/xray//Images/AN (176)/AP_5.nii,AP_5,371,4095,int16,1769,2079,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1381,AN00176,/zfs/wficai/xray/xray//Images/AN (176)/AXIAL_3...,AXIAL_3,749,4095,int16,1863,1546,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1382,AN00176,/zfs/wficai/xray/xray//Images/AN (176)/AP_INT_...,AP_INT_ROT_1,482,4095,int16,1870,1755,1,False,/home/xiaofey/xray/xray-master/data/exported_j...


## save

In [21]:
nifti_meta_data.to_csv(csv_nifti_path, index=None)

In [22]:
csv_nifti_path

'/home/xiaofey/xray/xray-master/data/nifti.csv'