In [1]:
import json
import pandas as pd
from PIL import Image
import numpy as np

In [2]:
roi_data_path = '../data/roi_export-2022-09-20T01 40 52.606Z.json' 
nifti_meta_path = '../data/nifti.csv'
cropped_image_path = '../data/cropped_images/'
cropped_meta_path = '../data/cropped_image_metadata.csv'

# Parse label json objects

In [3]:
with open(roi_data_path, 'r') as f:
    data = json.load(f)

In [4]:
roi_data_path


'../data/roi_export-2022-09-20T01 40 52.606Z.json'

In [5]:
data[0]

{'ID': 'cl6bg0tybail807zngn1b2qxb',
 'DataRow ID': 'cl66q51xv10yt075bct7wfefq',
 'Labeled Data': 'https://fast-videos.s3.us-east-2.amazonaws.com/phf/cobra_prelim_set/AN00001_Y_VIEW_4.jpg',
 'Label': {'objects': [{'featureId': 'cl6bg0zib00013s6gt29fdw2g',
    'schemaId': 'cl62esinv16is07152vpv1y33',
    'color': '#1CE6FF',
    'title': 'ROI',
    'value': 'roi',
    'bbox': {'top': 401, 'left': 505, 'height': 996, 'width': 863},
    'instanceURI': 'https://api.labelbox.com/masks/feature/cl6bg0zib00013s6gt29fdw2g?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjanIyYzk4MXhrN29kMDc5NzM0M2ZtaXN5Iiwib3JnYW5pemF0aW9uSWQiOiJjanIyYzk3cmRxN3RyMDczNzI1bDYxY29lIiwiaWF0IjoxNjYzNjM4MDUyLCJleHAiOjE2NjYyMzAwNTJ9.p3SsCyEzh51ZAWvMUjkP54mw4gKpn39NuAmYcw468Xs'}],
  'classifications': [{'featureId': 'cl6nvkvow00032869zcod0eeb',
    'schemaId': 'cl6nvcrzg0g0p08z71ayt4rm7',
    'scope': 'global',
    'title': 'View',
    'value': 'view',
    'answer': {'featureId': 'cl6nvkvow000228699hr5qvel',
   

In [18]:
data[0]['Label']

{'objects': [{'featureId': 'cl6bg0zib00013s6gt29fdw2g',
   'schemaId': 'cl62esinv16is07152vpv1y33',
   'color': '#1CE6FF',
   'title': 'ROI',
   'value': 'roi',
   'bbox': {'top': 401, 'left': 505, 'height': 996, 'width': 863},
   'instanceURI': 'https://api.labelbox.com/masks/feature/cl6bg0zib00013s6gt29fdw2g?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjanIyYzk4MXhrN29kMDc5NzM0M2ZtaXN5Iiwib3JnYW5pemF0aW9uSWQiOiJjanIyYzk3cmRxN3RyMDczNzI1bDYxY29lIiwiaWF0IjoxNjYzNjM4MDUyLCJleHAiOjE2NjYyMzAwNTJ9.p3SsCyEzh51ZAWvMUjkP54mw4gKpn39NuAmYcw468Xs'}],
 'classifications': [{'featureId': 'cl6nvkvow00032869zcod0eeb',
   'schemaId': 'cl6nvcrzg0g0p08z71ayt4rm7',
   'scope': 'global',
   'title': 'View',
   'value': 'view',
   'answer': {'featureId': 'cl6nvkvow000228699hr5qvel',
    'schemaId': 'cl6nvcrzg0g0s08z77um3fvw4',
    'title': 'Y',
    'value': 'y'}}],
 'relationships': []}

In [19]:
def parse_roi(lab):
    
    result = dict()
    
    # get external id
    result['external_id'] = lab['External ID']
    
    # get bounding box
    objects = lab['Label']['objects']
    if len(objects)>1:
        print("Warning: more than one bounding box. Using the first box.")    
        bbox = objects[0]['bbox']
    elif len(objects)==0:
        bbox=None
    else:
        bbox = objects[0]['bbox']
    
    if bbox is not None:
        for k,v in bbox.items():
            result[f'bbox_{k}'] = v
    
    # get classifications:
    for c in lab['Label']['classifications']:
        if c['value'] == 'special_notes':
            for a in c['answers']:
                result[a['value']] = True
            
        elif c['value'] == 'optional_notes':
            result['note'] = c['answer']
        
    return result

In [20]:
df = pd.DataFrame([parse_roi(d) for d in data])
df

Unnamed: 0,external_id,bbox_top,bbox_left,bbox_height,bbox_width,note,poor_quality,not_a_phf_view,brightness_flipped
0,AN00001_Y_VIEW_4,401.0,505.0,996.0,863.0,,,,
1,AN00010__1001,277.0,688.0,631.0,393.0,,,,
2,AN00010__1002,1098.0,642.0,844.0,825.0,"bad quality image, drop",True,,
3,AN00010__1004,809.0,353.0,347.0,409.0,"not the shoulder, drop",,True,
4,AN00010__1005,693.0,1087.0,349.0,389.0,"Not shoulder, skip",,True,
...,...,...,...,...,...,...,...,...,...
1141,AN00099_Shoulder_R_2,588.0,813.0,532.0,454.0,,,,
1142,AN00099_Shoulder_R_3,69.0,1491.0,521.0,455.0,,,,
1143,AN00001_AXILLARY_3,1363.0,1834.0,726.0,698.0,,,,True
1144,AN00001_EXT_ROTATION_2,726.0,677.0,796.0,721.0,,,,True


In [21]:
# impute False flags
df.brightness_flipped = df.brightness_flipped.fillna(False)
df.poor_quality = df.poor_quality.fillna(False)
df.not_a_phf_view = df.not_a_phf_view.fillna(False)

In [22]:
# drop rows with missing bbox
df = df.dropna(subset=['bbox_top', 'bbox_left', 'bbox_height', 'bbox_width'])

In [23]:
df[~df.note.isna()]

Unnamed: 0,external_id,bbox_top,bbox_left,bbox_height,bbox_width,note,poor_quality,not_a_phf_view,brightness_flipped
2,AN00010__1002,1098.0,642.0,844.0,825.0,"bad quality image, drop",True,False,False
3,AN00010__1004,809.0,353.0,347.0,409.0,"not the shoulder, drop",False,True,False
4,AN00010__1005,693.0,1087.0,349.0,389.0,"Not shoulder, skip",False,True,False
5,AN00100_AXILLARY_4,462.0,1043.0,614.0,389.0,image rotation makes box angle odd,False,False,False
12,AN00102_Shoulder_R_2,418.0,1000.0,596.0,542.0,low quality image,True,False,False
31,AN00108_Shoulder_L_3,396.0,473.0,521.0,514.0,hard to make out,True,False,False
54,AN00116_Shoulder_R_1,665.0,783.0,536.0,490.0,strange shapes appear,False,False,False
55,AN00116_Shoulder_R_2,797.0,743.0,470.0,465.0,strange shapes appear,False,False,False
59,AN00117_AP_SHOULDER_5,375.0,708.0,1010.0,829.0,poor contrast,True,False,True
60,AN00117_AP_SHOULDER_6,595.0,568.0,504.0,520.0,low contrast,True,False,True


In [24]:
df.brightness_flipped.value_counts()

False    755
True     361
Name: brightness_flipped, dtype: int64

In [25]:
df.poor_quality.value_counts()

False    1044
True       72
Name: poor_quality, dtype: int64

In [26]:
df.not_a_phf_view.value_counts()

False    1112
True        4
Name: not_a_phf_view, dtype: int64

# join on image data

In [27]:
df_nifti = pd.read_csv(nifti_meta_path)
df_nifti['external_id'] = df_nifti['PATIENT_ID'] + '_' + df_nifti['FILENAME_VIEW']

In [28]:
df_wide = df.merge(df_nifti, how='inner', on='external_id')
assert len(df_wide) == len(df), "Data loss when merging nifti metadata with ROI data. ROI data should be a subset of nifti metadata."

In [29]:
df_wide.head()

Unnamed: 0,external_id,bbox_top,bbox_left,bbox_height,bbox_width,note,poor_quality,not_a_phf_view,brightness_flipped,PATIENT_ID,FULL_PATH,FILENAME_VIEW,MIN,MAX,DATA_TYPE,WIDTH,HEIGHT,NUM_CHANNELS,ROI,JPG_PATH
0,AN00001_Y_VIEW_4,401.0,505.0,996.0,863.0,,False,False,False,AN00001,/zfs/wficai/xray/xray//Images/AN (1)/Y_VIEW_4.nii,Y_VIEW_4,1139,26177,int16,2328,2928,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1,AN00010__1001,277.0,688.0,631.0,393.0,,False,False,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1001.nii,_1001,5398,16383,int16,2142,1855,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
2,AN00010__1002,1098.0,642.0,844.0,825.0,"bad quality image, drop",True,False,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1002.nii,_1002,6035,11921,int16,2119,2300,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
3,AN00010__1004,809.0,353.0,347.0,409.0,"not the shoulder, drop",False,True,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1004.nii,_1004,4875,16383,int16,1571,2499,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
4,AN00010__1005,693.0,1087.0,349.0,389.0,"Not shoulder, skip",False,True,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1005.nii,_1005,0,16383,int16,2630,1477,1,False,/home/xiaofey/xray/xray-master/data/exported_j...


In [30]:
df_wide

Unnamed: 0,external_id,bbox_top,bbox_left,bbox_height,bbox_width,note,poor_quality,not_a_phf_view,brightness_flipped,PATIENT_ID,FULL_PATH,FILENAME_VIEW,MIN,MAX,DATA_TYPE,WIDTH,HEIGHT,NUM_CHANNELS,ROI,JPG_PATH
0,AN00001_Y_VIEW_4,401.0,505.0,996.0,863.0,,False,False,False,AN00001,/zfs/wficai/xray/xray//Images/AN (1)/Y_VIEW_4.nii,Y_VIEW_4,1139,26177,int16,2328,2928,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1,AN00010__1001,277.0,688.0,631.0,393.0,,False,False,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1001.nii,_1001,5398,16383,int16,2142,1855,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
2,AN00010__1002,1098.0,642.0,844.0,825.0,"bad quality image, drop",True,False,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1002.nii,_1002,6035,11921,int16,2119,2300,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
3,AN00010__1004,809.0,353.0,347.0,409.0,"not the shoulder, drop",False,True,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1004.nii,_1004,4875,16383,int16,1571,2499,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
4,AN00010__1005,693.0,1087.0,349.0,389.0,"Not shoulder, skip",False,True,False,AN00010,/zfs/wficai/xray/xray//Images/AN (10)/_1005.nii,_1005,0,16383,int16,2630,1477,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1111,AN00099_Shoulder_R_2,588.0,813.0,532.0,454.0,,False,False,False,AN00099,/zfs/wficai/xray/xray//Images/AN (99)/Shoulder...,Shoulder_R_2,0,30823,int16,2931,1614,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1112,AN00099_Shoulder_R_3,69.0,1491.0,521.0,455.0,,False,False,False,AN00099,/zfs/wficai/xray/xray//Images/AN (99)/Shoulder...,Shoulder_R_3,0,29055,int16,2163,1022,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1113,AN00001_AXILLARY_3,1363.0,1834.0,726.0,698.0,,False,False,True,AN00001,/zfs/wficai/xray/xray//Images/AN (1)/AXILLARY_...,AXILLARY_3,10244,26573,int16,2928,2328,1,False,/home/xiaofey/xray/xray-master/data/exported_j...
1114,AN00001_EXT_ROTATION_2,726.0,677.0,796.0,721.0,,False,False,True,AN00001,/zfs/wficai/xray/xray//Images/AN (1)/EXT_ROTAT...,EXT_ROTATION_2,1421,27093,int16,2928,2328,1,False,/home/xiaofey/xray/xray-master/data/exported_j...


In [38]:
from PIL import Image, ImageOps

def crop_image(row):
    # read in image
    img = Image.open(row['JPG_PATH'].replace('/Documents/Code/', '/PycharmProjects/'))
    
    # crop 
    l = row['bbox_left']
    t = row['bbox_top']
    r = row['bbox_left'] + row['bbox_width']
    b = row['bbox_top'] + row['bbox_height']
    img = img.crop((l, t, r, b))
    
    # reverse negatives
    if row['brightness_flipped']:
        img = ImageOps.invert(img)
    
    # save cropped image
    save_path = cropped_image_path + row['JPG_PATH'].split('/')[-1]
    img.save(save_path)
    
    return save_path

In [40]:
for i, row in df_wide.iterrows():
    print(f"Processing image {i+1}/{len(df)}.", end='\r')
    
    # only process if a valid phf view of good quality
    if not (row['poor_quality'] or row['not_a_phf_view']):
        df_wide.loc[i,'CROPPED_IMAGE_PATH'] = crop_image(row)

Processing image 1116/1116.

In [41]:
df_wide.head()

Unnamed: 0,external_id,bbox_top,bbox_left,bbox_height,bbox_width,note,poor_quality,not_a_phf_view,brightness_flipped,PATIENT_ID,...,FILENAME_VIEW,MIN,MAX,DATA_TYPE,WIDTH,HEIGHT,NUM_CHANNELS,ROI,JPG_PATH,CROPPED_IMAGE_PATH
0,AN00001_Y_VIEW_4,401.0,505.0,996.0,863.0,,False,False,False,AN00001,...,Y_VIEW_4,1139,26177,int16,2328,2928,1,False,/home/xiaofey/xray/xray-master/data/exported_j...,../data/cropped_images/AN00001_Y_VIEW_4.jpg
1,AN00010__1001,277.0,688.0,631.0,393.0,,False,False,False,AN00010,...,_1001,5398,16383,int16,2142,1855,1,False,/home/xiaofey/xray/xray-master/data/exported_j...,../data/cropped_images/AN00010__1001.jpg
2,AN00010__1002,1098.0,642.0,844.0,825.0,"bad quality image, drop",True,False,False,AN00010,...,_1002,6035,11921,int16,2119,2300,1,False,/home/xiaofey/xray/xray-master/data/exported_j...,
3,AN00010__1004,809.0,353.0,347.0,409.0,"not the shoulder, drop",False,True,False,AN00010,...,_1004,4875,16383,int16,1571,2499,1,False,/home/xiaofey/xray/xray-master/data/exported_j...,
4,AN00010__1005,693.0,1087.0,349.0,389.0,"Not shoulder, skip",False,True,False,AN00010,...,_1005,0,16383,int16,2630,1477,1,False,/home/xiaofey/xray/xray-master/data/exported_j...,


In [42]:
df_wide.to_csv(cropped_meta_path, index=None)

In [43]:
cropped_meta_path

'../data/cropped_image_metadata.csv'