In [1]:
import pydicom
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import os
from labelbox import Client
from labelbox.schema.data_row_metadata import DataRowMetadataKind

In [2]:
home_dir = os.path.expanduser('~')
data_dir = f'{home_dir}/Data/screws/dicom_data' # Change this to the directory where you have the dicom files
dicom_files = glob(f'{data_dir}/*/*/*/*')
processed_dir = f'{home_dir}/Data/screws/processed'

In [None]:
# Convert DICOM files to JPEG

for d in dicom_files:
    print("examining file....")
    print("*****************************")

    parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(d)))
    parent_dir_name = os.path.basename(parent_dir) #This is the CD number that corresponds to the spreadsheet data
    
    # print(f"Parent directory 3 levels up: {parent_dir_name}")

    try:
        dataset = pydicom.dcmread(d, force=True)

        if 'PixelData' in dir(dataset):
            print("PixelData is present")
        elif 'FloatPixelData' in dir(dataset):
            print("FloatPixelData is present")
        elif 'DoubleFloatPixelData' in dir(dataset):
            print("DoubleFloatPixelData is present")
        else:
            print("NO PIXEL DATA FOUND! Skipping...")
            continue

        # attributes = [attr for attr in dir(dataset) if not attr.startswith('__')]
        # print(attributes)

        # for attr in attributes:
        #     try:
        #         value = getattr(dataset, attr)
        #         print(f"{attr}: {value}")
        #     except Exception as e:
        #         print(f"Error getting value of {attr}: {e}")

        # patient_id = dataset.PatientID
        # accession_number = dataset.AccessionNumber

        # print("Patient ID:", patient_id)
        # print("Accession Number: ", accession_number)

        output_dir = os.path.join(processed_dir, parent_dir_name)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            fname = os.path.join(output_dir, '00.jpg')

        else:
            files = glob(os.path.join(output_dir, '*.jpg'))
            fname = os.path.join(output_dir, str(len(files)).zfill(2) + '.jpg')
            print("Saving new image to ", fname)

        print("getting pixel array...")
        pixel_array = dataset.pixel_array

        print("normalizing pixel values...")

        pixel_array = pixel_array - pixel_array.min()
        pixel_array = pixel_array / pixel_array.max()

        pixel_array *= 255

        pixel_array = pixel_array.astype('uint8')

        print("converting to image...")
        img = Image.fromarray(pixel_array)

        print("saving image...")
        print(fname)
        img.save(fname)

        print("SUCCESS")


    except Exception as e:
        print('Error: ', d, parent_dir, e)

In [6]:
# Upload images to Labelbox

try:

    print("preparing images for Labelbox...")

    client = Client(api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJjbHVqeWF3MDQwMDFyMDcyZGFzOTRnb2ZvIiwib3JnYW5pemF0aW9uSWQiOiJjbGcybTdkeDUwMjFyMDcwNjF1a3diZWJmIiwiYXBpS2V5SWQiOiJjbHYxNnR3bXIwMTNhMDd1NjBtbzVnN3pyIiwic2VjcmV0IjoiMmNmYjA2YWVhYWJkNGRiNjc1Y2FmY2YzMzM1ZDhmNjkiLCJpYXQiOjE3MTMxOTk2MzcsImV4cCI6MjM0NDM1MTYzN30.WA-I984wLDZ0MiiCOYai1D7y9YfwZ51dy2CSZ_6xQN8")

    # dataset = client.get_dataset("clv2tk6u600730769qijzr1lx")

    dataset = client.get_dataset("clwjmo0ae00170728bg4oe22v")

    metadata_ontology = client.get_data_row_metadata_ontology()

    # CREATE METADATA SCHEMA ON FIRST UPLOAD

    # metadata_schema = metadata_ontology.create_schema(name="accession_num", kind=DataRowMetadataKind.string)

    # You can create other metadata schema kinds, just change kind to be one of: 
    # DataRowMetadataKind.number, DataRowMetadataKind.datetime, DataRowMetadataKind.embedding
    # And update import statements to include


    # GET METADATA SCHEMA ON SUBSEQUENT UPLOADS
    metadata_schema = metadata_ontology.get_by_name("accession_num")

    # check the schema 
    print(metadata_schema)

    schema_id = metadata_schema.uid

    assets = []

    for accession_number in os.listdir(processed_dir):

        print("processing directory: ", accession_number)
        
        data_dir = os.path.join(processed_dir, accession_number)
        
        if os.path.isdir(data_dir):
            for image_file in os.listdir(data_dir):
                if image_file.endswith('.jpg'):
                    image_path = os.path.join(data_dir, image_file)

                    data = {
                        "row_data": image_path,
                        "global_key": accession_number + "_" + image_file,
                        "metadata_fields": [{
                            "schema_id": schema_id, "value": accession_number
                        }]
                    }

                    assets.append(data)
    
    print("Files processed...uploading to Labelbox...")

    task = dataset.create_data_rows(assets)
    task.wait_till_done()
    print(task.errors)

except Exception as e:
    print("Failed to upload images to Labelbox: ", e)

preparing images for Labelbox...
uid='clv2tom3h0a0907127zad0g76' name='accession_num' reserved=False kind=<DataRowMetadataKind.string: 'CustomMetadataString'> options=None parent=None
processing directory:  112978882
processing directory:  111531385
processing directory:  111788637
processing directory:  109158286
processing directory:  111151238
processing directory:  4581460
processing directory:  112365001
processing directory:  110482817
processing directory:  111514039
processing directory:  111539386
processing directory:  111157084
processing directory:  11875222
processing directory:  .DS_Store
processing directory:  111886001
processing directory:  112011215
processing directory:  112569420
processing directory:  109114246
processing directory:  109843208
processing directory:  109248549
processing directory:  4561561
processing directory:  109723396
processing directory:  109606533
processing directory:  113027680
processing directory:  111155710
processing directory:  112042