In [1]:
import numpy as np
import cv2
import pandas as pd
import os
import labelbox
import json
import urllib.request

In [7]:
LB_API_KEY = '<API_KEY>'
PROJECT_NUMBER = "clv2u76f5064n071jbjoab056"
TASK_ID = "clw0up26r029z07ybceb8c8oh" # Update task number based on labelbox export

OUTPUT_FILE_PATH = os.path.join(os.path.expanduser("~"), "Data/output/labelbox.json")
OUTPUT_IMAGE_DIR = os.path.join(os.path.expanduser("~"), "Data/output/cropped_images")
LABEL_DATA = os.path.join(os.path.expanduser("~"), "Data/manufacturers.csv")

client = labelbox.Client(api_key = LB_API_KEY)

export_task = labelbox.ExportTask.get_task(client, TASK_ID)

export_task.get_stream(converter=labelbox.FileConverter(file_path=OUTPUT_FILE_PATH)).start()

xray_count = 0

screw_count = 0

# Create log file, clear if exists
with open('errors.txt', 'w'):
    pass

def log_to_file(error_message):
    with open('errors.txt', 'a') as f:
        f.write(error_message + '\n')

# Retrieving the original images from labelbox storage requires only the provided
# image URL, no special headers. 
def get_image_noheaders(image_url):

  with urllib.request.urlopen(image_url) as url:
        s = url.read()
  # Convert the downloaded bytes to a numpy array
  arr = np.asarray(bytearray(s), dtype=np.uint8)
  # Decode the numpy array as an image
  img = cv2.imdecode(arr, -1)

  return img


# Retrieving the mask image requires inclusion of provided headers in request
def get_image_with_headers(url):
  req = urllib.request.Request(url, headers=client.headers)

  # Download the image, convert it to a NumPy array, and then read it into OpenCV format
  resp = urllib.request.urlopen(req)
  image = np.asarray(bytearray(resp.read()), dtype="uint8")
  image = cv2.imdecode(image, cv2.IMREAD_COLOR)
  
  return image


def find_manufacturer(accession_number):

  df = pd.read_csv(LABEL_DATA)
  row = df.loc[df['Accession Number'] == accession_number]

  if not row.empty:
      # Get the value from the 'Manufacturer' column
      manufacturer = row['Implant Manufacturer'].values[0] if not pd.isna(row['Implant Manufacturer'].values[0]) else "No manufacturer data"
  else:
      print("NO ACCESSION NUMBER MATCH")
      manufacturer = "No Accession Number Match"
      
  return manufacturer


def crop_to_mask(image_url, mask_url, mask_color):
  try:

    mask = get_image_with_headers(mask_url)
    image = get_image_noheaders(image_url)
    
    # reverse rgb to bgr for np
    bgr_color = mask_color[::-1]

    # Find the coordinates of the mask color pixels
    coords = np.where((mask == bgr_color).all(axis=2))

    # Find the minimum and maximum x and y values
    ymin, ymax = np.min(coords[0]), np.max(coords[0])
    xmin, xmax = np.min(coords[1]), np.max(coords[1])

    # Add padding to the bounding box
    padding = 20 
    xmin = max(0, xmin - padding)
    ymin = max(0, ymin - padding)
    xmax = min(mask.shape[1], xmax + padding)
    ymax = min(mask.shape[0], ymax + padding)

    # return the crop of the original image
    return image[ymin:ymax, xmin:xmax]


  except Exception as e:
    line_number = e.__traceback__.tb_lineno
    error_message = f"Error - Image Cropper: {e}, Image: {image_url}, Line: {line_number}"
    print(error_message)
    log_to_file(error_message)


def json_stream_handler(output):

  # Labelbox provides a composite mask image where each individual mask is a different color.
  # Get all of the RGB mask colors in a given image and loop through them to crop each individual mask out.

  try:

    data = json.loads(output.json_str)

    image_id = data['data_row']["global_key"]

    # For debuging problem images...
    # if image_id in ["117373809_00.jpg"]:
    #    print(data)

    accession_number = data["metadata_fields"][0]["value"]
    image_url = data["data_row"]["row_data"]  
    masks = data["projects"][PROJECT_NUMBER]["labels"][0]["annotations"]["objects"]
    mask_url = masks[0]["composite_mask"]["url"]
    mask_colors = [item["composite_mask"]["color_rgb"] for item in masks]
    # dataset_id = data["data_row"]["details"]["dataset_id"]
    # row_id = data["data_row"]["id"]

    # Make manufacturer directory if it doesnt exist
    manufacturer = find_manufacturer(accession_number)
    manufacturer_dir = os.path.join(OUTPUT_IMAGE_DIR, manufacturer)
    os.makedirs(manufacturer_dir, exist_ok=True)

    # Make accession number directory if it doesn't exist
    accession_dir = os.path.join(manufacturer_dir, accession_number)
    os.makedirs(accession_dir, exist_ok=True)
    
    for i, color in enumerate(mask_colors):
        
        # Strip the file extension from image_id and add a counter
        image_id_base = os.path.splitext(image_id)[0]
        image_id_new = f"{image_id_base}_{i}.jpg"
        # print(image_id_new)

        img_path = os.path.join(accession_dir, image_id_new)

        cropped_image = crop_to_mask(image_url, mask_url, color)

        cv2.imwrite(img_path, cropped_image)
        # print("Image saved...")
        global screw_count
        screw_count += 1
    
    # Logs
    global xray_count
    xray_count += 1
    print('**********************')
    print("Manufacturer: ", manufacturer)
    print("Accession Number: ", accession_number)
    print("xray count: ", xray_count)
    print("screw count: ", screw_count)

  except Exception as e:
    line_number = e.__traceback__.tb_lineno
    error_message = f"Error - Stream Handler: {e}, Image: {image_id}, Line: {line_number}"
    print(error_message)
    log_to_file(error_message)


# Stream the file to stdout
export_task.get_stream().start(stream_handler=json_stream_handler)
log_to_file("xray count: " + xray_count)
log_to_file("Screw count: " + screw_count)
print("stream handler finished")

**********************
Manufacturer:  Stryker Spine
Accession Number:  113292680
xray count:  1
screw count:  14
**********************
Manufacturer:  Stryker Spine
Accession Number:  115213728
xray count:  2
screw count:  18
**********************
Manufacturer:  Medtronic Sofamor Danek Inc
Accession Number:  116788885
xray count:  3
screw count:  37
**********************
Manufacturer:  Orthofix Inc
Accession Number:  114949278
xray count:  4
screw count:  39
**********************
Manufacturer:  Medtronic Sofamor Danek Inc
Accession Number:  116788885
xray count:  5
screw count:  47
**********************
Manufacturer:  Stryker Spine
Accession Number:  115213728
xray count:  6
screw count:  55
**********************
Manufacturer:  Stryker Spine
Accession Number:  115213728
xray count:  7
screw count:  63
**********************
Manufacturer:  Medtronic Sofamor Danek Inc
Accession Number:  116788885
xray count:  8
screw count:  69
**********************
Manufacturer:  Orthofix Inc
Acce

TypeError: log_to_file() takes 1 positional argument but 2 were given