# PUMA Data Loader

## Imports

In [8]:
# Misc
import os

# For processing images
from PIL import Image, ImageDraw

# Matplotlib
import matplotlib.pyplot as plt
# For manually adding legend entries
import matplotlib.patches as mpatches

# For parsing GeoJSON
import json
from shapely.geometry import shape
from shapely.geometry.polygon import Polygon

import numpy as np

## Data

Data should be placed in `data` folder. Expected folder names are:

- `01_training_dataset_geojson_nuclei`
- `01_training_dataset_geojson_tissue`
- `01_training_dataset_tif_ROIs`

In [None]:
NUCLEI_DIR = 'data/01_training_dataset_geojson_nuclei'
TISSUE_DIR = 'data/01_training_dataset_geojson_tissue'
IMAGE_DIR = 'data/01_training_dataset_tif_ROIs'

# Polygon class labels
CLASS = {'nuclei_tumor':        0b00000001, # Tumor
         'nuclei_lymphocyte':   0b00000010, # TIL
         'nuclei_plasma_cell':  0b00000010, # TIL
         'nuclei_endothelium':  0b00000100, # Other
         'nuclei_apoptosis':    0b00000100, # Other
         'nuclei_stroma':       0b00000100, # Other
         'nuclei_histiocyte':   0b00000100, # Other
         'nuclei_melanophage':  0b00000100, # Other
         'nuclei_neutrophil':   0b00000100, # Other
         'nuclei_epithelium':   0b00000100, # Other
}

img_path = os.path.join(IMAGE_DIR, 'training_set_primary_roi_001.tif')
img = Image.open(img_path)
width, height = img.size

# Create blank greyscale image to draw polygons on
img_poly = Image.new(mode="L", size=(width, height))
draw = ImageDraw.Draw(img_poly)

json_path = os.path.join(NUCLEI_DIR, 'training_set_primary_roi_001_nuclei.geojson')
with open(json_path, encoding='utf-8') as f:
    geojson = json.load(f)

legend = {}

for feature in geojson['features']:
    geometry = shape(feature['geometry'])
    label = feature['properties']['classification']["name"]
    color = feature['properties']['classification']["color"]
    
    legend[label] = color

    # Format colors as hex
    r = str(format(color[0], '#04x')).replace('0x', '')
    g = str(format(color[1], '#04x')).replace('0x', '')
    b = str(format(color[2], '#04x')).replace('0x', '')

    if geometry.geom_type == 'Polygon':
        coords = geometry.exterior.coords
        draw.polygon(coords, outline=CLASS[label], fill=CLASS[label])
    elif geometry.geom_type == 'MultiPolygon':
        for poly in geometry.geoms:
            polygon_coords = [(x, y) for x, y in poly.exterior.coords]
            draw.polygon(polygon_coords, outline="#ffffff", fill=f"#{r}{g}{b}")

# Convert encoded image to RGB image
img_array = np.array(img_poly)
img_array_rgb = np.zeros((height, width, 3), dtype=np.uint8)

# Define masks for each class
nuclei_tumor_mask       = (img_array == CLASS["nuclei_tumor"])
nuclei_lymphocyte_mask  = (img_array == CLASS["nuclei_lymphocyte"])
nuclei_endothelium_mask = (img_array == CLASS["nuclei_endothelium"])

# Apply the colors using masks
img_array_rgb[nuclei_tumor_mask, 0] = 0xFF  # Red channel
img_array_rgb[nuclei_lymphocyte_mask, 1] = 0xFF  # Green channel
img_array_rgb[nuclei_endothelium_mask, 2] = 0xFF  # Blue channel

patch = [
    mpatches.Patch(color="#ff0000", label="Tumor"),
    mpatches.Patch(color="#00ff00", label="TIL"),
    mpatches.Patch(color="#0000ff", label="Other"),
]

fig, axs = plt.subplots(1, 2, figsize=(16, 8))
axs[0].imshow(img_array_rgb)
axs[1].imshow(img)
axs[0].legend(handles=patch)