In [None]:
# System
import sys
import os
import json

# Misc
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch


# ML
import torch
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR

from shapely.geometry import shape

# Augmentations
import albumentations as A

# Custom
sys.path.append(os.path.abspath(".."))
from hover_net.dataloader.dataset import get_dataloader
from hover_net.datasets.puma_dataset import PumaDataset

In [None]:
IMAGE_PATH      = '../data/01_training_dataset_tif_ROIs'
GEOJSON_PATH    = '../data/01_training_dataset_geojson_nuclei'
PATCH_SIZE      = 512
BATCH_SIZE      = 1

dataset = PumaDataset(
    image_path=IMAGE_PATH,
    geojson_path=GEOJSON_PATH,
    input_shape=(
        PATCH_SIZE,
        PATCH_SIZE
    ),
    mask_shape=(
        PATCH_SIZE,
        PATCH_SIZE
    ),
    run_mode="test",
    augment=True
)

img, ann = PumaDataset.load_data(dataset, 7)
print("Pre-augmentation")
print(f"Image shape: {img.shape}")
print(f"Annotation shape: {ann.shape}")
aug = A.Compose([A.CenterCrop(height=PATCH_SIZE, width=PATCH_SIZE)])
augmented = aug(image=img, mask=ann)
img = augmented["image"]
ann = augmented["mask"]

# Define the colors for each class
colors = ['white', 'teal', 'lightblue', 'lightgreen']  # Example colors for 0, 1, 2, 3
class_labels = ['Tumor', 'TILs', 'Other']  # Labels for each color
cmap = ListedColormap(colors)

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(img, interpolation='none')

ann_image = ann[:, :, 1]
#ax[1].imshow(ann_image, cmap=cmap)
contour_fill = ax[1].contourf(
    ann_image, 
    levels=[-0.5, 0.5, 1.5, 2.5, 3.5], 
    cmap=cmap, 
    alpha=0.8, 
    extent=(0, ann.shape[1], 0, ann.shape[0])
)
contour_lines = ax[1].contour(
    ann_image, 
    levels=[-0.5, 0.5, 1.5, 2.5, 3.5], 
    cmap=cmap, 
    linewidths=2,
    alpha=1, 
    extent=(0, ann.shape[1], 0, ann.shape[0])
)
ax[1].invert_yaxis()

ax[0].set_title("Image", fontsize=16)
ax[1].set_title("Ground Truth", fontsize=16)

ax[0].tick_params(
    axis='both',  # Apply to both x and y axes
    which='both',  # Apply to major and minor ticks
    bottom=False,  # Remove ticks at the bottom
    left=False,    # Remove ticks at the left
    labelbottom=False,  # Remove tick labels at the bottom
    labelleft=False     # Remove tick labels at the left
)

ax[1].tick_params(
    axis='both',  # Apply to both x and y axes
    which='both',  # Apply to major and minor ticks
    bottom=False,  # Remove ticks at the bottom
    left=False,    # Remove ticks at the left
    labelbottom=False,  # Remove tick labels at the bottom
    labelleft=False     # Remove tick labels at the left
)

ax[1].set_aspect('equal', 'box')

legend_elements = [Patch(facecolor=colors[i+1], label=class_labels[i]) for i in range(len(colors)-1)]
fig.legend(
    handles=legend_elements, 
    loc='lower center', 
    ncol=3,
    fontsize=16,
    bbox_to_anchor=(0.5, -0.1),  # Adjust this value to move the legend up or down
    frameon=True
)

# plt.subplots_adjust(top=0.3)

fig.tight_layout()

fig.savefig("../figures/dataset_example.pdf", bbox_inches='tight')

In [None]:
GEOJSON_PATH = '../data/01_training_dataset_geojson_nuclei'
geojsons = os.listdir(GEOJSON_PATH)

nuclei_count = {
    "primary":
    {
        "tumor": 0,
        "tils": 0,
        "other": 0
    },
    "metastatic":
    {
        "tumor": 0,
        "tils": 0,
        "other": 0
    }
}

for file in geojsons:
    with open(os.path.join(GEOJSON_PATH, file), encoding="utf-8") as f:
        geojson = json.load(f)
    for feature in geojson["features"]:
        geometry = shape(feature["geometry"])
        label = feature["properties"]["classification"]["name"]

        if geometry.geom_type == "Polygon":
            coords = geometry.exterior.coords
            if label == "nuclei_tumor":
                if "metastatic" in file:
                    nuclei_count["metastatic"]["tumor"] += 1
                else:
                    nuclei_count["primary"]["tumor"] += 1
            elif label in ["nuclei_lymphocyte", "nuclei_plasma_cell"]:
                if "metastatic" in file:
                    nuclei_count["metastatic"]["tils"] += 1
                else:
                    nuclei_count["primary"]["tils"] += 1
            else:
                if "metastatic" in file:
                    nuclei_count["metastatic"]["other"] += 1
                else:
                    nuclei_count["primary"]["other"] += 1

In [None]:
# Data preparation
categories = ['tumor', 'tils', 'other']
titles = ['Tumor', 'TILs', 'Other']
primary_counts = [nuclei_count["primary"][category] for category in categories]
metastatic_counts = [nuclei_count["metastatic"][category] for category in categories]

# Data preparation for pie chart
primary_sizes = [nuclei_count["primary"][category] for category in categories]
metastatic_sizes = [nuclei_count["metastatic"][category] for category in categories]

# Define consistent colors for both pie charts
colors = ['teal', 'lightblue', 'lightgreen']

# Pie chart for Primary
fig, ax = plt.subplots(1, 2, figsize=(10, 5))

# Primary pie chart
ax[0].pie(primary_sizes, autopct='%1.1f%%', startangle=90, colors=colors)
ax[0].set_title('Primary Nuclei Distribution', fontsize=16, y=0.92)

# Metastatic pie chart
ax[1].pie(metastatic_sizes, autopct='%1.1f%%', startangle=90, colors=colors)
ax[1].set_title('Metastatic Nuclei Distribution', fontsize=16, y=0.92)

# Adding a single legend at the bottom center
fig.legend(titles, loc="lower center", fontsize=16, ncol=3, bbox_to_anchor=(0.5, 0))

# Display the plot
fig.tight_layout()

fig.savefig("../figures/dataset_distribution.pdf", bbox_inches='tight')

In [None]:
total_primary_nuclei = sum(nuclei_count['primary'].values())
total_metastatic_nuclei = sum(nuclei_count['metastatic'].values())
print(f"Total primary nuclei: {total_primary_nuclei}")
print(f"Total metastatic nuclei: {total_metastatic_nuclei}")
print(f"Total: {total_primary_nuclei + total_metastatic_nuclei}")

total_tumor = nuclei_count["primary"]["tumor"] + nuclei_count["metastatic"]["tumor"]
total_tils = nuclei_count["primary"]["tils"] + nuclei_count["metastatic"]["tils"]
total_other = nuclei_count["primary"]["other"] + nuclei_count["metastatic"]["other"]

print(f"Total tumor: {total_tumor}")
print(f"Total TILs: {total_tils}")
print(f"Total other: {total_other}")