In [None]:
from wholeslidedata.annotation.wholeslideannotation import WholeSlideAnnotation
from wholeslidedata.image.wholeslideimage import WholeSlideImage
from wholeslidedata.annotation.types import PolygonAnnotation as Polygon
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
from tqdm import tqdm

import cv2

from py.helpers import get_outlines, get_area, get_patch, get_sub_areas, patch_empty, concat_one, BARRET_ROOT
import os

os.add_dll_directory(r'C:\Program Files\openslide-win64\bin') # for openslide

LANS_DIR = os.path.join(BARRET_ROOT, 'LANS_001-923')
LANS_BIOP_ROOT = os.path.join(BARRET_ROOT, 'p53_experiment_luuk_biopsy level_no HE')
LANS_BIOP_DIRS = [os.path.join(LANS_BIOP_ROOT, fol) for fol in ['P53_score_high_consensus', 'RBE_nummers Hans', 'RBE_nummers Onno']]

In [None]:
casenames = {}
for d in LANS_BIOP_DIRS:
    tiffs = [f for f in os.listdir(d) if f.endswith('.tiff')]
    xmls = [f for f in os.listdir(d) if f.endswith('.xml')]
    print(f'{d}: {len(tiffs)} tiffs, {len(xmls)} xmls')

    xml_names = [f.split('.')[0] for f in xmls]
    tiff_names = [f.split('.')[0] for f in tiffs]

    # Find the names that are in both lists
    both = set(xml_names).intersection(set(tiff_names))
    print(f'Both: {len(both)}')

    casenames[d] = sorted(list(both))

In [None]:
def get_all_annotated_cases(dir):
    filepaths = {f.split('.')[0]:{"wsi":None, "wsa":None} for f in os.listdir(dir)}
    for f in os.listdir(dir):
        case = f.split('.')[0]

        if ".tiff" in f:
            typ = "wsi"
        elif f.endswith(".xml"):
            typ = "wsa"
        else:
            continue

        filepaths[case][typ] = os.path.join(dir, f)
    return filepaths

In [None]:
cases = {d: get_all_annotated_cases(d) for d in LANS_BIOP_DIRS}

In [None]:
def plot_sub_areas(wsi, sub_areas, area_labels=[], save_path="", spacing=2.0, figsize_factor=2, show_emptiness=False):
    nrows = len(sub_areas)
    ncols = len(sub_areas[0])
    fig, ax = plt.subplots(nrows,ncols, figsize=(ncols*figsize_factor,nrows*figsize_factor))
    for i in range(nrows):
        for j in range(ncols):
            if ncols < 2:
                index = i
            else:
                index = (i,j)

            if j < len(sub_areas[i]):
                sub_area = sub_areas[i][j]
                sub_patch = wsi.get_patch(*sub_area, spacing)
                
                # color = "red" if sub_patch.mean() < 10 else "black"
                # ax[i,j].text(105,128, f"{sub_patch.std():.2f}", c=color)
                if show_emptiness:
                    color = "red" if sub_patch.mean() > 223 else "black"
                    ax[index].text(105,128, f"{sub_patch.mean():.2f}", c=color)
                if len(area_labels) > 0:
                    ax[index].set_title(area_labels[i*nrows+j])
                ax[index].imshow(sub_patch)

            ax[index].axis("off")
    if save_path:
        plt.savefig(save_path, bbox_inches="tight")
        plt.close(fig)


# def save_all_sub_areas_plots(spacing, root=ROOT):
#     for casename, case in tqdm(get_all_annotated_cases(root).items()):
#         for coupe, paths in case.items():
#             outlines = get_outlines(WholeSlideAnnotation(paths["wsa"]))
#             for biopsy_nr, outline in enumerate(outlines):
#                 plot_sub_areas(
#                     WholeSlideImage(paths["wsi"]), 
#                     get_sub_areas(get_area(outline, spacing), spacing=spacing), 
#                     save_path=os.path.join(ROOT, "visualisation", f"sub_areas_{casename}_{biopsy_nr}_{coupe}.png"),
#                     spacing=spacing)

# save_all_sub_areas_plots(2, root=ROOT_ADJACENT)

Example of patching

In [None]:
spacing = 2.0

d = LANS_BIOP_DIRS[0]
casename = casenames[d][0]
casepaths = cases[d][casename]
outlines = get_outlines(WholeSlideAnnotation(casepaths["wsa"])) # biopsy outlines
area = get_area(outlines[0], spacing) # biopsy area
sub_areas = get_sub_areas(area) # sub areas (patches) of biopsy area
wsi = WholeSlideImage(casepaths["wsi"]) # whole slide image
plot_sub_areas(wsi, sub_areas, show_emptiness=True)

Example of showing biopsies

In [None]:
spacing = 4.0

d = LANS_BIOP_DIRS[0]
casename = casenames[d][2]
print("Dir: ", d)
print("Case: ", casename)
casepaths = cases[d][casename]
wsa = WholeSlideAnnotation(casepaths["wsa"])

labels = [a.label.name for a in wsa.annotations] # biopsy labels

outlines = get_outlines(wsa) # biopsy outlines
areas = [get_area(outline, spacing) for outline in outlines] # biopsy areas
half_areas_len = int(np.ceil(len(areas)/2))
areas = [[a for a in areas[:half_areas_len]], [a for a in areas[half_areas_len:]]]
wsi = WholeSlideImage(casepaths["wsi"]) # whole slide image
plot_sub_areas(wsi, areas, figsize_factor=5, area_labels=labels, spacing=spacing)

Gather data on biopsies

In [None]:
spacing = 4.0

biopsies = {}

for d in LANS_BIOP_DIRS:
    print("Dir: ", d)
    for casename in tqdm(casenames[d]):
        casepaths = cases[d][casename]
        wsa = WholeSlideAnnotation(casepaths["wsa"])

        labels = [a.label.name for a in wsa.annotations] # biopsy labels

        outlines = get_outlines(wsa) # biopsy outlines

        for b in range(len(labels)):
            outline = outlines[b]
            area = get_area(outline, spacing)

            biopsies[f"{casename}_b{b}"] = {
                "dir": d.split("\\")[-1],
                "casename": casename,
                "height": int(area[2]),
                "width": int(area[3]),
                "label": labels[b],
            }


Analyze biopsy distribution

Largest biopsy: 21376 x 19328	wildtype	413,155,328 pixels at 0.25 m/p

5344 x 4832	wildtype	25,822,208 at 1.0 m/p

In [None]:
df = pd.DataFrame.from_dict(biopsies, orient="index")
df["pixels"] = df["height"] * df["width"]

# Sort by pixels
df = df.sort_values(by=["pixels"], ascending=False)

display(df)

# Show barchart of label, with counts, using plt.bar
plt.figure(figsize=(5,3))
plt.bar(df["label"].unique(), df["label"].value_counts())
plt.xticks(rotation=90)

# Display counts on top of bars
for i, v in enumerate(df["label"].value_counts()):
    plt.text(i-0.2, v+1, str(v))
plt.show()

# Display rows with label none
none_labels = df[df["label"] == "none"]
# Sort by casename
none_labels = none_labels.sort_values(by=["casename"])
# Only unique casenames
none_labels = none_labels.drop_duplicates(subset=["casename"])
display(none_labels)

Create dataset

In [None]:
spacing = 4.0

destination = os.path.join(LANS_BIOP_ROOT, "dataset_fullsize")
dest_biop = os.path.join(destination, f"biopsies_s{spacing}")
os.makedirs(dest_biop, exist_ok=True)

biopsy_df = {}
errors = []

for d in LANS_BIOP_DIRS:
    print("Dir: ", d)
    for casename in tqdm(casenames[d]):
        casepaths = cases[d][casename]
        wsa = WholeSlideAnnotation(casepaths["wsa"])

        labels = [a.label.name for a in wsa.annotations] # biopsy labels

        outlines = get_outlines(wsa) # biopsy outlines
        try:
            wsi = WholeSlideImage(casepaths["wsi"]) # whole slide image
        except:
            errors.append(casepaths["wsi"])
            continue

        for b in range(len(labels)):
            outline = outlines[b]
            area = get_area(outline, spacing)

            biopsy_name = f"{casename}_b{b}"
            biopsy_df[biopsy_name] = {
                "dir": d,
                "casename": casename,
                "height": int(area[2]),
                "width": int(area[3]),
                "label": labels[b],
            }

            if os.path.exists(os.path.join(dest_biop, f"{biopsy_name}.png")):
                continue

            # Save biopsy with cv2
            biopsy = wsi.get_patch(*area, spacing)

            cv2.imwrite(os.path.join(dest_biop, f"{biopsy_name}.png"), biopsy)

print("Errors: ", len(errors), errors)

biopsy_df = pd.DataFrame.from_dict(biopsy_df, orient="index")
biopsy_df["pixels"] = biopsy_df["height"] * biopsy_df["width"]

# Save to csv
biopsy_df.to_csv(os.path.join(destination, "biopsy_labels.csv"))

errors:

['L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-01089_P53-III_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02039_P53-II_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02039_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02110_P53-IX_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02110_P53-VII_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02110_P53-X_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02313_P53-II_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02323_P53-II_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02323_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02665_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02666_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-02903_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-04030_P53-III_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-04863_P53-VI_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-06938_P53-XI_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-50101_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-50136_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-50151_P53-II_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Hans\\RBET18-50151_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-01089_P53-III_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-02039_P53-II_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-02039_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-02110_P53-IX_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-02110_P53-VII_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-02110_P53-X_BIG.tiff',
...
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-06938_P53-XI_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-50101_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-50136_P53-I_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-50151_P53-II_BIG.tiff',
 'L:\\\\basic\\\\divg\\\\PATH-COMPAI\\\\datasets\\\\Barrett\\\\p53_experiment_luuk_biopsy level_no HE\\RBE_nummers Onno\\RBET18-50151_P53-I_BIG.tiff']

 clearly something about the BIG tiffs