In [None]:
from wholeslidedata.annotation.wholeslideannotation import WholeSlideAnnotation
from wholeslidedata.image.wholeslideimage import WholeSlideImage
from wholeslidedata.annotation.types import PolygonAnnotation as Polygon
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
from tqdm import tqdm

import cv2

from py.helpers import get_outlines, get_area, get_patch, get_sub_areas, patch_empty, concat_one, BARRET_ROOT
import os

os.add_dll_directory(r'C:\Program Files\openslide-win64\bin') # for openslide

LANS_DIR = os.path.join(BARRET_ROOT, 'LANS_001-923')
LANS_BIOP_ROOT = os.path.join(BARRET_ROOT, 'p53_biopsy-level_no-HE_Luuk', '_Luuk')
LANS_BIOP_DIR = os.path.join(LANS_BIOP_ROOT, 'Slidescape_ASAP')
BOLERO_DIR = os.path.join(BARRET_ROOT, 'BOLERO', 'P53 Bolero')
PATHXL_DIR = os.path.join(LANS_BIOP_ROOT, 'PATHXL')

In [None]:
# Get all xml files from PATHXL
dir = PATHXL_DIR
xml_files = [os.path.join(dir, f) for f in os.listdir(dir) if f.endswith('.xml')]

# Read the whole slide annotations from the xml files
wsas = []
for xml_file in tqdm(xml_files):
    try:
        wsa = WholeSlideAnnotation(xml_file)
        wsas.append(wsa)
    except Exception as e:
        print(f'Error reading {xml_file}: {e}')

Check whether annotations are in order

In [None]:
import re

# First we need to go through all XMLs that contain the annotations
for file in tqdm(xml_files):
    # Open this XML file as text
    with open(file, 'r') as f:
        xml = f.read()

    # Look for all name="annotation {number}" in the XML, because we need to check if they're in order
    # Not cap sensitive
    annotations = re.findall(r'name="annotation (\d+)"', xml, re.IGNORECASE) # This will return a list of strings
    annotations = [int(a) for a in annotations] # Convert the strings to integers

    # Check if the annotations are in order
    sorted_annotations = sorted(annotations)
    if annotations != sorted_annotations:
        print(f"Annotations are not in order for {file}: {annotations}")
    if len(annotations) == 0:
        print(f"No annotations found for {file}")
        continue
    if annotations[0] != 1:
        print(f"First annotation is not 1 for {file}: {annotations}")

In [None]:
# a has keys: 'coordinates', 'label.name', 'color', 'annotation_id', 'annotation_type'
# [[a.todict() for a in wsa.annotations] for wsa in wsas[:5]]

# Make count dict of all .label.name values of all annotations
all_labels = {}
for wsa in wsas:
    for ann in wsa.annotations:
        label = ann.label.name
        if label not in all_labels:
            all_labels[label] = 0
        all_labels[label] += 1
all_labels

In [None]:
from xml.etree import ElementTree as ET
from xml.dom import minidom
import numpy as np

class AnnotationType:
    RECTANGLE = 'Rectangle'
    POLYGON = 'Polygon'

class AnnotationGroup:
    WILD_TYPE = 'WildType'
    OVER_EXPRESSION = 'OverExpression'
    NULL_MUTATION = 'NullMutation'
    DOUBLE_CLONES = 'DoubleClones'
    NO_CONSENSUS = 'NoConsensus'
    EXCLUDE = 'exclude'

DEFAULT_COLOR = "#F4FA58"

            
def prettify(elem):
    """
    Returns a pretty-printed xml string for the Element.

    Parameters:
        elem: Element
            xml element

    Returns:
        string
            Pretty-printed xml string
    """
    rough_string = ET.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="\t")


###############################################################################
# CONVERSION
###############################################################################
def xml_to_dict(tree):
    """
    Converts an xml tree to a dictionary.

    Parameters:
        tree: ElementTree
            xml tree

    Returns:
        dictionary
            Dictionary representation of the xml tree
    """
    root = tree.getroot()
    return _xml_to_dict(root)

def _xml_to_dict(element):
    """
    Converts an xml element to a dictionary.

    Parameters:
        element: Element
            xml element

    Returns:
        dictionary
            Dictionary representation of the xml element
    """
    d = {element.tag: {}}
    if element.attrib:
        d[element.tag]["attrib"] = element.attrib
    if element.text:
        # d[element.tag].update({"text": element.text})
        pass
    children = list(element)
    if children:
        d[element.tag]["children"] = []
        for child in children:
            d[element.tag]["children"].append(_xml_to_dict(child))
    return d

def dict_to_xml(d):
    """
    Converts a dictionary to an xml tree.

    Parameters:
        d: dictionary
            Dictionary

    Returns:
        ElementTree
            xml tree representation of the dictionary
    """
    assert isinstance(d, dict) and len(d) == 1
    tag, body = next(iter(d.items()))
    root = ET.Element(tag)
    _dict_to_xml(body, root)
    return ET.ElementTree(root)

def _dict_to_xml(d, root):
    """
    Converts a dictionary to an xml element.

    Parameters:
        d: dictionary
            Dictionary
        root: Element
            xml element to add the dictionary to
    """
    if "attrib" in d:
        root.attrib = d["attrib"]
    if "text" in d:
        root.text = d["text"]
    if "children" in d:
        for child in d["children"]:
            tag, body = next(iter(child.items()))
            child_element = ET.SubElement(root, tag)
            _dict_to_xml(body, child_element)




###############################################################################
# TEMPLATES
###############################################################################
def get_xml_template_dict():
    """
    Returns an xml template dictionary.

    Returns:
        xml_template_dict: dictionary
            Xml template dictionary
    """
    xml_template_dict = {
        'ASAP_Annotations': {"children": [
                {'Annotations': {"children": []}},
                {'AnnotationGroups': {"children": []}}
            ]
        }
    }
    for group, color in zip([AnnotationGroup.WILD_TYPE, AnnotationGroup.OVER_EXPRESSION, AnnotationGroup.NULL_MUTATION, 
                             AnnotationGroup.DOUBLE_CLONES, AnnotationGroup.NO_CONSENSUS, AnnotationGroup.EXCLUDE], 
                             ['#64fe2e', '#aaaa00', '#0000ff', '#ff0000', DEFAULT_COLOR, '#000000']):
        xml_template_dict['ASAP_Annotations']['children'][1]['AnnotationGroups']['children'].append({
            'Group': {'attrib': {'Name': group, 'PartOfGroup': 'None', 'Color': color}, 'children': 
                      [{'Attibutes': {}}]
            }
        })
    return xml_template_dict


def get_annotation_dict(contour, annotation_type=AnnotationType.POLYGON, annotation_group="None", color=DEFAULT_COLOR, name=None):
    """
    Returns an annotation dictionary for a contour.

    Parameters:
        contour: numpy array
            Contour
        annotation_type: string
            Annotation type
        annotation_group: string
            Annotation group
        color: string
            Color of the annotation

    Returns:
        annotation_dict: dictionary
            Annotation dictionary
    """
    center = np.mean(contour, axis=0).astype(np.int32)
    if name is None:
        name = f'x{center[0]}y{center[1]}'
    annotation_dict = {'attrib': {
            'Name': name,
            'Type': annotation_type,
            'PartOfGroup': annotation_group,
            'Color': color,
        }, 
        'children': [{'Coordinates': {'children': []}}]
    }
    for i, (x, y) in enumerate(contour):
        annotation_dict['children'][0]['Coordinates']['children'].append({'Coordinate': {
                'attrib': {
                    'Order': str(i),
                    'X': str(x),
                    'Y': str(y)
                }
            }
        })
    return annotation_dict




###############################################################################
# SAVING
###############################################################################
def map_contour_to_slide_coordinates(contour, spacing, process_spacing, area_box):
    """Map contour, which has coordinates relative to the scanned area, with spacing, to coordinates relative to the slide.
    Doesn't correct for the cut_patch_margin, so set it to 0."""
    return contour * 4 * spacing + np.array(area_box[:2])[None,:] * 4 * process_spacing


def add_contour_to_xml_dict(xml_dict, mapped_contour, annotation_group="None"):
    """Add contour to xml_dict as an annotation."""
    annotation_dict = get_annotation_dict(mapped_contour, annotation_group=annotation_group)
    if not xml_dict:
        xml_dict = get_xml_template_dict()
    xml_dict['ASAP_Annotations']['children'][0]['Annotations']['children'].append({'Annotation': annotation_dict})
    return xml_dict




###############################################################################
# EXAMPLES
###############################################################################
"""
        <Annotation Name="Annotation 0" Type="Rectangle" PartOfGroup="NullMutation" Color="#000000">
            <Coordinates>
                <Coordinate Order="0" X="112178" Y="29089"/>
                <Coordinate Order="1" X="122881" Y="29089"/>
                <Coordinate Order="2" X="122881" Y="22312.0996"/>
                <Coordinate Order="3" X="112178" Y="22312.0996"/>
            </Coordinates>
        </Annotation>

        <Annotation Name="Annotation 3" Type="Polygon" PartOfGroup="NullMutation" Color="#F4FA58">
            <Coordinates>
                <Coordinate Order="0" X="117203.203" Y="23387.2188"/>
                <Coordinate Order="1" X="116342.508" Y="23862.0859"/>
                <Coordinate Order="2" X="115541.172" Y="24425.9902"/>
                ...
                <Coordinate Order="42" X="118835.562" Y="23179.4648"/>
                <Coordinate Order="43" X="118271.656" Y="23387.2188"/>
                <Coordinate Order="44" X="117737.43" Y="23387.2188"/>
            </Coordinates>
        </Annotation>
"""
"""
<ASAP_Annotations>
    <Annotations>
    </Annotations>
    <AnnotationGroups>
        <Group Name="WildType" PartOfGroup="None" Color="#64fe2e">
            <Attributes/>
        </Group>
        <Group Name="OverExpression" PartOfGroup="None" Color="#aaaa00">
            <Attributes/>
        </Group>
        <Group Name="NullMutation" PartOfGroup="None" Color="#0000ff">
            <Attributes/>
        </Group>
        <Group Name="DoubleClones" PartOfGroup="None" Color="#ff0000">
            <Attributes/>
        </Group>
        <Group Name="exclude" PartOfGroup="None" Color="#000000">
            <Attributes/>
        </Group>
    </AnnotationGroups>
</ASAP_Annotations>
"""

In [None]:
# First we need to go through all XMLs that contain the annotations
for file in tqdm(xml_files):
    xml_dict = xml_to_dict(ET.parse(file))

    # Look for all name="annotation {number}" in the XML, because we need to check if they're in order
    # Not cap sensitive
    # annotations = [int(a) for a in annotations] # Convert the strings to integers
    annotations = []
    annotation_idx = 0
    if 'Annotations' not in xml_dict['ASAP_Annotations']['children'][annotation_idx]:
        annotation_idx = 1
    for ann in xml_dict['ASAP_Annotations']['children'][annotation_idx]['Annotations']['children']:
        name = ann['Annotation']['attrib']['Name'].replace('Annotation ', '')
        annotations.append(int(name))


    # Check if the annotations are in order
    sorted_annotations = sorted(annotations)
    if annotations != sorted_annotations:
        print(f"Annotations are not in order for {file}: {annotations}")
    if len(annotations) == 0:
        print(f"No annotations found for {file}")
        continue
    if annotations[0] != 1:
        print(f"First annotation is not 1 for {file}: {annotations}")

    # Sanity check that when opening by wsa, the first coordinate of its first annotation is the same as the first annotation in the xml
    wsa = WholeSlideAnnotation(file)
    for i, ann in enumerate(wsa.annotations):
        if i >= len(annotations):
            print(f"More annotations in wsa than in xml for {file}")
            break
        x, y = ann.coordinates[0]
        if x != ann.coordinates[0][0] or y != ann.coordinates[0][1]:
            print(f"First coordinate of annotation {i} in wsa is not the same as in xml for {file}")


In [None]:
import pandas as pd

slide_labels = pd.read_csv(os.path.join(LANS_BIOP_ROOT, 'p53_slide+biopsy.csv'), sep=';')

biopsies_by_slide = {}
for i, row in slide_labels.iterrows():
    slide = row['case_nr'].replace('.xml', '')
    biopsy_string = row['biopsies'].replace(')', '').replace('(', '')
    biopsies = [(b[:2],int(b[2:])) for b in biopsy_string.split(',')]
    biopsies_by_slide[slide] = biopsies

    # # Check which studies they're from
    # if slide in [os.path.basename(f).replace('.xml', '') for f in xml_files]:
    #     print(slide, row['Study'], row['nr_raters'], row['biopsies'])

In [None]:
# Check if all xml_files have RL in them
for file in xml_files:
    if 'RL' not in file:
        print(f"RL not in {file}")

In [None]:
# Compare keys of biopsies_by_slide with the xml_files
xml_slides = set([os.path.basename(f).replace('.xml', '') for f in xml_files])
label_slides = set([k for k in biopsies_by_slide.keys()])
if xml_slides != label_slides:
    print(f"Slides in xml_files that are not in slide_labels: {len(xml_slides - label_slides)}")
    display(sorted(xml_slides - label_slides))
    # print(f"Slides in slide_labels that are not in xml_files: {len(label_slides - xml_slides)}")
    # display(sorted(label_slides - xml_slides))

In [None]:
# Check for every xml file whether there are as many annotations as there are biopsies in biopsies_by_slide
for file in xml_files:
    slide = os.path.basename(file).replace('.xml', '')
    biopsies = biopsies_by_slide[slide]
    wsa = WholeSlideAnnotation(file)
    if len(wsa.annotations) != len(biopsies):
        print(f"Number of annotations does not match number of biopsies for {file}")

In [None]:
# Go through all xml files, and add the annotations from the wsa to the xml_dict template, and overwrite the xml file
for xml_file in tqdm(xml_files[:]):
    # Open this XML file as text
    with open(xml_file, 'r') as f:
        xml = f.read()

    # Open the xml file as an ElementTree
    tree = ET.parse(xml_file)

    # Convert the xml tree to a dictionary
    xml_dict = xml_to_dict(tree)

    wsa = WholeSlideAnnotation(xml_file)

    # Add the annotations to the xml_dict
    xml_dict = get_xml_template_dict()
    case_nr = os.path.basename(xml_file).replace('.xml', '')
    for i, ann in enumerate(wsa.annotations):
        group, concordance = biopsies_by_slide[case_nr][i]
        if concordance < 70:
            group = AnnotationGroup.NO_CONSENSUS
        else:
            switcher = {
                'WT': AnnotationGroup.WILD_TYPE,
                'OE': AnnotationGroup.OVER_EXPRESSION,
                'NM': AnnotationGroup.NULL_MUTATION,
                'DC': AnnotationGroup.DOUBLE_CLONES
            }
            group = switcher.get(group, AnnotationGroup.NO_CONSENSUS)

        # Get the annotation dict
        ann_dict = get_annotation_dict(ann.coordinates, annotation_group=group, name=f'Annotation {i+1}')
        # Add the annotation to the xml_dict
        xml_dict['ASAP_Annotations']['children'][0]['Annotations']['children'].append({'Annotation': ann_dict})

    # Convert the xml_dict to an xml tree
    tree = dict_to_xml(xml_dict)

    # Write the xml tree to the xml file
    tree.write(xml_file)
    # print(prettify(tree.getroot()))