# libraries & functions

In [1]:
import sys
sys.path.append('utility_box/')
from cpath import WSI
import load

In [2]:
from shapely_utils import make_valid, GeometryCollection, Polygon, mapping, MultiPolygon

In [3]:
import geojson
from pathlib import Path
from tqdm.auto import tqdm
import xml.etree.ElementTree as ET

In [4]:
def get_geojson_contour(X, Y):
    contours = []
    for x, y in zip(X, Y):
        contours.append([x, y])
    return contours

def get_geojson_mPolyfeature(polys, groupName='Annotation', color=(246, 122, 73)):
    contours=[]
    for poly in polys:
        x=(np.array(poly.exterior.coords)[:,0]).astype(int).tolist()
        y=(np.array(poly.exterior.coords)[:,1]).astype(int).tolist()
        
        geojson_contour=get_geojson_contour(x,y)
        contours.append([geojson_contour])
        
    properties = {'objectType': 'annotation','name':f"{groupName}" , 'color': color}
    geojson_feature=geojson.Feature(geometry=geojson.MultiPolygon(contours), properties=properties)
    return geojson_feature

In [6]:
import numpy as np

In [7]:
def parse_asap_annotations(xml_file):
    # Parse the XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    annotations = []

    # Loop through each Annotation element
    for annotation in root.findall(".//Annotation"):
        annotation_name = annotation.get("Name")
        part_of_group = annotation.get("PartOfGroup")
        coordinates = []

        # Find all Coordinate elements inside the Annotation
        for coordinate in annotation.findall(".//Coordinate"):
            x = np.floor(float(coordinate.get("X")))
            y = np.floor(float(coordinate.get("Y")))
            coordinates.append((x, y))

        # Store the annotation's data
        annotations.append({
            "name": annotation_name,
            "group": part_of_group,
            "coordinates": coordinates
        })
    
    return annotations

# CAMELYON17

In [8]:
path=Path('PublicDatasets/CAMELYON17')
annotations_folder=Path('PublicDatasets/CAMELYON17/annotations')
slides_folder=Path('PublicDatasets/CAMELYON17/images')

In [9]:
for idx, annotation_path in enumerate(tqdm(list(annotations_folder.iterdir()))):
    
    annotations=parse_asap_annotations(annotation_path)
    
    updated_path=Path(str(annotation_path.with_suffix('')).replace('annotations','images'))
    slide_path=Path(f"{updated_path}.tif")
    
    geom_dicts=[]
    for annotation in annotations:
        if annotation['group']=='Tumor':
            geom_dicts.append({'geom':Polygon(annotation["coordinates"]).buffer(0), 'group':'Tumor'})
        else:
            geom_dicts.append({'geom':Polygon(annotation["coordinates"]).buffer(0), 'group':'Hole'})

    load.save_pickle(f'{path}/tumor_geoms/{slide_path.stem}.pkl',geom_dicts)
    
    geojson_features=[]
    for geom in geom_dicts:
        geojson_feature = geojson.Feature(geometry=mapping(geom['geom']))
        geojson_feature['properties']["label"]=geom['group']
        geojson_features.append(geojson_feature)
        

    geojson_feature_collection = geojson.FeatureCollection(geojson_features)
    load.save_geojson(f'{path}/tumor_geojson_masks/{slide_path.stem}.geojson', geojson_feature_collection)

  0%|          | 0/50 [00:00<?, ?it/s]

File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_051_node_2.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_089_node_3.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_061_node_4.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_009_node_1.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_052_node_1.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_024_node_2.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_017_node_1.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON17/tumor_geojson_masks/patient_015_node_1.geojson' created successfully.
File Saved
GeoJSON file 'PublicD

# CAMELYON16

In [10]:
path=Path('PublicDatasets/CAMELYON16')
annotations_folder=Path('PublicDatasets/CAMELYON16/annotations')
slides_folder=Path('PublicDatasets/CAMELYON16/images')

In [11]:
for idx, annotation_path in enumerate(tqdm(list(annotations_folder.iterdir()))):
    
    annotations=parse_asap_annotations(annotation_path)
    
    updated_path=Path(str(annotation_path.with_suffix('')).replace('annotations','images'))
    slide_path=Path(f"{updated_path}.tif")
    
    geom_dicts=[]
    for annotation in annotations:
        if len(annotation["coordinates"])<4:
            continue
            
        if annotation['group']=='Tumor':
            geom_dicts.append({'geom':Polygon(annotation["coordinates"]).buffer(0), 'group':'Tumor'})
        else:
            geom_dicts.append({'geom':Polygon(annotation["coordinates"]).buffer(0), 'group':'Hole'})

    load.save_pickle(f'{path}/tumor_geoms/{slide_path.stem}.pkl',geom_dicts)
    
    geojson_features=[]
    for geom in geom_dicts:
        geojson_feature = geojson.Feature(geometry=mapping(geom['geom']))
        geojson_feature['properties']["label"]=geom['group']
        geojson_features.append(geojson_feature)
        

    geojson_feature_collection = geojson.FeatureCollection(geojson_features)
    load.save_geojson(f'{path}/tumor_geojson_masks/{slide_path.stem}.geojson', geojson_feature_collection)

  0%|          | 0/160 [00:00<?, ?it/s]

File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/test_084.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/tumor_091.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/tumor_102.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/test_079.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/tumor_108.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/test_105.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/tumor_103.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/test_001.geojson' created successfully.
File Saved
GeoJSON file 'PublicDatasets/CAMELYON16/tumor_geojson_masks/tumor_087.geojson' created successful