# Imports

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

%reload_ext autoreload
%autoreload 2
#%matplotlib notebook
%matplotlib inline

from typing import List, Tuple, Union, Sequence

import sys
import os

#https://github.com/FAU-DLM/wsi_processing_pipeline
sys.path.append("../")
sys.path.append("../wsi_processing_pipeline/")
sys.path.append("../wsi_processing_pipeline/tile_extraction")
sys.path.append("../wsi_processing_pipeline/shared")
import wsi_processing_pipeline
import tile_extraction
import preprocessing
import postprocessing
import shared
from wsi_processing_pipeline.shared import roi
from wsi_processing_pipeline.tile_extraction import tiles, util, slide, filter
from wsi_processing_pipeline.preprocessing import *
import wsi_processing_pipeline.preprocessing.files_getter
import wsi_processing_pipeline.preprocessing.name_getter
from wsi_processing_pipeline.preprocessing.name_getter import NameGetter
from wsi_processing_pipeline.preprocessing.tile_image_block import TileImage, label_tl_image, tile_image, TileTransform
from wsi_processing_pipeline.preprocessing.tile_image_block import show_batch, TileImageBlock
from shared.patient_manager import PatientManager
from shared.enums import DatasetType, EvaluationLevel

sys.path.append('../models-pytorch/pretrained-models.pytorch')
import pretrainedmodels
from pretrainedmodels import *


import fastai
from fastai.vision.all import *
from typing import Dict
import pandas
import pandas as pd
import numpy as np
import os
import torch
torch.backends.cudnn.benchmark=True
import torchvision
from torchvision.models import *
from torchsummary import summary
from functools import partial, update_wrapper
from tqdm import tqdm_notebook as tqdm
import matplotlib.image as mpimg
import shutil

# to fix python OSError: broken data stream when reading image file
# https://stackoverflow.com/questions/42462431/oserror-broken-data-stream-when-reading-image-file
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import sklearn
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
from tqdm.notebook import tqdm

import pathlib
from pathlib import Path
Path.ls = lambda x: [p for p in list(x.iterdir()) if '.ipynb_checkpoints' not in p.name]


from concurrent.futures import as_completed, ProcessPoolExecutor
from tqdm import tqdm
import json

#against DecompressionBombWarning
Image.MAX_IMAGE_PIXELS = 10000000000   

PATH = Path('/home/Deep_Learner/private/datasets/urothel_ca')
PATH_WSIS = PATH/'Neue_Scans_Urothelkarzinome'
PATH_ROIS = PATH/'Rois'
PATH_EXCEL_SHEET = PATH/'Data_Sheet_MIER_22112021.xlsx'


seed = 42
np.random.seed(seed)

# Data

In [3]:
wsi_mrxs_paths = [p for p in PATH_WSIS.ls() if p.suffix == '.mrxs']
len(wsi_mrxs_paths)

239

In [4]:
wsi_dir_paths = [p for p in PATH_WSIS.ls() if p.suffix == '']
len(wsi_dir_paths)

239

In [5]:
json_paths = PATH_ROIS.ls()
len(json_paths)

233

## rename files and directories

### json paths

In [6]:
p = json_paths[0]; p

Path('/home/Deep_Learner/private/datasets/urothel_ca/Rois/MIER_237_M4.json')

In [7]:
new_name = p.name.replace('%20', '_');new_name

'MIER_237_M4.json'

In [8]:
new_p = p.parent/new_name; new_p

Path('/home/Deep_Learner/private/datasets/urothel_ca/Rois/MIER_237_M4.json')

In [9]:
p.replace(new_p)

In [10]:
for p in json_paths:
    new_name = p.name.replace('%20', '_')
    new_p = p.parent/new_name
    p.replace(new_p)

### mrxs files

In [11]:
for p in wsi_mrxs_paths:
    new_name = p.name.replace(' ', '_')
    new_p = p.parent/new_name
    p.replace(new_p)

### corresponding mrxs dirs

In [44]:
for p in wsi_dir_paths:
    new_name = p.name.replace(' ', '_')
    new_p = p.parent/new_name
    p.replace(new_p)

## wsi path to json file mapping

In [12]:
wsi_mrxs_paths[0].stem

'MIER_140_M17'

In [13]:
def find_json_path(mrxs_path:pathlib.Path)->pathlib.Path:
    stem = mrxs_path.stem
    for jp in json_paths:
        if(jp.stem == stem):
            return jp
    return None

In [14]:
wsi_mrxs_to_json_path = {}
wsi_mrxs_paths_without_corresponding_json_file = []
for mrxs_p in wsi_mrxs_paths:
    jp = find_json_path(mrxs_path=mrxs_p)
    if(jp is not None):
        wsi_mrxs_to_json_path[mrxs_p] = jp
    else:
        wsi_mrxs_paths_without_corresponding_json_file.append(mrxs_p)

In [15]:
len(wsi_mrxs_to_json_path)

233

In [16]:
len(wsi_mrxs_paths_without_corresponding_json_file)

6

In [17]:
wsi_mrxs_paths_without_corresponding_json_file

[Path('/home/Deep_Learner/private/datasets/urothel_ca/Neue_Scans_Urothelkarzinome/MIER_122_TUR1.mrxs'),
 Path('/home/Deep_Learner/private/datasets/urothel_ca/Neue_Scans_Urothelkarzinome/MIER_153_TUR1.mrxs'),
 Path('/home/Deep_Learner/private/datasets/urothel_ca/Neue_Scans_Urothelkarzinome/MIER_205_A.mrxs'),
 Path('/home/Deep_Learner/private/datasets/urothel_ca/Neue_Scans_Urothelkarzinome/MIER_119_D.mrxs'),
 Path('/home/Deep_Learner/private/datasets/urothel_ca/Neue_Scans_Urothelkarzinome/MIER_106_M18.mrxs'),
 Path('/home/Deep_Learner/private/datasets/urothel_ca/Neue_Scans_Urothelkarzinome/MIER_118_B.mrxs')]

## filter out stroma rois

In [18]:
jp = json_paths[0];jp

Path('/home/Deep_Learner/private/datasets/urothel_ca/Rois/MIER_237_M4.json')

In [19]:
class __PolygonHelper:
    def __init__(self, level:int, vertices:Sequence[Tuple[float, float]]):
        self.level = level
        self.vertices = vertices
def get_polygons_from_json(json_path:pathlib.Path)->List[__PolygonHelper]:
    """
    Reads the json file and returns a list of __PolygonHelper objects. 
    This should be a specialized function for the specific structure of your json files.
    
    Arguments:
        json_path: path to json file
        
    Returns:
        List of __PolygonHelper objects
    """
    polygons = []
    with open(json_path) as json_file:
        for annotation in json.load(json_file):
            #skip rois that contain stroma
            if(annotation["properties"]["classification"]["name"] == "Tumorstroma"):
                continue

            if(annotation["geometry"]["type"] == 'MultiPolygon'):
                multi_polygon_vertices = annotation["geometry"]["coordinates"]
                #print(f'Multi Polygon: {np.array(multi_polygon_vertices).squeeze().shape}')
                ##QuPath produces Polygons and Multipolygons 
                ##(see difference here: https://gis.stackexchange.com/questions/225368/understanding-difference-between-polygon-and-
                ##multipolygon-for-shapefiles-in-qgis/225373)
                ##This loop separates Multipolygons into individual Polygons
                for sub_polygon_vertices in multi_polygon_vertices:
                    sub_polygon_vertices_array = np.array(sub_polygon_vertices, dtype=object).squeeze()
                    if(len(sub_polygon_vertices_array.shape) == 2 and sub_polygon_vertices_array.shape[1] == 2):
                        #print(f'then: {sub_polygon_vertices_array.shape}')
                        polygons.append(__PolygonHelper(level=0, vertices=sub_polygon_vertices_array))
                    else:
                        for elem in sub_polygon_vertices_array:
                            elem_array = np.array(elem).squeeze()
                            #print(f'else: {elem_array.shape}')
                            polygons.append(__PolygonHelper(level=0, vertices=elem_array))
                
            elif(annotation["geometry"]["type"] == 'Polygon'):
                vertices = annotation["geometry"]["coordinates"]
                #print(f'Polygon: {np.array(vertices).squeeze().shape}')
                polygons.append(__PolygonHelper(level=0, vertices=np.array(vertices, dtype=object).squeeze()))
            else:
                assert False
    return polygons

## get rois for each wsi path

In [20]:
#no stroma rois
wsi_mrxs_path_to_tumor_rois = {}
roi.get_list_of_RegionOfInterestPolygon_from_json()

TypeError: get_list_of_RegionOfInterestPolygon_from_json() missing 1 required positional argument: 'json_path'