In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

# Standard library imports
import os
import time

# Third-party imports
import cv2
import geojson
import numpy as np
import openslide
import pandas as pd

# Local application/library specific imports
from utils_data import (adjust_artery_coords_by_boundry, cnt_polygon_test, clean_coord, 
                        get_border_of_ann, get_border_of_cnt, get_cnts_inside)
from utils_measure import measure_thickness
from utils_measure_vis import plot_artery_ann, save_img_helper

In [2]:
DIR_WSI = "/Users/jinzhou/Desktop/Paper_MIA/data/wsi"
DIR_ANN = "/Users/jinzhou/Desktop/Paper_MIA/data/ann_geojson"
DIR_SAVE_RESULTS = "/Users/jinzhou/Desktop/Paper_MIA/results"
DIR_SAVE_FIGURE = "/Users/jinzhou/Desktop/Paper_MIA/results/figures"

In [3]:
def artery_analysis(slide, ann_outer, ann, slide_boundries, wsi_id, artery_id, df):
    # get coordinates of arteries/media_outer_boundries
    cnt_outer = ann_outer["geometry"]["coordinates"]
    # based on outer, get middle and inner insides/intersecs
    cnts_middle = get_cnts_inside(ann, cnt_outer, target="Intima")
    cnts_inner = get_cnts_inside(ann, cnt_outer, target="Lumen")
    if len(cnts_inner) == 0 or len(cnts_inner) == 0:
        print("No Intima/Media Contour: ", artery_id)
        return df
    # adjust coords based on slide boundry
    (cnt_outer, cnts_middle, cnts_inner) = \
        adjust_artery_coords_by_boundry(cnt_outer, cnts_middle,
            cnts_inner, slide_boundries)
    
    # adjust coords based on artery boundry
    artery_boundry = get_border_of_cnt(cnt_outer, border=50)
    (cnt_outer, cnts_middle, cnts_inner) = \
        adjust_artery_coords_by_boundry(cnt_outer, cnts_middle,
            cnts_inner, artery_boundry) 
    
    (xmin, xmax, ymin, ymax) = artery_boundry
    
    curr_slide = slide[ymin:ymax, xmin:xmax].copy()
    curr_slide = plot_artery_ann(curr_slide, cnt_outer, cnts_middle, cnts_inner)
    
    (h, w) = curr_slide.shape[:2]
    curr_ann = np.zeros((h, w, 3), np.uint8)
    curr_ann = plot_artery_ann(curr_ann, cnt_outer, cnts_middle, cnts_inner)
        
#     dir_save = os.path.join(DIR_SAVE_FIGURE, wsi_id, artery_id)
    
    area_lumen = np.sum([cv2.contourArea(cnt) for cnt in cnts_inner])
    area_intima = np.sum([cv2.contourArea(cnt) for cnt in cnts_middle]) - area_lumen    
    area_media = cv2.contourArea(cnt_outer) - area_intima - area_lumen
    
    max_lumen = 0

    for idx_inner, cnt_inner in enumerate(cnts_inner):
        for idx_middle, cnt_middle in enumerate(cnts_middle):
            if cnt_polygon_test(cnt_inner, cnt_middle):
                # radial lines intersecs with excludde should be discarded
                exclude = (cnts_middle[:idx_middle] + cnts_middle[idx_middle+1:] + 
                           cnts_inner[:idx_inner] + cnts_inner[idx_inner+1:])
                
                curr_area_intima = cv2.contourArea(cnt_middle)
                curr_area_lumen = cv2.contourArea(cnt_inner)
                if curr_area_lumen >= max_lumen:
                    max_lumen = curr_area_lumen
                else:
                    continue
                thick_media, thick_intima=measure_thickness(
                    cnt_outer, cnt_middle, cnt_inner, 
                    wsi_id=wsi_id, artery_id=artery_id, 
                    angle_width=15, exclude=exclude, 
                    vis=curr_ann, dir_parent=DIR_SAVE_FIGURE)

                row = {'WSI_ID':wsi_id, 'Artery_ID':artery_id, 
                       'Thickness_Media':thick_media, 'Thickness_Intima':thick_intima,
                       'Area_Media': area_media, 'Area_Intima': area_intima, 'Area_Lumen': area_lumen,
                       'Curr_Area_Intima': curr_area_intima, 'Curr_Area_Lumen': curr_area_lumen}
                df = df.append(row, ignore_index=True)
    
    return df

In [4]:
def wsi_analysis(slide, ann, slide_boundries, wsi_id, df):
    
    # get ann for arteries/media_outer_boundries
    ann_outers = [ann_i for ann_i in ann if "name" in ann_i["properties"]]
    print("Number of arteries in {} is {}".format(wsi_id, len(ann_outers)))
    for i, ann_outer in enumerate(ann_outers):
        artery_id = ann_outer["properties"]["name"]
        print(artery_id)
        # artery-level analysis
        df = artery_analysis(slide, ann_outer, ann, slide_boundries, wsi_id, artery_id, df)
    return df

In [5]:
start_time = time.time()
df = pd.DataFrame(columns = ['WSI_ID', 'Artery_ID', 'Thickness_Media', 'Thickness_Intima', 
                            'Area_Media', 'Area_Intima', 'Area_Lumen', 'Curr_Area_Intima', 'Curr_Area_Lumen'])
path_to_save = os.path.join(DIR_SAVE_RESULTS, "thickness.json")

for wsi in os.listdir(DIR_WSI):
    if (not wsi.endswith(".ndpi")) and (not wsi.endswith(".svs")):
        continue
    start_time_wsi = time.time()
    path_wsi = os.path.join(DIR_WSI, wsi)
    wsi_id = ".".join(wsi.split(".")[:-1])
    
    if wsi_id not in ["KL-0023191_L03_TRIS_272723", "11_26609_000_011_L02_TRI", "12_26609_022_015 L2 TRI", 
                     "11_26609_009_008 L10 TRI"]: continue

    print("Working on Case: ", wsi_id)
    path_ann = os.path.join(DIR_ANN, wsi_id+".geojson")
    
    with open(path_ann) as f:
        exported = geojson.load(f)
        ann = exported['features']
    
    for i in range(len(ann)):
        coords_raw = ann[i]['geometry']['coordinates']
        ann[i]['geometry']['coordinates'] = clean_coord(coords_raw)
    
    # find the area including arteries, make it easier to plot
    xmin, xmax, ymin, ymax = get_border_of_ann(ann)
    
    # open slide with formats of ndpi or svs
    slide_0 = openslide.OpenSlide(path_wsi)

    # fetch levels[0] res of whole WSI with only the area including arteries
    slide = slide_0.read_region((xmin, ymin), 0, (xmax-xmin, ymax-ymin))
    slide = np.asarray(slide)
    print("Fetched Shape",slide.shape)
    # origin slide is in RGBA format, convert it to RGB and save to model data dir
    slide = cv2.cvtColor(slide, cv2.COLOR_RGBA2RGB)
    # WSI-level analysis
    df = wsi_analysis(slide, ann, (xmin, xmax, ymin, ymax), wsi_id, df)
    print(wsi_id, " costs, ", time.time() - start_time_wsi)
    print()

print("In total, it costs ", time.time() - start_time)

Working on Case:  12_26609_022_015 L2 TRI
Fetched Shape (9195, 14790, 4)
Number of arteries in 12_26609_022_015 L2 TRI is 10
A04
A01
A02
A03
A10
A11
A12
A07
A05
A06
12_26609_022_015 L2 TRI  costs,  19.978446006774902

Working on Case:  KL-0023191_L03_TRIS_272723
Fetched Shape (14601, 4439, 4)
Number of arteries in KL-0023191_L03_TRIS_272723 is 8
A01
A02
A03
A04
A05
A06
A07
A08
KL-0023191_L03_TRIS_272723  costs,  14.392180919647217

Working on Case:  11_26609_009_008 L10 TRI
Fetched Shape (41284, 41284, 4)
Number of arteries in 11_26609_009_008 L10 TRI is 55
A01
A02
A03
A04
A06
A07
A08
A09
A10
A11
A12
A13
A14
A15
A16
A17
A18
A19
A20
A21
A22
A23
A24
A25
A27
A26
A28
A29
A30
A31
A32
A33
A34
A35
A36
A37
A38
A39
A40
A41
A42
A44
A43
A45
A46
A47
A48
A49
A50
A51
A52
A53
A54
A55
A05
11_26609_009_008 L10 TRI  costs,  161.1004900932312

Working on Case:  11_26609_000_011_L02_TRI
Fetched Shape (8317, 10905, 4)
Number of arteries in 11_26609_000_011_L02_TRI is 25
A14
A15
A16
A07
A17
A19
A18
A20
A22


In [6]:
# df.to_json(path_to_save, orient="records", lines=True)