In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# import necessary packages

import time
import geojson
import numpy as np
import pandas as pd
import openslide

from data_utils import *
from vis_utils import *
from measure_utils import *

In [None]:
DIR_WSI = "/Users/jinzhou/Desktop/Paper_MIA/data/wsi"
DIR_ANN = "/Users/jinzhou/Desktop/Paper_MIA/data/ann_geojson"
DIR_SAVE_RESULTS = "/Users/jinzhou/Desktop/Paper_MIA/results"
DIR_SAVE_FIGURE = "/Users/jinzhou/Desktop/Paper_MIA/results/figures"

In [None]:
def approx_cnt(cnt):
#     epsilon = 0.001 * cv2.arcLength(cnt, True)
    approx = cv2.approxPolyDP(cnt, 1, True)
    return approx.reshape(-1, 2)

def get_cnt_idx_w_largest_area(cnts):
    max_area = 0
    max_area_idx = 0
    for i in range(len(cnts)):
        curr_area = cv2.contourArea(cnts[i])
        if curr_area > max_area:
            max_area = curr_area
            max_area_idx = i
    return max_area_idx


def artery_analysis(slide, ann_outer, ann, slide_boundries, wsi_id, artery_id, df):
    # get coordinates of arteries/media_outer_boundries
    cnt_outer_raw = ann_outer["geometry"]["coordinates"]
    cnt_outer = clean_coord(cnt_outer_raw)
    # based on outer, get middle and inner insides/intersecs
    cnts_middle = get_cnts_inside(ann, cnt_outer, target="Intima")
    cnts_inner = get_cnts_inside(ann, cnt_outer, target="Lumen")
    cnts_hyalin = get_cnts_inside (ann, cnt_outer, target="Hyalin")
    if len(cnts_hyalin):print("Hyalin Here")

    # adjust coords based on slide boundry
    (cnt_outer, cnts_middle, cnts_inner) = \
        adjust_artery_coords_by_boundry(cnt_outer, cnts_middle,
            cnts_inner, slide_boundries)
    
    # adjust coords based on artery boundry
    artery_boundry = get_border_of_cnt(cnt_outer, border=50)
    (cnt_outer, cnts_middle, cnts_inner) = \
        adjust_artery_coords_by_boundry(cnt_outer, cnts_middle,
            cnts_inner, artery_boundry) 
    
    
    (xmin, xmax, ymin, ymax) = artery_boundry
    
    curr_slide = slide[ymin:ymax, xmin:xmax].copy()
    curr_slide = plot_artery_ann(curr_slide, cnt_outer, cnts_middle, cnts_inner)
    
    (h, w) = curr_slide.shape[:2]
    curr_ann = np.zeros((h, w, 3), np.uint8)
    
    idx_inner = get_cnt_idx_w_largest_area(cnts_inner)
    cnt_inner = cnts_inner[idx_inner]
    
    dir_save = os.path.join(DIR_SAVE_FIGURE, wsi_id, artery_id)
            
    for idx_middle, cnt_middle in enumerate(cnts_middle):
        if cnt_polygon_test(cnt_inner, cnt_middle):

            # radial lines intersecs with excludde should be discarded
            exclude = cnts_middle[:idx_middle] + cnts_middle[idx_middle+1:] + cnts_inner[:idx_inner] + cnts_inner[idx_inner+1:]
            thick_media, thick_intima=measure_thickness(
                cnt_outer, cnt_middle, cnt_inner, angle_width=15, exclude=exclude, vis=curr_ann, dir_save=dir_save)
            row = {'WSI_ID':wsi_id, 'Artery_ID':artery_id, 
                   'Thickness_Media_Abs':thick_media, 'Thickness_Intima_Abs':thick_intima}
            df = df.append(row, ignore_index=True)
#                 curr_ann = plot_artery_ann(curr_ann, cnt_outer, cnts_middle, cnts_inner)

#     save_img(curr_slide, DIR_SAVE_FIGURE, wsi_id, artery_id) 
#     save_img(curr_ann, DIR_SAVE_FIGURE, wsi_id, artery_id+'_ann') 
    imshow_k_in_row([curr_slide, curr_ann])
    return df

def wsi_analysis(slide, ann, slide_boundries, wsi_id, df):
    
    # get ann for arteries/media_outer_boundries
    ann_outers = [ann_i for ann_i in ann if "name" in ann_i["properties"]]
    print("Number of arteries in {} is {}".format(wsi_id, len(ann_outers)))
    artery_id_es = []
    for i, ann_outer in enumerate(ann_outers):
        artery_id = ann_outer["properties"]["name"]
        print(artery_id)
        artery_id_es.append(artery_id)
#         if artery_id!= "A11": continue
#         print("Artery Id: ", artery_id)
        # artery-level analysis
        df = artery_analysis(slide, ann_outer, ann, slide_boundries, wsi_id, artery_id, df)
    return df

In [None]:
start_time = time.time()
df = pd.DataFrame(columns = ['WSI_ID', 'Artery_ID', 'Thickness_Media_Abs', 'Thickness_Intima_Abs', "Hyalin"])
path_to_save = os.path.join(DIR_SAVE_RESULTS, "thickness.json")

for wsi in os.listdir(DIR_WSI):
    if (not wsi.endswith(".ndpi")) and (not wsi.endswith(".svs")):
        continue
    start_time_wsi = time.time()
    path_wsi = os.path.join(DIR_WSI, wsi)
    wsi_id = ".".join(wsi.split(".")[:-1])
    if wsi_id in ["015_TRI_235805", "16-032-02_TRI_11628_copy", "16-010-02_TRI_11632_copy"]:
        continue
    if wsi_id.startswith("2"): continue
        
#     if wsi_id!="11_26609_009_008 L10 TRI": continue
#     if wsi_id!="11_26609_020_002 A1-5 TRI": continue
#     if wsi_id!="12_26609_022_018 L08 TRI": continue
    print("Working on Case: ", wsi_id)
    path_ann = os.path.join(DIR_ANN, wsi_id+".geojson")
    
    with open(path_ann) as f:
        exported = geojson.load(f)
        ann = exported['features']
    
    # find the area including arteries, make it easier to plot
    xmin, xmax, ymin, ymax = get_border_of_ann(ann)
    
    # open slide with formats of ndpi or svs
    slide_0 = openslide.OpenSlide(path_wsi)
    levels = slide_0.level_dimensions
#     print("stored slide size pyramid",levels)

    # fetch levels[0] res of whole WSI with only the area including arteries
    slide = slide_0.read_region((xmin, ymin), 0, (xmax-xmin, ymax-ymin))
    slide = np.asarray(slide)
    print("fetched shape",slide.shape)
    # origin slide is in RGBA format, convert it to RGB and save to model data dir
    slide = cv2.cvtColor(slide, cv2.COLOR_RGBA2RGB)
    # WSI-level analysis
    df = wsi_analysis(slide, ann, (xmin, xmax, ymin, ymax), wsi_id, df)
    df.to_json(path_to_save, orient="records", lines=True)
    print(wsi_id, " costs, ", time.time() - start_time_wsi)
    print()

print("In total, it costs ", time.time() - start_time)