In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# import necessary packages

import time
import geojson
import numpy as np
import pandas as pd
import openslide

from data_utils import *
from vis_utils import *
from measure_utils import *

In [2]:
DIR_WSI = "/Users/jinzhou/Desktop/Paper_MIA/data/wsi"
DIR_ANN = "/Users/jinzhou/Desktop/Paper_MIA/data/ann_geojson"
DIR_SAVE_RESULTS = "/Users/jinzhou/Desktop/Paper_MIA/results"
DIR_SAVE_FIGURE = "/Users/jinzhou/Desktop/Paper_MIA/results/figures"

In [3]:
def artery_analysis(slide, ann_outer, ann, slide_boundries, wsi_id, artery_id, df):
    # get coordinates of arteries/media_outer_boundries
    cnt_outer = ann_outer["geometry"]["coordinates"]
    # based on outer, get middle and inner insides/intersecs
    cnts_middle = get_cnts_inside(ann, cnt_outer, target="Intima")
    cnts_inner = get_cnts_inside(ann, cnt_outer, target="Lumen")
    if len(cnts_inner) == 0 or len(cnts_inner) == 0:
        print("No Intima/Media Contour: ", artery_id)
        return df
    # adjust coords based on slide boundry
    (cnt_outer, cnts_middle, cnts_inner) = \
        adjust_artery_coords_by_boundry(cnt_outer, cnts_middle,
            cnts_inner, slide_boundries)
    
    # adjust coords based on artery boundry
    artery_boundry = get_border_of_cnt(cnt_outer, border=50)
    (cnt_outer, cnts_middle, cnts_inner) = \
        adjust_artery_coords_by_boundry(cnt_outer, cnts_middle,
            cnts_inner, artery_boundry) 
    
    
    (xmin, xmax, ymin, ymax) = artery_boundry
    
    curr_slide = slide[ymin:ymax, xmin:xmax].copy()
    curr_slide = plot_artery_ann(curr_slide, cnt_outer, cnts_middle, cnts_inner)
    
    (h, w) = curr_slide.shape[:2]
    curr_ann = np.zeros((h, w, 3), np.uint8)
    curr_ann = plot_artery_ann(curr_ann, cnt_outer, cnts_middle, cnts_inner)
    
#     idx_inner = get_cnt_idx_w_largest_area(cnts_inner)
#     cnt_inner = cnts_inner[idx_inner]
    
    dir_save = os.path.join(DIR_SAVE_FIGURE, wsi_id, artery_id)
    
    
    area_lumen = np.sum([cv2.contourArea(cnt) for cnt in cnts_inner])
    area_intima = np.sum([cv2.contourArea(cnt) for cnt in cnts_middle]) - area_lumen    
    area_media = cv2.contourArea(cnt_outer) - area_intima - area_lumen
    thick_media = []
    thick_intima = []
    for idx_inner, cnt_inner in enumerate(cnts_inner):
        for idx_middle, cnt_middle in enumerate(cnts_middle):
            if cnt_polygon_test(cnt_inner, cnt_middle):
                # radial lines intersecs with excludde should be discarded
                exclude = (cnts_middle[:idx_middle] + cnts_middle[idx_middle+1:] + 
                           cnts_inner[:idx_inner] + cnts_inner[idx_inner+1:])
                thick_media_sub, thick_intima_sub=measure_thickness(
                    cnt_outer, cnt_middle, cnt_inner, angle_width=15, exclude=exclude, 
                    vis=curr_ann, dir_save=dir_save)
                thick_media = thick_media+thick_media_sub
                thick_intima = thick_intima+thick_intima_sub
                row = {'WSI_ID':wsi_id, 'Artery_ID':artery_id, 
                       'Thickness_Media':thick_media, 'Thickness_Intima':thick_intima,
                       'Area_Media': area_media, 'Area_Intima': area_intima, 'Area_Lumen': area_lumen}
                df = df.append(row, ignore_index=True)
#             break
#     save_img(curr_slide, DIR_SAVE_FIGURE, wsi_id, artery_id) 
    save_img(curr_ann, DIR_SAVE_FIGURE, wsi_id, artery_id+'_ann') 
#     imshow_k_in_row([curr_slide, curr_ann])
    return df

In [4]:
def wsi_analysis(slide, ann, slide_boundries, wsi_id, df):
    
    # get ann for arteries/media_outer_boundries
    ann_outers = [ann_i for ann_i in ann if "name" in ann_i["properties"]]
    print("Number of arteries in {} is {}".format(wsi_id, len(ann_outers)))
    for i, ann_outer in enumerate(ann_outers):
        artery_id = ann_outer["properties"]["name"]
#         if artery_id!="A47":continue
        print(artery_id)
        # artery-level analysis
        df = artery_analysis(slide, ann_outer, ann, slide_boundries, wsi_id, artery_id, df)
    return df

In [5]:
start_time = time.time()
df = pd.DataFrame(columns = ['WSI_ID', 'Artery_ID', 'Thickness_Media', 'Thickness_Intima', 
                            'Area_Media', 'Area_Intima', 'Area_Lumen'])
path_to_save = os.path.join(DIR_SAVE_RESULTS, "thickness.json")

for wsi in os.listdir(DIR_WSI):
    if (not wsi.endswith(".ndpi")) and (not wsi.endswith(".svs")):
        continue
    start_time_wsi = time.time()
    path_wsi = os.path.join(DIR_WSI, wsi)
    wsi_id = ".".join(wsi.split(".")[:-1])
#     if wsi_id != "2_6063_A_0045151": continue
#     if wsi_id in ["015_TRI_235805", "16-032-02_TRI_11628_copy", "16-010-02_TRI_11632_copy"]:
#         continue
#     if wsi_id.startswith("2"): continue

    print("Working on Case: ", wsi_id)
    path_ann = os.path.join(DIR_ANN, wsi_id+".geojson")
    
    with open(path_ann) as f:
        exported = geojson.load(f)
        ann = exported['features']
    
    for i in range(len(ann)):
        coords_raw = ann[i]['geometry']['coordinates']
        ann[i]['geometry']['coordinates'] = clean_coord(coords_raw)
    
    # find the area including arteries, make it easier to plot
    xmin, xmax, ymin, ymax = get_border_of_ann(ann)
    
    # open slide with formats of ndpi or svs
    slide_0 = openslide.OpenSlide(path_wsi)

    # fetch levels[0] res of whole WSI with only the area including arteries
    slide = slide_0.read_region((xmin, ymin), 0, (xmax-xmin, ymax-ymin))
    slide = np.asarray(slide)
    print("Fetched Shape",slide.shape)
    # origin slide is in RGBA format, convert it to RGB and save to model data dir
    slide = cv2.cvtColor(slide, cv2.COLOR_RGBA2RGB)
    # WSI-level analysis
    df = wsi_analysis(slide, ann, (xmin, xmax, ymin, ymax), wsi_id, df)
    print(wsi_id, " costs, ", time.time() - start_time_wsi)
    print()

df.to_json(path_to_save, orient="records", lines=True)
print("In total, it costs ", time.time() - start_time)

Working on Case:  12_26609_022_015 L2 TRI
Fetched Shape (9195, 14790, 4)
Number of arteries in 12_26609_022_015 L2 TRI is 10
A04
A01
A02
A03
A10
A11
A12
A07
A05
A06
12_26609_022_015 L2 TRI  costs,  15.005650997161865

Working on Case:  11_26609_020_006 A15 TRI
Fetched Shape (31498, 47835, 4)
Number of arteries in 11_26609_020_006 A15 TRI is 33
A02
A03
A01
A04
A05
A06
A07
A08
A09
A10
A11
A12
A13
A14
A15
A16
A17
A19
A18
A20
A21
A22
A23
A24
A25
A26
A27
A29
A30
A31
A32
A33
A28
11_26609_020_006 A15 TRI  costs,  85.77194690704346

Working on Case:  11_26609_098_004_L11 TRI
Fetched Shape (11821, 16034, 4)
Number of arteries in 11_26609_098_004_L11 TRI is 8
A01
A03
A02
A05
A04
A06
A07
A08
11_26609_098_004_L11 TRI  costs,  12.283273220062256

Working on Case:  2_4841_A_0045207
Fetched Shape (21520, 16631, 4)
Number of arteries in 2_4841_A_0045207 is 11
A01
A02
A03
A04
A05
A06
A07
A08
A09
A10
A11
2_4841_A_0045207  costs,  29.479982137680054

Working on Case:  2_6128_A_0037277
Fetched Shape (1840

A172
A173
A174
A175
A176
A177
A178
A179
A180
A181
A182
A183
A184
A185
A186
A187
A188
A189
A190
A191
A192
A193
A194
A195
A196
A197
A198
A199
A200
A201
A202
A203
A204
A205
A206
A207
A208
A209
A210
A211
A212
A213
A214
A215
A216
A217
A218
A219
A220
A221
A222
A223
A224
A225
A226
A227
A228
A229
A230
A231
A232
A233
A234
A235
A236
A237
A238
A239
A240
A241
A242
A243
A244
A245
A246
A247
A248
A249
A250
A251
A252
A253
A254
A255
A256
A257
A258
A259
A260
A261
A262
A263
A264
A265
A266
A60
A110
A05
A31
16-010-02_TRI_11632_copy  costs,  1454.370735168457

Working on Case:  12_26609_020_018 L05 A1 TRI
Fetched Shape (27758, 20337, 4)
Number of arteries in 12_26609_020_018 L05 A1 TRI is 18
A01
A02
A05
A09
A10
A11
A07
A06
A14
A18
A16
A15
A12
A13
A17
A08
A04
A03
12_26609_020_018 L05 A1 TRI  costs,  41.89551591873169

In total, it costs  2884.0508069992065


In [6]:
df.shape

(753, 7)