In [1]:
import sys  
sys.path.insert(0, '../py')
from graviti import *

import json
import numpy as np
from skimage.draw import polygon
from skimage import io
from matplotlib import pyplot as plt
import glob
import pandas as pd
import os
from scipy.sparse import coo_matrix
from skimage.measure import label, regionprops#, regionprops_table
import math

In [2]:
sample = '/home/garner1/Downloads/TCGA-O1-A52J-01Z-00-DX1.26F6ECCA-D614-4950-98E6-4D76E82F71B4.svs/luad_polygon/TCGA-O1-A52J-01Z-00-DX1.26F6ECCA-D614-4950-98E6-4D76E82F71B4.svs'
patchlist = glob.glob(sample+'/*.csv')

In [None]:
x_list = []
y_list = []
features = ['centroid_x','centroid_y','area','eccentricity','orientation','perimeter','solidity']
data = pd.DataFrame(columns = features) # create empty df to store morphometrics
cells = 0
counter = 0
for filename in patchlist[:]: # loop over patches
    df = pd.read_csv(filename)
    if ~df.empty:
        cc0 = float(os.path.basename(filename).split(sep='_')[0])
        rr0 = float(os.path.basename(filename).split(sep='_')[1] )

        cells += df['Polygon'].shape[0] # update cell count
        counter += 1; print(counter)
        
        for cell in df['Polygon'].tolist()[:]: # loop over cells in patch
            lista = list(np.fromstring(cell[1:-1], dtype=float, sep=':')) #list of vertices in polygon
            cc = lista[0::2] # list of x coord of each polygon vertex
            rr = lista[1::2] # list of y coord of each polygon verted
            poly = np.asarray(list(zip(cc,rr)))
            mean = poly.mean(axis=0) 
            poly -= mean 
            # create the nuclear mask
            mask = np.zeros(tuple(np.ceil(np.max(poly,axis=0) - np.min(poly,axis=0)).astype(int))).astype(int) # build an empty mask spanning the support of the polygon
            mini = np.min(poly,axis=0)
            poly -= mini # subtract the min to translate the mask 
            cc, rr = polygon(poly[:, 0], poly[:, 1], mask.shape) # get the nonzero mask locations
            mask[cc, rr] = 1 # nonzero pixel entries
            # rescale back to original coordinates
            rr = rr.astype(float);cc = cc.astype(float)
            rr += mini[0]; cc += mini[1]
            rr += mean[0]; cc += mean[1]
            rr += rr0; cc += cc0
            # !!!You need to calculate the morphometric features excluding peripheral cells!!!
            
            # update the list of nonzero pixel entries
            # There is not need to build a very large mask
            #x_list.extend( [int(n) for n in list(rr)] ) 
            #y_list.extend( [int(n) for n in list(cc)] )
            
            label_mask = label(mask)
            regions = regionprops(label_mask, coordinates='rc')
            
            #fig, ax = plt.subplots(figsize=(5,5))
            #ax.imshow(mask, cmap=plt.cm.gray)
            #plt.show()
            
            dicts = {}
            keys = features
            for i in keys:
                if i == 'centroid_x':
                    dicts[i] = regions[0]['centroid'][0]
                elif i == 'centroid_y':
                    dicts[i] = regions[0]['centroid'][1]
                else:
                    dicts[i] = regions[0][i]
            # update morphometrics data 
            new_df = pd.DataFrame(dicts, index=[0])
            data = data.append(new_df, ignore_index=True)
            

In [None]:
def process_patch_of_polygons(filename,features): # given the patch filename, generate morphometrics
    data = pd.DataFrame(columns = features) # create empty df to store morphometrics
    df = pd.read_csv(filename)
    if ~df.empty:
        cc0 = float(os.path.basename(filename).split(sep='_')[0])
        rr0 = float(os.path.basename(filename).split(sep='_')[1] )

        for cell in df['Polygon'].tolist()[:]: # loop over cells in patch
            lista = list(np.fromstring(cell[1:-1], dtype=float, sep=':')) #list of vertices in polygon
            cc = lista[0::2] # list of x coord of each polygon vertex
            rr = lista[1::2] # list of y coord of each polygon verted
            poly = np.asarray(list(zip(cc,rr)))
            mean = poly.mean(axis=0) 
            poly -= mean 
            # create the nuclear mask
            mask = np.zeros(tuple(np.ceil(np.max(poly,axis=0) - np.min(poly,axis=0)).astype(int))).astype(int) # build an empty mask spanning the support of the polygon
            mini = np.min(poly,axis=0)
            poly -= mini # subtract the min to translate the mask 
            cc, rr = polygon(poly[:, 0], poly[:, 1], mask.shape) # get the nonzero mask locations
            mask[cc, rr] = 1 # nonzero pixel entries
            
            # rescale back to original coordinates
            rr = rr.astype(float);cc = cc.astype(float)
            rr += mini[0]; cc += mini[1]
            rr += mean[0]; cc += mean[1]
            rr += rr0; cc += cc0
            
            label_mask = label(mask)
            regions = regionprops(label_mask, coordinates='rc')
            
            #fig, ax = plt.subplots(figsize=(5,5))
            #ax.imshow(mask, cmap=plt.cm.gray)
            #plt.show()
            
            dicts = {}
            keys = features
            for i in keys:
                if i == 'centroid_x':
                    dicts[i] = regions[0]['centroid'][0]
                elif i == 'centroid_y':
                    dicts[i] = regions[0]['centroid'][1]
                else:
                    dicts[i] = regions[0][i]
            # update morphometrics data 
            new_df = pd.DataFrame(dicts, index=[0])
            data = data.append(new_df, ignore_index=True)
    data.to_pickle(filename+'.morphometrics.pkl')
    return 

In [3]:
import timeit
import multiprocessing
from joblib import Parallel, delayed
from datetime import datetime
from tqdm import tqdm

features = ['centroid_x','centroid_y','area','eccentricity','orientation','perimeter','solidity']

num_cores = multiprocessing.cpu_count() # numb of cores
output = Parallel(n_jobs=num_cores)(
    delayed(process_patch_of_polygons)(filename,features) for filename in tqdm(patchlist[:]) if ~pd.read_csv(filename).empty
    )

  7%|▋         | 29/400 [01:47<20:02,  3.24s/it]

KeyboardInterrupt: 

In [None]:
print('The number of cells is: '+str(cells))
print(data.shape)

In [None]:
xx = np.array(x_list).reshape((len(x_list),1))
yy = np.array(y_list).reshape((len(y_list),1))
del x_list,y_list
arr = np.hstack((xx,yy))
del xx, yy
arr -= np.mean(arr,axis=0).astype(int)
mini = np.min(arr,axis=0)
arr -= mini.astype(int) # subtract the min to translate the mask 

In [None]:
row = np.rint(arr[:,0]).astype(int)
col = np.rint(arr[:,1]).astype(int)
mtx = coo_matrix((np.ones(row.shape), (row, col)), dtype=bool)

In [None]:
del arr, row, col

In [None]:
for r0 in range(0,20000,4000):
    r0 = r0;rwidth = 1*4000;
    c0 = r0;cwidth = 1*4000;
    select_row = np.arange(r0,r0+rwidth)
    select_col = np.arange(c0,c0+cwidth)
    fov = mtx.tocsr()[select_row,:][:,select_col]

    plt.figure(figsize=(10,10))
    io.imshow(fov.todense(),cmap='gray')
    plt.show()

In [None]:
mtx.nonzero()