In [1]:
import numpy as np
import os
import os.path
from os import path
import sys
import glob
import h5py
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.graph_objs import *
import plotly.express as px
import hdbscan
import pandas as pd
import umap
from graviti import *
import networkx as nx
from scipy import sparse, linalg
import warnings
warnings.filterwarnings('ignore')

In [2]:
dirname = '/home/garner1/Work/pipelines/WSI-analysis/SG/pipeline/data/id_52'  #sys.argv[1] # the directory where features.npz files are located
sample = '52' #sys.argv[2]  # the sample id

counter = 0
for f in glob.glob(dirname+'/*features.npz'): # for every fov
    counter += 1
    if counter == 1:            # set up the data arrays
        data = np.load(f,allow_pickle=True)
        fov = data['fov']
        xy = data['centroids']
        morphology = data['morphology']
    else:                       # update the data arrays
        data = np.load(f,allow_pickle=True)
        fov = np.vstack((fov,data['fov']))
        xy = np.vstack((xy, data['centroids']))
        morphology = np.vstack((morphology, data['morphology']))

# Create dataframes
df_fov = pd.DataFrame(data=fov, columns=['fov_row','fov_col'])
df_xy = pd.DataFrame(data=xy, columns=['cx','cy'])
df_morphology = pd.DataFrame(data=morphology, columns=['area','perimeter','solidity','eccentricity','circularity','mean_intensity','std_intensity'])

# Concatenate all dataframes
df = pd.concat([df_fov,df_xy, df_morphology],axis=1)

# filter by percentiles in morphologies (hardcoded in function filtering)
fdf = filtering(df) # .sample(n=100000)

In [5]:
df.columns

Index(['fov_row', 'fov_col', 'cx', 'cy', 'area', 'perimeter', 'solidity',
       'eccentricity', 'circularity', 'mean_intensity', 'std_intensity'],
      dtype='object')

In [10]:
# Get the positions of centroids 
pos = fdf[fdf.columns[2:4]].to_numpy()
nn = 10 # number of nearest neighbor in umap
print('Building the UMAP graph')
filename = '../py/'+str(sample)+'.graph.npz' # the adj sparse matrix
if path.exists(filename):
    print('The graph already exists')
    A = sparse.load_npz(filename) 
else:
    print('Creating the graph')
    A = space2graph(pos,nn)
    sparse.save_npz(filename, A)
    
filename = '../py/'+str(sample)+'.graph.pickle'    # the networkx obj
if path.exists(filename):    
    print('The network already exists')
    G = nx.read_gpickle(filename)
else:
    print('Creating the network')
    G = nx.from_scipy_sparse_matrix(A, edge_attribute='weight')
    nx.write_gpickle(G, filename)

Build the UMAP graph
The graph already exists
The network already exists


In [30]:
def mm(A,times):
    if times > 0:
        M = A.dot(A)
    for t in range(1,times):
        newM = A.dot(M)
        M = newM
    return M

In [71]:
def smoothed_covd(A,fdf,r):
    M = mm(A,r)
    row_idx, col_idx = M.nonzero()
    descriptor = np.zeros((A.shape[0],28))
    for row_ID in range(A.shape[0]):
        mask = row_idx == row_ID # the non-zero elements idx at row rowID
        a = M[row_ID,col_idx[mask]] # the non-zero elements entries at row rowID
        morphology = fdf.iloc[mask][['area','perimeter','solidity','eccentricity','circularity','mean_intensity','cov_intensity']].to_numpy()
        C = np.cov(morphology,rowvar=False,aweights=a.data) # the covd for row_ID weighted with paths
        iu1 = np.triu_indices(C.shape[1]) # the indices of the upper triangular part
        covd2vec = C[iu1]
        descriptor[row_ID,:] = covd2vec
    return descriptor

In [None]:
descriptor = smoothed_covd(A,fdf,1)
descriptor[:3,:]

In [None]:
def new_smoothing(W,data,radius):
    S = normalize(W, norm='l1', axis=1) #create the row-stochastic matrix

    smooth = np.zeros((data.shape[0],data.shape[1]))
    summa = data
    for counter in range(radius):
        newdata = S.dot(data)
        data = newdata
        if counter == radius-1:
            smooth = summa*1.0/(counter+1)
    return smooth


In [1]:
print('Smooth the morphology')
radius = 10000
data = fdf[fdf.columns[4:]].to_numpy()
smooth_data = smoothing(A,data,radius)
new_fdf = pd.DataFrame(data=smooth_data,columns=fdf.columns[4:],index=fdf.index)
df = pd.concat([fdf[fdf.columns[:4]],new_fdf],axis=1)