In [1]:
import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skan import skeleton_to_csgraph, _testdata, draw, Skeleton, summarize

sys.path.insert(0, os.path.abspath('../'))
from src.lib import centroids as centutils

plt.rcParams['figure.figsize'] = [15, 15]

  @numba.jitclass(csr_spec)


In [2]:
def get_skeleton_data(sk_img):
    sk_img_gray = cv2.cvtColor(sk_img, cv2.COLOR_BGR2GRAY)
    pixel_graph, coordinates, degrees = skeleton_to_csgraph(sk_img_gray)
    branch_data = summarize(Skeleton(sk_img_gray))
    return degrees, branch_data

In [19]:
def get_area_features(nuclei_img, jbin_img):
    centroids = centutils.get_nuclei_centroids(nuclei_img)
    centroids = centutils.clean_centroids(centroids, jbin_img)
    centroids, moments = centutils.get_moments_cells(centroids, jbin_img)
    
    ret = {}
    
    cell_area = 0
    total_area = jbin_img.shape[0] * jbin_img.shape[1]
    
    for m in moments:
        cell_area += m['m00']
        
    white_area = len(jbin_img[np.where(jbin_img != 0)]) 
    print(white_area)
    ret['cell_area_ratio'] = cell_area / total_area
    ret['white_area_ratio'] = white_area / total_area
    
    return ret

In [28]:
def get_skeleton_features(sk_img):
    degrees, branch_data = get_skeleton_data(sk_img)
    
    ret = {}
    
    # End-to-end
    e2e_data = branch_data[branch_data['branch-type'] == 0]
    ret['e2e_n'] = len(e2e_data)
    ret['e2e_distance_mean'] = e2e_data['branch-distance'].mean()
    ret['e2e_distance_std'] = e2e_data['branch-distance'].std()
    ret['e2e_eu_distance_mean'] = e2e_data['euclidean-distance'].mean()
    ret['e2e_eu_distance_std'] = e2e_data['euclidean-distance'].std()
    ret['e2e_distance_ratio_mean'] = (e2e_data['euclidean-distance'] / e2e_data['branch-distance']).mean()
    ret['e2e_distance_ratio_std'] = (e2e_data['euclidean-distance'] / e2e_data['branch-distance']).std()
    
    # Junction-to-end
    j2e_data = branch_data[branch_data['branch-type'] == 1]
    ret['j2e_n'] = len(j2e_data)
    ret['j2e_distance_mean'] = j2e_data['branch-distance'].mean()
    ret['j2e_distance_std'] = j2e_data['branch-distance'].std()
    ret['j2e_eu_distance_mean'] = j2e_data['euclidean-distance'].mean()
    ret['j2e_eu_distance_std'] = j2e_data['euclidean-distance'].std()
    ret['j2e_distance_ratio_mean'] = (j2e_data['euclidean-distance'] / j2e_data['branch-distance']).mean()
    ret['j2e_distance_ratio_std'] = (j2e_data['euclidean-distance'] / j2e_data['branch-distance']).std()
    
    # Junction-to-junction
    j2j_data = branch_data[branch_data['branch-type'] == 2]
    ret['j2j_n'] = len(j2j_data)
    ret['j2j_distance_mean'] = j2j_data['branch-distance'].mean()
    ret['j2j_distance_std'] = j2j_data['branch-distance'].std()
    ret['j2j_eu_distance_mean'] = j2j_data['euclidean-distance'].mean()
    ret['j2j_eu_distance_std'] = j2j_data['euclidean-distance'].std()
    ret['j2j_distance_ratio_mean'] = (j2j_data['euclidean-distance'] / j2j_data['branch-distance']).mean()
    ret['j2j_distance_ratio_std'] = (j2j_data['euclidean-distance'] / j2j_data['branch-distance']).std()

    # Nodes
    nodes = degrees[np.where(degrees > 2)]
    ret['nodes_n'] = len(nodes)
    ret['nodes_max'] = nodes.max()
    ret['nodes_mean'] = nodes.mean()
    ret['nodes_std'] = nodes.std()
    
    return ret

In [29]:
FILENAMES = ['ACTB_B2_CA', 'ACTB_B2_NO', 'ACTB_B2_NS', 'ACTN4_L2_NO', 
         'ACTN4_L3_NO', 'ACTR3_C3_NO', 'ADAM9_C2_NO', 'ADD3_A2_CA',
         'AKAP5_B2_CA', 'AKT1_B1_CA', 'AKT1_B2_CA', 'NegsiRNA_A2_NS',
         'NegsiRNA_A2b_CA', 'NegsiRNA_A2b_NO', 'NegsiRNA_A2b_NS', 'NegsiRNA_A3_CA',
         'NegsiRNA_A3_NO', 'NegsiRNA_A3_NS', 'NegsiRNA_B1_CA', 'NegsiRNA_B1_NO']


def main():
    ft_dict = {}
    for name in FILENAMES:
        print("###################################################")
        print(name)
        nuclei_path = '../data/{name}n.tif'.format(name=name)
        jbin_path = '../data/{name}j-BI.tif'.format(name=name)
        jsk_path = '../data/{name}j-SK.tif'.format(name=name)
        j_path = '../data/{name}j.tif'.format(name=name)

        nuclei_img = cv2.imread(nuclei_path)
        jbin_img = cv2.imread(jbin_path)
        jsk_img = cv2.imread(jsk_path)
        j_img = cv2.imread(j_path)

        features = {}
        area_ft = get_area_features(nuclei_img, jbin_img)
        sk_ft = get_skeleton_features(jsk_img)
        
        features.update(area_ft)
        features.update(sk_ft)
        
        ft_dict[name] = features
    return ft_dict
    
ft = main()

###################################################
ACTB_B2_CA
1983906
###################################################
ACTB_B2_NO
2037924
###################################################
ACTB_B2_NS
2784597
###################################################
ACTN4_L2_NO
933585
###################################################
ACTN4_L3_NO
1869606
###################################################
ACTR3_C3_NO
2428842
###################################################
ADAM9_C2_NO
1992966
###################################################
ADD3_A2_CA
1729608
###################################################
AKAP5_B2_CA
2248413
###################################################
AKT1_B1_CA
1324197
###################################################
AKT1_B2_CA
2853879
###################################################
NegsiRNA_A2_NS
1766124
###################################################
NegsiRNA_A2b_CA
2201271
###################################################
NegsiRNA_A2b

In [30]:
len(ft)

20

In [41]:
name = 'ACTB_B2_NO'
nuclei_path = '../data/{name}n.tif'.format(name=name)
jbin_path = '../data/{name}j-BI.tif'.format(name=name)
jsk_path = '../data/{name}j-SK.tif'.format(name=name)
j_path = '../data/{name}j.tif'.format(name=name)

nuclei_img = cv2.imread(nuclei_path)
jbin_img = cv2.imread(jbin_path)
jsk_img = cv2.imread(jsk_path)
j_img = cv2.imread(j_path)

_, branch_data = get_skeleton_data(jsk_img)

In [44]:
branch_data[branch_data['branch-type'] == 0]

Unnamed: 0,skeleton-id,node-id-src,node-id-dst,branch-distance,branch-type,mean-pixel-value,stdev-pixel-value,image-coord-src-0,image-coord-src-1,image-coord-dst-0,image-coord-dst-1,coord-src-0,coord-src-1,coord-dst-0,coord-dst-1,euclidean-distance
1076,653,26435,26650,6.0,0,1.0,0.0,1012.0,0.0,1018.0,0.0,1012.0,0.0,1018.0,0.0,6.0


In [74]:
import pandas as pd
df = pd.DataFrame.from_dict(ft, orient='index')

In [94]:
cleaned_df = df.drop(columns=['e2e_distance_mean', 'e2e_distance_std', 
                              'e2e_eu_distance_mean', 'e2e_eu_distance_std', 
                              'e2e_distance_ratio_mean', 'e2e_distance_ratio_std'])

In [95]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize

In [103]:
data_a = cleaned_df.to_numpy()
data_norm = normalize(data_a, axis=0)
pca = PCA(n_components=10)

In [104]:
pca.fit(data_norm)

PCA(n_components=10)

In [105]:
data_norm.shape

(20, 21)

In [106]:
data_norm

array([[0.21982905, 0.21573097, 0.17817416, 0.18697341, 0.25764603,
        0.2576734 , 0.25726874, 0.25839072, 0.22320695, 0.21996179,
        0.19407465, 0.23742384, 0.24264712, 0.23622889, 0.2403572 ,
        0.22319137, 0.21505365, 0.19034048, 0.21223818, 0.22334537,
        0.22022911],
       [0.21269611, 0.22160492, 0.02227177, 0.19370653, 0.21487156,
        0.1962254 , 0.21552304, 0.19837105, 0.2243994 , 0.16717285,
        0.19352173, 0.24944396, 0.25131734, 0.24822622, 0.24786125,
        0.22286673, 0.20836892, 0.1930868 , 0.25468582, 0.22340875,
        0.22378607],
       [0.20225304, 0.30279853, 0.04454354, 0.1802403 , 0.26914837,
        0.2271798 , 0.27043901, 0.23118823, 0.22423635, 0.16814376,
        0.19462757, 0.24227628, 0.21873676, 0.24406942, 0.2210883 ,
        0.22468262, 0.18432343, 0.18970672, 0.21223818, 0.22349552,
        0.21753505],
       [0.08079919, 0.10151852, 0.11135885, 0.16780993, 0.20760319,
        0.23333233, 0.20668958, 0.23560934, 0.2231836

In [107]:
pca.explained_variance_ratio_

array([0.55199858, 0.23757577, 0.09407713, 0.0564265 , 0.02674689,
       0.01411455, 0.00768569, 0.00494133, 0.00378534, 0.00121107])

In [108]:
pca.singular_values_

array([0.73911274, 0.48488949, 0.30512918, 0.23631062, 0.16269655,
       0.11818848, 0.0872134 , 0.06992999, 0.06120604, 0.03461991])

In [None]:
pca_array = pca.transform(data_norm)
fig, ax = plt.subplots(1)
ax.set_xlabel('Principal component 1')
ax.set_ylabel('Principal component 2')
plt.scatter(pca_array[:, 0], pca_array[:, 1])

labels = list(df.index)

for index, label in enumerate(labels):
    ax.text(pca_array[index, 0] + 10, pca_array[index, 1] + 10, label, fontsize=12)
    