In [1]:
import numpy as np
import umap

import cdlib
from cdlib import algorithms
import networkx as nx
from  scipy import sparse

import warnings
warnings.filterwarnings('ignore')

In [3]:
def build_graph(XYZ):
    adj = umap.umap_.fuzzy_simplicial_set(
        XYZ,
        n_neighbors=5, # this parameter has to be fine-tuned
        random_state=np.random.RandomState(seed=42),
        metric='l2',
        metric_kwds={},
        knn_indices=None,
        knn_dists=None,
        angular=False,
        set_op_mix_ratio=1.0,
        local_connectivity=2.0,
        verbose=False
        )
    return adj

def build_communities(adj):
    g = nx.from_scipy_sparse_matrix(adj) # generate a graph networkx obj
    eset = [(u, v) for (u, v, d) in g.edges(data=True)] # get list of edges from graph
    weights = [d['weight'] for (u, v, d) in g.edges(data=True)] # get list of weights from edges
    # find communities
    # in this example we use the Leiden algorithm
    leiden_coms = algorithms.leiden(g,weights=weights) # check if the algo is stochastic, in that case set rnd generator    
    return leiden_coms.communities # a list of lists of nodes

In [164]:
# Create random 3D structures
# 100 random (x,y,z)-tuples of 1000 bins each 

structures = np.random.default_rng().uniform(-100,100,(1000,3,100))
max_numb_str = 10
numb_str = structures.shape[2]
numb_loci = structures.shape[0]

In [165]:
# Map each structure to a weighted graph
# In this example we use UMAP
comm = {}
for structure in range(numb_str)[:max_numb_str]: # for each structure
    XYZ = structures[:,:,structure] # get the x,y,z coordinates
    adj = build_graph(XYZ) # get the graph
    comm[structure] = build_communities(adj) # get the communities of the graph

In [166]:
# The matrix P is the analog of the HiC matrix:
# P(i,j) = # times loci (i,j) occur together 
for structure in range(numb_str)[:max_numb_str]: # for each structure
    for c in range(len(comm[structure])):        # for each community in the given structure
        if c == 0: # for the first community define the graph G
            G = nx.complete_graph(comm[structure][c])
        else: # for the other communities update G
            G.update(nx.complete_graph(comm[structure][c]))
    if structure == 0: # for the first structure define the sparse matrix P
        P = nx.to_scipy_sparse_matrix(G, nodelist=range(numb_loci))
    else: # for the other structures add to P
        P += nx.to_scipy_sparse_matrix(G, nodelist=range(numb_loci)) 
    #print(P.nnz)

In [186]:
# Use NMF to decompose the V matrix similarly to what is done in word X document decomposition 
# The matrix V is #pairs X #structures matrix:
# V[(i,j),s] = 1 if the pair of loci (i,j) occurs in structure s

for structure in range(numb_str)[:max_numb_str]: # for each structure
    for c in range(len(comm[structure])):        # for each community in the given structure
        if c == 0: # for the first community define the graph G
            G = nx.complete_graph(comm[structure][c])
        else: # for the other communities update G
            G.update(nx.complete_graph(comm[structure][c]))
    
    if structure == 0: # for the first structure define the sparse matrix sparse_structure_2d
        node_i = [e[0] for e in G.edges]
        node_j = [e[1] for e in G.edges]
        s = [structure for e in G.edges]
        sparse_structure = sparse.coo_matrix((s, (node_i, node_j)), 
                                             shape=(numb_loci, numb_loci), dtype=np.int8)
        # reshape to 1d sparse array
        sparse_structure_1d = sparse_structure.reshape((numb_loci*numb_loci,1))
        # build the 2d edgesXstructure sparse array:
        rows = sparse_structure_1d.row # the occurring pairs
        cols = [v for v in sparse_structure_1d.data] # the occurring structures
        data = [1]*len(sparse_structure_1d.data) # 1 if loci pair is in structures
        V = sparse.coo_matrix((data, (rows, cols)),
                              shape=(numb_loci*numb_loci,max_numb_str),
                              dtype=np.int8) # define the V matrix
    else:
        node_i = [e[0] for e in G.edges]
        node_j = [e[1] for e in G.edges]
        s = [structure for e in G.edges]
        sparse_structure = sparse.coo_matrix((s, (node_i, node_j)), 
                                             shape=(numb_loci, numb_loci), dtype=np.int8)
        # reshape to 1d sparse array
        sparse_structure_1d = sparse_structure.reshape((numb_loci*numb_loci,1))
        # build the new 2d edgesXstructure sparse array:
        rows = sparse_structure_1d.row
        cols = [v for v in sparse_structure_1d.data]
        data = [1]*len(sparse_structure_1d.data)
        # concatenate old and new data
        data = np.concatenate((V.data, data))
        rows = np.concatenate((V.row, rows))
        cols = np.concatenate((V.col, cols))
        V = sparse.coo_matrix((data, (rows, cols)),
                              shape=(numb_loci*numb_loci,max_numb_str),
                              dtype=np.int8) # update the V matrix

In [187]:
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='nndsvd', random_state=0)
W = model.fit_transform(V)
H = model.components_

In [188]:
print(V.shape,W.shape,H.shape)

(1000000, 10) (1000000, 2) (2, 10)


In [200]:
# Get the node_i and node_j labels from the rows of the V matrix
row_V_matrix = 19088
np.unravel_index([row_V_matrix],  (numb_loci,numb_loci))

(array([19]), array([88]))