In [2]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
#from pandas_profiling import ProfileReport
from sklearn.decomposition import NMF

In [3]:
network_file_path = "../Data/Network_files/KIRP_GeneNet.txt"

In [5]:
network = nx.read_adjlist(network_file_path, create_using=nx.Graph)

In [6]:
binary_matrix_path = "../Data/Sm_files/KIRP_cna_mat_new.csv"

In [7]:
binary_matrix = pd.read_csv(binary_matrix_path, index_col = 0)

In [8]:
patient_cols = set(binary_matrix.columns)

In [9]:
common_cols = patient_cols.intersection(network.nodes)

In [10]:
modified_binary_mat = binary_matrix[common_cols]

In [11]:
diff_cols = []
for i in network.nodes:
  if i not in common_cols:
    diff_cols.append(i)

In [12]:
x = pd.DataFrame(index = modified_binary_mat.index, columns=diff_cols).fillna(0)

In [13]:
result = pd.concat([modified_binary_mat, x], axis=1)

In [14]:
def normalize_network(network, symmetric_norm=False):
    adj_mat = nx.adjacency_matrix(network)
    adj_array = np.array(adj_mat.todense())
    if symmetric_norm:
        D = np.diag(1/np.sqrt(sum(adj_array)))
        adj_array_norm = np.dot(np.dot(D, adj_array), D)
    else:
        degree = sum(adj_array)
        adj_array_norm = (adj_array*1.0/degree).T
    return adj_array_norm

In [15]:
def fast_random_walk(alpha, binary_mat, subgraph_norm, prop_data_prev):
    term1 = (1-alpha)*binary_mat
    # print(subgraph_norm)
    term2 = np.identity(binary_mat.shape[1])-alpha*subgraph_norm
    term2_inv = np.linalg.inv(term2)
    subgraph_prop = np.dot(term1, term2_inv)
    prop_data_add = np.concatenate((prop_data_prev, subgraph_prop), axis=1)
    return prop_data_add

In [16]:
def network_propagation(network, binary_matrix, alpha=0.7, symmetric_norm=False, verbose=True):

    # Begin network propagation
    starttime = time.time()
    if verbose:
        print('Performing network propagation with alpha:', alpha)
    # Separate network into connected components and calculate propagation values of each sub-sample on each connected component
    subgraphs = list(network.subgraph(c) for c in nx.connected_components(network))
    # Initialize propagation results by propagating first subgraph
    subgraph = subgraphs[0]
    subgraph_nodes = list(subgraph.nodes)
    prop_data_node_order = list(subgraph_nodes)
    binary_matrix_filt = np.array(binary_matrix.T.loc[subgraph_nodes].fillna(0).T)
    subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
    prop_data_empty = np.zeros((binary_matrix_filt.shape[0], 1))
    prop_data = fast_random_walk(alpha, binary_matrix_filt, subgraph_norm, prop_data_empty)
    # Get propagated results for remaining subgraphs
    for subgraph in subgraphs[1:]:
        subgraph_nodes = list(subgraph.nodes)
        prop_data_node_order = prop_data_node_order + subgraph_nodes
        binary_matrix_filt = np.array(binary_matrix.T.loc[subgraph_nodes].fillna(0).T)
        subgraph_norm = normalize_network(subgraph, symmetric_norm=symmetric_norm)
        prop_data = fast_random_walk(alpha, binary_matrix_filt, subgraph_norm, prop_data)
    # Return propagated result as dataframe
    prop_data_df = pd.DataFrame(data=prop_data[:, 1:], index=binary_matrix.index, columns=prop_data_node_order)
    if verbose:
        print('Network Propagation Complete:', time.time()-starttime, 'seconds')
    return prop_data_df


In [17]:
propagated_matrix = network_propagation(network, result)

Performing network propagation with alpha: 0.7
Network Propagation Complete: 6.511929273605347 seconds


In [18]:
result

Unnamed: 0,KLHL33,IGFL3,ASPG,C1orf127,DOCK2,CGNL1,KCNN4,MYT1L,RBFOX3,APOC1,...,ASCL5,CST9,ASCL4,BIVM,CLRN2,NBEAL1,CST11,NFE4,EGFL8,BTBD19
TCGA-2K-A9WE,0,0,0,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-2Z-A9J1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-2Z-A9J2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-2Z-A9J3,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-2Z-A9J5,0,0,0,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-Y8-A898,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-Y8-A8RY,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-Y8-A8RZ,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TCGA-Y8-A8S0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
propagated_matrix

Unnamed: 0,REM1,JPH2,DUSP26,GLP1R,GRIN1,ARHGEF15,MUC6,CYP3A4,GPR12,RAB6B,...,CLRN2,NBEAL1,CST11,NFE4,EGFL8,RBFOX3,CEACAM4,BTBD19,TMEM26,RAD51AP2
TCGA-2K-A9WE,0.108047,0.052858,0.046964,0.155900,0.199010,0.083105,0.073291,0.156810,0.115275,0.045153,...,0.001485,0.002305,0.003914,0.000463,0.001115,0.302258,0.000579,0.000771,0.000502,0.000496
TCGA-2Z-A9J1,0.036814,0.026403,0.017269,0.048349,0.077312,0.028487,0.032372,0.049381,0.035761,0.014967,...,0.000245,0.000747,0.001021,0.000152,0.000395,0.301920,0.000299,0.000216,0.000286,0.000252
TCGA-2Z-A9J2,0.064909,0.028901,0.023875,0.078673,0.103647,0.044039,0.057776,0.089060,0.060724,0.023487,...,0.000891,0.001456,0.002167,0.000227,0.000735,0.302073,0.000367,0.000381,0.000178,0.000250
TCGA-2Z-A9J3,0.070039,0.035521,0.027887,0.089069,0.125478,0.049721,0.060101,0.098701,0.069907,0.026573,...,0.000939,0.001539,0.002519,0.000270,0.000791,0.302127,0.000385,0.000414,0.000377,0.000312
TCGA-2Z-A9J5,0.129827,0.050855,0.048845,0.164742,0.200987,0.083184,0.080776,0.200142,0.115761,0.043945,...,0.001396,0.001961,0.004069,0.000479,0.001190,0.302333,0.000504,0.001588,0.001146,0.301530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-Y8-A898,0.036814,0.026403,0.017269,0.048349,0.077312,0.028487,0.032372,0.049381,0.035761,0.014967,...,0.000245,0.000747,0.001021,0.000152,0.000395,0.301920,0.000299,0.000216,0.000286,0.000252
TCGA-Y8-A8RY,0.031133,0.019195,0.014311,0.037327,0.055022,0.022424,0.030122,0.039451,0.029744,0.011965,...,0.000198,0.000677,0.000709,0.000109,0.000343,0.301867,0.000282,0.000184,0.000225,0.000181
TCGA-Y8-A8RZ,0.139383,0.024282,0.027006,0.098760,0.108715,0.047555,0.044265,0.116987,0.086982,0.024422,...,0.001002,0.000905,0.002586,0.000235,0.001017,0.002844,0.000196,0.001089,0.000187,0.000201
TCGA-Y8-A8S0,0.036814,0.026403,0.017269,0.048349,0.077312,0.028487,0.032372,0.049381,0.035761,0.014967,...,0.000245,0.000747,0.001021,0.000152,0.000395,0.301920,0.000299,0.000216,0.000286,0.000252


In [20]:
propagated_matrix.to_csv("../Data/smoothed_CNV_new.csv")