#Find Density of Nodes

This script uses the gene interaction network and list of mutations to create a density score for ever gene appearing in the list of mutations. Since this process takes some time to compute, it should not be re-run. The output is already in the git repository and had to be manually curated.

In [19]:
%pylab inline
import pandas as pd
import numpy as np
import os
import sklearn as sk
from sklearn.cluster import AffinityPropagation
from sklearn import datasets
from sklearn import metrics
from multiprocessing import Pool
import pickle

Populating the interactive namespace from numpy and matplotlib


In [20]:
mr = pd.read_csv('mr.csv', index_col=0)
edges = pickle.load(open("edges.p", 'rb'))
mutations = pd.read_csv('mutations.csv', index_col=0)

In [21]:
def calculate_density(graph, start_node, dataframe):
    nodes = graph.keys()
    # check if start_node is in human net
    if start_node not in nodes:
        print start_node, "NaN"
        return 'NaN'
    discovered = [0 for x in range(len(graph.keys()))]
    distance = [0 for x in range(len(graph.keys()))]
    queue = [start_node]
    density = 0
    discovered[nodes.index(start_node)] = 1
    while 1:
        if len(queue) > 0:
            curr = queue.pop(0)
            # check if curr is a distance of three from center
            if distance[nodes.index(curr)] == 3:
                print start_node, density
                return density
            # otherwise 
            else:
                # add to density
                proximity = distance[nodes.index(curr)]
                mutations_per_gene = dataframe[dataframe['Entrez_Gene_Id'] == curr].counts.get_values()
                if len(mutations_per_gene) == 1:
                    if proximity == 1:
                        density += .2*(mutations_per_gene[0])
                    elif proximity == 2:
                        density += .1*(mutations_per_gene[0])
                # for all neighbors
                for neighbor in graph[curr]:
                    # if node is unused
                    if discovered[nodes.index(neighbor)] == 0:
                        distance[nodes.index(neighbor)] = distance[nodes.index(curr)] + 1
                        queue.extend([neighbor])
                        discovered[nodes.index(neighbor)] = 1
        elif len(queue) == 0:
            print start_node, density
            return density

In [22]:
edges_list = [edges for i in range(len(mutations.Entrez_Gene_Id))]
gene_list = [gene for gene in mutations.Entrez_Gene_Id]
mutations_list = [mutations for j in range(len(mutations.Entrez_Gene_Id))]
#pool = Pool(processes=10)
#denisity = map(calculate_density, edges_list[2799:3001], gene_list[2799:3001], mutations_list[2799:3001])
denisity = map(calculate_density, edges_list[:1000], gene_list[:1000], mutations_list[:1000])

673 1338.3
2312 137.1
4893 1787.3
113146 NaN
94025 14.3
7273 1038.1
140453 102.4
7038 314.6
3265 1877.2
285464 100.8
727897 NaN
996 1065.2
284802 115.3
463 700.7
94137 2.8
1770 183.5
84033 196.3
388697 255.9
115294 207.1
2153 981.6
4288 1335.7
6708 414.5
58508 108.2
114784 151.1
6261 575.7
4297 538.5
83481 314.0
85301 NaN
23345 413.5
4588 NaN
7011 247.4
4586 NaN
5339 772.8
2904 618.2
7148 178.4
2051 1061.5
23524 675.4
774 1088.4
11200 1437.7
9774 527.1
3709 1265.0
143 218.2
5781 1337.8
23195 1147.1
5314 325.8
353333 267.6
1964 1491.9
2125 602.2
1769 40.6
3845 1846.8
84059 174.9
388698 192.9
2483 117.5
83872 136.4
55814 572.9
394263 NaN
115701 35.0
1176 626.2
79026 352.8
285643 NaN
472 827.9
23405 1684.5
64072 325.4
26974 361.4
50509 268.1
4300 421.3
545 1233.2
1767 627.5
11214 737.6
6996 716.8
10985 474.0
8913 47.4
89795 651.1
57619 1035.7
2318 622.3
25878 568.3
1114 104.6
4624 1229.6
84809 NaN
8239 471.8
7716 52.5
54535 211.6
6840 294.3
6560 106.8
9509 48.3
7301 755.1
84498 328.1
338 

In [14]:
mutations.count()

Entrez_Gene_Id     10271
counts             10271
Perc_Bi_Allelic    10271
Perc_Missense      10271
Perc_Nonsense      10271
dtype: int64

In [18]:
mutations.Entrez_Gene_Id

0              673
1             2312
2             4893
3           113146
4            94025
5             7273
6           140453
7             7038
8             3265
9           285464
10          727897
11             996
12          284802
13             463
14           94137
15            1770
16           84033
17          388697
18          115294
19            2153
20            4288
21            6708
22           58508
23          114784
24            6261
25            4297
26           83481
27           85301
28           23345
29            4588
           ...    
10241       347613
10242       112609
10243         5590
10244       284110
10245       139341
10246        51280
10247        26056
10248    100287932
10249        79966
10250         5558
10251       646575
10252        26024
10253        51412
10254         8634
10255         9632
10256       163227
10257       441131
10258       221393
10259         5100
10260       339345
10261       140688
10262       