### Begin Weighting PCnet

In [1]:
import pandas as pd
import numpy as np
import ndex2
import networkx as nx
import scipy.sparse

#### Adjacency Matrix- GIANT

In [2]:
# read in nwnet (GIANT with HGNC) 
with open('nwnet', 'rb') as f:
    nwnet = nx.read_gpickle(f)
    
f.close()

In [3]:
# get dict of nodenames
nw_nodedict = nx.get_node_attributes(nwnet, "name")
# get list of node names
nw_nodelist = list(nw_nodedict.values())

#### Update PC Net

In [5]:
# import PCnet using ndex
niceCx = ndex2.create_nice_cx_from_server(server='http://ndexbio.org',
                                         username='scratch',
                                         password='scratch',
                                         uuid='f93f402c-86d4-11e7-a10d-0ac135e8bacf')
# make PCnet to networkX
pcnet = niceCx.to_networkx()

In [6]:
# get dict of nodenames
pc_nodedict = nx.get_node_attributes(pcnet, "name")
# get list of node names
pc_nodelist = list(pc_nodedict.values())

In [7]:
# How many of PCnet nodes are NOT in GIANT
pc_not_there = pd.DataFrame(list(set(pc_nodelist)-set(nw_nodelist)), index=None, columns=None)
len(pc_not_there)

949

In [8]:
# save the not_there list to check ncbi for any updated IDs 
# (the network is 1 year old, so there may have neen changes.)

with open('pc_not_there.txt', 'w') as f:
    pc_not_there.to_csv(f, sep='\n',header=False, index=False)
    
f.close()

In [9]:
# used rentrez in R to generate the following file (see notebook) 
# to update Old HGNC ID to New HGNC ID for each of the not_there genes.

# upload file
with open('pc_now_there.csv', 'r') as f:
    pc_now_there = pd.read_csv(f, sep=' ', header=None)
    
f.close()

# check file
pc_now_there.iloc[0:3,:]

Unnamed: 0,0,1
0,LINC00238,CCDC196
1,ZNF295,ZBTB21
2,CHURC1-FNTB,CHURC1-FNTB


In [10]:
# make dict with these ids
h_en = {}

# start adding to dict
for line in range(0,len(pc_now_there)):
    h_en[str(pc_now_there.iloc[line,0])] = pc_now_there.iloc[line,1]
        
f.close()
len(list(h_en.keys()))

844

In [11]:
# make node dict to save current node keys
pc_nodeids = dict(zip(list(nx.nodes(pcnet)), list(nx.nodes(pcnet))))

# make node attribute
nx.set_node_attributes(pcnet, 'id', pc_nodeids) 

# rename nodes with current names
pcnet = nx.relabel_nodes(pcnet, pc_nodedict)

In [12]:
# update node names with node names that are now there
pcnet = nx.relabel_nodes(pcnet, h_en)

In [13]:
# convert node names to numners, store current node names as attribute "name"
nx.convert_node_labels_to_integers(pcnet, first_label=1, ordering='default', label_attribute='name')

<networkx.classes.graph.Graph at 0x7fbbfa18fe10>

In [14]:
# write pcnet (updated HGNC) 
with open('pcnet', 'wb') as f:
    nx.write_gpickle(pcnet, f)
    
f.close()