# Creating a data driven network

This example shows how we create and add annotations to a data driven network.

In [1]:
import sys
sys.path.append("../..")

%matplotlib inline
from IPython.display import display
import os
import networkx as nx

In [2]:
from exp_data import exp_data
from magine.networks.utils import add_attribute_to_network
from magine.networks.network_generator import build_network

Creating list of seed species and background species for network

In [3]:
measured = set(exp_data.species.id_list)
sig_measured = set(exp_data.species.sig.id_list)

Now we will create the network. We pass the seed and background list to the network as well as flags turning on all of the network databases. We also trim source/sink nodes (optional). This basically cleans up dangling nodes that are not in our seed or background lists.

In [4]:
network = build_network(
    seed_species=sig_measured,  # seed species
    all_measured_list=measured,  # all data measured
    use_biogrid=True,  # expand with biogrid
    use_hmdb=True,  # expand with hmdb
    use_reactome=True,  # expand with reactome
    use_signor=True,  # expand with signor
    trim_source_sink=True,  # remove all source and sink nodes not measured
    save_name='Data/cisplatin_network'
)

Trimming network
Network has 15841 nodes and 184279 edges
Found 8513 of 15777 seed species in network
Found 10556 of 23725 background species in network


Now that we have a network generated, we will add attributes to the network. This way we can visualize these attributes.

In [5]:
# seed species 
network = add_attribute_to_network(network, sig_measured,
                                   'seed', 'red', 'blue')

# background
network = add_attribute_to_network(network, measured, 
                                   'measured', 'red', 'blue')

In [6]:
# This retrieves a dictionary of where the keys are from the 'source' of the data and values are lists of species
m, sig_m = exp_data.get_measured_by_datatype()

In [7]:
# add attribute if node is measured per 'source' of data
for exp_type, spec in m.items():
    # this just cleans up non alpha-numeric characters
    attr_name = exp_type.replace('_', '')
    attr_name = attr_name.replace('-', '')
    network = add_attribute_to_network(network, spec, attr_name,
                                             'red', 'blue')
    

In [8]:
# add labels for if node is measured in any of our samples
for time, spec in zip(exp_data.species.sig.sample_ids, exp_data.species.sig.by_sample):
    time = 'sample{}'.format(time)
    network = add_attribute_to_network(network, spec, time, 'red', 'blue')

In [9]:
save_name = 'cisplatin_network_w_attributes'

# write to GML for cytoscape or other program
nx.write_gml(network, os.path.join('Data', save_name+'.gml'))

# write to gpickle for fast loading in python
nx.write_gpickle(network, os.path.join('Data', save_name+'.p'))