# Supplemental Notebook - Interactive Hierarchy Creation

This notebook shows the generation of hierarchy networks in HCX format for interactive capabilities using [web.cytoscape.org](web.cytoscape.org)

### Set Up

In [3]:
import pandas as pd
import os
import seaborn as sns
import sys
import ndex2 as ndex
import networkx as nx
import matplotlib.pyplot as plt
from ndex2.cx2 import NetworkXToCX2NetworkFactory, CX2Network,  PandasDataFrameToCX2NetworkFactory, CX2NetworkXFactory

In [4]:
cwd = os.getcwd()
sys.path.append(os.path.join(cwd, '../carva'))
from network_utils import *
from geneset_utils import *
from hierarchy_utils import *

In [5]:
from getpass import getpass
user = getpass('Username:')
password = getpass('Password:')
client = ndex2.client.Ndex2(username=user, password=password)

## Load the subnetworks

In [None]:
subnetworks = {'autism spectrum disorder': '94590325-4ed4-11f0-a218-005056ae3c32',
 'Alzheimer disease': 'a25cd00e-4ed4-11f0-a218-005056ae3c32',
 'bipolar disorder': 'a454f6a0-4ed4-11f0-a218-005056ae3c32'}

Which network is which is a mess. the uuid_list.txt files from whhich "'autism spectrum disorder': '94590325-4ed4-11f0-a218-005056ae3c32' " is, is a network of 'all' (z*>3, coloc network) created by S3

In [6]:
#shachar note: i will need to load the networks manually from local instead of from remote

G = nx.read_gpickle(os.path.join(cwd, '../out/net_out/lupus_lupus_net1.gpickle'))

In [7]:
print(type(G))

<class 'networkx.classes.graph.Graph'>


Below is some experimenting with the other graph format, made by create subnetwork

In [None]:
G_other = nx.read_gpickle(os.path.join(cwd, '../out/net_out/lupus_lupus_net1_all.gpickle'))


In [None]:
g_cx_other = ndex2.create_nice_cx_from_networkx(G_other)
first_node_id_other, _ = next(iter(g_cx_other.get_nodes()))
node_attributes_other = g_cx_other.get_node_attributes(first_node_id_other)

In [None]:
display(node_attributes_other)

In [None]:
#G_cx = G #.to_networkx()


In [None]:
asd_parentG= load_network(uuid=subnetworks['autism spectrum disorder'], use_password=True,ndex_password=password, ndex_user=user)
bip_parentG=load_network(uuid=subnetworks['bipolar disorder'], use_password=True,ndex_password=password, ndex_user=user)
azd_parentG=load_network(uuid=subnetworks['Alzheimer disease'], use_password=True,ndex_password=password, ndex_user=user)

## Create hierarchy via community detection

Example for creation of network hierarchy using HiDeF

In [None]:
uuid = '3cc84672-1653-11f0-9806-005056ae3c32'
outdir = '/output/directory/'

In [None]:
G_cx = load_network(uuid, use_password=True, return_cx=True, username=username, password=password)

#### name attribute problem
apperantly, my network is missing a "name" attribute for the nodes.

i will attempt to assign it manually using the GeneID attribute (Entrez id) instead

In [None]:
for n, data in G.nodes(data=True):
    if 'name' not in data:
        # use a biologically meaningful label if available
        G.nodes[n]['name'] = data.get('GeneID', str(n))

In [None]:
factory = NetworkXToCX2NetworkFactory()
G_cx = factory.get_cx2network(G)

In [None]:
G_cx.set_name("pcnet2.2_lupus_2")

In [None]:
name = G_cx.get_name()
print(name)

In [None]:
# For the first few nodes
print(type(G))
print(len(list(G.nodes(data=True))))
list(G.nodes(data=True))[:5]

In [None]:
#shachar note: changed the returned object on heir utils side, will now convert back to networkx here
#G_hier = create_hierarchy(G_cx, verbose=True)
CX_hier = create_hierarchy(G_cx, verbose=True)

factory = CX2NetworkXFactory()
G_hier_multidigraph = factory.get_graph(CX_hier) 
G_hier = nx.Graph(G_hier_multidigraph)

hier_df = create_hier_df(G_hier)

In [None]:
# old conversion
factory = CX2NetworkXFactory()
G_hier = factory.get_graph(CX_hier)

In [None]:

print(type(G_hier))
print(G_hier.name)
print(len(G_hier.nodes(data=True)))
list(G_hier.nodes(data=True))[1]

### shachar note: adding uploading the hierarchy network
this will enable to run different algorithms than the native one carva runs
this might solve the problem with the rest of the pipeline not fitting the output of the algorithm - missing CD_Community_Pval etc.

In [None]:
id_str = upload_network(G_hier, G_hier.name+'_Hierarchy_only', username=user, password=password, template=None)
print(id_str)

for good measures, lets upload the base not and do the whole process there

In [None]:
id_str = upload_network(G, G.name+'_base_only', username=user, password=password, template=None)

In [None]:
print(type(CX_hier))
count = 0
count0 = 0
# Assuming 'hier_net' is your CX2Network object
for node_id, node_object in CX_hier.get_nodes().items():
    
    # 'node_object' contains all node data, including attributes in 'v'
    attributes_dict = node_object.get('v', {})
    
    #print(f"Node ID: {node_id}")
    #print(f"Attributes: {attributes_dict}")
    display(attributes_dict)
    break
    if attributes_dict['CD_AnnotatedMembers_Pvalue'] == 0:
        count0 += 1
    count+=1
    
    # Example of getting a specific attribute ('name')
    name = attributes_dict.get('name')
    #print(f"Name: {name}\n")
print(count, count0)

In [None]:
#attempting to fix mismatch by converting from cx2 to cx1
G_cx1 = ndex2.create_nice_cx_from_networkx(CX2NetworkXFactory().get_graph(G_cx))

In [None]:
#trying to udnerstand network naming conventions
print(type(G_cx1))
first_node_id, _ = next(iter(G_cx1.get_nodes()))
#node_attributes = G_cx1.get_node_attribute(first_node_id, 'GeneClass')
node_attributes = G_cx1.get_node_attributes(first_node_id)
display(node_attributes)

back to the pipeline
changed the gene fractions function to thew new one, adjusted for the new network format.

In [None]:
#hier_df = add_seed_gene_fractions(hier_df, G_cx) # was G_cx
hier_df = add_seed_gene_fractions_new_format(hier_df, G_cx1)

In [None]:
display(hier_df.head())

In [None]:
outdir = "../out/net_out/lupus"

In [None]:
hier_df_annot = name_hierarchy_systems(hier_df, outdir=outdir, gene_col='SymbolList', write=True, hier_name=name)

In [None]:
#hier_df_annot['GO_Name'] = hier_df_annot['GO_Name'].astype(str)

In [None]:
display(hier_df_annot.head())

In [None]:
display(hier_df_annot["GO_Name"].tolist())

In [None]:
write_nx_hierarchy(G_hier, hier_df_annot, outdir, name)

In [None]:
G_out2 = add_annotations_to_hierarchy(G_hier, hier_df)

In [None]:
print(type(G_out2))
list(G_out2.nodes(data=True))[1]

In [None]:
# upload cx doesnt seem to accept outdir anymore
upload_cx_hierarchy(G_hier, hier_df_annot, name, user, password)#, annot_cols=['GO_Name', 'rare', 'common', 'shared', 'rare_z', 'common_z', 'shared_z', 'rc_ratio','CD_MemberList', 
                                    #'SymbolList', 'CD_MemberList_Size','CD_MemberList_LogSize', 'HiDeF_persistence', 'CD_AnnotatedMembers', 'CD_AnnotatedMembers_Size', 'CD_AnnotatedMembers_Size'])

In [None]:
upload_cx_hierarchy(G_hier, hier_df_annot, outdir, name, username, password)

In [None]:
list(G_hier.nodes(data=True))[:1]

note: using hierr df annot makes the labels go away, using hier df doesnt.

In [None]:
#again, instead of uploading the hierarchy online, we will save it locally
print(type(G_out))
nx.write_gpickle(G_out, (os.path.join(outdir, f'{name}_Hierarchy2.gpickle')))

In [None]:
list(G_out.nodes(data=True))[:5]

### Offline Treatment
apperantly, in order to get some cols, you need to do extra analysis outside of this notebook.

to not get keyerror over node_data = node_data.loc[:, ['CD_MemberList', 'CD_AnnotatedMembers_Pvalue', 'CD_CommunityName', 'CD_MemberList_LogSize', 'CD_AnnotatedMembers_SourceTerm', 'CD_AnnotatedMembers_SourceDB']]

you will need to upload the previous net, do "Run Community Detection" using CDAP in cytospcace\ndex, and then create a new network om which the clustering was performed

### Load Pre-computed Hierarchies

In [None]:
lupus_uuid = '4c4ffe8a-3cc5-11f0-a469-005056ae3c32'
azd_uuid = '44345758-3cc5-11f0-a469-005056ae3c32'
bip_uuid = '3f551676-3cc5-11f0-a469-005056ae3c32'

In [2]:
lupus_uuid = "e1a15afa-ba30-11f0-a218-005056ae3c32" #"70c44c45-ba2e-11f0-a218-005056ae3c32" #"0bbf51af-b982-11f0-a218-005056ae3c32"

In [None]:
asdG = load_network(uuid=asd_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

In [None]:
azdG = load_network(uuid=azd_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

In [None]:
bipG = load_network(uuid=bip_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

In [None]:
lupusG = nx.read_gpickle(os.path.join(cwd, '../out/net_out/lupus/pcnet2.2_lupus_Hierarchy.gpickle'))

In [8]:
lupusG = load_network(uuid=lupus_uuid, ndex_password=password, ndex_user=user, verbose=True, use_password=True,
                return_cx=False)

Network Name:pcnet2.2_lupus_step1-carva_step2-cdap
Number of nodes: 56
Number of edges: 58


In [None]:
path = str(os.path.join(outdir , "pcnet2.2_lupus_2_step1-carva_step2-carva.cx"))

lupus_cx = ndex2.create_nice_cx_from_file(path)


In [None]:
lupusG = lupus_cx.to_networkx()

In [14]:
list(lupusG.nodes(data=True))[:2]

[('C128706714',
  {'CD_MemberList': '3586 11009 6774 29949',
   'CD_AnnotatedMembers_Size': '2',
   'CD_AnnotatedMembers_Pvalue': '4.7613107361530627E-7',
   'CD_AnnotatedMembers': '3586 6774',
   'CD_CommunityName': 'FGFR4 p G388R signaling',
   'CD_AnnotatedMembers_Overlap': '0.4',
   'HiDeF_persistence': '8',
   'HCX::isRoot': 'false',
   'CD_AnnotatedMembers_SourceTerm': 'WP:WP5428',
   'CD_MemberList_LogSize': '2.0',
   'HCX::members': ['3586', '11009', '6774', '29949'],
   'CD_Labeled': 'true',
   'CD_AnnotatedAlgorithm': 'Annotated by gProfiler [Docker: coleslawndex/cdgprofilergenestoterm:0.3.0] {{--organism=hsapiens, --maxpval=0.00001, --minoverlap=0.05, --maxgenelistsize=5000}} via CyCommunityDetection Cytoscape App (1.12.1)',
   'selected': 'true',
   'CD_NonAnnotatedMembers': '11009 29949',
   'CD_AnnotatedMembers_SourceDB': 'WP',
   'CD_MemberList_Size': '4'}),
 ('C128706701',
  {'CD_MemberList': '29126 3120 3118 80380 55824',
   'CD_AnnotatedMembers_Size': '4',
   'CD_Anno

In [None]:
all_nodes_data = G_out.nodes.data()
print(all_nodes_data)

supicitions: some GO_Name for the nodes are just the node id (number) while others are actual go terms, ig regulation of immune response

In [10]:
def load_hierarchy_info(G):
    node_data = {}
    for n, data in G.nodes(data=True):
        node_data[n] = data
    #display(node_data)
    node_data = pd.DataFrame(node_data).T
    node_data = node_data.loc[:, ['CD_MemberList', 'CD_AnnotatedMembers_Pvalue', 'CD_CommunityName', 'CD_MemberList_LogSize',
                                 'CD_AnnotatedMembers_SourceTerm', 'CD_AnnotatedMembers_SourceDB']]
    node_data.columns = ['Genes', 'Pvalue', 'Name', 'LogSize', 'SourceTerm', 'SourceDB']
    gene_dict = {}
    for comm, genes in zip(node_data.index, node_data.Genes):
        gene_dict[comm] = [int(x) for x in genes.split(' ')]
    return node_data, gene_dict

In [11]:
lupus_df, lupus_genes = load_hierarchy_info(lupusG)

In [17]:
lupus_parentG = G
display(list(lupus_parentG.nodes(data=True))[:2])

[(100507650,
  {'GeneID': '100507650',
   'represents': 'ncbigene:100507650',
   'NPS_R': -1.1150704446291682,
   'NPS_C': 5.543708452301536,
   'NPS_RC': -6.181625448802352,
   'GeneClass': 'common',
   'COLOC Gene': 0,
   'InputGene': True}),
 (2052,
  {'GeneID': '2052',
   'represents': 'ncbigene:2052',
   'HGNC': 'MAP9',
   'NPS_R': -0.0712179709662565,
   'NPS_C': 2.933210439243172,
   'NPS_RC': -0.2088972958999407,
   'GeneClass': 'common',
   'COLOC Gene': 0,
   'InputGene': True})]

In [None]:
asd_df, asd_genes = load_hierarchy_info(asdG)
azd_df, azd_genes = load_hierarchy_info(azdG)
bip_df, bip_genes = load_hierarchy_info(bipG)

## Construct the HCX Object

In [12]:
def get_cx2_networks(hierG, parentG, comm_df):
    factory = NetworkXToCX2NetworkFactory()
    factorypd = PandasDataFrameToCX2NetworkFactory()
    parent_net =factory.get_cx2network(parentG)
    hier_df = nx.to_pandas_edgelist(hierG)
    hier_net = factorypd.get_cx2network(hier_df, source_field='source', target_field='target')
    for node_id, node_obj in hier_net.get_nodes().items():
        comm = hier_net.get_node(node_id).get('v', {}).get('name')
        hier_net.add_node_attribute(node_id, 'CD_MemberList', comm_df.loc[comm]['Genes'] ,datatype='string')
    return hier_net, parent_net

In [18]:
lupus_hier, lupus_parent = get_cx2_networks(lupusG, lupus_parentG, lupus_df)

In [None]:
asd_hier, asd_parent = get_cx2_networks(asdG, asd_parentG, asd_df)
azd_hier, azd_parent = get_cx2_networks(azdG, azd_parentG, azd_df)
bip_hier, bip_parent = get_cx2_networks(bipG, bip_parentG, bip_df)

In [19]:
def get_hcx(hier_net, parent_net, parent_uuid, hier_name):
    hier_net.add_network_attribute('ndexSchema', 'hierarchy_v0.1', datatype='string')
    hier_net.add_network_attribute('HCX::modelFileCount', '2', datatype='integer')
    hier_net.set_name(hier_name)
    hier_net.add_network_attribute('HCX::interactionNetworkUUID', parent_uuid, datatype='string')
    all_nodes = set(hier_net.get_nodes().keys())
    targets = set()
    for edge_id, edge_obj in hier_net.get_edges().items():
        targets.add(edge_obj['t'])
    # Source node is not a target of any edge
    root_nodes = all_nodes.difference(targets)
    attr_name = 'HCX::isRoot'
    for node_id in hier_net.get_nodes().keys():
        hier_net.add_node_attribute(node_id, attr_name, str(node_id in root_nodes).lower(), datatype='boolean')
    for node_id, node_obj in hier_net.get_nodes().items():
        memberlist = hier_net.get_node(node_id).get('v', {}).get('CD_MemberList', '').split(' ')
        membersids = []
        for member in memberlist:
            membersids.append(int(member))
        hier_net.add_node_attribute(node_id, 'HCX::members', membersids, datatype='list_of_integer')
    return hier_net

In [20]:
lupus_HCX = get_hcx(lupus_hier, lupus_parent, '288f9830-ba29-11f0-a218-005056ae3c32', hier_name='lupus Hierarchy HCX')

In [None]:
asd_HCX = get_hcx(asd_hier, asd_parent, 'ccd5e0d3-31ac-11f0-a469-005056ae3c32', hier_name='ASD Hierarchy HCX')
azd_HCX = get_hcx(azd_hier, azd_parent, 'cd0ad385-31ac-11f0-a469-005056ae3c32', hier_name='AZD Hierarchy HCX')
bip_HCX = get_hcx(bip_hier, bip_parent, 'cd515269-31ac-11f0-a469-005056ae3c32', hier_name='BIP Hierarchy HCX')

### Upload hiearchies

In [21]:
client.save_new_cx2_network(lupus_HCX.to_cx2(), visibility='PRIVATE')

'https://www.ndexbio.org/v3/networks/d628718b-baef-11f0-a218-005056ae3c32'

In [None]:
client.save_new_cx2_network(asd_HCX.to_cx2(), visibility='PRIVATE')

In [None]:
client.save_new_cx2_network(azd_HCX.to_cx2(), visibility='PRIVATE')

In [None]:
client.save_new_cx2_network(bip_HCX.to_cx2(), visibility='PRIVATE')

## Add hierarchy annotations

In [106]:
def annotate_and_clean_hierarchy(hier_df, parentG):
    hier_df['logp'] = hier_df.Pvalue.apply(lambda x: -1 * np.log10(float(x)))
    hier_df['Name'] = clean_names(hier_df['Name'].values)
    hier_df = hier_df.drop(columns = ['Pvalue', 'SourceDB'])
    comm_features = {comm: {} for comm in hier_df.index.values} 
    node_data = parentG.nodes(data=True)

    node_data_dict = {node_id: data_dict for (node_id, data_dict) in node_data} # this way getting tthe data from the nodes is less error-prone
    
    #print(type(parentG))
    #print([node['GeneID'] for node in list(node_data)])
    #print(3586 in [node[1]['GeneID'] for node in list(parentG.nodes(data=True))])
    #print(list(parentG.nodes(data=True))[:2])

    for comm in comm_features:
        genes = hier_df.at[comm, 'Genes'].split(' ')
        # seed genes
        #debugging

        #print([data for data in node_data])
        
        #print(node_data_dict[3586])
        #print(node_data_dict)
        '''
        for n in genes:
            print(n)
            #print(node['GeneID'])
            #print(type(node_data))
            print(parentG.nodes)
            for node_id, data_dict in node_data:
                if data_dict['GeneID'] == '3586':
                    print(f"Node: {node_id}, Data: {data_dict}")
                    
            break
        '''
        #debend
        #gene_classes = pd.DataFrame({'gene_class':[node_data[str(n)]['GeneClass'] for n in genes]}).gene_class.value_counts()
        gene_classes = pd.DataFrame({'gene_class':[node_data_dict[int(n)]['GeneClass'] for n in genes]}).gene_class.value_counts()
        for frac in ['shared_fraction', 'rare_fraction', 'common_fraction', 'network_fraction']:
            comm_features[comm][frac] = 0
        if 'shared' in gene_classes.index.values:
            comm_features[comm]['shared_fraction'] = gene_classes['shared']/len(genes)
        if 'rare' in gene_classes.index.values:
            comm_features[comm]['rare_fraction'] = gene_classes['rare']/len(genes)
        if 'common' in gene_classes.index.values:
            comm_features[comm]['common_fraction'] = gene_classes['common']/len(genes)
        if 'Network' in gene_classes.index.values:
            comm_features[comm]['network_fraction'] = gene_classes['Network']/len(genes)
        assert (comm_features[comm]['shared_fraction'] +comm_features[comm]['rare_fraction']+comm_features[comm]['common_fraction']+comm_features[comm]['network_fraction']) == 1, 'Fractions do not add up to ...'

        # NPS scores

        correct_keys = ['NPS_C', 'NPS_R', 'NPS_RC'] 
        nps_names = ['NPSc', 'NPSr', 'NPSrc']

        #for z, nps in zip(['z_C', 'z_R', 'Z_coloc'], ['NPSc', 'NPSr', 'NPSrc']): shachar note: they changed the foramat...
        for data_key, nps_name in zip(correct_keys, nps_names):
            #scores = [float(node_data[n][z]) for n in genes]
            scores = [float(node_data_dict[int(n)][data_key]) for n in genes]
            comm_features[comm][nps_name] = np.mean(np.array(scores))
        comm_features[comm]['c_vs_r'] = comm_features[comm]['NPSc'] / (comm_features[comm]['NPSc'] + comm_features[comm]['NPSr']) - 0.5
        # symbols
        #symbols = [node_data[n]['HGNC'] for n in genes]
        #data_dic = ([node_data_dict[int(n)] for n in genes])
        #node_data_dict[29949]['HGNC'] = 'IL19'
        
        add_hgnc(node_data_dict)
        '''
        #code to find how many missing hgnc values are there 
        symbols = []
        error_count = 0
        for n in genes:
            try:
                symbols.append(node_data_dict[int(n)]['HGNC'])
            except KeyError:
                error_count+=1
                print(f"Problem: Gene {n} is missing the 'HGNC' key.")
                # Optional: Print the whole data dict to see what it has
                print(f"Data for gene {n}: {node_data_dict[int(n)]}")
        print("error count:", error_count)
        '''
        

        symbols = [node_data_dict[int(n)]["HGNC"] for n in genes]
        comm_features[comm]['HGNC'] = ' '.join(symbols)

    comm_df = pd.DataFrame.from_dict(comm_features, orient='index')
    return hier_df.join(comm_df)
    
def clean_names(names):
    replace = {'calcium': 'Ca', 'Calcium':'Ca', 'regulation':'reg.', 'Regulation':'Reg.', 
          'activity': 'activ.', 'organization':'org.', '(none)': 'NA'}
    names_out =[]
    for name in names:
        for before, after in replace.items():
            name = name.replace(before, after)
        names_out.append(name[0].capitalize() + name[1:])
    return names_out


def add_hgnc(node_data_dict):
    """
        input: a dict of the form node_id: attributes_dict
        output: adds missing HGNC values to input dict. changes in place.
    """

    import mygene

    # --- Setup ---
    # (Your existing code to get node_data_dict and genes list)
    # node_data_dict = ...
    # genes = ...

    # --- Step 1: Collect all Entrez IDs that need translation ---
    ids_to_query = []
    nodes_to_update = [] # We'll store the node keys (int) here

    for n in node_data_dict.keys():
        int_n = int(n)
        node_data = node_data_dict[int_n]
        
        # Check if 'HGNC' is missing, None, or an empty string
        if 'HGNC' not in node_data or not node_data['HGNC']:
            gene_id = node_data.get('GeneID')
            
            # Only query if we have a valid GeneID
            if gene_id:
                ids_to_query.append(gene_id)
                nodes_to_update.append(int_n) # Save the node key

    # Get a unique list of IDs to query
    ids_to_query = list(set(ids_to_query))

    # --- Step 2: Batch query mygene.info ---
    entrez_to_hgnc_map = {} # This will store our {entrez: hgnc} mappings

    if ids_to_query:
        print(f"Querying mygene.info for {len(ids_to_query)} Entrez IDs...")
        mg = mygene.MyGeneInfo()
        
        # Send all IDs in one batch request
        # scope='entrezgene' tells it we are sending Entrez IDs
        # fields='symbol' asks it to return the 'symbol' (which is HGNC)
        results = mg.querymany(ids_to_query, scopes='entrezgene', fields='symbol', species='human')
        print("...query complete.")
        
        # --- Step 3: Create a simple translation dictionary ---
        for res in results:
            query_id = res.get('query') # The Entrez ID we sent
            hgnc_symbol = res.get('symbol')
            
            if query_id and hgnc_symbol:
                entrez_to_hgnc_map[query_id] = hgnc_symbol

    # --- Step 4: Update your main data dictionary ---
    for int_n in nodes_to_update:
        node_data = node_data_dict[int_n]
        gene_id = node_data['GeneID'] # We know this exists
        
        # Get the translated symbol from our map
        translated_symbol = entrez_to_hgnc_map.get(gene_id)
        
        if translated_symbol:
            # Success! Fill in the correct HGNC symbol
            node_data['HGNC'] = translated_symbol
        else:
            # Translation failed, use the GeneID as a fallback
            # print(f"Warning: Could not find HGNC for Entrez {gene_id}. Using ID.")
            node_data['HGNC'] = gene_id


In [107]:
lupus_out = annotate_and_clean_hierarchy(lupus_df.copy(), lupus_parentG.copy())

Input sequence provided is already in string format. No operation performed
Input sequence provided is already in string format. No operation performed


Querying mygene.info for 221 Entrez IDs...
...query complete.


In [108]:
outdir = "../out/net_out/lupus"

In [109]:
lupus_out.to_csv(os.path.join(outdir, 'lupus_hier_info.tsv'), sep='\t')

In [None]:
asd_out = annotate_and_clean_hierarchy(asd_df.copy(), asd_parentG.copy())

In [None]:
azd_out = annotate_and_clean_hierarchy(azd_df.copy(), azd_parentG.copy())

In [None]:
bip_out = annotate_and_clean_hierarchy(bip_df.copy(), bip_parentG.copy())

In [None]:
asd_out.to_csv('~/Data/Transfer/RVC/figures/NPD/ASD_hier_info.tsv', sep='\t')
azd_out.to_csv('~/Data/Transfer/RVC/figures/NPD/AZD_hier_info.tsv', sep='\t')
bip_out.to_csv('~/Data/Transfer/RVC/figures/NPD/BIP_hier_info.tsv', sep='\t')