In [19]:
pip install ndex2

Note: you may need to restart the kernel to use updated packages.


In [1]:
import ndex2
from ndex2.nice_cx_network import NiceCXNetwork
import json
import pandas as pd
from collections import defaultdict
import os

In [2]:
UUID = "95bc75d5-d1d1-11ee-8a40-005056ae23aa"
server = "http://public.ndexbio.org"

nice_cx = ndex2.create_nice_cx_from_server(server, uuid=UUID)

#To download .cx as a local file
#client = ndex2.client.Ndex2()
#cx_stream = client.get_network_as_cx_stream(UUID)
#with open('network.cx', 'wb') as f:
#    for chunk in cx_stream:
#        f.write(chunk)

In [22]:
#To download .cx as a local file
client = ndex2.client.Ndex2()
cx_stream = client.get_network_as_cx_stream(UUID)
with open('network.cx', 'wb') as f:
    for chunk in cx_stream:
        f.write(chunk)

In [23]:
#Extract edge list

edges = []
for edge_id, edge_data in nice_cx.get_edges():
    source = nice_cx.get_node(edge_data['s'])['n']
    target = nice_cx.get_node(edge_data['t'])['n']
    edges.append({'geneA': source, 'geneB': target})

edge_list= pd.DataFrame(edges)
edge_list.head()
print (f"Identified {edge_list.shape[0]} edges from the network")


Identified 36842 edges from the network


In [24]:
# Node Attributes
nodes = nice_cx.nodes
node_attrs = nice_cx.nodeAttributes

attr_by_node_id = defaultdict(dict)

for node_id, attr_list in node_attrs.items():
    for attr in attr_list:
        attr_name = attr['n']
        attr_value = attr['v']
        attr_by_node_id[node_id][attr_name] = attr_value

nodes_combined = []
for node_id, node_data in nodes.items():
    node_id = node_id
    name = node_data.get('n')
    represents = node_data.get('r', None)
    bait_str = attr_by_node_id[node_id].get('bait', None)
    antibody = attr_by_node_id[node_id].get('antibody', None)
    
    if bait_str == 'true':
        bait = True
    elif bait_str == 'false':
        bait = False
    else:
        bait = None
    
    nodes_combined.append({
        'node_id': node_id,
        'name': name,
        'represents': represents,
        'bait': bait,
        'antibody': antibody
    })

node_attrs_list = pd.DataFrame(nodes_combined)
print(f"Node attributes of {node_attrs_list.shape[0]} genes identified")


Node attributes of 7543 genes identified


In [None]:
# Original code:

import re
import csv
import logging
import mygene
from tqdm import tqdm

from cellmaps_ppidownloader.exceptions import CellMapsPPIDownloaderError

logger = logging.getLogger(__name__)


class GeneQuery(object):
    """
    Gets information about genes from mygene
    """

    def __init__(self, mygeneinfo=mygene.MyGeneInfo()):
        """
        Constructor
        """
        self._mg = mygeneinfo

    def querymany(self, queries, species=None,
                  scopes=None,
                  fields=None):
        """
        Simple wrapper that calls MyGene querymany
        returning the results

        :param queries: list of gene ids/symbols to query
        :type queries: list
        :param species:
        :type species: str
        :param scopes:
        :type scopes: str
        :param fields:
        :type fields: list
        :return: dict from MyGene usually in format of
        :rtype: list
        """
        mygene_out = self._mg.querymany(queries,
                                        scopes=scopes,
                                        fields=fields,
                                        species=species)
        return mygene_out

    def get_symbols_for_genes(self, genelist=None,
                              scopes='_id'):
        """
        Queries for genes via GeneQuery() object passed in via
        constructor

        :param genelist: genes to query for valid symbols and ensembl ids
        :type genelist: list
        :param scopes: field to query on _id for gene id, ensemble.gene
                       for ENSEMBLE IDs
        :type scopes: str
        :return: result from mygene which is a list of dict objects where
                 each dict is of format:

                 .. code-block::

                     { 'query': 'ID',
                       '_id': 'ID', '_score': #.##,
                       'ensembl': { 'gene': 'ENSEMBLEID' },
                       'symbol': 'GENESYMBOL' }
        :rtype: list
        """
        res = self.querymany(genelist,
                             species='human',
                             scopes=scopes,
                             fields=['ensembl.gene', 'symbol'])
        return res


class GeneNodeAttributeGenerator(object):
    """
    Base class for GeneNodeAttribute Generator
    """

    def __init__(self):
        """
        Constructor
        """
        pass

    @staticmethod
    def add_geneids_to_set(gene_set=None,
                           ambiguous_gene_dict=None,
                           geneid=None):
        """
        Examines **geneid** passed in and if a comma exists
        in value split by comma and assume multiple genes.
        Adds those genes into **gene_set** and add entry
        to **ambiguous_gene_dict** with key set to each gene
        name and value set to original **geneid** value

        :param gene_set: unique set of genes
        :type gene_set: set
        :param geneid: name of gene or comma delimited string of genes
        :type geneid: str
        :return: genes found in **geneid** or None if **gene_set**
                 or **geneid** is ``None``
        :rtype: list
        """
        if gene_set is None:
            return None
        if geneid is None:
            return None

        split_str = re.split('\W*,\W*', geneid)
        gene_set.update(split_str)
        if ambiguous_gene_dict is not None:
            if len(split_str) > 1:
                for entry in split_str:
                    ambiguous_gene_dict[entry] = geneid
        return split_str

    def get_gene_node_attributes(self):
        """
        Should be implemented by subclasses

        :raises NotImplementedError: Always
        """
        raise NotImplementedError('Subclasses should implement')
    
class NdexGeneNodeAttributeGenerator(GeneNodeAttributeGenerator):
    """
    Creates APMS Gene Node Attributes table and edgelist table from NDEx network data (.cx)
    """

    def __init__(self, apms_edgelist=None,
                 genequery=GeneQuery(), uuid, ndexserver):
        """
        Constructor

        :param apms_edgelist: list of dict elements where each
                              dict is of format:

                              .. code-block::

                                  {'Bait': VAL,
                                   'Prey': VAL,
                                   'logOddsScore': VAL,
                                   'FoldChange.x': VAL,
                                   'BFDR.x': VAL}
        :type apms_edgelist: list
        :param genequery:
        """
        super().__init__()
        self._raw_apms_edgelist = apms_edgelist
        self._apms_edgelist = None
        self._genequery = genequery
        self.uuid = uuid
        self.ndexserver = "http://public.ndexbio.org"
        self.node_attrs_list = self.get_gene_node_attributes ()


    @staticmethod
    def get_apms_edgelist_from_tsvfile(tsvfile=None,
                                       bait_col='Bait',
                                       prey_col='Prey',
                                       bfdr_col=None,
                                       foldchange_col=None,
                                       foldchange_cutoff=0.0,
                                       bfdr_maxcutoff=0.05):
        """
        Generates list of dicts by parsing TSV file specified
        by **tsvfile** with the
        format header column and corresponding values:

        .. code-block::

            Bait\tPrey\tBFDR.x\tFoldChange.x

        .. note::

           If BFDR.x column does not exist, no BFDR filtering will occur
           Same goes if FoldChange.x column does not exist

        :param tsvfile: Path to TSV file with above format
        :type tsvfile: str
        :param bait_col: Name of bait column
        :type bait_col: str
        :param prey_col: Name of prey column
        :type prey_col: str
        :param bfdr_col: Name of BFDR aka false discovery rate column
                         If ``None`` no BFDR filtering will occur
        :type bfdr_col: str
        :param foldchange_col: Name of FoldChange column
                               If ``None`` no FoldChange filtering will occur
        :type foldchange_col: str
        :param foldchange_cutoff: Foldchange cutoff. Only keep rows with
                                  values greater then this value.
                                  If this value is ``None`` no filtering
                                  will occur
        :type foldchange_cutoff: float
        :param bfdr_maxcutoff: BFDR cutoff. Only keep rows with BFDR
                               less then or equal to this value.
                               If this value is ``None`` no filtering will
                               occur
        :type bfdr_maxcutoff: float
        :return: list of dicts, with each dict of format:

                 .. code-block::

                      {'Bait': VAL,
                       'Prey': VAL}
        :rtype: list
        """
        edgelist = []
        with open(tsvfile, 'r') as f:
            reader = csv.DictReader(f, delimiter='\t')
            for row in reader:
                if bfdr_col is not None and bfdr_col in row \
                    and row[bfdr_col] > bfdr_maxcutoff:
                    continue
                if foldchange_col is not None and foldchange_col in row \
                    and row[foldchange_col] <= foldchange_cutoff:
                    continue
                edgelist.append({'Bait': row[bait_col],
                                 'Prey': row[prey_col]})
        return edgelist

    def _get_unique_set_from_raw_edgelist(self, colname=None):
        """
        Given a column name **colname** extract unique set of values from
        raw apms edgelist passed in via constructor

        :return:
        :rtype: set
        """
        col_set = set()
        for entry in self._raw_apms_edgelist:
            col_set.add(entry[colname])
        return col_set

    def _get_baits_to_ensemblsymbolmap(self):
        """
        Get unique set of bait names from raw apms edgelist
        and query mygene to get symbols and ensembl gene ids

        :return: original bait name to mapped to tuple
                 (id, symbol, ensembl gene id)
        :rtype: dict
        """
        bait_set = self._get_unique_set_from_raw_edgelist('Bait')
        res = self._genequery.get_symbols_for_genes(list(bait_set),
                                                    scopes='symbol')
        bait_to_id = {}
        for entry in res:
            bait_to_id[entry['query']] = (entry['_id'],
                                          entry['symbol'],
                                          entry['ensembl']['gene'])
        return bait_to_id

    def _get_prey_to_ensemblsymbolmap(self):
        """
        Get unique set of prey names from raw apms edgelist
        and query mygene to get symbols and ensembl gene ids

        :return: original bait name to mapped to tuple
                 (id, symbol, ensembl gene id)
        :rtype: dict
        """
        prey_set = self._get_unique_set_from_raw_edgelist('Prey')
        res = self._genequery.get_symbols_for_genes(list(prey_set),
                                                    scopes='uniprot')
        prey_to_id = {}
        for entry in res:
            ensemblstr = ''
            if 'ensembl' not in entry:
                logger.error(str(entry) + ' no ensembl found')
                continue
            if isinstance(entry['ensembl'], list):
                ensemblstr += ';'.join([g['gene'] for g in entry['ensembl']])
            else:
                ensemblstr = entry['ensembl']['gene']
            prey_to_id[entry['query']] = (entry['_id'],
                                          entry['symbol'],
                                          ensemblstr)
        return prey_to_id

    def get_apms_edgelist(self):
        """
        Gets apms edgelist

        :return:
        :rtype: list
        """
        if self._apms_edgelist is not None:
            return self._apms_edgelist

        # we need to generate this list
        baits_to_idmap = self._get_baits_to_ensemblsymbolmap()

        prey_set = self._get_unique_set_from_raw_edgelist('Prey')

        prey_to_idmap = self._get_prey_to_ensemblsymbolmap()
        self._apms_edgelist = []
        for row in self._raw_apms_edgelist:
            if row['Bait'] not in baits_to_idmap:
                logger.warning('Bait ' + str(row['Bait']) + ' not in map. Skipping')
                continue
            if row['Prey'] not in prey_to_idmap:
                logger.warning('Prey ' + str(row['Prey'] + ' not in map. Skipping'))
                continue
            bait_tuple = baits_to_idmap[row['Bait']]
            prey_tuple = prey_to_idmap[row['Prey']]
            self._apms_edgelist.append({'GeneID1': bait_tuple[0],
                                        'Symbol1': bait_tuple[1],
                                        'Ensembl1': bait_tuple[2],
                                        'GeneID2': prey_tuple[0],
                                        'Symbol2': prey_tuple[1],
                                        'Ensembl2': prey_tuple[2]})
        return self._apms_edgelist

    def _get_apms_bait_set(self):
        """
        Gets unique set of baits

        :return:
        :rtype: set
        """
        bait_set = set()
        for entry in self._apms_baitlist:
            bait_set.add(entry['GeneID'])
        return bait_set

    def get_gene_node_attributes(self):
        """
        Gene gene node attributes which is output as a list of
        dicts in this format:

        .. code-block::

            { 'GENEID': { 'name': 'GENESYMBOL',
                          'represents': 'ensemble:ENSEMBLID1;ENSEMBLID2..',
                          'ambiguous': 'ALTERNATE GENEs',
                          'bait': True or False}
            }



        :return: (list of dicts containing gene node attributes,
                  list of str describing any errors encountered)
        :rtype: tuple
        """
        self.get_apms_edgelist()
        errors = []
        gene_node_attrs = {}
        for i in ['1', '2']:
            if i == '1':
                bait = True
            else:
                bait = False
            for x in self._apms_edgelist:
                if x['GeneID' + i] in gene_node_attrs:
                    continue
                ensemblstr = 'ensembl:' + x['Ensembl' + i]
                gene_node_attrs[x['GeneID' + i]] = {'name': x['Symbol' + i],
                                                    'represents': ensemblstr,
                                                    'ambiguous': '',
                                                    'bait': bait}

        return gene_node_attrs, errors


NameError: name 'GeneNodeAttributeGenerator' is not defined

In [None]:
def get_gene_node_attributes(nice_cx):
    nodes = nice_cx.nodes
    node_attrs = nice_cx.nodeAttributes

    attr_by_node_id = defaultdict(dict)
    for node_id,attr_list in node_attrs.items():
        for attr in attr_list:
            attr_name = attr['n']
            attr_value = attr['v']
            attr_by_node_id[node_id][attr_name] = attr_value

    gene_node_attrs = {}
    errors = []

    for node_id, node_data in nodes.items():
        node_id = node_id
        name = node_data.get('n')

        if name is None:
            errors.append(f"Node {node_id} has no 'name'")
            continue
        
        represents = node_data.get('r', None)
        bait = attr_by_node_id[node_id].get('bait', None)
        antibody = attr_by_node_id[node_id].get('antibody', None)
        ambiguous = attr_by_node_id[node_id].get('ambiguous', None)


        gene_node_attrs[str(node_id)] = {
            'name': name,
            'represents': represents,
            'ambiguous' : ambiguous,
            'bait': bait,
            'antibody': antibody
            
        }
    return gene_node_attrs, errors

In [69]:
test_list, errors = get_gene_node_attributes(nice_cx)
test_list

{635898: {'name': 'TMEM88',
  'represents': None,
  'ambiguous': None,
  'bait': 'true',
  'antibody': None},
 635852: {'name': 'COL10A1',
  'represents': None,
  'ambiguous': None,
  'bait': 'true',
  'antibody': None},
 635816: {'name': 'NT5DC3',
  'represents': 'ensembl:ENSG00000111696',
  'ambiguous': None,
  'bait': 'false',
  'antibody': 'HPA041634'},
 635680: {'name': 'CYCS',
  'represents': 'ensembl:ENSG00000172115',
  'ambiguous': None,
  'bait': 'true',
  'antibody': 'CAB004222'},
 635458: {'name': 'PHF20',
  'represents': 'ensembl:ENSG00000025293',
  'ambiguous': None,
  'bait': 'true',
  'antibody': 'HPA029620'},
 635454: {'name': 'ZNF580',
  'represents': 'ensembl:ENSG00000213015',
  'ambiguous': None,
  'bait': 'false',
  'antibody': 'HPA054058'},
 635420: {'name': 'WTIP',
  'represents': None,
  'ambiguous': None,
  'bait': 'false',
  'antibody': None},
 635416: {'name': 'ADD2',
  'represents': 'ensembl:ENSG00000075340',
  'ambiguous': None,
  'bait': 'true',
  'antibody

In [48]:
nodes_comb = get_gene_node_attributes(nice_cx)

In [49]:
nodes_comb

[{'name': 'TMEM88',
  'represents': None,
  'ambiguous': None,
  'bait': True,
  'antibody': None},
 {'name': 'COL10A1',
  'represents': None,
  'ambiguous': None,
  'bait': True,
  'antibody': None},
 {'name': 'NT5DC3',
  'represents': 'ensembl:ENSG00000111696',
  'ambiguous': None,
  'bait': False,
  'antibody': 'HPA041634'},
 {'name': 'CYCS',
  'represents': 'ensembl:ENSG00000172115',
  'ambiguous': None,
  'bait': True,
  'antibody': 'CAB004222'},
 {'name': 'PHF20',
  'represents': 'ensembl:ENSG00000025293',
  'ambiguous': None,
  'bait': True,
  'antibody': 'HPA029620'},
 {'name': 'ZNF580',
  'represents': 'ensembl:ENSG00000213015',
  'ambiguous': None,
  'bait': False,
  'antibody': 'HPA054058'},
 {'name': 'WTIP',
  'represents': None,
  'ambiguous': None,
  'bait': False,
  'antibody': None},
 {'name': 'ADD2',
  'represents': 'ensembl:ENSG00000075340',
  'ambiguous': None,
  'bait': True,
  'antibody': 'HPA034510'},
 {'name': 'SLC25A39',
  'represents': None,
  'ambiguous': None

In [28]:
def get_apms_edgelist (nice_cx, node_attrs_list):

    node_attrs_by_id = {}
    for _, row in node_attrs_list.iterrows():
        node_attrs_by_id[row['node_id']] = row.to_dict()

    edges = nice_cx.edges
    edge_attrs = nice_cx.edgeAttributes

    attr_by_edge_id = defaultdict(dict)
    for edge_id, attr_list in edge_attrs.items():
        for attr in attr_list:
            attr_name = attr['n']
            attr_value = attr['v']
            if attr_name == 'name':
                continue
            attr_by_edge_id[edge_id][attr_name] = attr_value

    edge_list = []
    for edge_id, edge_data in edges.items():
        source = edge_data.get('s')
        target = edge_data.get('t')

        source_info = node_attrs_by_id.get(source, {})
        target_info = node_attrs_by_id.get(target, {})

        #ensembl1_raw = source_info.get('represents')
        #ensembl1 = ensembl1_raw[len('ensembl:'):] if ensembl1_raw and ensembl1_raw.startswith('ensembl:') else ensembl1_raw

        #ensembl2_raw = target_info.get('represents')
        #ensembl2 = ensembl2_raw[len('ensembl:'):] if ensembl2_raw and ensembl2_raw.startswith('ensembl:') else ensembl2_raw

        edge_dict = {
            'geneA': str(source_info.get('node_id')), 
            'symbolA': source_info.get('name'),
            #'ensemblA': ensembl1,
            'geneB': str(target_info.get('node_id')),
            'symbolB': target_info.get('name'),
            #'ensemblB': ensembl2,
        }

        edge_dict.update(attr_by_edge_id.get(edge_id, {}))
        edge_list.append(edge_dict)
    

    return edge_list


In [29]:
apms_edgelist = get_apms_edgelist(nice_cx, nodes_comb)

In [30]:
apms_edgelist

[{'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '551222',
  'symbolB': 'SLC30A1'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '568644',
  'symbolB': 'TNPO3'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '553938',
  'symbolB': 'SLC12A4'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '552516',
  'symbolB': 'MYO6'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '551198',
  'symbolB': 'SLC39A10'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555820',
  'symbolB': 'SLC12A6'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555224',
  'symbolB': 'VPS18'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '548774',
  'symbolB': 'TMEM160'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555242',
  'symbolB': 'CUEDC1'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555116',
  'symbolB': 'CYRIB'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '562860',
  'symbolB': 'UFSP2'},
 {'geneA': '635898',


In [None]:
class NdexGeneNodeAttributeGenerator(GeneNodeAttributeGenerator):
    """
    Creates APMS Gene Node Attributes table and edgelist table from NDEx network data (.cx)
    """

    def __init__(self, apms_edgelist=None,
                 genequery=GeneQuery(), uuid, ndexserver):
        """
        Constructor

        :param apms_edgelist: list of dict elements where each
                              dict is of format:

                              .. code-block::

                                  {'Bait': VAL,
                                   'Prey': VAL,
                                   'logOddsScore': VAL,
                                   'FoldChange.x': VAL,
                                   'BFDR.x': VAL}
        :type apms_edgelist: list
        :param genequery:
        """
        super().__init__()
        self._raw_apms_edgelist = apms_edgelist
        self._apms_edgelist = None
        self._genequery = genequery
        self.uuid = uuid
        self.ndexserver = "http://public.ndexbio.org"
        self.node_attrs_list = self.get_gene_node_attributes ()
    
    def get_gene_node_attributes(self, nice_cx):
        nodes = nice_cx.nodes
        node_attrs = nice_cx.nodeAttributes

        attr_by_node_id = defaultdict(dict)
        for node_id,attr_list in node_attrs.items():
            for attr in attr_list:
                attr_name = attr['n']
                attr_value = attr['v']
                attr_by_node_id[node_id][attr_name] = attr_value

        nodes_combined = []
        for node_id, node_data in nodes.items():
            node_id = node_id
            name = node_data.get('n')
            represents = node_data.get('r', None)
            bait_str = attr_by_node_id[node_id].get('bait', None)
            antibody = attr_by_node_id[node_id].get('antibody', None)

            if bait_str == 'true':
                bait = True
            elif bait_str == 'false':
                bait = False
            else:
                bait = None

            nodes_combined.append({
                'node_id': node_id,
                'name': name,
                'represents': represents,
                'bait': bait,
                'antibody': antibody
            })
        
        node_attrs_list = pd.DataFrame(nodes_combined)

        return node_attrs_list
    
    
    def get_apms_edgelist (self, nice_cx, node_attrs_list):

        node_attrs_by_id = {}
        for _, row in node_attrs_list.iterrows():
            node_attrs_by_id[row['node_id']] = row.to_dict()

        edges = nice_cx.edges
        edge_attrs = nice_cx.edgeAttributes

        attr_by_edge_id = defaultdict(dict)
        for edge_id, attr_list in edge_attrs.items():
            for attr in attr_list:
                attr_name = attr['n']
                attr_value = attr['v']
                if attr_name == 'name':
                    continue
                attr_by_edge_id[edge_id][attr_name] = attr_value

        edge_list = []
        for edge_id, edge_data in edges.items():
            source = edge_data.get('s')
            target = edge_data.get('t')

            source_info = node_attrs_by_id.get(source, {})
            target_info = node_attrs_by_id.get(target, {})

            ensembl1_raw = source_info.get('represents')
            ensembl1 = ensembl1_raw[len('ensembl:'):] if ensembl1_raw and ensembl1_raw.startswith('ensembl:') else ensembl1_raw

            ensembl2_raw = target_info.get('represents')
            ensembl2 = ensembl2_raw[len('ensembl:'):] if ensembl2_raw and ensembl2_raw.startswith('ensembl:') else ensembl2_raw

            edge_dict = {
                'geneA': source_info.get('node_id'), 
                'symbolA': source_info.get('name'),
                'ensemblA': ensembl1,
                'geneB': target_info.get('node_id'),
                'symbolB': target_info.get('name'),
                'ensemblB': ensembl2,
            }

            edge_dict.update(attr_by_edge_id.get(edge_id, {}))
            edge_list.append(edge_dict)
        
        edgelist = pd.DataFrame(edge_list)

        return edgelist  

In [None]:
#original
def get_apms_baitlist_from_tsvfile(tsvfile=None,
                                       symbol_col=BAITLIST_GENE_SYMBOL,
                                       geneid_col=BAITLIST_GENE_ID,
                                       numinteractors_col=BAITLIST_NUM_INTERACTORS):
        """
        Generates list of dicts by parsing TSV file specified
        by **tsvfile** with the
        format header column and corresponding values:

        .. code-block::

            GeneSymbol\tGeneID\t# Interactors

        :param tsvfile: Path to TSV file with above format
        :type tsvfile: str
        :return: list of dicts, with each dict of format:

                 .. code-block::

                      { 'GeneSymbol': VAL,
                        'GeneID': VAL,
                        'NumIteractors': VAL }
        :rtype: list
        """
        edgelist = []
        if tsvfile is not None:
            with open(tsvfile, 'r') as f:
                reader = csv.DictReader(f, delimiter='\t')
                for row in reader:
                    edgelist.append({'GeneSymbol': row[symbol_col],
                                     'GeneID': row[geneid_col],
                                     'NumInteractors': row[numinteractors_col]})
        return edgelist

In [53]:
def get_baitlist(uuid=None):

    nice_cx = ndex2.create_nice_cx_from_server("http://public.ndexbio.org", uuid=uuid)
    nodes = nice_cx.nodes
    node_attrs = nice_cx.nodeAttributes
    edges = nice_cx.edges

    attr_by_node_id = defaultdict(dict)
    for node_id, attr_list in node_attrs.items():
        for attr in attr_list:
            attr_by_node_id[node_id][attr['n']] = attr['v']

    adjacency = defaultdict(set)
    for edge_id, edge_data in edges.items():
        source = edge_data['s']
        target = edge_data['t']
        adjacency[source].add(target)
        adjacency[target].add(source)

    baitlist = []

    for node_id, node_data in nodes.items():
        is_bait = attr_by_node_id[node_id].get('bait', '').lower() == 'true'
        if is_bait:
            gene_symbol = node_data.get('n')
            gene_id = str(node_id)
            num_interactors = len(adjacency[node_id])
            baitlist.append({
                'GeneSymbol': gene_symbol,
                'GeneID': gene_id,
                'NumInteractors': num_interactors
            })

    return baitlist


In [54]:
apms_baitlist = get_baitlist(uuid="95bc75d5-d1d1-11ee-8a40-005056ae23aa")

In [55]:
apms_baitlist

[{'GeneSymbol': 'TMEM88', 'GeneID': '635898', 'NumInteractors': 40},
 {'GeneSymbol': 'COL10A1', 'GeneID': '635852', 'NumInteractors': 22},
 {'GeneSymbol': 'CYCS', 'GeneID': '635680', 'NumInteractors': 4},
 {'GeneSymbol': 'PHF20', 'GeneID': '635458', 'NumInteractors': 5},
 {'GeneSymbol': 'ADD2', 'GeneID': '635416', 'NumInteractors': 1},
 {'GeneSymbol': 'IMPDH1', 'GeneID': '635296', 'NumInteractors': 52},
 {'GeneSymbol': 'VSIG1', 'GeneID': '635006', 'NumInteractors': 139},
 {'GeneSymbol': 'SYCE3', 'GeneID': '634946', 'NumInteractors': 12},
 {'GeneSymbol': 'P4HA3', 'GeneID': '634460', 'NumInteractors': 61},
 {'GeneSymbol': 'DHCR24', 'GeneID': '634348', 'NumInteractors': 17},
 {'GeneSymbol': 'ESRRG', 'GeneID': '634208', 'NumInteractors': 11},
 {'GeneSymbol': 'FYN', 'GeneID': '634146', 'NumInteractors': 29},
 {'GeneSymbol': 'ISX', 'GeneID': '634130', 'NumInteractors': 7},
 {'GeneSymbol': 'TGIF2', 'GeneID': '633898', 'NumInteractors': 31},
 {'GeneSymbol': 'PARP12', 'GeneID': '633228', 'NumIn

In [None]:
def get_apms_edgelist(self, nice_cx):
    nodes = nice_cx.nodes
    edges = nice_cx.edges
    edge_attrs = nice_cx.edgeAttributes

    attr_by_edge_id = defaultdict(dict)
    for edge_id, attr_list in edge_attrs.items():
        for attr in attr_list:
            attr_name = attr['n']
            attr_value = attr['v']
            if attr_name == 'name':
                continue
            attr_by_edge_id[edge_id][attr_name] = attr_value

    self._apms_edgelist = []
    for edge_id, edge_data in edges.items():
        source = edge_data.get('s')
        target = edge_data.get('t')

        source_info = nodes.get(source, {})
        target_info = nodes.get(target, {})

        edge_dict = {
            'GeneID1': str(source),  
            'Symbol1': source_info.get('n'),  
            'GeneID2': str(target),
            'Symbol2': target_info.get('n'),
        }

        edge_dict.update(attr_by_edge_id.get(edge_id, {}))
        self._apms_edgelist.append(edge_dict)

    return self._apms_edgelist


In [38]:
get_apms_edgelist2(nice_cx)

[{'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '551222',
  'symbolB': 'SLC30A1'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '568644',
  'symbolB': 'TNPO3'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '553938',
  'symbolB': 'SLC12A4'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '552516',
  'symbolB': 'MYO6'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '551198',
  'symbolB': 'SLC39A10'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555820',
  'symbolB': 'SLC12A6'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555224',
  'symbolB': 'VPS18'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '548774',
  'symbolB': 'TMEM160'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555242',
  'symbolB': 'CUEDC1'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '555116',
  'symbolB': 'CYRIB'},
 {'geneA': '635898',
  'symbolA': 'TMEM88',
  'geneB': '562860',
  'symbolB': 'UFSP2'},
 {'geneA': '635898',
