In [29]:
import sys
import pandas as pd
import requests
import json
from pandas.io.json import json_normalize

# Expansion of nodes using Monarch APIs
* https://api.monarchinitiative.org/api/#/
* https://scigraph-ontology.monarchinitiative.org/scigraph/docs/#/

In [30]:
# input substance
metabolite = 'CHEBI:506227'

# output files
path = 'glcnac-human-expansion/glcnac_human'

## Graph queries (SciGraph)

In [31]:
# api address
api = 'https://scigraph-ontology.monarchinitiative.org/scigraph'
endpoint = '/graph'

In [32]:
# get neighbors (JSON content)
r = requests.get('{}{}/neighbors/{}'.format(api,endpoint,metabolite))
r.headers

{'Connection': 'keep-alive', 'Date': 'Wed, 07 Jun 2017 01:01:40 GMT', 'Content-Length': '880', 'Content-Type': 'application/json', 'Server': 'nginx/1.10.0 (Ubuntu)', 'Vary': 'Accept, Accept-Encoding', 'Content-Encoding': 'gzip', 'Cache-Control': 'no-transform, max-age=7200'}

In [33]:
r.status_code

200

In [34]:
# Read results 
r.json()

{'edges': [{'meta': {'lbl': ['isDefinedBy']},
   'obj': 'OBO:upheno/monarch.owl',
   'pred': 'isDefinedBy',
   'sub': 'CHEBI:506227'},
  {'meta': {'isDefinedBy': ['http://purl.obolibrary.org/obo/upheno/monarch.owl'],
    'lbl': ['subClassOf']},
   'obj': 'CHEBI:59640',
   'pred': 'subClassOf',
   'sub': 'CHEBI:506227'},
  {'meta': {'convenience': [True],
    'equivalentOriginalNodeSource': ['_:72df5bf9bf084b8edfbacd665df547ca'],
    'lbl': ['has input'],
    'owlType': ['operand']},
   'obj': 'CHEBI:506227',
   'pred': 'RO:0002233',
   'sub': 'GO:0097316'},
  {'meta': {'convenience': [True],
    'equivalentOriginalNodeSource': ['_:55af255a394966ac0299ddc7621feb8d'],
    'lbl': ['has input'],
    'owlType': ['operand']},
   'obj': 'CHEBI:506227',
   'pred': 'RO:0002233',
   'sub': 'GO:0097315'},
  {'meta': {'convenience': [True],
    'lbl': ['has input'],
    'owlType': ['subClassOf']},
   'obj': 'CHEBI:506227',
   'pred': 'RO:0002233',
   'sub': 'GO:0097315'},
  {'meta': {'isDefinedBy'

In [35]:
neighbors_df = json_normalize(r.json(), 'nodes')
neighbors_df.head()

Unnamed: 0,id,lbl,meta
0,GO:0097316,cellular response to N-acetyl-D-glucosamine,{'http://www.w3.org/2000/01/rdf-schema#label':...
1,CHEBI:76969,bacterial metabolite,{'http://www.w3.org/2000/01/rdf-schema#label':...
2,OBO:upheno/monarch.owl,,"{'types': ['Ontology', 'cliqueLeader', 'Node']}"
3,CHEBI:59640,N-acetylglucosamine,{'http://www.w3.org/2000/01/rdf-schema#label':...
4,GO:0097315,response to N-acetyl-D-glucosamine,{'http://www.w3.org/2000/01/rdf-schema#label':...


In [36]:
# Analyze neighbours' types
neighbors_df.id.unique()
print('nodes: {}'.format(len(neighbors_df.id.unique())))

nodes: 7


In [37]:
neighbors_df[['id','lbl']]
# the 4 CHEBIs are outgoing nodes in the ChEBI ontology (has_role, is_a)

Unnamed: 0,id,lbl
0,GO:0097316,cellular response to N-acetyl-D-glucosamine
1,CHEBI:76969,bacterial metabolite
2,OBO:upheno/monarch.owl,
3,CHEBI:59640,N-acetylglucosamine
4,GO:0097315,response to N-acetyl-D-glucosamine
5,CHEBI:506227,N-acetyl-D-glucosamine
6,CHEBI:21601,N-acetyl-D-hexosamine


In [38]:
neighbors_df['node_type'] = neighbors_df.id.apply(lambda x: x.split(':')[0])
neighbors_df.node_type.value_counts()

CHEBI    4
GO       2
OBO      1
Name: node_type, dtype: int64

In [39]:
# conclusion: neighbors are outgoing nodes (parents) in CHEBI, GO annotation, provenance. I will use GO only:
# <go> <has_input, RO:0002233, subclassOf> <metabolite>

In [40]:
# subset go terms
go_df = neighbors_df[['id', 'lbl']]
go_df = go_df[go_df['id'].str.contains('GO:')]
go_df.head()

Unnamed: 0,id,lbl
0,GO:0097316,cellular response to N-acetyl-D-glucosamine
4,GO:0097315,response to N-acetyl-D-glucosamine


In [41]:
go_df.to_csv('{}_go_neighbors_monarch.tsv'.format(path), sep='\t', index=False, header=True)

In [42]:
# Filters
# Filter by interaction_type. BUT what are the strings per interaction_type???

In [43]:
# get reachable nodes (JSON content)
r = requests.get('{}{}/reachablefrom/{}'.format(api,endpoint,metabolite))
r.headers

{'Connection': 'keep-alive', 'Date': 'Wed, 07 Jun 2017 01:01:41 GMT', 'Content-Length': '337', 'Content-Type': 'application/json', 'Server': 'nginx/1.10.0 (Ubuntu)', 'Vary': 'Accept, Accept-Encoding', 'Content-Encoding': 'gzip', 'Cache-Control': 'no-transform, max-age=7200'}

In [44]:
r.status_code

200

In [45]:
# Read results 
r.json()
reach_df = json_normalize(r.json(), 'nodes')
reach_df.head()

Unnamed: 0,id,lbl,meta
0,OBO:upheno/monarch.owl,,"{'types': ['Ontology', 'cliqueLeader', 'Node']}"
1,CHEBI:59640,N-acetylglucosamine,{'http://www.w3.org/2000/01/rdf-schema#label':...
2,CHEBI:76969,bacterial metabolite,{'http://www.w3.org/2000/01/rdf-schema#label':...
3,CHEBI:21601,N-acetyl-D-hexosamine,{'http://www.w3.org/2000/01/rdf-schema#label':...
4,_:a087bd0cac3136e13077700e06486036,,"{'types': ['someValuesFrom', 'cliqueLeader', '..."


In [46]:
reach_df.id.unique()
print('nodes: {}'.format(len(reach_df.id.unique())))

nodes: 5


In [47]:
reach_df['node_type'] = reach_df.id.apply(lambda x: x.split(':')[0])
reach_df.node_type.value_counts()

CHEBI    3
_        1
OBO      1
Name: node_type, dtype: int64

In [48]:
# conclusion: three nodes reachabable, which are the chebi outgoing nodes (same above)

## Edge Queries (Monarch)

In [78]:
# api address
api = 'https://api.monarchinitiative.org/api'
endpoint = '/bioentity'

In [50]:
# get substance info
r = requests.get('{}{}/substance/{}'.format(api,endpoint,metabolite))
r.headers
r.status_code

200

In [51]:
r.json()

{'categories': None,
 'consider': None,
 'deprecated': None,
 'description': None,
 'id': None,
 'inchi': None,
 'inchi_key': None,
 'label': None,
 'replaced_by': None,
 'smiles': None,
 'synonyms': None,
 'target_associations': None,
 'taxon': {'id': None, 'label': None},
 'types': None,
 'xrefs': None}

In [79]:
# get substance exposures TODO
r = requests.get('{}{}/substance/{}/exposures/'.format(api,endpoint,metabolite))
r.status_code, r.json()

(200,
 {'evidence_graph': {'edges': None, 'nodes': None},
  'evidence_types': None,
  'id': None,
  'object': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'object_extension': None,
  'provided_by': None,
  'publications': None,
  'qualifiers': None,
  'relation': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'types': None},
  'slim': None,
  'subject': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'subject_extension': None,
  'type': None})

In [80]:
# get substance interactions TODO
r = requests.get('{}{}/substance/{}/interactions/'.format(api,endpoint,metabolite))
r.status_code, r.json()

(200,
 {'evidence_graph': {'edges': None, 'nodes': None},
  'evidence_types': None,
  'id': None,
  'object': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'object_extension': None,
  'provided_by': None,
  'publications': None,
  'qualifiers': None,
  'relation': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'types': None},
  'slim': None,
  'subject': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'subject_extension': None,
  'type': None})

In [90]:
# get substance pathways
data = {'use_compact_associations':'true'} 
r = requests.get('{}{}/substance/{}/participant_in/'.format(api,endpoint,metabolite))
r.status_code, r.json()

(200,
 [{'evidence_graph': {'edges': None, 'nodes': None},
   'evidence_types': None,
   'id': None,
   'object': {'categories': None,
    'consider': None,
    'deprecated': None,
    'description': None,
    'id': 'CHEBI:506227',
    'label': 'N-acetyl-D-glucosamine',
    'replaced_by': None,
    'synonyms': None,
    'taxon': {'id': None, 'label': None},
    'types': None,
    'xrefs': None},
   'object_extension': None,
   'provided_by': None,
   'publications': None,
   'qualifiers': None,
   'relation': {'categories': None,
    'consider': None,
    'deprecated': None,
    'description': None,
    'id': None,
    'label': None,
    'replaced_by': None,
    'synonyms': None,
    'types': None},
   'slim': None,
   'subject': {'categories': None,
    'consider': None,
    'deprecated': None,
    'description': None,
    'id': 'GO:0097315',
    'label': 'response to N-acetyl-D-glucosamine',
    'replaced_by': None,
    'synonyms': None,
    'taxon': {'id': None, 'label': None},
    

In [108]:
r_list = r.json()
for edge in r_list:
    print(edge['subject']['id'],edge['subject']['label'])
    print(edge['object']['id'],edge['object']['label'])

GO:0097315 response to N-acetyl-D-glucosamine
CHEBI:506227 N-acetyl-D-glucosamine
CHEBI:506227 N-acetyl-D-glucosamine
CHEBI:76969 bacterial metabolite
CHEBI:506227 N-acetyl-D-glucosamine
OBO:upheno/monarch.owl None
GO:0097316 cellular response to N-acetyl-D-glucosamine
CHEBI:506227 N-acetyl-D-glucosamine
GO:0097315 response to N-acetyl-D-glucosamine
CHEBI:506227 N-acetyl-D-glucosamine
CHEBI:506227 N-acetyl-D-glucosamine
CHEBI:59640 N-acetylglucosamine
CHEBI:506227 N-acetyl-D-glucosamine
CHEBI:21601 N-acetyl-D-hexosamine


In [84]:
# get substance roles
r = requests.get('{}{}/substance/{}/roles/'.format(api,endpoint,metabolite))
r.status_code, r.json()

(200,
 [{'evidence_graph': {'edges': None, 'nodes': None},
   'evidence_types': None,
   'id': None,
   'object': {'categories': None,
    'consider': None,
    'deprecated': None,
    'description': None,
    'id': 'CHEBI:76969',
    'label': 'bacterial metabolite',
    'replaced_by': None,
    'synonyms': None,
    'taxon': {'id': None, 'label': None},
    'types': None,
    'xrefs': None},
   'object_extension': None,
   'provided_by': None,
   'publications': None,
   'qualifiers': None,
   'relation': {'categories': None,
    'consider': None,
    'deprecated': None,
    'description': None,
    'id': None,
    'label': None,
    'replaced_by': None,
    'synonyms': None,
    'types': None},
   'slim': None,
   'subject': {'categories': None,
    'consider': None,
    'deprecated': None,
    'description': None,
    'id': 'CHEBI:506227',
    'label': 'N-acetyl-D-glucosamine',
    'replaced_by': None,
    'synonyms': None,
    'taxon': {'id': None, 'label': None},
    'types': None

In [85]:
# get substance-substance interactions TODO
r = requests.get('{}{}/substance/{}/substances/'.format(api,endpoint,metabolite))
r.status_code, r.json()

(200,
 {'evidence_graph': {'edges': None, 'nodes': None},
  'evidence_types': None,
  'id': None,
  'object': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'object_extension': None,
  'provided_by': None,
  'publications': None,
  'qualifiers': None,
  'relation': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'types': None},
  'slim': None,
  'subject': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'subject_extension': None,
  'type': None})

In [86]:
# get substance targets TODO
r = requests.get('{}{}/substance/{}/targets/'.format(api,endpoint,metabolite))
r.status_code, r.json()

(200,
 {'evidence_graph': {'edges': None, 'nodes': None},
  'evidence_types': None,
  'id': None,
  'object': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'object_extension': None,
  'provided_by': None,
  'publications': None,
  'qualifiers': None,
  'relation': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'types': None},
  'slim': None,
  'subject': {'categories': None,
   'consider': None,
   'deprecated': None,
   'description': None,
   'id': None,
   'label': None,
   'replaced_by': None,
   'synonyms': None,
   'taxon': {'id': None, 'label': None},
   'types': None,
   'xrefs': None},
  'subject_extension': None,
  'type': None})

In [87]:
# get substance indications
r = requests.get('{}{}/substance/{}/treats/'.format(api,endpoint,metabolite))
r.status_code, r.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# Expansion of nodes using Reactome APIs
##### Provides an API for pathway over-representation and expression analysis as well as species comparison tool, so better query the web?
* http://www.reactome.org/AnalysisService/
* http://www.reactome.org/content/query?
* http://reactomews.oicr.on.ca:8080/ReactomeRESTfulAPI/ReactomeRESTFulAPI.html

In [59]:
# input substance
metabolite = 'CHEBI:506227'

In [60]:
# get pathways from input genes
# 
r = requests.post('http://reactomews.oicr.on.ca:8080/ReactomeRESTfulAPI/RESTfulWS/queryHitPathways', data='CEP192', headers={'Content-Type':'application/json'})
r.status_code
r.headers
r.json()

[{'dbId': 2565942,
  'displayName': 'Regulation of PLK1 Activity at G2/M Transition',
  'hasDiagram': False,
  'isInDisease': False,
  'isInferred': False,
  'schemaClass': 'Pathway',
  'species': [{'dbId': 48887,
    'displayName': 'Homo sapiens',
    'schemaClass': 'Species'}],
  'speciesName': 'Homo sapiens',
  'stableIdentifier': {'dbId': 3200709,
   'displayName': 'R-HSA-2565942.1',
   'schemaClass': 'StableIdentifier'}},
 {'dbId': 8854518,
  'displayName': 'AURKA Activation by TPX2',
  'hasDiagram': False,
  'isInDisease': False,
  'isInferred': False,
  'releaseStatus': 'NEW',
  'schemaClass': 'Pathway',
  'species': [{'dbId': 48887,
    'displayName': 'Homo sapiens',
    'schemaClass': 'Species'}],
  'speciesName': 'Homo sapiens',
  'stableIdentifier': {'dbId': 8858734,
   'displayName': 'R-HSA-8854518.1',
   'schemaClass': 'StableIdentifier'}},
 {'dbId': 5620912,
  'displayName': 'Anchoring of the basal body to the plasma membrane',
  'hasDiagram': False,
  'isInDisease': Fals

In [61]:
# get infor for glcnac
# 
r = requests.post('http://reactomews.oicr.on.ca:8080/ReactomeRESTfulAPI/RESTfulWS/queryById/DatabaseObject', data='R-ALL-2855066', headers={'Content-Type':'application/json'})
r.status_code
r.headers
r.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [76]:
# get pathways from input genes
# 
r = requests.post('http://reactomews.oicr.on.ca:8080/ReactomeRESTfulAPI/RESTfulWS/pathwaysForEntities', data='R-ALL-2855066', headers={'Content-Type':'application/json'})
r.status_code
r.headers
r.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# Reactome contains: Proteins (40) with glcnac as a component (not retrievable from the api), complex (41), pw (24) 
# reactions (93), set (7). 

## Query Wikidata for Knowldege.Bio

In [62]:
# api address:
api = 'https://query.wikidata.org/sparql'

# output files
path = 'glcnac_human_expansion/glcnac_human'

In [63]:
def generate_table(header, results):
    df = {}
    for res_d in results:
        for head in header:
            df[head] = []
        
    for res_d in results:
        for head in header:
            try:
                value = res_d[head]['value']
            except:
                value = 'NA'
            if value.startswith('http'):
                namespace, value = value.rsplit('/', 1)
            aux = df[head]
            aux.append(value)
            df[head] = aux
            
    try:
        results_df = pd.DataFrame.from_dict(df)
    except e:
        print(e)
        print(df)
        
    results_df = results_df[header]
    return results_df

In [64]:
# get Uniprot (enzymes from hmdb):

# get input_list
input_df = pd.read_table('/home/nuria/workspace/ngly1_hg/glcnac_human_expansion/glcnac_human_uniprot_enzymes_hmdb.csv')
input_df['id'] = input_df.protein_id.apply(lambda x: '"' + str(x.split(':')[1]) + '"')
input_l = list(input_df['id'])
input_s = ' '.join(input_l)
input_s

'"Q9Y223" "P51606" "O60909" "P15291" "P07686" "P06865" "Q9UJ70" "Q9BZP6" "Q9UK23" "Q13231" "O60512" "O60513" "P54802" "P61626" "Q8WZA1" "Q6UWQ5" "Q7Z4W2" "Q96QH8" "O75951" "P36222" "Q15782" "Q8WVB3" "O60502"'

In [65]:
# query
query = """SELECT DISTINCT ?id ?item ?itemLabel (group_concat(distinct ?itemaltLabel; separator="|") as ?altLabel) ?itemDesc
WHERE
{
  {?item wdt:P352 ?id .} # uniprot id
  values ?id {""" + input_s + """}
  OPTIONAL{
  ?item skos:altLabel ?itemaltLabel .
    FILTER(LANG(?itemaltLabel) = "en")
  ?item schema:description ?itemDesc .
    FILTER(LANG(?itemDesc) = "en")
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
group by ?item ?id ?itemLabel ?itemDesc"""

In [66]:
r = requests.post(api, data={'query': query}, headers={'Accept':'application/sparql-results+json'})
r.json()

{'head': {'vars': ['id', 'item', 'itemLabel', 'altLabel', 'itemDesc']},
 'results': {'bindings': [{'altLabel': {'type': 'literal',
     'value': 'LYZL1|lysozyme D1|lysozyme-like protein 1'},
    'id': {'type': 'literal', 'value': 'Q6UWQ5'},
    'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q21122346'},
    'itemDesc': {'type': 'literal',
     'value': 'mammalian protein found in Homo sapiens',
     'xml:lang': 'en'},
    'itemLabel': {'type': 'literal',
     'value': 'Lysozyme like 1',
     'xml:lang': 'en'}},
   {'altLabel': {'type': 'literal',
     'value': 'HEXA|N-acetyl-beta-glucosaminidase subunit alpha|hexosaminidase A (alpha polypeptide)|beta-hexosaminidase subunit alpha|beta-N-acetylhexosaminidase subunit alpha|hexosaminidase subunit A'},
    'id': {'type': 'literal', 'value': 'P06865'},
    'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q21120577'},
    'itemDesc': {'type': 'literal',
     'value': 'mammalian protein found in Homo

In [67]:
header_l = r.json()['head']['vars']
results_l = r.json()['results']['bindings']
df = generate_table(header_l, results_l)

# merge input with response
input_df = input_df[['protein_id']]
input_df['id'] = input_df.protein_id.apply(lambda x: x.split(':')[1])
output_df = input_df.merge(df)
output_df = output_df[['protein_id', 'item', 'itemLabel', 'altLabel', 'itemDesc']]
output_df.head(2)

Unnamed: 0,protein_id,item,itemLabel,altLabel,itemDesc
0,UniProt:Q9Y223,Q21106661,Glucosamine (UDP-N-acetyl)-2-epimerase/N-acety...,GNE|UDP-GlcNAc-2-epimerase/ManAc kinase|N-acyl...,mammalian protein found in Homo sapiens
1,UniProt:P51606,Q22677897,Renin binding protein,AGE|GlcNAc 2-epimerase|N-acetyl-D-glucosamine ...,mammalian protein found in Homo sapiens


In [68]:
len(output_df)

23

In [69]:
output_df.to_csv('{}_uniprot_enzymes_hmdb_concept_kb.tsv'.format(path), sep='\t', index=False, header=True)

In [70]:
# get Uniprot (targets (enzymes) from chembl):


# get input_list
input_df = pd.read_table('/home/nuria/workspace/ngly1_hg/glcnac_human_expansion/glcnac_human_uniprot_targets_chembl.csv')
input_df['id'] = input_df.protein_id.apply(lambda x: '"' + str(x.split(':')[1]) + '"')
input_l = list(input_df['id'])
input_s = ' '.join(input_l)
input_s

'"P00811" "P27695" "O75874" "Q6P4F1"'

In [71]:
# query
query = """SELECT DISTINCT ?id ?item ?itemLabel (group_concat(distinct ?itemaltLabel; separator="|") as ?altLabel) ?itemDesc
WHERE
{
  {?item wdt:P352 ?id .} # uniprot id
  values ?id {""" + input_s + """}
  OPTIONAL{
  ?item skos:altLabel ?itemaltLabel .
    FILTER(LANG(?itemaltLabel) = "en")
  ?item schema:description ?itemDesc .
    FILTER(LANG(?itemDesc) = "en")
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
group by ?item ?id ?itemLabel ?itemDesc"""

In [72]:
r = requests.post(api, data={'query': query}, headers={'Accept':'application/sparql-results+json'})
r.json()

{'head': {'vars': ['id', 'item', 'itemLabel', 'altLabel', 'itemDesc']},
 'results': {'bindings': [{'altLabel': {'type': 'literal',
     'value': 'IDH1|IDP|Cytosolic NADP-isocitrate dehydrogenase|NADP(+)-specific ICDH|isocitrate dehydrogenase 1 (NADP+), soluble|isocitrate dehydrogenase [NADP] cytoplasmic|epididymis luminal protein 216|epididymis secretory protein Li 26|NADP-dependent isocitrate dehydrogenase, cytosolic|NADP-dependent isocitrate dehydrogenase, peroxisomal|oxalosuccinate decarboxylase'},
    'id': {'type': 'literal', 'value': 'O75874'},
    'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q21116029'},
    'itemDesc': {'type': 'literal',
     'value': 'mammalian protein found in Homo sapiens',
     'xml:lang': 'en'},
    'itemLabel': {'type': 'literal',
     'value': 'Isocitrate dehydrogenase (NADP(+)) 1, cytosolic',
     'xml:lang': 'en'}},
   {'altLabel': {'type': 'literal', 'value': 'ampC|b4150'},
    'id': {'type': 'literal', 'value': 'P00811'},
  

In [73]:
header_l = r.json()['head']['vars']
results_l = r.json()['results']['bindings']
df = generate_table(header_l, results_l)

# merge input with response
input_df = input_df[['protein_id']]
input_df['id'] = input_df.protein_id.apply(lambda x: x.split(':')[1])
output_df = input_df.merge(df)
output_df = output_df[['protein_id', 'item', 'itemLabel', 'altLabel', 'itemDesc']]
output_df.head(2)

Unnamed: 0,protein_id,item,itemLabel,altLabel,itemDesc
0,UniProt:P00811,Q24138857,"Penicillin-binding protein; beta-lactamase, in...",ampC|b4150,microbial protein found in Escherichia coli st...
1,UniProt:P27695,Q22676734,Apurinic/apyrimidinic endodeoxyribonuclease 1,AP endonuclease 1|APEN|APEX nuclease|REF-1|DNA...,mammalian protein found in Homo sapiens


In [74]:
len(output_df)

4

In [75]:
output_df.to_csv('{}_uniprot_targets_chembl_concept_kb.tsv'.format(path), sep='\t', index=False, header=True)