## API Calls to Obtain GO Terms from UniProt

This notebook performs API calls to obtain gene ontology terms from UniProt

Author: Nicolas Jonsson and Giulio Tesei

Contact: giulio.tesei@bio.ku.dk

In [None]:
import urllib.parse
import urllib.request
import json
import pandas as pd
import numpy as np

def uniprot_api(uniprot):
    # Use the uniprot as input to fetch the protein name
    url_template_uniprot = "https://rest.uniprot.org/uniprotkb/{}.json"
    url_uniprot = url_template_uniprot.format(uniprot)

    with urllib.request.urlopen(url_uniprot) as link:
        #print(f'Extracting protein name for {uniprot}...')
        data_uniprot = json.loads(link.read().decode())
        #print(data_uniprot)

        try:
            go_list = []
            for count,value in enumerate(data_uniprot['uniProtKBCrossReferences']):
                if data_uniprot['uniProtKBCrossReferences'][count]["database"] == "GO":
                    go_id = data_uniprot['uniProtKBCrossReferences'][count]["id"]
                    #print(go_id)
                    go_list.append(go_id)
        except:
            go_id = "unknown"
            go_list.append(go_id)


        if len(go_list) == 0:
            go_id = "unknown"
            go_list.append(go_id)

        return go_list

In [None]:
# load IDRome database
df_idrome = pd.read_csv('IDRome_DB.csv',index_col=0)

In [None]:
GO_terms = pd.Series(pd.read_pickle('go_analysis/uniprot_dict.pkl')).to_dict()

In [None]:
for i,uniprot in enumerate(df_idrome.UniProt_ID.unique()):
    go = uniprot_api(uniprot)
    GO_terms[uniprot] = go
    print(i,end='-')

In [None]:
pd.Series(GO_terms).to_pickle('idr_selection/uniprot_dict.pkl')