In [1]:
import requests
import time
import json

POLLING_INTERVAL = 3
API_URL = "https://rest.uniprot.org"


def check_response(response):
    try:
        response.raise_for_status()
    except requests.HTTPError:
        print(response.json())
        raise

def submit_id_mapping(from_db, toDB, ids):
    response = requests.post(
        f"{API_URL}/idmapping/run", 
        data={"from": from_db, "to": toDB, "ids": ids},
    )
    check_response(response)
    return response.json()["jobId"]


def get_id_mapping_results(job_id):
    while True:
        r = requests.get(f"{API_URL}/idmapping/status/{job_id}")
        r.raise_for_status()
        job = r.json()
        if "jobStatus" in job:
            if job["jobStatus"] == "RUNNING":
                print(f"Retrying in {POLLING_INTERVAL}s")
                time.sleep(POLLING_INTERVAL)
            else:
                raise Exception(job["jobStatus"])
        else:
            return job


job_id = submit_id_mapping(
    from_db="UniProtKB_AC-ID", 
    toDB="UniProtKB", 
    ids=["P05067", "P12345"]
)

results = get_id_mapping_results(job_id)


In [2]:
results

{'results': [{'from': 'P05067',
   'to': {'entryType': 'UniProtKB reviewed (Swiss-Prot)',
    'primaryAccession': 'P05067',
    'secondaryAccessions': ['B2R5V1',
     'B4DII8',
     'D3DSD1',
     'D3DSD2',
     'D3DSD3',
     'P09000',
     'P78438',
     'Q13764',
     'Q13778',
     'Q13793',
     'Q16011',
     'Q16014',
     'Q16019',
     'Q16020',
     'Q6GSC0',
     'Q8WZ99',
     'Q9BT38',
     'Q9UC33',
     'Q9UCA9',
     'Q9UCB6',
     'Q9UCC8',
     'Q9UCD1',
     'Q9UQ58'],
    'uniProtkbId': 'A4_HUMAN',
    'entryAudit': {'firstPublicDate': '1987-08-13',
     'lastAnnotationUpdateDate': '2024-03-27',
     'lastSequenceUpdateDate': '1991-11-01',
     'entryVersion': 310,
     'sequenceVersion': 3},
    'annotationScore': 5.0,
    'organism': {'scientificName': 'Homo sapiens',
     'commonName': 'Human',
     'taxonId': 9606,
     'lineage': ['Eukaryota',
      'Metazoa',
      'Chordata',
      'Craniata',
      'Vertebrata',
      'Euteleostomi',
      'Mammalia',
      

In [18]:
for idx, entry in enumerate(results['results']):
    slocs = set()
    for annotation in entry['to']['comments']:
        type = annotation['commentType']
        if type == ('SUBCELLULAR LOCATION'):
            for sloc in annotation['subcellularLocations']:
                slocs.add(sloc['location']['value'])
    print(idx, entry['from'], slocs)

0 P05067 {'Golgi apparatus', 'Cell projection, growth cone', 'Perikaryon', 'Cell membrane', 'Nucleus', 'Cell surface', 'Endoplasmic reticulum', 'Secreted', 'Membrane', 'Early endosome', 'Cytoplasm', 'Membrane, clathrin-coated pit', 'Cytoplasmic vesicle'}
1 P12345 {'Cell membrane', 'Mitochondrion matrix'}


In [55]:
from collections import defaultdict
FEATURES = {'Modified residue', 'Natural variant'}

annotations = defaultdict(set)
variants = defaultdict(set)
for idx, entry in enumerate(results['results']):
    sequence = entry['to']['sequence']['value']
    for feature in entry['to']['features']:
        start = feature['location']['start']['value']
        end = feature['location']['end']['value']
        ftype = feature['type']
        if ftype in FEATURES:
            for k, res in enumerate(range(start, end+1)):
                resname = sequence[res-1]
                key = (entry['from'], f'{resname}{res}')
                # Get residue annotations
                annotations[key].add(ftype)
                # Get variants
                if ftype == 'Natural variant':
                    orseq = feature['alternativeSequence']['originalSequence']
                    altseqs = feature['alternativeSequence']['alternativeSequences']
                    for v in altseqs:
                        variants[key].add((orseq, v[k]))
variants

defaultdict(set,
            {('P05067', 'E501'): {('E', 'K')},
             ('P05067', 'E665'): {('E', 'D')},
             ('P05067', 'K670'): {('KM', 'NL')},
             ('P05067', 'M671'): {('KM', 'NL')},
             ('P05067', 'D678'): {('D', 'N')},
             ('P05067', 'A692'): {('A', 'G')},
             ('P05067', 'E693'): {('E', 'G'), ('E', 'K'), ('E', 'Q')},
             ('P05067', 'D694'): {('D', 'N')},
             ('P05067', 'L705'): {('L', 'V')},
             ('P05067', 'A713'): {('A', 'T'), ('A', 'V')},
             ('P05067', 'T714'): {('T', 'A'), ('T', 'I')},
             ('P05067', 'V715'): {('V', 'M')},
             ('P05067', 'I716'): {('I', 'V')},
             ('P05067', 'V717'): {('V', 'F'),
              ('V', 'G'),
              ('V', 'I'),
              ('V', 'L')},
             ('P05067', 'L723'): {('L', 'P')}})

In [37]:
print('uniprot_id,resid,annotations')
for k,v in INFO.items():
    if k[1] > 260:
        print(f'{k[0]},{k[1]},{";".join(v)}')

uniprot_id,resid,annotations
P05067,261,Topological domain;Compositional bias;Region;Chain
P05067,262,Topological domain;Modified residue;Compositional bias;Region;Chain
P05067,263,Topological domain;Compositional bias;Region;Chain
P05067,264,Topological domain;Region;Chain
P05067,265,Topological domain;Region;Chain
P05067,266,Topological domain;Region;Chain
P05067,267,Topological domain;Compositional bias;Region;Chain
P05067,268,Topological domain;Compositional bias;Region;Chain
P05067,269,Topological domain;Compositional bias;Region;Chain
P05067,270,Topological domain;Compositional bias;Region;Chain
P05067,271,Topological domain;Compositional bias;Region;Chain
P05067,272,Topological domain;Compositional bias;Region;Chain
P05067,273,Topological domain;Compositional bias;Region;Chain
P05067,274,Topological domain;Compositional bias;Region;Chain
P05067,275,Topological domain;Compositional bias;Region;Chain
P05067,276,Topological domain;Compositional bias;Region;Chain
P05067,277,Topologi