### Testing notebook

In [None]:
import requests
import time
import json

POLLING_INTERVAL = 3
API_URL = "https://rest.uniprot.org"


def check_response(response):
    try:
        response.raise_for_status()
    except requests.HTTPError:
        print(response.json())
        raise

def submit_id_mapping(from_db, toDB, ids):
    response = requests.post(
        f"{API_URL}/idmapping/run", 
        data={"from": from_db, "to": toDB, "ids": ids},
    )
    check_response(response)
    return response.json()["jobId"]


def get_id_mapping_results(job_id):
    while True:
        r = requests.get(f"{API_URL}/idmapping/status/{job_id}")
        r.raise_for_status()
        job = r.json()
        if "jobStatus" in job:
            if job["jobStatus"] == "RUNNING":
                print(f"Retrying in {POLLING_INTERVAL}s")
                time.sleep(POLLING_INTERVAL)
            else:
                raise Exception(job["jobStatus"])
        else:
            return job


job_id = submit_id_mapping(
    from_db="UniProtKB_AC-ID", 
    toDB="UniProtKB", 
    ids=["P05067", "P12345"]
)

results = get_id_mapping_results(job_id)


In [None]:
for idx, entry in enumerate(results['results']):
    slocs = set()
    for annotation in entry['to']['comments']:
        type = annotation['commentType']
        if type == ('SUBCELLULAR LOCATION'):
            for sloc in annotation['subcellularLocations']:
                slocs.add(sloc['location']['value'])
    print(idx, entry['from'], slocs)

In [None]:
from collections import defaultdict
FEATURES = {'Modified residue', 'Natural variant'}

annotations = defaultdict(set)
variants = defaultdict(set)
for idx, entry in enumerate(results['results']):
    sequence = entry['to']['sequence']['value']
    for feature in entry['to']['features']:
        start = feature['location']['start']['value']
        end = feature['location']['end']['value']
        ftype = feature['type']
        if ftype in FEATURES:
            for k, res in enumerate(range(start, end+1)):
                resname = sequence[res-1]
                key = (entry['from'], f'{resname}{res}')
                # Get residue annotations
                annotations[key].add(ftype)
                # Get variants
                if ftype == 'Natural variant':
                    orseq = feature['alternativeSequence']['originalSequence']
                    altseqs = feature['alternativeSequence']['alternativeSequences']
                    for v in altseqs:
                        variants[key].add((orseq, v[k]))
variants

In [None]:
print('uniprot_id,resid,annotations')
for k,v in INFO.items():
    if k[1] > 260:
        print(f'{k[0]},{k[1]},{";".join(v)}')