In [10]:
import json
import requests
import tqdm

data = json.load(open("data_target.json"))

In [8]:
def query_uniport_to_ENSP(protein_name):
    url = f"https://rest.uniprot.org/uniprotkb/search?query={protein_name}&format=json"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json().get("results", [])
        for item in data:
            xrefs = item.get("uniProtKBCrossReferences", [])
            for ref in xrefs:
                if ref.get("database") == "STRING":
                    id = ref.get("id")
                    if "9606" in id:
                        return id.replace("9606.", "")
    except requests.exceptions.RequestException as e:
        print(e)
        return None
    return None

uniprots = []
for d in data:
    for u in d["target_uniprot"]:
        if u not in uniprots:
            uniprots.append(u)

uniprots.remove(None)
uniprots

['P00734',
 'P00533',
 'O75015',
 'P02745',
 'P02746',
 'P02747',
 'P08637',
 'P12314',
 'P12318',
 'P01589',
 'P14784',
 'P31785',
 'P01375',
 'P01374',
 'P31994',
 'P31995',
 'P30968',
 'P48551',
 'P17181',
 'P00747',
 'P02671',
 'P02679',
 'P05121',
 'Q02643',
 'P19235',
 'Q03405',
 'P05120',
 'P05154',
 'P98164',
 'Q9Y5Y6',
 'P14543',
 'P22888',
 'P30988',
 'Q99062',
 'P15509',
 'P26951',
 'P32927',
 'P34741',
 'P13727',
 'P16473',
 'P00742',
 'P00740',
 'P04275',
 'O14832',
 'P07307',
 'P11021',
 'P27797',
 'P27824',
 'P49257',
 'Q07954',
 'Q8NI22',
 'P14778',
 'Q92637',
 'P01024',
 'P0C0L4',
 'P0C0L5',
 'P01031',
 'P06213',
 'P08069',
 'P16870',
 'P48745',
 'Q16270',
 'P05452',
 'P05787',
 'P07355',
 'P23945',
 'P15260',
 'P38484',
 'P30518',
 'P37288',
 'P47901',
 'P13726',
 'P10646',
 'P38435',
 'P08709',
 'Q14626',
 'P21802',
 'P11362',
 'P98160',
 'P47871',
 'O95838',
 'P43220',
 'P63027',
 'P23763',
 'Q8N9I0',
 'P12319',
 'Q01362',
 'O60603',
 'P02452',
 'P02458',
 'P02461',

In [9]:
uniprot_to_ENSP = {u:{} for u in uniprots}
for u in tqdm.tqdm(uniprots):
    ENSP = query_uniport_to_ENSP(u)
    uniprot_to_ENSP[u] = ENSP
uniprot_to_ENSP

100%|██████████| 2813/2813 [55:48<00:00,  1.19s/it] 


{'P00734': 'ENSP00000308541',
 'P00533': 'ENSP00000275493',
 'O75015': 'ENSP00000433642',
 'P02745': 'ENSP00000363773',
 'P02746': 'ENSP00000313967',
 'P02747': 'ENSP00000363770',
 'P08637': 'ENSP00000356946',
 'P12314': 'ENSP00000358165',
 'P12318': 'ENSP00000271450',
 'P01589': 'ENSP00000369293',
 'P14784': 'ENSP00000216223',
 'P31785': 'ENSP00000363318',
 'P01375': 'ENSP00000398698',
 'P01374': 'ENSP00000403495',
 'P31994': 'ENSP00000351497',
 'P31995': None,
 'P30968': 'ENSP00000226413',
 'P48551': 'ENSP00000343957',
 'P17181': 'ENSP00000270139',
 'P00747': 'ENSP00000308938',
 'P02671': 'ENSP00000498441',
 'P02679': 'ENSP00000336829',
 'P05121': 'ENSP00000223095',
 'Q02643': 'ENSP00000320180',
 'P19235': 'ENSP00000222139',
 'Q03405': 'ENSP00000339328',
 'P05120': 'ENSP00000401645',
 'P05154': 'ENSP00000333203',
 'P98164': 'ENSP00000496870',
 'Q9Y5Y6': 'ENSP00000278742',
 'P14543': 'ENSP00000264187',
 'P22888': 'ENSP00000294954',
 'P30988': 'ENSP00000352561',
 'Q99062': 'ENSP0000036

In [16]:
new_data = []
for d in data:
    new_d = {"dg_id": d["dg_id"]}
    new_d["target_ENSP"] = []
    for u in d["target_uniprot"]:
        if u is not None:
            if uniprot_to_ENSP[u] is not None:
                new_d["target_ENSP"].append(uniprot_to_ENSP[u])
    new_data.append(new_d)
with open("data_target_ENSP.json", "w") as f:
    json.dump(new_data, f, indent=2)