In [5]:
import pandas as pd
import numpy as np
import sys
import os
from rdkit import Chem
from rdkit.Chem import MACCSkeys
import pubchempy as pcp
import urllib.parse
import urllib.request
import re
import time

In [None]:
print(os.getcwd())
os.chdir('../../')
print(os.getcwd())

In [6]:
#extracting drug-target mapping from TTD
#these will go into config_file
dataset_dir = "datasets/"
processed_drug_feature_path = dataset_dir + 'processed/drug/'
raw_drug_target_mapping_file = dataset_dir + 'ttd/' + 'P1-07-Drug-TargetMapping.csv'
drug_pubchemID_mapping_file =  dataset_dir + 'ttd/' + 'P1-03-TTD_crossmatching_manually_removed_metadata.txt'
target_uniprot_mapping_file = dataset_dir + 'ttd/' + 'P2-01-TTD_uniprot_all_manually_removed_metadata.txt'
# target_sequence_mapping_file = dataset_dir + 'ttd/' + 'P2-06-TTD_sequence_all_manually_removed_metadata.txt'
# target_kegg_pathway_mapping_file = dataset_dir + 'ttd/' + 'P4-01-Target-KEGGpathway_all_manually_removed_metadata.txt'

processed_drug_target_mapping_file=processed_drug_feature_path+'drug_target_map_TTD.tsv'

uniprot_id_ac_mapping_path = dataset_dir + 'uniprot_id_ac_map.tsv'
ttd_drug_name_cid_pubchempy_pkl_file = processed_drug_feature_path + 'ttd_drug_name_cid_pubchempy_file.pkl'
drug_TTDID_to_pubchem_id_file = processed_drug_feature_path + 'TTD_drug_id_to_pubchem_cid_map.tsv'
manually_extracted_uniprot_id_ac_mapping_path = dataset_dir + 'manually_extracted_uniprot_id_ac_map.tsv'


In [7]:
# analyse number of drugIDs per drugname
df = pd.read_csv(raw_drug_target_mapping_file)
df = df[['Drug_Name','DrugID']].groupby('Drug_Name')['DrugID'].unique().reset_index()
df['number_of_drugids'] = df['DrugID'].apply(lambda x: len(x))
df = df[df['number_of_drugids']>1]
print('%d drugnames have more than one drugids associated'%(df.shape[0]))
# print(df)

# print(df.columns)
# print(df.nunique())

49 drugnames have more than one drugids associated


In [8]:
df = pd.read_csv(raw_drug_target_mapping_file)
df = df[['Drug_Name','DrugID']].groupby('DrugID')['Drug_Name'].unique().reset_index()
df['number_of_drugnames'] = df['Drug_Name'].apply(lambda x: len(x))
df = df[df['number_of_drugnames']>1]
print('%d drugids have more than one drugnames associated'%(df.shape[0]))



0 drugids have more than one drugnames associated


In [9]:
def get_cid_from_drug_name(drug_name):
    for i in range(0,5):
        try:
            compounds = pcp.get_compounds(drug_name, 'name')    
            if(len(compounds)>0):
            #return the first compound from the search result
                print(drug_name,compounds[0].cid )
                return compounds[0].cid
            else:
                print('None' , drug_name)
                return None
        except pcp.PubChemHTTPError:
            print('PubChemHTTPError', drug_name)
            time.sleep(0.3)
            continue
    return None

In [10]:
def drug_name_cid_mapping_from_pubchempy(drug_names_list,ttd_drug_name_cid_pubchempy_pkl_file):
    drug_name_to_pubchem_cid_smiles_df = pd.DataFrame({'drug_name' : drug_name_list})

    drug_name_to_pubchem_df['pubchem_cid'] = pd.Series(str)       

    for drug_name in drug_name_list:
        pubchem_cid = get_cid_from_drug_name(drug_name)
        if(pubchem_cid != None):
            drug_name_to_pubchem_df.at[drug_name,'pubchem_cid']=pubchem_cid

    drug_name_to_pubchem_df.dropna(inplace=True)
    drug_name_to_pubchem_df.to_pickle(ttd_drug_name_cid_pubchempy_pkl_file)
    print(drug_name_to_pubchem_df.shape)

In [11]:
#drug TTDID to pubchem ID mapping
def drug_TTDID_to_pubchem_ID_map():
    drug_pubechemID_mapping_df = pd.read_csv(drug_pubchemID_mapping_file, sep='\t', header=None)

    #drugid-drugname mapping df
    drug_Name_df = drug_pubechemID_mapping_df[drug_pubechemID_mapping_df[1].\
                                                isin(['DRUGNAME'])][[0,2]].\
                                                rename(columns={0:'DrugID', 2:'Drug_Name'})\
                                                .set_index('DrugID')
    print(drug_Name_df.shape)

    #drugid-pubchemid mapping df
    drug_PUBCHCID_df=drug_pubechemID_mapping_df[drug_pubechemID_mapping_df[1].\
                                                isin(['PUBCHCID'])][[0,2]].\
                                                rename(columns={0:'DrugID', 2:'cIds'}).set_index('DrugID')
    print(drug_PUBCHCID_df.shape)
    
    processed_drug_pubechemID_mapping_df = drug_Name_df.join([drug_PUBCHCID_df]).reset_index()
    print(processed_drug_pubechemID_mapping_df.shape)
    
    no_cid_mapping_df = processed_drug_pubechemID_mapping_df\
                    [processed_drug_pubechemID_mapping_df['cIds'].isnull()]
    drug_names_without_cid = set(no_cid_mapping_df['Drug_Name'])
    print('drug_names_without_cid',len(drug_names_without_cid))
    
    #for some drugnames pubchemid is not readily available in TTD. So, extract those pubchemid using pubchempy
    for index,row in processed_drug_pubechemID_mapping_df.iterrows():
        if row['Drug_Name'] in(drug_names_without_cid):
#             row['cIds'] = get_cid_from_drug_name(row['Drug_Name'])
            processed_drug_pubechemID_mapping_df.at[index,'cIds'] = get_cid_from_drug_name(row['Drug_Name'])
    
    #still no cid
    no_cid_mapping_df = processed_drug_pubechemID_mapping_df\
                    [processed_drug_pubechemID_mapping_df['cIds'].isnull()]
    drug_names_without_cid = set(no_cid_mapping_df['Drug_Name'])
    print('drug_names_without_cid',len(drug_names_without_cid))

    #dropping the rows which contain drugs for which we do not have any cIds mapped to it.
    processed_drug_pubechemID_mapping_df.dropna(axis = 0, inplace=True)
    print(processed_drug_pubechemID_mapping_df.shape)
    
    print(processed_drug_pubechemID_mapping_df.nunique())
    
    return processed_drug_pubechemID_mapping_df

# x= drug_TTDID_to_pubchem_ID_map()

In [13]:
force_run=False
if(os.path.exists(drug_TTDID_to_pubchem_id_file) and force_run==False):
    print('reading from already existing TTD_ID to pubchem_id map file')
else:
    drug_TTDID_to_pubchem_id_df = drug_TTDID_to_pubchem_ID_map()
    
    os.makedirs(os.path.dirname(drug_TTDID_to_pubchem_id_file), exist_ok=True)
    drug_TTDID_to_pubchem_id_df.to_csv(drug_TTDID_to_pubchem_id_file, sep='\t', index=False)
    
drug_TTDID_to_pubchem_id_df = pd.read_csv(drug_TTDID_to_pubchem_id_file, sep='\t')


# See if single pubchemid is being mapped to multiple drug names
print('\nSee if single pubchemid is being mapped to multiple drug names\n')
df_1 = drug_TTDID_to_pubchem_id_df.copy()
df_1 = df_1[['Drug_Name','cIds']].groupby('cIds')['Drug_Name'].unique().reset_index()
df_1['number_of_drugnames'] = df_1['Drug_Name'].apply(lambda x: len(x))
df_1 = df_1[df_1['number_of_drugnames']>1]
print(df_1.shape)
print(df_1)

# See if single drugname is being mapped to multiple pubchemids
print('\nSee if single drugname is being mapped to multiple pubchemids')
df_1 = drug_TTDID_to_pubchem_id_df.copy()
df_1 = df_1[['Drug_Name','cIds']].groupby('Drug_Name')['cIds'].unique().reset_index()
df_1['number_of_cIds'] = df_1['cIds'].apply(lambda x: len(x))
df_1 = df_1[df_1['number_of_cIds']>1]
print(df_1.shape)
print(df_1)


# print(df.columns)
# print(df.nunique())

reading from already existing TTD_ID to pubchem_id map file

See if single pubchemid is being mapped to multiple drug names

(218, 3)
           cIds                                          Drug_Name  \
49     10025615                                 [MN-201, CARD-024]   
227    10103168                                [GT-2203, VUF-5296]   
283    10127622                                [ARRY-886, AZD6244]   
720    10331844                             [Napabucasin, BBI-608]   
1048   10473088                        [[3H]resolvin E1, RX-10001]   
...         ...                                                ...   
22045   9859866  [4-methyl-2-oxo-2H-chromen-7-yl sulfamate, COU...   
22094   9865375                                  [HT-0712, OX-914]   
22206   9875671                                    [GM604, GM-602]   
22344   9891761                             [AVE-5997EF, AVE-5997]   
22431   9905233                                 [CMI-977, MLN-977]   

       number_of_drugname

In [14]:
force_run = False
#convert uniprot ID into uniprot accession ID.
def uniprot_ID_to_accession_ID_map(target_uniprot_mapping_df):
    if(os.path.exists(uniprot_id_ac_mapping_path) and force_run==False):
        print('reading from existing uniprot id to accession id mapping file')
    
    else:
        query_str = ' '.join(list(target_uniprot_mapping_df['UniprotID']))
        #get rid of multiple presence of one uniprotid. In this way I preevented one id being mapped to 
        #different ac_ids.
        query_str = ' '.join(set(query_str.split(' ')))
        print("hello there")

        url = 'https://www.uniprot.org/uploadlists/'

        params = {
        'from': 'ACC+ID',
        'to': 'ACC',
        'format': 'tab',
        'query': query_str
        }

        data = urllib.parse.urlencode(params)
        data = data.encode('utf-8')
        req = urllib.request.Request(url, data)
        with urllib.request.urlopen(req) as f:
            response = f.read()
        uniprotid_to_ac = response.decode('utf-8')

        #parse the reponse string
        uniprotid_to_ac_list = list(filter(None, uniprotid_to_ac.split('\n')))
        # print(uniprotid_to_ac_list)

        id_list = []
        ac_list = []

        for element in uniprotid_to_ac_list:
            element = element.split('\t')
            id_list.append(element[0])
            ac_list.append(element[1])


        #remove the first row containg 'From' 'To'
        id_list.pop(0)
        ac_list.pop(0)

        uniprot_id_to_ac_map_df = pd.DataFrame({'uniprot_id': id_list,
                        'uniprot_ac': ac_list})
        
        os.makedirs(os.path.dirname(uniprot_id_ac_mapping_path), exist_ok=True)
        uniprot_id_to_ac_map_df.to_csv(uniprot_id_ac_mapping_path, index = False, sep = '\t')
        
    uniprot_id_to_ac_map_df = pd.read_csv(uniprot_id_ac_mapping_path, index_col = 'uniprot_id', sep = '\t')
    return uniprot_id_to_ac_map_df

In [15]:
#target to uniprot mapping. This is uniprot id, not uniport accession id.
#Here protein from other organism than human is also present
target_uniprot_mapping_df = pd.read_csv(target_uniprot_mapping_file, sep='\t', header = None)
#extract the rows containing 'UNIPROID' in column 1
target_uniprot_mapping_df = target_uniprot_mapping_df[target_uniprot_mapping_df[1].isin(['UNIPROID'])][[0,2]].\
                            rename(columns={0:'TargetID',2:'UniprotID'})
#filter out non-human protein targets. Keep only human protein targets.
target_uniprot_mapping_df = target_uniprot_mapping_df[target_uniprot_mapping_df['UniprotID'].\
                                                      str.contains('HUMAN')]


#one targetid might be associated with multiple semicolon separated uniprot ids(contd) 
# Need processing to get uniprot_ac from uniprot_ids
target_uniprot_mapping_df['UniprotID'] = target_uniprot_mapping_df['UniprotID'].astype(str).\
                                         apply(lambda x: ' '.join(list(filter(None,re.split('[;/\- ]', x)))))

query_str = ' '.join(list(target_uniprot_mapping_df['UniprotID']))
#get rid of multiple presence of one uniprotid
query_str = set(query_str.split(' '))
print('number of unique uniprot id: ', len(query_str))

#now replace 'UniprotID' in target_uniprot_mapping_df with 'uniprot_ac' from uniprot_id_to_ac_map_df.
uniprot_id_to_ac_map_df = uniprot_ID_to_accession_ID_map(target_uniprot_mapping_df)
#could not get uniprot_ac for 11 proteins using the above function, so I extracted them manually.Now incorporate those
#manually extracted uniprot_id to uniprot_ac map
df = pd.read_csv(manually_extracted_uniprot_id_ac_mapping_path, index_col = 'uniprot_id', sep = '\t')
uniprot_id_to_ac_map_df= pd.concat([uniprot_id_to_ac_map_df,df], axis = 0)
print('uniprot_id_to_ac_map_df: ', uniprot_id_to_ac_map_df.nunique())



# if one target id is mapped to multiple uniprot_ids but I could not found uniprot_ac to all but for atleast one
# then did not look further. Just kept the found ac ids.
#add UniprotAC column
target_uniprot_mapping_df['UniprotAC']=pd.Series(dtype=str)
for index, row in target_uniprot_mapping_df.iterrows():
    uniprot_ids = row['UniprotID'].split(' ')
#     if(len(uniprot_ids)>1):
#         print(uniprot_ids)
    uniprot_acs = []
    for x in uniprot_ids:
        if x in uniprot_id_to_ac_map_df.index:
            ac = uniprot_id_to_ac_map_df.at[x,'uniprot_ac'] 
#             print(ac)
            uniprot_acs.append(ac)
#         else: 
#             uniprot_acs.append('None')
#             print(x, 'NO') #could not retrieve uniprot_ac_id for these(in total 6)
    s = ','.join(set(uniprot_acs))
    print(s)
    target_uniprot_mapping_df.at[index,'UniprotAC'] = s
        
print(target_uniprot_mapping_df.shape)
print(target_uniprot_mapping_df.head)




number of unique uniprot id:  2920
reading from existing uniprot id to accession id mapping file
uniprot_id_to_ac_map_df:  uniprot_ac    2910
dtype: int64
P10451
P01135
P29279
Q5VT25
Q9H490
P09917
Q9UL62
Q00987
Q00653
P05112
P20393
Q16653
P30532
O60235
P00519
P02585
Q9GZT4
Q5VWK5
O00398
O95477
Q96IZ0
Q9NRM0
Q99956
P46721
P53779
P48431
Q92918
P21217
Q14654
P41279
P25024
P49019
Q02156
P34130
Q9UPP1
P17643
Q86U86
O75460
O00308
E9PEK4
O75602
Q9NY57
P11274,P00519
Q9Y5Z0
O15393
Q99816
Q86TM6
Q8TBC4,Q13564
P78417
P57773
O95470
P04040
P29590
Q5VTY9
P18825
P05556
P20151
Q04912
Q08499
P15848
P48507
Q14213
P06746,Q7Z5Q5,Q9UHN1,P56282,O75417,Q9HCU8,P28340,P54098
Q13950
Q5JUK3
Q9Y275
Q86XT9
P10912
O95399
Q9BQI3
P54762
Q9Y2I7
Q13423
P06729
P35499
P35462
O14684
Q9NPD5
Q96GD0
P02787
O15392
P35228
Q14832
P10914
P30989
P51677
O60706
P11532
P01574
Q9P2D3
Q9NPH3
Q9H3Y6
P01116
O60603,Q15399
Q9Y4D2
P78395
P51812
P01589
Q14210
P24394
Q12772
P35354,P23219
Q9GZS9
Q14061
Q03426
P39900
P57771
P60484
P20823
Q8NI5

Q16665
P43694
P15056,P04049,P10398
Q9Y2C9
Q16348
P18428
Q9UK79
Q7LBE3
Q12797
P51170,P51172,P51168,P37088
P53396
Q13133
P04049
Q9Y6Y9
P15559
Q13496
Q08752
P03956
Q9Y2U5
P07550
Q86UE4
P52757
Q14872
P12318,P31994,P31995
Q9H4A3
P27816
P08519
Q96GU1
P21452
Q9Y219
Q04917
O15492
P41146
P08034
O00253
Q9HAB3
P02741
P30874
P20333
O14795
P54802
Q13613
Q9Y5C1
O14733
Q92806
Q96KQ7
P18848
P13497
P22748
P25100
P36897
Q9BYP7
P49662
P43628
P09210
P16234
P13010
P04035
Q92876
Q9HC73
Q8NHY2
Q12884
P0C0S5
O75832
Q15126
Q96KS0
Q9H2X6
O00574
O15537
Q9Y5Y7
O15178
O43525
P14780
P03950
P06744
P36955
Q96DU3
P08727
Q8IY34
P43490
O95180
Q9NQB0
P02686
P13866
P31350
P43627
P32297,Q15822
P28072
Q99884
Q9H093
Q9NR82
P78348
Q8IZI9
P16104
P35813
Q9NR61
O75311,P23415,P23416,P48167
P17301
P16860
Q9NWZ3
P07333
P0DJI8
Q9NPD7
Q16665
Q9ULZ9
P13569
Q9HAU0
Q05469
P02753
P35790
Q15759
Q12866
Q15822
P23510
P56696
P17707
Q9BZV2
O75751
O15303
Q01959
Q99436
Q13976,Q13237
P09936
O15054
P50454
O15164
P54753
Q9HC97
P48449
Q8TDY4
O14773

P49674
P51813
(3035, 3)
<bound method NDFrame.head of       TargetID    UniprotID UniprotAC
1       T00032   OSTP_HUMAN    P10451
5       T00033   TGFA_HUMAN    P01135
13      T00039   CTGF_HUMAN    P29279
17      T00075  MRCKA_HUMAN    Q5VT25
21      T00088   PIGU_HUMAN    Q9H490
...        ...          ...       ...
13873   T99912   KYNU_HUMAN    Q16719
13877   T99948  PD1L1_HUMAN    Q9NZQ7
13881   T99954   PI2R_HUMAN    P43119
13885   T99989   KC1E_HUMAN    P49674
13889   T99990    BMX_HUMAN    P51813

[3035 rows x 3 columns]>


In [20]:
drug_target_mapping_df = pd.read_csv(raw_drug_target_mapping_file)
# print(drug_target_mapping_df.nunique())
# print(target_uniprot_mapping_df.nunique())


#add target UniprotAC and UniprotIDs
#set index to join
drug_target_mapping_df = drug_target_mapping_df.set_index('TargetID').\
                        join([target_uniprot_mapping_df.set_index('TargetID')]).reset_index()\
                        [['DrugID', 'UniprotAC','UniprotID','TargetID']]
#not every target id has uniprot ids associated with it in given TTD file
drug_target_mapping_df.dropna(inplace = True)
# print(drug_target_mapping_df.columns)
print(drug_target_mapping_df.shape)



#DrugID_to_target_uniprot_mapping
drugID_target_uniprot_mapping_grouped = drug_target_mapping_df[['DrugID','UniprotAC']]\
                                        .groupby('DrugID')['UniprotAC'].\
                                        apply(lambda x: ','.join(x.astype(str))).reset_index()
print(drugID_target_uniprot_mapping_grouped)
print(drugID_target_uniprot_mapping_grouped.columns)



(38297, 4)
       DrugID             UniprotAC
0      D00AAN                P49841
1      D00AAU         Q92731,P03372
2      D00ABO  P11362,P00519,P36888
3      D00ABW                P10275
4      D00ACC         P41146,Q9NXE4
...       ...                   ...
27200  D0ZZ4S                P28223
27201  D0ZZ4Z  P41595,P28223,P28335
27202  D0ZZ6N                Q16881
27203  D0ZZ8F                Q8NER1
27204  D0ZZ9M                P11229

[27205 rows x 2 columns]
Index(['DrugID', 'UniprotAC'], dtype='object')


In [17]:
drug_TTDID_to_pubchem_id_df.set_index('DrugID', inplace=True)


In [29]:
#add drug pbubchem id column


print(drug_TTDID_to_pubchem_id_df.shape)
print(drugID_target_uniprot_mapping_grouped.shape)

print('common drugid',len(set(drug_TTDID_to_pubchem_id_df.index.unique()).\
              intersection(set(drugID_target_uniprot_mapping_grouped['DrugID']))  ))
print(('common drugid',set(drug_TTDID_to_pubchem_id_df.index.unique()).\
              difference(set(drugID_target_uniprot_mapping_grouped['DrugID']))  ))

processed_drug_target_mapping_df = drug_TTDID_to_pubchem_id_df.\
                join([drugID_target_uniprot_mapping_grouped.set_index('DrugID')])


processed_drug_target_mapping_df = processed_drug_target_mapping_df.\
                rename(columns={'Drug_Name':'drug_name','UniprotAC': 'uniprot_id','cIds': 'pubchem_cid' })
processed_drug_target_mapping_df = processed_drug_target_mapping_df.reset_index()[['pubchem_cid','drug_name','uniprot_id']]
processed_drug_target_mapping_df.dropna(inplace=True)

print(processed_drug_target_mapping_df.shape)


(23135, 2)
(27205, 2)
common drugid 17956
('common drugid', {'D00PSG', 'D0J5HG', 'D06UOL', 'D0IA7R', 'D0OT1A', 'D0A2SA', 'D01HVM', 'D0CG9E', 'D01AZP', 'D0N3JX', 'D0E8JO', 'D0W7KD', 'D0Y4QG', 'D0L0SP', 'D07XDS', 'D0YL0L', 'D0NW9G', 'D0WT5M', 'D0Y4AZ', 'D08FQX', 'D05MXM', 'D02GVH', 'D0VR9K', 'D0QL5M', 'D0X7KZ', 'D04APR', 'D08LLS', 'D0HS3X', 'D00UTD', 'D03JAO', 'D0B7HJ', 'D01DSP', 'D0E0XV', 'D0Q4VE', 'D0O4JH', 'D00HUZ', 'D07WGN', 'D09DNJ', 'D0S1YN', 'D03AYQ', 'D0OY0X', 'D0E6VR', 'D03GKP', 'D07QOG', 'D0Z5EJ', 'D0SO4Z', 'D0A2HR', 'D05TOJ', 'D0Q3RF', 'D0V7XC', 'D02JYY', 'D0V8ZD', 'D0K9BF', 'D04RLY', 'D0I1BA', 'D0C6HH', 'D04VJH', 'D0W3GR', 'D0PE9T', 'D05QUL', 'D0B2BY', 'D0V8WE', 'D0P3BL', 'D0O5KG', 'D01VCW', 'D0CT9C', 'D0M1MZ', 'D0T5SG', 'D0D5CY', 'D0U9OQ', 'D07MCA', 'D0X3XE', 'D0M7GB', 'D03KUB', 'D0N8MN', 'D09PKU', 'D0YO2N', 'D0B8QA', 'D07XHJ', 'D0X7HM', 'D01SLZ', 'D07JVS', 'D0XW9L', 'D09CIX', 'D0LA0P', 'D0O1LW', 'D0Y4NJ', 'D0C2FJ', 'D02OAV', 'D0H1GJ', 'D0K4QT', 'D06OSM', 'D06BKP', 'D01XUG',

In [19]:
# in one row keep just one uniprot_id. So if one drug tragets two proteins/uniprot_ids, 
# create two rows in dataframe for that drug. Before this part of code multiple target for one drug was kept
# in one row in comma separated manner
processed_drug_target_mapping_df_with_one_uniprot_per_row = processed_drug_target_mapping_df\
                                [~(processed_drug_target_mapping_df['uniprot_id'].str.contains(','))]
processed_drug_target_mapping_df_with_mult_uniprot_per_row = processed_drug_target_mapping_df\
                                [processed_drug_target_mapping_df['uniprot_id'].str.contains(',')]
print(processed_drug_target_mapping_df_with_mult_uniprot_per_row.head())

rows = []
for i, row in processed_drug_target_mapping_df_with_mult_uniprot_per_row.iterrows():
    s = set(filter(None, row['uniprot_id'].split(',')))
    print(row['pubchem_cid'], s)
    for uniprot in s:
        r = {}
        r['pubchem_cid'] = row['pubchem_cid']
        r['drug_name'] = row['drug_name']
        r['uniprot_id'] = uniprot
        rows.append(r)
temp_df = pd.DataFrame(rows)
final_drug_target_mapping_df = pd.concat([processed_drug_target_mapping_df_with_one_uniprot_per_row,\
                                         temp_df], axis=0)

print(final_drug_target_mapping_df)

os.makedirs(os.path.dirname(processed_drug_target_mapping_file), exist_ok=True)
final_drug_target_mapping_df.to_csv(processed_drug_target_mapping_file, index=False, sep='\t')


  pubchem_cid                                   drug_name  \
1      100424  3-[1-ethyl-2-(3-hydroxyphenyl)butyl]phenol   
2    11427553                                     KW-2449   
3     9844019                                      ND1251   
4      644213          2-chloro-5-nitro-N-phenylbenzamide   
5   102371197                                     [3H]BSP   

                    uniprot_id  
1                Q92731,P03372  
2         P11362,P00519,P36888  
3                P41146,Q9NXE4  
4         P37231,P19793,P37231  
5  P46721,Q9NPD5,O94956,Q9NYB5  
100424 {'P03372', 'Q92731'}
11427553 {'P36888', 'P00519', 'P11362'}
9844019 {'P41146', 'Q9NXE4'}
644213 {'P19793', 'P37231'}
102371197 {'P46721', 'O94956', 'Q9NPD5', 'Q9NYB5'}
5281727 {'P11274', 'Q07869', 'P00519'}
1897797 {'P23141', 'P30304'}
15124 {'P41595', 'P28335', 'P28223'}
71529770 {'Q9Y463', 'Q13627'}
71388 {'P19838', 'Q04864', 'Q01201', 'Q04206', 'Q00653'}
16204411 {'P30542', 'P0DMS8', 'P29274'}
5908088 {'P31749', 'P07949'

11708275 {'P41595', 'P28335', 'P28223'}
44355268 {'P43235', 'P07858'}
201590 {'P31645', 'Q01959'}
9883372 {'Q13224', 'Q05586'}
16167454 {'Q969V1', 'Q99705'}
286 {'P17174', 'P35520'}
11515063 {'P18507', 'P14867'}
11613121 {'P05106', 'P06756'}
124477 {'P34903', 'P31644', 'P47869', 'P18507', 'P14867'}
10398175 {'P35367', 'P28223'}
208951 {'P14416', 'P08908'}
452548 {'P11388', 'Q02880'}
52949342 {'P29475', 'P35228'}
46232032 {'P22303', 'P06276'}
46229072 {'P31639', 'P13866'}
444108 {'P49354', 'P49356'}
462919 {'P00915', 'Q16790', 'P00918'}
15552206 {'P29475', 'P29474', 'P35228'}
12263801 {'P31645', 'P23975'}
2993172 {'P21728', 'P21917', 'P21918'}
73755161 {'P25090', 'P21462'}
9804450 {'P41145', 'P41143', 'P35372'}
157918 {'P31645', 'P28223'}
261000 {'P06401', 'P10275'}
148203 {'P08253', 'P03956', 'P09237'}
44421938 {'P22303', 'P06276'}
44285245 {'P30559', 'P47901', 'P37288', 'P30518'}
24691160 {'P46098', 'P23975', 'P31645', 'Q12809'}
5280666 {'P47989', 'Q16678'}
3100 {'P35367', 'P50135'}
5

108143 {'P10826', 'P10276'}
5311007 {'Q14330', 'P21554'}
11334759 {'P03372', 'Q92731'}
73977920 {'P30542', 'P0DMS8'}
9803433 {'Q9NZ42', 'Q92542', 'Q96BI3', 'Q8WW43', 'P49768'}
1641 {'Q9UBS5', 'O75899'}
11849871 {'P30542', 'P29275', 'P29274'}
11427204 {'P56524', 'Q9UBN7', 'Q13547'}
11438116 {'P30542', 'P0DMS8', 'P29274'}
11245073 {'P30542', 'P0DMS8'}
14372 {'P31645', 'Q01959', 'P23975'}
18070083 {'P27338', 'P21397'}
151075 {'P35354', 'P23219'}
9812715 {'P32297', 'Q15822'}
44427184 {'P41145', 'P41143', 'P35372'}
44436392 {'P41143', 'P35372'}
21774960 {'P04054', 'P14555'}
44403231 {'P35346', 'P30874'}
9841284 {'P30542', 'P0DMS8', 'P29275'}
5281426 {'Q16790', 'P24385'}
44396734 {'P03372', 'Q92731'}
46189893 {'P31645', 'P23975'}
16202727 {'P30542', 'P0DMS8', 'P29274'}
5311066 {'P62508', 'P11474', 'O95718'}
46883536 {'Q9NZ42', 'Q92542', 'Q96BI3', 'Q8WW43', 'P49768'}
24760995 {'P47211', 'O43603'}
44408515 {'P41968', 'Q01726', 'P32245'}
24827761 {'P22303', 'P06276'}
11160546 {'Q16790', 'P00918

15870567 {'P28222', 'P28221'}
9961920 {'P11388', 'Q02880'}
44291858 {'Q9HBA0', 'Q8NER1'}
45028567 {'P00915', 'Q9ULX7', 'P00918', 'Q16790', 'O43570'}
44341260 {'P08684', 'P10635'}
25172871 {'P98170', 'Q13489'}
5097016 {'Q9UBS5', 'O75899'}
11241513 {'P27338', 'P21397'}
13953710 {'O60885', 'Q15059', 'P25440'}
11529633 {'P18507', 'P34903', 'P14867', 'P47870'}
107838 {'P28223', 'P18089', 'P27338', 'P18825', 'P21397', 'P08913'}
44352202 {'P08913', 'P18825', 'P18089'}
44408687 {'Q13224', 'Q12879', 'Q05586'}
9996409 {'Q9NZ42', 'Q92542', 'Q96BI3', 'Q8WW43', 'P49768'}
6539788 {'P11511', 'P15538', 'P05093'}
11341869 {'P00519', 'P12931', 'P00533'}
10244811 {'P16234', 'P09619'}
10223146 {'O00398', 'O95136'}
17759555 {'P41279', 'P08254'}
10198700 {'P28222', 'P28221'}
25022340 {'O15392', 'Q13490', 'Q13489', 'Q13075', 'Q96CA5', 'P98170'}
44414735 {'P31645', 'P23975'}
51045 {'P07550', 'P08588'}
16757571 {'P42345', 'P48736'}
447077 {'P12931', 'P11274', 'P16234', 'P00519', 'P09619', 'P11362', 'P06239'}
3

6255 {'O43451', 'P10253', 'Q14697', 'Q8TET4'}
3015863 {'P35367', 'P28223'}
446849 {'P03372', 'Q92731'}
247839 {'P62508', 'P11474', 'O95718'}
644210 {'P41143', 'P35372'}
10250021 {'P35354', 'P23219'}
9903 {'Q96RI1', 'P15692', 'P04626'}
46933593 {'P00915', 'Q16790', 'O43570', 'P00918'}
6166 {'P04150', 'P08235'}
9829012 {'P30556', 'P50052'}
91972012 {'P04637', 'Q00987'}
25129843 {'P22303', 'P06276'}
23657507 {'P06239', 'P07948', 'P42681', 'P43405'}
119387 {'P35354', 'P23219'}
72163809 {'Q86V86', 'P68400', 'O43781', 'Q9H2X6'}
16006492 {'P34969', 'P28221', 'P08908', 'P28335', 'P47898'}
11973927 {'P14416', 'P08908'}
44452426 {'P34972', 'P21554'}
44584680 {'P21728', 'P14416'}
127993 {'P30542', 'P0DMS8', 'P29274'}
3654103 {'P28223', 'P50406', 'P08908', 'P28222', 'P28335', 'P30939', 'P41595', 'P28566', 'P28221'}
15071517 {'Q9NZ42', 'Q92542', 'Q96BI3', 'Q8WW43', 'P49810', 'P49768'}
135242184 {'P29275', 'P29274'}
10407950 {'Q13936', 'Q01668'}
44424810 {'P41145', 'P41143', 'P35372'}
44427178 {'P41

71556703 {'P51813', 'P00533', 'P51451', 'P42681', 'Q15303', 'Q06187', 'P04626', 'P42680'}
44342905 {'Q92769', 'Q13547'}
18728 {'P10275'}
11520765 {'P32238', 'P32239', 'P41143', 'P35372'}
18516979 {'P06493', 'P35968'}
68678 {'P08913', 'P18825', 'P18089'}
5689 {'P34972', 'P21554'}
4878 {'P12931', 'O43353', 'P02741', 'Q13882', 'P00519', 'P06239'}
91754554 {'P37023', 'Q13873', 'P36897', 'Q04771'}
3127 {'P31645', 'Q01959', 'P23975'}
24754344 {'P08684', 'P15538'}
46926513 {'P49759', 'Q13627', 'Q9Y463', 'Q9HAZ1', 'P49760'}
153997 {'P21439', 'P08183'}
5315502 {'P03372', 'Q92731'}
9907093 {'P42338', 'P48736'}
10948394 {'P03372', 'Q92731'}
44269000 {'P32238', 'P32239'}
44475843 {'P35354', 'P23219'}
9919495 {'P49354', 'P49356', 'P53609'}
24995604 {'P34972', 'P21554'}
40539 {'Q14416', 'Q13255', 'Q14831', 'P41594', 'Q04609', 'P39086', 'O00222', 'Q14832'}
44563958 {'P47211', 'O43603'}
9908268 {'Q9H228', 'O95977', 'P21453', 'Q99500'}
234562 {'P00915', 'Q9ULX7', 'Q16790', 'O43570'}
107780 {'P28222', '

4850 {'P14416', 'P35462', 'P18825', 'P08908', 'P21917', 'P41595'}
338 {'P00915', 'P00918', 'P23219', 'O43570', 'P23280', 'P22748'}
44429064 {'P41145', 'P41143', 'P35372'}
1328 {'P30542', 'P29275'}
22995178 {'P30542', 'P29275', 'P29274'}
481747 {'P24941', 'P11802'}
11476171 {'P06213', 'P08069'}
44559100 {'P50406', 'P28335', 'P34969', 'P41595'}
10033891 {'P06493', 'P14635'}
148091 {'P11388', 'Q02880'}
16203666 {'P0DMS8', 'P29274'}
10417809 {'P00742', 'P00734'}
9937534 {'P30550', 'P28336', 'P32247'}
10696098 {'P07477', 'P00734'}
3072540 {'P08913', 'P18825', 'P18089'}
11556976 {'P32238', 'P32239', 'P41143', 'P35372'}
25161177 {'P15056', 'P04049', 'P10398'}
9823787 {'Q02779', 'P35916', 'Q16584', 'P06241', 'P04629', 'P80192'}
44391650 {'P00915', 'Q16790', 'P00918'}
4671 {'Q9Y2T6', 'P34972', 'Q8TDV5'}
18428457 {'P29475', 'P35228'}
3295 {'P00915', 'P00918'}
44543268 {'Q01959', 'P23975'}
41744 {'P11388', 'Q02880'}
11740439 {'P30542', 'P0DMS8', 'P29274'}
10473365 {'P11511', 'P05093'}
24866614 {'

11652153 {'P30542', 'P0DMS8', 'P29274'}
11743895 {'P30542', 'P0DMS8', 'P29274'}
10993014 {'P35462', 'P46098'}
9828551 {'P43235', 'P25774'}
16759925 {'P19838', 'Q04864', 'Q01201', 'Q04206', 'Q00653'}
108094 {'P18089', 'P35368', 'P18825', 'P35348', 'P08913', 'P25100'}
10126984 {'P15090', 'Q01469'}
15984632 {'P43115', 'P35408'}
24816375 {'P04150', 'P08235', 'P06401', 'P10275'}
24768351 {'P35372', 'P41143', 'P25103'}
11637090 {'P20248', 'P50750', 'P24941'}
5210 {'P31645', 'P23975'}
10402251 {'P16234', 'P09619'}
13904719 {'P31645', 'P23975'}
5230 {'Q9UBS5', 'O75899'}
11641503 {'P41143', 'P35372'}
9884807 {'P45983', 'P45984', 'P53779'}
91827372 {'Q9NPI1', 'Q9H8M2'}
11450197 {'P30542', 'P0DMS8', 'P29274'}
14014889 {'P31645', 'P23975'}
10186367 {'P13612', 'P05556'}
25886893 {'Q13258', 'P43115', 'P35408', 'P43119', 'P21731', 'P43116'}
14896613 {'P30542', 'P29275', 'P29274'}
9925999 {'P31645', 'P23975'}
73349109 {'O43614', 'O43613'}
44631912 {'Q00534', 'P11802'}
46184986 {'P07333', 'Q99062'}
468

25151352 {'P36888', 'P10721', 'Q99062'}
10710647 {'P30542', 'P29275'}
6472 {'P11229', 'P08172'}
44395768 {'Q9ULX7', 'O43570'}
9896267 {'P27815', 'Q08499', 'Q07343'}
56847486 {'Q12866', 'P30530', 'Q06418'}
25044690 {'P00915', 'Q9ULX7', 'P00918', 'Q16790', 'O43570'}
10192763 {'P03372', 'Q92731'}
45264052 {'P17252', 'P05129', 'Q02156', 'P05771', 'Q05513', 'Q04759', 'Q05655'}
470 {'P48066', 'P30531'}
167250 {'P0DP24', 'P0DP25', 'P10828', 'P10599', 'P29474', 'P0DP23'}
16131438 {'P25090', 'P25089'}
44449257 {'P30542', 'P29274'}
11303119 {'P41145', 'P41143', 'P35372'}
163839 {'P50406', 'P34969'}
55917 {'O00408', 'O76074', 'Q14432'}
24860514 {'Q13133', 'P55055'}
97463 {'Q16790', 'P00918', 'P22748'}
46877881 {'P41968', 'P32245'}
667490 {'Q06203', 'P20839'}
5288811 {'Q9UGM1', 'P36544', 'Q9GZZ6'}
16203665 {'P30542', 'P0DMS8', 'P29274'}
6539794 {'P15538', 'P05093'}
46901616 {'P15538', 'P05093'}
44426814 {'P41146', 'P41145', 'P41143', 'P35372'}
11521761 {'P30542', 'P0DMS8', 'P29274'}
441298 {'P0991

14595509 {'P28223', 'P18089', 'P27338', 'P18825', 'P21397', 'P08913'}
11627859 {'Q13133', 'P55055'}
67161540 {'P10721', 'P35916', 'P17948', 'P12931', 'P16234', 'Q8IVH8', 'P51812', 'P36888', 'P09619', 'P11362', 'P35968'}
2733079 {'P57771', 'P49798'}
60680 {'P25105', 'P26992'}
9943996 {'Q92769', 'Q9UBN7', 'P56524', 'Q13547', 'Q969S8', 'Q9BY41'}
3074331 {'P32238', 'P32239'}
4541 {'P41595', 'P48547', 'P28223', 'P28335'}
50796983 {'P55201', 'O15164'}
9906198 {'P41145', 'P41143', 'P35372'}
8974 {'P30542', 'P0DMS8', 'P29275'}
44427183 {'P41145', 'P41143', 'P35372'}
6438330 {'P49354', 'P49356'}
75124304 {'O60885', 'Q15059', 'P25440'}
445555 {'O00757', 'P09467'}
10202642 {'P06239', 'P36897', 'Q16539'}
10726859 {'P29475', 'P35228'}
1797 {'P28222', 'P28221'}
44624064 {'P22303', 'P06276'}
73755193 {'Q14416', 'Q14832'}
44316376 {'P30411', 'P46663'}
208907 {'P22303', 'P27338', 'P21397'}
2752461 {'Q02763', 'P14635', 'P17252', 'P05129', 'P24864', 'Q02156', 'P00533', 'P11802', 'P06493', 'Q00534', 'P243

25066467 {'Q02763', 'P00519'}
3353976 {'P29475', 'P29474', 'P35228'}
44454650 {'Q92769', 'Q13547'}
44434950 {'P41145', 'P41143', 'P35372'}
441480 {'P04746', 'P10253', 'Q14697', 'Q8TET4'}
46901613 {'P15538', 'P05093'}
22146595 {'P30550', 'P28336', 'P32247'}
16737137 {'P41145', 'P41143', 'P35372'}
6228 {'Q9UNI1', 'P00797'}
11335419 {'P14416', 'P35462', 'P21728', 'P21917', 'P21918'}
5284583 {'Q7L0J3', 'P46098'}
445245 {'P11217', 'P09848'}
164014 {'P35354', 'P23219'}
10944016 {'P28221', 'P08908'}
10149311 {'O43525', 'O43526'}
177284 {'P11388', 'Q02880'}
11614928 {'P32238', 'P32239', 'P41143', 'P35372'}
9576789 {'P04150', 'P17707'}
45487451 {'P31645', 'Q01959'}
44592118 {'P23975', 'Q01959', 'P31645', 'Q12809'}
9950486 {'P34972', 'P21554'}
5384876 {'P11388', 'Q02880'}
24801186 {'P20248', 'P50750'}
4567175 {'P08913', 'P18825', 'P18089'}
44418325 {'P34972', 'P21554'}
44416159 {'P41146', 'P35372'}
25154731 {'P00915', 'P00918', 'P22748'}
46871648 {'P22303', 'P06276'}
14802537 {'P32238', 'P32239'

44418303 {'P34972', 'P21554'}
9804992 {'Q13547', 'O60674'}
53323295 {'Q9UBS5', 'O75899'}
46224516 {'Q9H9B1', 'Q96KQ7'}
123895 {'P29475', 'P29474', 'P35228'}
10243546 {'P16234', 'P09619'}
44400605 {'P30874', 'P31391'}
24774602 {'P41968', 'Q01726'}
91827340 {'P18825', 'P18089'}
11450099 {'P24941', 'P24864'}
54686376 {'P0DMV9', 'P0DMV8', 'P11142'}
104999 {'P34903', 'Q99928', 'P31644', 'P47869'}
46203139 {'P0DMV9', 'P0DMV8'}
9906282 {'P04150', 'P08235', 'P06401', 'P10275'}
9887925 {'P28702', 'Q96RI1', 'P19793', 'P48443'}
183134 {'P30556', 'P50052'}
5154691 {'P45983', 'P12931', 'O43353', 'P17612', 'P06239'}
11635371 {'P20701', 'P05107'}
3071731 {'P11388', 'Q02880'}
439750 {'P01008', 'P03897', 'P48507'}
5330524 {'P10721', 'Q16539', 'P12931', 'P00533', 'P16234', 'P00519', 'P09619', 'P11362', 'P06239'}
11281011 {'Q9UBS5', 'Q99928', 'O75899'}
5289555 {'P29475', 'P29474', 'P35228'}
6442328 {'Q9Y271', 'Q9NS75'}
23689269 {'P51164', 'P20648'}
2244 {'P04035', 'P35354', 'P23219'}
45141807 {'Q01959', 

9847835 {'P55210', 'P55211'}
44578696 {'P34972', 'P21554'}
9995893 {'P28222', 'P08908'}
24803184 {'P08684', 'P10635'}
13279027 {'P31645', 'Q01959'}
3038 {'P00915', 'P00918'}
44407137 {'P22303', 'P06276'}
44186618 {'P17252', 'Q05655'}
9549301 {'P50613', 'Q00534', 'P49841'}
6369390 {'P22303', 'P06276'}
11672904 {'P31645', 'Q01959', 'P23975'}
58539171 {'P33981', 'Q5S007'}
44430009 {'P41146', 'P41145', 'P41143', 'P35372'}
44388022 {'P30874', 'P35346', 'P32745', 'P31391', 'P30872'}
5328819 {'P41594', 'P00533'}
24748204 {'Q96GD4', 'O14965'}
1355 {'P34969', 'P28223', 'P50406', 'P28335', 'P41595', 'P28566', 'P28221'}
9964781 {'P30542', 'P29274'}
10154248 {'Q9UBS5', 'O75899'}
24824620 {'P05106', 'P06756'}
5803 {'P10827', 'P10828'}
1253361 {'O60885', 'P55201'}
42617983 {'P34972', 'P21554'}
780643 {'P27815', 'Q01959', 'Q07343'}
44408484 {'P32238', 'P32239', 'P41143', 'P35372'}
46703929 {'P00915', 'Q9ULX7', 'P00918', 'Q16790', 'O43570'}
15887953 {'P29475', 'P29474', 'P35228'}
5311501 {'P34972', 'P

10399068 {'P22303', 'P06276'}
11636164 {'P05106', 'P06756'}
25192477 {'P08684', 'P10635', 'P14061'}
46843772 {'Q13470', 'Q13164', 'O00444', 'Q8N568'}
264430 {'P16083', 'P15559'}
10302451 {'P00519', 'P12931'}
11414445 {'P03372', 'Q92731'}
18646248 {'P27338', 'P21397'}
443295 {'P25090', 'P21462'}
11580305 {'P18507', 'P14867', 'P47870'}
164710 {'P14416', 'Q01959'}
73755067 {'Q8TDV5', 'Q9Y2T6', 'Q9Y5S1'}
54682505 {'Q13224', 'Q12879', 'Q05586'}
5311194 {'P41146', 'P46098'}
10198431 {'P41595', 'P28335', 'P28223'}
16747727 {'P41145', 'P41143', 'P35372'}
16095112 {'P56524', 'Q13547'}
11521736 {'P18507', 'P14867', 'P47870'}
25333445 {'P21728', 'P14416', 'P21917', 'P35462'}
4650 {'P22303', 'P06276'}
10416322 {'O95180', 'O43497'}
44413811 {'Q9NZ42', 'Q92542', 'Q96BI3', 'Q8WW43', 'P49810', 'P49768'}
4762 {'Q01959', 'P23975'}
11245786 {'P35968', 'P17948', 'P35916'}
44368903 {'P24530', 'P25101'}
101163123 {'P11388', 'Q02880'}
5311190 {'P21917', 'P14416', 'P35462', 'P08908'}
44320997 {'P30559', 'P479

56599293 {'P08069', 'Q9UM73'}
25050224 {'P41143', 'P35372'}
60182 {'P32238', 'Q15761'}
11461586 {'P34972', 'P21554'}
3964633 {'Q14416', 'O15303', 'Q14831', 'O00222'}
46232684 {'P22303', 'P06276'}
44421948 {'P22303', 'P06276'}
44418371 {'P34972', 'P21554'}
9803446 {'P27815', 'Q08499', 'Q07343'}
16109441 {'P30559', 'P47901', 'P37288', 'P30518'}
10358392 {'P28222', 'P08908'}
10760609 {'P30542', 'P0DMS8'}
72950888 {'O00329', 'P48736'}
11536481 {'P00915', 'Q16790', 'P00918'}
11210506 {'P31645', 'Q01959', 'P23975'}
2728532 {'P21917', 'P14416', 'P35462'}
5311053 {'P34995', 'Q13258', 'P43088', 'P43115', 'P21731'}
24823110 {'P05106', 'P06756'}
415628 {'P25100', 'P35348', 'P08908'}
189081 {'P29475', 'P29474', 'P35228'}
9851770 {'P35354', 'P23219'}
5311203 {'P34903', 'P31644', 'P47869', 'P18507', 'P14867'}
5388063 {'Q16678', 'P00519', 'P08183'}
11543610 {'P18507', 'P14867', 'P47870'}
44520901 {'P00915', 'P00918'}
52941982 {'P22303', 'P06276'}
24995524 {'Q9UQB9', 'Q96GD4', 'O14965'}
5328940 {'P005

16082947 {'P41143', 'P35372'}
44418316 {'P34972', 'P21554'}
10296414 {'P28222', 'P28221', 'P08908'}
4212 {'P11388', 'Q02880'}
46864283 {'Q9NRM7', 'O75116', 'Q15208', 'Q13464'}
17513 {'Q9Y478', 'P54619', 'Q13131'}
71657455 {'O75874'}
44359599 {'P49354', 'P49356'}
45483407 {'P31645', 'Q01959', 'P08908', 'P23975'}
11524350 {'P14416', 'P28223', 'P08908'}
44437883 {'P56524', 'Q9UBN7', 'Q13547'}
10046856 {'O43614', 'O43613'}
11040230 {'P32238', 'P32239', 'P41143', 'P35372'}
44418913 {'P41143', 'P35372'}
46704077 {'P00915', 'Q9ULX7', 'P00918', 'Q16790', 'O43570'}
11536774 {'P30542', 'P0DMS8'}
5566 {'P0DP25', 'P0DP23', 'P0DP24'}
11431898 {'P41145', 'P41143', 'P35372'}
1204 {'P36544', 'P17787', 'Q15822', 'P08912'}
44418914 {'P41143', 'P35372'}
43227726 {'Q01959', 'P23975'}
3466480 {'P27338', 'P21397'}
438981 {'P06493', 'P49841', 'Q00535'}
10335601 {'P34903', 'P31644', 'P47869', 'P18507', 'P14867'}
10354670 {'P28223', 'P08908'}
119182 {'P31350', 'Q7LG56', 'P23921'}
5311080 {'P41145', 'P41143', '

56840673 {'P30550', 'P28336', 'P32247'}
54613196 {'P19835', 'P08246'}
2703 {'Q99572', 'Q02156'}
10141039 {'P43115', 'P43119', 'P35408'}
44584683 {'P21728', 'P14416'}
16736880 {'P41145', 'P41143', 'P35372'}
5353399 {'P49354', 'P49356'}
9953255 {'P30542', 'P0DMS8', 'P29275'}
16720954 {'P31645', 'Q01959'}
9942427 {'P43681', 'P17787'}
44563951 {'P47211', 'O43603'}
6538761 {'P14635', 'P06493', 'P49841', 'Q00535'}
3033 {'P35354', 'P23219'}
22636319 {'P28221', 'P28223', 'P08908'}
21345766 {'P41145', 'P35372'}
127597 {'P18507', 'P14867', 'P47870'}
14742545 {'O95264', 'P46098'}
25139473 {'P27338', 'P21397'}
65937 {'P32238', 'P32239'}
62518 {'P41145', 'P41143', 'P35372'}
44418308 {'P34972', 'P21554'}
10797392 {'P08913', 'P18825', 'P18089'}
44467662 {'P41145', 'P41143', 'P35372'}
44561207 {'P31645', 'Q01959'}
2396 {'O00141', 'P14635', 'P45983', 'Q16539', 'P05129', 'P31749', 'P49841', 'Q02156', 'P06493', 'O14757', 'Q05513', 'P28482', 'Q15759', 'P06239', 'P53778', 'Q05655', 'Q13464'}
44418281 {'P05

52947851 {'P43115', 'P43116', 'P35408'}
148195 {'P49354', 'P49356'}
93004 {'P62136', 'Q15172'}
132814 {'P05106', 'P08514'}
44264453 {'P27338', 'P21397'}
100801 {'P03372', 'Q92731'}
16131448 {'P41146', 'P35372'}
44563198 {'P27815', 'Q07343', 'O76074', 'Q14432'}
21976428 {'P30542', 'P29274'}
9998777 {'P16050', 'P18054'}
44589165 {'P11245', 'P18440'}
3058747 {'P14416', 'P08908'}
216239 {'P10721', 'P35968', 'P09619', 'P00533'}
44418725 {'P03372', 'Q92731'}
10515839 {'P08913', 'P18825', 'P18089'}
690399 {'P18507', 'P14867', 'P47870'}
483477 {'P06746', 'Q7Z5Q5', 'Q9UHN1', 'P56282', 'O75417', 'Q9HCU8', 'P28340', 'P54098'}
10644450 {'P11802', 'P00533'}
11656380 {'P05106', 'P06756'}
44572473 {'P30542', 'P29275', 'P29274'}
10087225 {'P18405', 'P31213'}
9947999 {'P31645', 'Q01959', 'P23975'}
44420667 {'P41145', 'P41143', 'P35372'}
21804 {'P22303', 'P21554'}
44186666 {'P29275', 'P29274'}
108002 {'P24530', 'P25101'}
9930789 {'P41595', 'P28335', 'P28223'}
56649450 {'P42356', 'P42336'}
10083051 {'P16

9965141 {'Q92769', 'Q9UBN7', 'P56524', 'Q13547', 'Q969S8', 'Q9BY41'}
9797370 {'P12931', 'P16234', 'P00519', 'P06241', 'P09619', 'P06239'}
46222244 {'P24941', 'P24864'}
135398658 {'P41440', 'Q96NT5'}
5330258 {'P24864', 'P11802', 'P20248', 'P22607', 'Q00534', 'P24941', 'P11362', 'P21802', 'P22455'}
10782198 {'P31645', 'Q01959'}
44388094 {'P32745', 'P30872'}
9912381 {'P07711', 'P43235', 'P07858', 'P25774'}
11638264 {'P03372', 'Q92731'}
23661626 {'P27338', 'P21397'}
79599 {'P00915', 'P00918'}
10029385 {'P49841', 'P49840'}
67136 {'P00915', 'Q16790', 'P23280', 'P00918'}
42617985 {'P34972', 'P21554'}
448545 {'P49354', 'P49356'}
23643664 {'P14416', 'P28223', 'P34969', 'P08908'}
387042 {'P11274', 'P00519'}
44447548 {'P30542', 'P0DMS8', 'P29275'}
44452650 {'P34972', 'P21554'}
44560113 {'Q13224', 'Q12879'}
154575 {'P27815', 'Q08499', 'Q07343'}
442688 {'Q14534', 'P00742', 'P00734'}
25262792 {'Q96B36', 'Q9BPZ7', 'Q8TB45', 'P42345', 'Q9BVC4', 'Q8N122'}
44342825 {'Q92769', 'Q9UBN7', 'P56524', 'Q13547