This code takes clean data exported from the DID parser and writes it to Wikidata

In [1]:
from wikidataintegrator import wdi_core, wdi_login
from wikidataintegrator.ref_handlers import update_retrieved_if_new_multiple_refs
import pandas as pd
from pandas import read_csv
import requests
from tqdm.notebook import trange, tqdm
import ipywidgets 
import widgetsnbextension

In [2]:
from datetime import datetime
import copy
def create_reference():
    refStatedIn = wdi_core.WDItemID(value="Q70116865", prop_nr="P248", is_reference=True)
    timeStringNow = datetime.now().strftime("+%Y-%m-%dT00:00:00Z")
    refRetrieved = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    refStatedIn2 = wdi_core.WDItemID(value="Q21008030", prop_nr="P248", is_reference=True)
    refRetrieved2 = wdi_core.WDTime(timeStringNow, prop_nr="P813", is_reference=True)
    
    return [refStatedIn, refRetrieved, refStatedIn2, refRetrieved2]

In [None]:
## Login for Scheduled bot
print("Logging in...")
try:
    from scheduled_bots.local import WDUSER, WDPASS
except ImportError:
    if "WDUSER" in os.environ and "WDPASS" in os.environ:
        WDUSER = os.environ['WDUSER']
        WDPASS = os.environ['WDPASS']
    else:
        raise ValueError("WDUSER and WDPASS must be specified in local.py or as environment variables")

In [3]:
"""
print("Logging in...")
import wdi_user_config ## Credentials stored in a wdi_user_config file
login_dict = wdi_user_config.get_credentials()
login = wdi_login.WDLogin(login_dict['WDUSER'], login_dict['WDPASS'])
"""

Logging in...
https://www.wikidata.org/w/api.php
Successfully logged in as Gtsulab


## Update WD with statements derived from WDID

In [5]:
## Unit test-- write a statement

drug_qid = 'Q407125'
phen_qid = 'Q18968110'
reference = create_reference()
statement = [wdi_core.WDItemID(value=phen_qid, prop_nr="P2175", references=[copy.deepcopy(reference)])]
item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=statement, append_value="P2175",
                       global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
item.write(login)


'Q407125'

In [11]:
predicate_map = {'may_prevent':'P4954',
                 'may_treat':'P2175',
                 'causes':'P1542'}

wd_properties = {'P4954':'may prevent', ## applied to drug entities, referencing disease entities
                 'P2175':'medical condition treated', ## applied to drug entities, referencing disease entities
                 'P1696':'drug used for treatment', ##applied to disease entities, referencing drug entities
                 'P1542':'has effect', ## applied to drug entities, referencing disease entities
                 'P828':'has cause', ## applied to disease entities, referencing drug entities
                }

### Write treatment statements

In [35]:
may_treats = read_csv('results/may_treat.tsv',delimiter='\t',header=0, index_col=0)
treatment_statement_info = may_treats[['drug_cas_wdid','phen_cui_wdid']].copy()
treatment_statement_info.drop_duplicates(keep='first',inplace=True)
treatment_statement_info.reset_index(inplace=True)
i=0
for i in tqdm(range(len(treatment_statement_info))):
    drug_qid = treatment_statement_info.iloc[i]['drug_cas_wdid']
    phen_qid = treatment_statement_info.iloc[i]['phen_cui_wdid']
    reference = create_reference()
    statement = [wdi_core.WDItemID(value=phen_qid, prop_nr="P2175", references=[copy.deepcopy(reference)])]
    item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=statement, append_value=predicate_map['may_treat'],
                       global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
    item.write(login)
    #print(i, drug_qid, phen_qid)
    i=i+1

0 Q407125 Q18968110
1 Q407125 Q653971
2 Q407125 Q223591
3 Q407125 Q1495657
4 Q407125 Q4003020
5 Q1087888 Q41861
6 Q179731 Q653971
7 Q179731 Q223591
8 Q179731 Q202837
9 Q179731 Q605259
10 Q179731 Q3505252
11 Q409251 Q936382
12 Q409251 Q736715
13 Q23767 Q18968110
14 Q23767 Q653971
15 Q23767 Q223591
16 Q23767 Q1495657
17 Q23767 Q936382
18 Q23767 Q736715
19 Q23767 Q4003020
20 Q1052672 Q18975220
21 Q1052672 Q101896
22 Q1052672 Q223254
23 Q1052672 Q3776920
24 Q1052672 Q11564537
25 Q1052672 Q186470
26 Q1052672 Q55779861
27 Q2329715 Q12206
28 Q27132403 Q2480013
29 Q417174 Q180762
30 Q415563 Q938107
31 Q415563 Q1494682
32 Q415563 Q647630
33 Q5405160 Q205764
34 Q5405160 Q938107
35 Q5515257 Q916280
36 Q27122277 Q506652
37 Q27122277 Q18553923
38 Q27114666 Q5526839
39 Q426386 Q422225
40 Q121874 Q35805
41 Q121874 Q114953
42 Q121874 Q767327
43 Q121874 Q34879
44 Q121874 Q3991731
45 Q390305 Q1890194
46 Q390305 Q16499
47 Q27107089 Q471521
48 Q27107089 Q101991
49 Q27107089 Q81938
50 Q27107089 Q281289
51 

795 Q2465218 Q187255
796 Q2465218 Q81938
797 Q418445 Q4862390
798 Q418445 Q81938
799 Q418445 Q980709
800 Q418817 Q18554460
801 Q418817 Q1411740
802 Q418817 Q264118
803 Q418817 Q18556020
804 Q418817 Q1891209
805 Q418817 Q1898141
806 Q418817 Q324464
807 Q418817 Q7845637
808 Q414762 Q18556697
809 Q554297 Q179945
810 Q411159 Q18968110
811 Q411159 Q653971
812 Q411159 Q223591
813 Q411159 Q4003020
814 Q411159 Q218712
815 Q421301 Q2895302
816 Q421301 Q852376
817 Q420644 Q41861
818 Q418928 Q38404
819 Q418928 Q180913
820 Q418928 Q12174
821 Q419724 Q762713
822 Q2357007 Q3296793
823 Q407541 Q81938
824 Q415122 Q55136018
825 Q424167 Q181391
826 Q424167 Q1620594
827 Q848264 Q281490
828 Q421381 Q56002
829 Q5462356 Q189588
830 Q411478 Q627368
831 Q411478 Q3705799
832 Q411478 Q977090
833 Q411478 Q868137
834 Q411478 Q627625
835 Q411478 Q18967011
836 Q238490 Q259626
837 Q238490 Q273510
838 Q238490 Q2726043
839 Q238490 Q18967011
840 Q2697578 Q4684750
841 Q421920 Q3505252
842 Q27106662 Q268667
843 Q27106662

1566 Q55186498 Q18558125
1567 Q58375 Q170082
1568 Q58375 Q41112
1569 Q416507 Q356033
1570 Q416507 Q208414
1571 Q907219 Q131755
1572 Q907219 Q7140388
1573 Q2601832 Q183134
1574 Q2601832 Q2458539
1575 Q2601832 Q55779861
1576 Q415366 Q2032041
1577 Q415366 Q12152
1578 Q423538 Q852163
1579 Q423538 Q2006818
1580 Q423538 Q21504918
1581 Q423538 Q117060
1582 Q423538 Q1433212
1583 Q423538 Q574360
1584 Q423538 Q1132120
1585 Q1758380 Q34879
1586 Q1758380 Q55779861
1587 Q757058 Q189331
1588 Q757058 Q1132120
1589 Q413772 Q4797546
1590 Q413772 Q18555343
1591 Q413772 Q7170410
1592 Q413772 Q1053824
1593 Q413772 Q574360
1594 Q413772 Q746001
1595 Q413772 Q377978
1596 Q413840 Q11085
1597 Q415238 Q5609817
1598 Q415238 Q926462
1599 Q424312 Q6066379
1600 Q424312 Q281490
1601 Q221174 Q131755
1602 Q221174 Q7140388
1603 Q221174 Q81938
1604 Q419948 Q193889
1605 Q419948 Q152234
1606 Q419948 Q41861
1607 Q419948 Q736715
1608 Q419948 Q504790
1609 Q419948 Q857667
1610 Q2193376 Q3108586
1611 Q411457 Q66793701
1612 Q34

### Write prevents statements

In [39]:
may_prevent = read_csv('results/may_prevent.tsv',delimiter='\t',header=0)
prevent_statement_info = may_prevent[['drug_cas_wdid','phen_cui_wdid']].copy()
prevent_statement_info.drop_duplicates(keep='first',inplace=True)
prevent_statement_info.reset_index(inplace=True)
j=0
for j in tqdm(range(len(prevent_statement_info))):
    drug_qid = prevent_statement_info.iloc[j]['drug_cas_wdid']
    phen_qid = prevent_statement_info.iloc[j]['phen_cui_wdid']
    reference = create_reference()
    statement = [wdi_core.WDItemID(value=phen_qid, prop_nr="P4954", references=[copy.deepcopy(reference)])]
    item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=statement, append_value=predicate_map['may_prevent'],
                       global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
    item.write(login)
    #print(j, drug_qid, phen_qid)
    j=j+1

HBox(children=(IntProgress(value=0, max=139), HTML(value='')))




### Write cause statements

In [38]:
causes = read_csv('results/cause.tsv',delimiter='\t',header=0)
cause_statement_info = causes[['drug_cas_wdid','phen_cui_wdid']].copy()
cause_statement_info.drop_duplicates(keep='first',inplace=True)
cause_statement_info.reset_index(inplace=True)
k=0
for k in tqdm(range(len(cause_statement_info))):
    drug_qid = cause_statement_info.iloc[k]['drug_cas_wdid']
    phen_qid = cause_statement_info.iloc[k]['phen_cui_wdid']
    reference = create_reference()
    statement = [wdi_core.WDItemID(value=phen_qid, prop_nr="P1542", references=[copy.deepcopy(reference)])]
    item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=statement, append_value=predicate_map['causes'],
                       global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
    item.write(login)
    #print(k,drug_qid, phen_qid)
    k=k+1

HBox(children=(IntProgress(value=0, max=48), HTML(value='')))




## abstract task to a bot

In [7]:
statement_types = ['cause','may_prevent','may_treat']
property_dict = {'cause':'P1542','may_prevent':'P4954','may_treat':'P2175'}
filelocation = 'results/'

drug_qid_column_to_use = 'drug_cas_wdid'
phen_qid_column_to_use = 'phen_cui_wdid'

for each_did_type in statement_types:
    statement_filename = filelocation+each_did_type+'.tsv'
    triples = read_csv(statement_filename,delimiter='\t',header=0)
    triples_clean = triples[[drug_qid_column_to_use,phen_qid_column_to_use]].copy()
    triples_clean.drop_duplicates(keep='first',inplace=True)
    triples_clean.reset_index(inplace=True)
    i=0
    for i in tqdm(range(len(triples_clean))):
        drug_qid = triples_clean.iloc[i][drug_qid_column_to_use]
        phen_qid = triples_clean.iloc[i][phen_qid_column_to_use]
        reference = create_reference()
        statement = [wdi_core.WDItemID(value=phen_qid, prop_nr=property_dict[each_did_type], references=[copy.deepcopy(reference)])]
        item = wdi_core.WDItemEngine(wd_item_id=drug_qid, data=statement, append_value=property_dict[each_did_type],
                           global_ref_mode='CUSTOM', ref_handler=update_retrieved_if_new_multiple_refs)
        i=i+1        


HBox(children=(IntProgress(value=0, max=48), HTML(value='')))

cause Q27106156 Q10860644 P1542
cause Q27106282 Q10860644 P1542
cause Q27107584 Q10860644 P1542
cause Q27129640 Q10860644 P1542
cause Q27107612 Q10860644 P1542
cause Q27133236 Q1981368 P1542
cause Q27108171 Q10860644 P1542
cause Q413717 Q10860644 P1542
cause Q416905 Q10860644 P1542
cause Q5102983 Q10860644 P1542
cause Q5103226 Q10860644 P1542
cause Q3294630 Q10860644 P1542
cause Q258591 Q1981368 P1542
cause Q223600 Q1981368 P1542
cause Q5198674 Q10860644 P1542
cause Q421274 Q10860644 P1542
cause Q210402 Q10860644 P1542
cause Q1073333 Q10860644 P1542
cause Q421301 Q1981368 P1542
cause Q418011 Q114953 P1542
cause Q412189 Q1981368 P1542
cause Q423912 Q10860644 P1542
cause Q417222 Q1981368 P1542
cause Q162867 Q10860644 P1542
cause Q3292273 Q10860644 P1542
cause Q1060922 Q10860644 P1542
cause Q424297 Q10860644 P1542
cause Q20817188 Q10860644 P1542
cause Q189522 Q10860644 P1542
cause Q420685 Q1981368 P1542
cause Q415744 Q1981368 P1542
cause Q411887 Q1981368 P1542
cause Q194406 Q114953 P1542


HBox(children=(IntProgress(value=0, max=139), HTML(value='')))

may_prevent Q407125 Q1495657 P4954
may_prevent Q23767 Q1495657 P4954
may_prevent Q27132403 Q110315 P4954
may_prevent Q390305 Q1890194 P4954
may_prevent Q409199 Q794086 P4954
may_prevent Q184630 Q794086 P4954
may_prevent Q421259 Q81938 P4954
may_prevent Q207442 Q18558194 P4954
may_prevent Q2314 Q194290 P4954
may_prevent Q375613 Q2425407 P4954
may_prevent Q147101 Q44727 P4954
may_prevent Q410358 Q5420019 P4954
may_prevent Q176533 Q81938 P4954
may_prevent Q411064 Q270421 P4954
may_prevent Q3617574 Q220570 P4954
may_prevent Q3617574 Q55286710 P4954
may_prevent Q4781812 Q9294051 P4954
may_prevent Q621834 Q186889 P4954
may_prevent Q621834 Q127076 P4954
may_prevent Q18216 Q1209150 P4954
may_prevent Q18216 Q12152 P4954
may_prevent Q18216 Q81938 P4954
may_prevent Q592802 Q1129105 P4954
may_prevent Q1530958 Q81938 P4954
may_prevent Q422745 Q81938 P4954
may_prevent Q4890814 Q186889 P4954
may_prevent Q4890814 Q127076 P4954
may_prevent Q4918914 Q35869 P4954
may_prevent Q422212 Q35869 P4954
may_prev

HBox(children=(IntProgress(value=0, max=1730), HTML(value='')))

may_treat Q407125 Q18968110 P2175
may_treat Q407125 Q653971 P2175
may_treat Q407125 Q223591 P2175
may_treat Q407125 Q1495657 P2175
may_treat Q407125 Q4003020 P2175
may_treat Q1087888 Q41861 P2175
may_treat Q179731 Q653971 P2175
may_treat Q179731 Q223591 P2175
may_treat Q179731 Q202837 P2175
may_treat Q179731 Q605259 P2175
may_treat Q179731 Q3505252 P2175
may_treat Q409251 Q936382 P2175
may_treat Q409251 Q736715 P2175
may_treat Q23767 Q18968110 P2175
may_treat Q23767 Q653971 P2175
may_treat Q23767 Q223591 P2175
may_treat Q23767 Q1495657 P2175
may_treat Q23767 Q936382 P2175
may_treat Q23767 Q736715 P2175
may_treat Q23767 Q4003020 P2175
may_treat Q1052672 Q18975220 P2175
may_treat Q1052672 Q101896 P2175
may_treat Q1052672 Q223254 P2175
may_treat Q1052672 Q3776920 P2175
may_treat Q1052672 Q11564537 P2175
may_treat Q1052672 Q186470 P2175
may_treat Q1052672 Q55779861 P2175
may_treat Q2329715 Q12206 P2175
may_treat Q27132403 Q2480013 P2175
may_treat Q417174 Q180762 P2175
may_treat Q415563 Q93

may_treat Q407972 Q544006 P2175
may_treat Q407972 Q186889 P2175
may_treat Q407972 Q170082 P2175
may_treat Q407972 Q47790 P2175
may_treat Q407972 Q127076 P2175
may_treat Q425289 Q152234 P2175
may_treat Q425289 Q41861 P2175
may_treat Q425289 Q504790 P2175
may_treat Q3294630 Q2006818 P2175
may_treat Q3294630 Q21504918 P2175
may_treat Q3294630 Q1433212 P2175
may_treat Q3294630 Q81938 P2175
may_treat Q3294630 Q574360 P2175
may_treat Q139347 Q389735 P2175
may_treat Q139347 Q736715 P2175
may_treat Q139347 Q18556020 P2175
may_treat Q139347 Q179945 P2175
may_treat Q139347 Q18555315 P2175
may_treat Q193166 Q18557946 P2175
may_treat Q419468 Q917620 P2175
may_treat Q419468 Q5609817 P2175
may_treat Q419468 Q926462 P2175
may_treat Q418201 Q727028 P2175
may_treat Q258591 Q1097957 P2175
may_treat Q409492 Q18968110 P2175
may_treat Q409492 Q653971 P2175
may_treat Q409492 Q223591 P2175
may_treat Q409492 Q4003020 P2175
may_treat Q409492 Q218712 P2175
may_treat Q193978 Q1474877 P2175
may_treat Q193978 Q134

KeyboardInterrupt: 