In [1]:
import psycopg2

pg_cred = {
  "POSTGRES_DB":   "",
  "POSTGRES_HOST" : "",
  "POSTGRES_PORT" : "",
  "POSTGRES_USER" : "",
  "POSTGRES_PW" :  ""
}

connection_str = "dbname='{}' user='{}' host='{}' port='{}' password='{}'".format(
        pg_cred["POSTGRES_DB"],
        pg_cred["POSTGRES_USER"],
        pg_cred["POSTGRES_HOST"],
        pg_cred["POSTGRES_PORT"],
        pg_cred["POSTGRES_PW"])

conn = psycopg2.connect(connection_str)
print('connected')

connected


In [2]:
# only causes with at least 3 occurrences
mv_cause = "mv_cause_2"
mv_ddi_gen = "mv_ddi_gen"
mv_ddi_function = "mv_ddi_function"


query_cause = "SELECT pmid, subject_cui, object_cui FROM {}".format(mv_cause)
query_ddi_gen = "SELECT pmid, subject_cui, object_cui FROM {} ".format(mv_ddi_gen)
query_ddi_gen_1_dg = "SELECT pmid, subject_cui, object_cui FROM {} WHERE (object_semtype = 'gngm' OR object_semtype = 'aapp') AND (subject_semtype = 'clnd' OR subject_semtype = 'phsu' OR subject_semtype ='sbst')".format(mv_ddi_gen)
query_ddi_gen_2_gd = "SELECT pmid, subject_cui, object_cui FROM {} WHERE (subject_semtype = 'gngm' OR subject_semtype = 'aapp') AND (object_semtype = 'clnd' OR object_semtype = 'phsu' OR object_semtype ='sbst')".format(mv_ddi_gen)


query_ddi_function_1_dg = "SELECT pmid, subject_cui, object_cui FROM {} WHERE (object_semtype = 'gngm' OR object_semtype = 'aapp') AND (subject_semtype = 'clnd' OR subject_semtype = 'phsu' OR subject_semtype ='sbst')".format(mv_ddi_function)
query_ddi_function_2_gf = "SELECT pmid, subject_cui, object_cui FROM {} WHERE \
(subject_semtype = 'gngm' OR subject_semtype = 'aapp') AND \
(object_semtype = 'biof' OR object_semtype = 'phsf' OR object_semtype ='orgf' OR object_semtype = 'menp' \
OR object_semtype = 'ortf' OR object_semtype = 'celf' OR object_semtype ='moft' OR object_semtype = 'genf' \
OR object_semtype = 'patf' OR object_semtype = 'dsyn' OR object_semtype = 'mobd' OR object_semtype = 'neop' \
OR object_semtype = 'comd' OR object_semtype = 'emod')".format(mv_ddi_function)

query_cause_correct = "SELECT pmid, subject_cui, object_cui FROM {}".format(mv_cause)
query_ddi_correct = "SELECT '0', d1, d2 FROM mv_ddi_correct"

# Usage of Knowledge Graph 

In [3]:
from model.knowledgegraph import kg_derive_facts, kg_verify_facts, load_kg_facts

## Cause Experiment

In [4]:
print('load facts from semmeddb and building index...')
cause_idx_subjects, cause_idx_objects = load_kg_facts(conn, query_cause)
print('deriving new facts...')
cause_results, cause_res_amounts = kg_derive_facts(cause_idx_subjects, cause_idx_objects)
print('verifying facts in semmeddb...')
print(query_cause_correct)
cause_correct = kg_verify_facts(cause_idx_subjects, cause_results)

print('{} of {} derived facts are correct'.format(cause_correct, cause_res_amounts))
cause_idx_subjects = None
cause_idx_objects = None
cause_results = None

# Gold Standard: 95037 of 7978099 derived facts are correct (verfied by semmed)

load facts from semmeddb and building index...
deriving new facts...
verifying facts in semmeddb...
SELECT pmid, subject_cui, object_cui FROM mv_cause_2
95037 of 7978099 derived facts are correct


## DDI Gene Experiment

In [5]:
print('load facts from semmeddb and building index...')
_, ddi_gen_idx_objects = load_kg_facts(conn, query_ddi_gen_1_dg)
ddi_gen_idx_subjects, _ = load_kg_facts(conn, query_ddi_gen_2_gd)

print('deriving new facts...')
ddi_gen_results, ddi_gen_res_amounts = kg_derive_facts(ddi_gen_idx_subjects, ddi_gen_idx_objects)

print('{} results'.format(ddi_gen_res_amounts))
print('loading correct interactions from semmed...')
ddi_gen_idx_subjects_correct, _ = load_kg_facts(conn, query_ddi_correct)

print('verifying facts in semmeddb...')
ddi_gen_correct = kg_verify_facts(ddi_gen_idx_subjects_correct, ddi_gen_results)

print('derived {} of {} derived facts are correct'.format(ddi_gen_correct, ddi_gen_res_amounts))
ddi_gen_idx_subjects = None
ddi_gen_idx_objects = None
ddi_gen_results = None
ddi_gen_idx_subjects_correct = None

# 55370 of 753899 derived facts are correct (verified by semmeddb)

load facts from semmeddb and building index...
deriving new facts...
753899 results
loading correct interactions from semmed...
verifying facts in semmeddb...
derived 55370 of 753899 derived facts are correct


## DDI Function Experiment

In [6]:
print('load facts from semmeddb and building index...')
_, ddi_f_dg_idx_o = load_kg_facts(conn, query_ddi_function_1_dg)
ddi_f_gf_idx_s, _ = load_kg_facts(conn, query_ddi_function_2_gf)


print('deriving new facts drug-gene-function ...')
_, ddi_f_t1_df_o, t1_len = kg_derive_facts(ddi_f_gf_idx_s, ddi_f_dg_idx_o, compute_o_idx=True)
print('{} facts derived'.format(t1_len))

print('deriving new facts drug-gene-function-gene-drug ...')
# drug is in both cases the object
ddi_f_res, ddi_f_res_len = kg_derive_facts(ddi_f_t1_df_o, ddi_f_t1_df_o)
print('{} facts derived'.format(ddi_f_res_len))

print('loading correct interactions from semmed...')
ddi_gen_idx_subjects_correct, _ = load_kg_facts(conn, query_ddi_correct)

print('verifying facts in semmeddb...')
ddi_f_correct = kg_verify_facts(ddi_gen_idx_subjects_correct, ddi_f_res)
print('{} of {} derived facts are correct'.format(ddi_f_correct, ddi_f_res_len))


ddi_f_dg_idx_o = None
ddi_f_gf_idx_s = None
ddi_f_t1_df = None
ddi_f_res = None
ddi_gen_idx_subjects_correct = None

#  148346 of 18685424 derived facts are correct (verified by semmeddb)

load facts from semmeddb and building index...
deriving new facts drug-gene-function ...
6831917 facts derived
deriving new facts drug-gene-function-gene-drug ...
18685416 facts derived
loading correct interactions from semmed...
verifying facts in semmeddb...
148346 of 18685416 derived facts are correct


# Library Graph

In [7]:
from model.librarygraph import derive_facts_with_context, load_lg_facts
from model.experiments import do_cause_experiment_with_context, do_ddi_gene_experiment_with_context, do_ddi_function_experiment_with_context

## Cause Experiment


In [8]:
print('load facts from semmeddb and building index...')
idx_pmid = load_lg_facts(conn, query_cause)

print('load facts from semmeddb and building index...')
cause_idx_subjects, _ = load_kg_facts(conn, query_cause)


do_cause_experiment_with_context(idx_pmid, cause_idx_subjects)
del idx_pmid
del cause_idx_subjects
# Gold Standard: 5544 of 11478 derived facts are correct (verfied by semmed)

load facts from semmeddb and building index...
load facts from semmeddb and building index...
deriving new facts...
11478 facts derived
verifying facts in semmeddb...
5544 of 11478 derived facts are correct


## DDI Gene Experiment


In [9]:
print('load facts from semmeddb and building index...')
idx_pmid_dg = load_lg_facts(conn, query_ddi_gen_1_dg)
idx_pmid_gd = load_lg_facts(conn, query_ddi_gen_2_gd)
print('loading correct interactions from semmed...')
ddi_gen_idx_subjects_correct, _ = load_kg_facts(conn, query_ddi_correct)


do_ddi_gene_experiment_with_context(idx_pmid_dg, idx_pmid_gd, ddi_gen_idx_subjects_correct)

del idx_pmid_dg
del idx_pmid_gd
# 909 of of 1311 derived facts are correct (verified by semmeddb)

load facts from semmeddb and building index...
loading correct interactions from semmed...
deriving new facts...
1311 facts derived
verifying facts in semmeddb...
derived 909 of 1311 derived facts are correct


## DDI Function Experiment

In [10]:
print('load facts from semmeddb and building index...')
idx_pmid_dg = load_lg_facts(conn, query_ddi_function_1_dg)
idx_pmid_gf = load_lg_facts(conn, query_ddi_function_2_gf)
print('loading correct interactions from semmed...')
ddi_gen_idx_subjects_correct, _ = load_kg_facts(conn, query_ddi_correct)


do_ddi_function_experiment_with_context(idx_pmid_dg, idx_pmid_gf, ddi_gen_idx_subjects_correct)

del idx_pmid_dg
del idx_pmid_gf
del ddi_gen_idx_subjects_correct
# 1352 of 2138 derived facts are correct (verified by semmeddb)

load facts from semmeddb and building index...
loading correct interactions from semmed...
deriving new facts drug-gene-function-gene-drug ...
2138 facts derived
verifying facts in semmeddb...
1352 of 2138 derived facts are correct
