In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import math
import os
import sys
import logging
mf_module_path = os.path.abspath(os.path.join('../python'))
if mf_module_path not in sys.path:
    sys.path.append(mf_module_path)
import mf
import mf_random
import hpoutil
import synergy_tree
import networkx as nx
import obonet
import pickle

In [2]:
dir_path = '../../../data/mf_regarding_diseases/primary_only/'

In [25]:
mf_file_name = 'df_mf_ratio_labHpo_labHpo_038.csv'
conditional_mf = pd.read_csv(dir_path + mf_file_name).loc[:, ['P1', 'P2', 'mf_P1P2_given_diag', 'P1_label', 'P2_label']]

synergy_file_name = 'df_synergy_labHpo_labHpo_038.csv'
synergy = pd.read_csv(dir_path + synergy_file_name).loc[:, ['P1', 'P2', 'synergy', 'P1_label', 'P2_label']]

In [26]:
percentile = 0.01
N = len(synergy)
synergy_top = synergy.iloc[np.arange(math.floor(N * percentile)), :]

In [27]:
synergy_top_with_conditional_mf = synergy_top.merge(conditional_mf.loc[:, ['P1', 'P2', 'mf_P1P2_given_diag']], on = ['P1', 'P2'])

In [28]:
nodes = set()
for labHpo in synergy_top_with_conditional_mf.P1:
    nodes.add(labHpo)
for labHpo in synergy_top_with_conditional_mf.P2:
    nodes.add(labHpo)
print(len(nodes))

51


In [29]:
synergy_top_with_conditional_mf.head()

Unnamed: 0,P1,P2,synergy,P1_label,P2_label,mf_P1P2_given_diag
0,HP:0020062,HP:0020063,0.004039,Decreased hemoglobin concentration,Increased hemoglobin concentration,0.010987
1,HP:0012419,HP:0500164,0.003415,Hyperoxemia,Abnormal blood carbon dioxide level,0.277064
2,HP:0001941,HP:0012419,0.003409,Acidosis,Hyperoxemia,0.072473
3,HP:0002151,HP:0012419,0.003409,Increased serum lactate,Hyperoxemia,0.072473
4,HP:0012417,HP:0012419,0.003316,Hypocapnia,Hyperoxemia,0.149472


In [49]:
conditional_mf_graph = nx.Graph()

for i in np.arange(len(synergy_top_with_conditional_mf)):
    P1 = synergy_top_with_conditional_mf.iloc[i, 0]
    P2 = synergy_top_with_conditional_mf.iloc[i, 1]
    mf = synergy_top_with_conditional_mf.iloc[i, 5]
    conditional_mf_graph.add_edge(P1, P2, mf=mf)

hpo = hpoutil.HPO('/Users/zhangx/git/human-phenotype-ontology/hp.obo')


In [50]:
print(len(conditional_mf_graph.nodes))

51


In [60]:
trimmed = synergy_tree.trim_edges(conditional_mf_graph, hpo.graph, 0.8)
print(len(trimmed.nodes))

32


In [43]:
retained = []
for i in np.arange(len(synergy_top_with_conditional_mf)):
    P1 = synergy_top_with_conditional_mf.iloc[i, 0]
    P2 = synergy_top_with_conditional_mf.iloc[i, 1]
    retained.append((P1, P2) in trimmed.edges)
    
print(retained)

[True, True, False, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, False, False, False, False, True, False, True, True, True, False, True, False, False, False, False, True, True, True, False, True, False, True, True, True, False, False, False, False, True, False, False, False, False, False, True, True, True, True, True, False, True, False, False, True, False, False, False, True, False, True, False, True, True, True, False, False]


In [44]:
synergy_top_with_conditional_mf.loc[retained, :].sort_values(by='mf_P1P2_given_diag', ascending=False).head(10)

Unnamed: 0,P1,P2,synergy,P1_label,P2_label,mf_P1P2_given_diag
1,HP:0012419,HP:0500164,0.003415,Hyperoxemia,Abnormal blood carbon dioxide level,0.277064
25,HP:0003074,HP:0012419,0.001563,Hyperglycemia,Hyperoxemia,0.188102
5,HP:0012419,HP:0032368,0.002864,Hyperoxemia,Acidemia,0.174678
11,HP:0020063,HP:0031850,0.00185,Increased hemoglobin concentration,Abnormal hematocrit,0.170272
14,HP:0020058,HP:0020063,0.001824,Abnormal red blood cell count,Increased hemoglobin concentration,0.169761
9,HP:0012418,HP:0012419,0.002441,Hypoxemia,Hyperoxemia,0.163692
4,HP:0012417,HP:0012419,0.003316,Hypocapnia,Hyperoxemia,0.149472
33,HP:0012419,HP:0020062,0.001456,Hyperoxemia,Decreased hemoglobin concentration,0.132853
69,HP:0011015,HP:0020063,0.001249,Abnormal blood glucose concentration,Increased hemoglobin concentration,0.114673
22,HP:0002901,HP:0012419,0.001633,Hypocalcemia,Hyperoxemia,0.087484


In [45]:
synergy_top_with_conditional_mf.sort_values(by='mf_P1P2_given_diag', ascending=False).head(15)

Unnamed: 0,P1,P2,synergy,P1_label,P2_label,mf_P1P2_given_diag
1,HP:0012419,HP:0500164,0.003415,Hyperoxemia,Abnormal blood carbon dioxide level,0.277064
25,HP:0003074,HP:0012419,0.001563,Hyperglycemia,Hyperoxemia,0.188102
5,HP:0012419,HP:0032368,0.002864,Hyperoxemia,Acidemia,0.174678
11,HP:0020063,HP:0031850,0.00185,Increased hemoglobin concentration,Abnormal hematocrit,0.170272
14,HP:0020058,HP:0020063,0.001824,Abnormal red blood cell count,Increased hemoglobin concentration,0.169761
9,HP:0012418,HP:0012419,0.002441,Hypoxemia,Hyperoxemia,0.163692
7,HP:0004360,HP:0012419,0.002563,Abnormality of acid-base homeostasis,Hyperoxemia,0.160065
4,HP:0012417,HP:0012419,0.003316,Hypocapnia,Hyperoxemia,0.149472
33,HP:0012419,HP:0020062,0.001456,Hyperoxemia,Decreased hemoglobin concentration,0.132853
68,HP:0011014,HP:0020063,0.001249,Abnormal glucose homeostasis,Increased hemoglobin concentration,0.114673


In [39]:
import importlib
importlib.reload(synergy_tree)

<module 'synergy_tree' from '/Users/zhangx/git/MIMIC_HPO/src/main/python/synergy_tree.py'>

In [36]:
'HP:0011014' in nx.ancestors(hpo.graph, 'HP:0003074')

False

In [38]:
list(nx.ancestors(hpo.graph, 'HP:0011014'))

['HP:0012734',
 'HP:0000825',
 'HP:0001985',
 'HP:0031883',
 'HP:0008255',
 'HP:0001943',
 'HP:0008189',
 'HP:0011998',
 'HP:0012051',
 'HP:0004914',
 'HP:0001952',
 'HP:0100651',
 'HP:0005959',
 'HP:0000877',
 'HP:0000857',
 'HP:0002173',
 'HP:0008283',
 'HP:0009800',
 'HP:0040270',
 'HP:0001988',
 'HP:0008205',
 'HP:0000855',
 'HP:0004904',
 'HP:0000842',
 'HP:0001998',
 'HP:0000831',
 'HP:0001953',
 'HP:0011015',
 'HP:0001958',
 'HP:0004924',
 'HP:0005978',
 'HP:0000819',
 'HP:0003074']

In [64]:
nx.get_node_attributes(hpo.graph, 'name')

{'HP:0000001': 'All',
 'HP:0000002': 'Abnormality of body height',
 'HP:0000003': 'Multicystic kidney dysplasia',
 'HP:0000005': 'Mode of inheritance',
 'HP:0000006': 'Autosomal dominant inheritance',
 'HP:0000007': 'Autosomal recessive inheritance',
 'HP:0000008': 'Abnormality of female internal genitalia',
 'HP:0000009': 'Functional abnormality of the bladder',
 'HP:0000010': 'Recurrent urinary tract infections',
 'HP:0000011': 'Neurogenic bladder',
 'HP:0000012': 'Urinary urgency',
 'HP:0000013': 'Hypoplasia of the uterus',
 'HP:0000014': 'Abnormality of the bladder',
 'HP:0000015': 'Bladder diverticulum',
 'HP:0000016': 'Urinary retention',
 'HP:0000017': 'Nocturia',
 'HP:0000019': 'Urinary hesitancy',
 'HP:0000020': 'Urinary incontinence',
 'HP:0000021': 'Megacystis',
 'HP:0000022': 'Abnormality of male internal genitalia',
 'HP:0000023': 'Inguinal hernia',
 'HP:0000024': 'Prostatitis',
 'HP:0000025': 'Functional abnormality of male internal genitalia',
 'HP:0000026': 'Male hypogo

In [1]:
!ls

Disease_prediction_with_machine_learning.ipynb	mutual_info.ipynb
EDA.ipynb					mutual_info_archive.ipynb
interpretation.ipynb				trimming.ipynb


In [2]:
!java -version

java version "1.8.0_144"
Java(TM) SE Runtime Environment (build 1.8.0_144-b01)
Java HotSpot(TM) 64-Bit Server VM (build 25.144-b01, mixed mode)
