 # Human Phenotype Ontology

 Author: Moshe Silverstein <br/>
 Date: 11-17 <br/>
 Data Source: http://www.human-phenotype-ontology.org/

 Reviewer: Charles Dai <br>
 Updated: 6-20

In [4]:
# appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [5]:
import sys
import os
from datetime import date

import numpy as np
import pandas as pd
import itertools
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
%matplotlib inline

import harmonizome.utility_functions as uf
import harmonizome.lookup as lookup

In [6]:
# from clustergrammer_widget import *
# net = Network(clustergrammer_widget)

In [7]:
%load_ext autoreload
%autoreload 2

 ### Notebook Information

In [8]:
print('This notebook was run on:', date.today(), '\nPython version:', sys.version)

'3.6.9 (default, Apr 18 2020, 01:56:04) \n[GCC 8.4.0]'

 # Initialization

 ### Load Mapping Dictionaries

In [9]:
symbol_lookup, geneid_lookup = lookup.get_lookups()

Gathering sources: 100%|██████████| 3/3 [00:17<00:00,  5.91s/it]


 ### Output Path

In [10]:
output_name = 'hmdb'

path = 'Output/HMDB'
if not os.path.exists(path):
    os.makedirs(path)

In [11]:
%%appyter hide_code
{% do SectionField(
    name='data',
    title='Load Data',
    subtitle='Upload Files from the Human Metabolome Database',
) %}

 # Load Data

In [41]:
%%appyter code_exec

tree = ET.iterparse({{FileField(
    constraint='.*\.xml$',
    name='all_metabolites', 
    label='All Metabolites (XML)', 
    default='Input/HMDB/hmdb_metabolites.xml',
    section='data')
}})

```python

tree = ET.iterparse('Input/HMDB/hmdb_metabolites.xml')
```

 # Pre-process Data

 ## Get Relevant Data

In [42]:
metabolites = []
genes = []

for _, elem in tree:
  if elem.tag == '{http://www.hmdb.ca}metabolite':
    metabolites.append(elem.find('{http://www.hmdb.ca}name').text)
    genes.append([gene.find('{http://www.hmdb.ca}gene_name').text for gene in elem.find('{http://www.hmdb.ca}protein_associations').iter('{http://www.hmdb.ca}protein')])
    elem.clear()

In [43]:
df = pd.DataFrame({'Genes': genes, 'Metabolites': metabolites})
df.head()

Unnamed: 0,Genes,Metabolites
0,"[CNDP1, PRMT3]",1-Methylhistidine
1,"[SMS, DHPS, ABP1, AOC3, AOC2, AMD1, ODC1]","1,3-Diaminopropane"
2,"[PDHB, PDHA1, PDHA2, BCKDHB, BCKDHA, OGDH, CTH...",2-Ketobutyric acid
3,"[DLD, SULT2B1, UGT1A1, LDHAL6A, LDHB, LDHC, LD...",2-Hydroxybutyric acid
4,"[COMT, UGT2B28, UGT2B4, UGT1A4, UGT2B10, UGT2B...",2-Methoxyestrone


 ## Split Gene Lists

In [44]:
df = df.explode('Genes')
df = df.set_index('Genes')
df.head()

Unnamed: 0_level_0,Metabolites
Genes,Unnamed: 1_level_1
CNDP1,1-Methylhistidine
PRMT3,1-Methylhistidine
SMS,"1,3-Diaminopropane"
DHPS,"1,3-Diaminopropane"
ABP1,"1,3-Diaminopropane"


In [45]:
df.shape

(955300, 1)

 # Filter Data

 ## Map Gene Symbols to Up-to-date Approved Gene Symbols

In [46]:
df = uf.mapgenesymbols(df, symbol_lookup)
df.shape

100%|██████████| 955300/955300 [00:01<00:00, 801426.55it/s]


(858107, 1)

 # Analyze Data

 ## Create Binary Matrix

In [47]:
binary_matrix = uf.createBinaryMatrix(df)
binary_matrix.head()

Unnamed: 0,"(+)-(1R,2R)-1,2-Diphenylethane-1,2-diol",(+)-12a-Hydroxypachyrrhizone,(+)-7-Isojasmonic acid CoA,(+)-Catechin 6-C-glucoside,(+)-Catechin 8-C-glucoside,(+)-Galeon,(+)-Limonene,(+)-trans-Carveol,(-)-trans-Carveol,"(13E)-11a-Hydroxy-9,15-dioxoprost-13-enoic acid",(1E)-1-(2-hydroxy-4-methoxyphenyl)pent-1-en-3-one,(1E)-1-(4-hydroxyphenyl)pent-1-en-3-one,(1E)-1-(4-methoxyphenyl)pent-1-en-3-ol,(1E)-4-hydroxy-1-(4-methoxyphenyl)pent-1-en-3-one,(1E)-5-hydroxy-1-(4-methoxyphenyl)pent-1-en-3-one,(1E)-5-phenyl-1-(3-phenyloxiran-2-yl)pent-1-en-3-one,"(1R)-Glutathionyl-(2R)-hydroxy-1,2-dihydronaphthalene","(1R)-Hydroxy-(2R)-glutathionyl-1,2-dihydronaphthalene","(1R,12S,16Z,24E,26E,28Z,32S)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,13,18-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,15,18-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,19-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-30-methoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,21-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,23-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,30-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19-methoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,35-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-15-(hydroxymethyl)-19,30-dimethoxy-17,21,23,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-17-(hydroxymethyl)-19,30-dimethoxy-15,21,23,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4λ⁵-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,4,10,14,20-hexone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29-pentamethyl-35-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,29,35-pentamethyl-23-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,23,29,35-pentamethyl-21-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-17,21,23,29,35-pentamethyl-15-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-21-(hydroxymethyl)-19,30-dimethoxy-15,17,23,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-23-(hydroxymethyl)-19,30-dimethoxy-15,17,21,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-29-(hydroxymethyl)-19,30-dimethoxy-15,17,21,23,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-35-(hydroxymethyl)-19,30-dimethoxy-15,17,21,23,29-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-hydroxy-3-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[2-hydroxy-1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[3-(4-hydroxy-3-methoxycyclohexyl)prop-1-en-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone",...,myo-Inositol,naringenin-7-O-glucuronide,nicotinate beta-D-ribonucleotide,o-Tolyl salicylate,p-Anisic acid,p-Cresol glucuronide,p-Hydroxyphenylacetic acid,p-Octopamine,p-Synephrine,"phenyl 2,3-dihydroxybenzoate","phenyl 2,5-dihydroxybenzoate",sn-glycero-3-Phosphoethanolamine,"trans,cis-Lauro-2,6-dienoyl-CoA","trans-1,2-Dihydrobenzene-1,2-diol",trans-2-Enoyl-OPC4-CoA,trans-2-Enoyl-OPC6-CoA,trans-2-Enoyl-OPC8-CoA,trans-2-Hexenoyl-CoA,"trans-2-Methyl-5-isopropylhexa-2,5-dienoyl-CoA","trans-3,3',4',5,5',7-Hexahydroxyflavanone","trans-3,4-Dihydro-3,4-dihydroxy-7,12-dimethylbenz[a]anthracene",trans-3-Chloro-2-propene-1-ol,trans-3-Chloroacrylic acid,trans-3-Chloroallyl aldehyde,trans-3-Decenoyl-CoA,trans-3-Hexenoyl-CoA,trans-3-Hydroxycotinine glucuronide,trans-4-Carboxymethylenebut-2-en-4-olide,"trans-5,6-Dihydro-5,6-dihydroxy-7,12-dimethylbenz[a]anthracene",trans-Cinnamic acid,trans-Cinnamyl alcohol,trans-Dec-2-enoic acid,trans-Dodec-2-enoic acid,trans-Ferulic acid,trans-Octadec-2-enoyl-CoA,trans-Tetra-dec-2-enoic acid,xi-Norepinephrine,"{2-[4-(3-ethyl-2,3-diphenyloxiran-2-yl)phenoxy]ethyl}(methyl)amine","{2-[4-(4-chloro-1,2-diphenylbut-1-en-1-yl)phenoxy]ethyl}(methyl)amine","{[6-({5-[(6-{[(2-carboxyacetyl)oxy]methyl}-3,4,5-trihydroxyoxan-2-yl)oxy]-7-hydroxy-2-(4-oxocyclohexa-2,5-dien-1-ylidene)-2H-chromen-3-yl}oxy)-3,4,5-trihydroxyoxan-2-yl]methyl}[1-hydroxy-3-(4-hydroxyphenyl)prop-2-en-1-ylidene]oxidanium"
A1CF,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
A2M,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
A4GALT,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
AACS,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
AADAC,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [48]:
binary_matrix.shape

(5213, 24504)

In [49]:
uf.saveData(binary_matrix, path, output_name + '_binary_matrix', 
            compression='npz', dtype=np.uint8)

 ## Create Gene List

In [50]:
gene_list = uf.createGeneList(binary_matrix, geneid_lookup)
gene_list.head()

100%|██████████| 5213/5213 [00:00<00:00, 269398.32it/s]


Unnamed: 0,GeneSym,GeneID
0,A1CF,29974
1,A2M,2
2,A4GALT,53947
3,AACS,65985
4,AADAC,13


In [51]:
gene_list.shape

(5213, 2)

In [52]:
uf.saveData(gene_list, path, output_name + '_gene_list',
            ext='tsv', compression='gzip', index=False)

 ## Create Attribute List

In [53]:
attribute_list = uf.createAttributeList(binary_matrix)
attribute_list.head()

"(+)-(1R,2R)-1,2-Diphenylethane-1,2-diol"
(+)-12a-Hydroxypachyrrhizone
(+)-7-Isojasmonic acid CoA
(+)-Catechin 6-C-glucoside
(+)-Catechin 8-C-glucoside


In [54]:
attribute_list.shape

(24504, 0)

In [55]:
uf.saveData(attribute_list, path, output_name + '_attribute_list',
            ext='tsv', compression='gzip')

 ## Create Gene and Attribute Set Libraries

In [56]:
uf.createUpGeneSetLib(binary_matrix, path, output_name + '_gene_up_set')

100%|██████████| 19797/19797 [00:01<00:00, 16920.70it/s]


In [57]:
uf.createUpAttributeSetLib(binary_matrix, path, 
                           output_name + '_attribute_up_set')

100%|██████████| 5213/5213 [00:00<00:00, 5402.17it/s]


 ## Create Attribute Similarity Matrix

In [58]:
attribute_similarity_matrix = uf.createSimilarityMatrix(binary_matrix.T, 'jaccard', sparse=True)
attribute_similarity_matrix.head()

Unnamed: 0,"(+)-(1R,2R)-1,2-Diphenylethane-1,2-diol",(+)-12a-Hydroxypachyrrhizone,(+)-7-Isojasmonic acid CoA,(+)-Catechin 6-C-glucoside,(+)-Catechin 8-C-glucoside,(+)-Galeon,(+)-Limonene,(+)-trans-Carveol,(-)-trans-Carveol,"(13E)-11a-Hydroxy-9,15-dioxoprost-13-enoic acid",(1E)-1-(2-hydroxy-4-methoxyphenyl)pent-1-en-3-one,(1E)-1-(4-hydroxyphenyl)pent-1-en-3-one,(1E)-1-(4-methoxyphenyl)pent-1-en-3-ol,(1E)-4-hydroxy-1-(4-methoxyphenyl)pent-1-en-3-one,(1E)-5-hydroxy-1-(4-methoxyphenyl)pent-1-en-3-one,(1E)-5-phenyl-1-(3-phenyloxiran-2-yl)pent-1-en-3-one,"(1R)-Glutathionyl-(2R)-hydroxy-1,2-dihydronaphthalene","(1R)-Hydroxy-(2R)-glutathionyl-1,2-dihydronaphthalene","(1R,12S,16Z,24E,26E,28Z,32S)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,13,18-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,15,18-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,19-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-30-methoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,21-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,23-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,30-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19-methoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18,35-trihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-15-(hydroxymethyl)-19,30-dimethoxy-17,21,23,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-17-(hydroxymethyl)-19,30-dimethoxy-15,21,23,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4λ⁵-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,4,10,14,20-hexone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29-pentamethyl-35-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,29,35-pentamethyl-23-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,23,29,35-pentamethyl-21-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-17,21,23,29,35-pentamethyl-15-methylidene-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-21-(hydroxymethyl)-19,30-dimethoxy-15,17,23,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-23-(hydroxymethyl)-19,30-dimethoxy-15,17,21,29,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-29-(hydroxymethyl)-19,30-dimethoxy-15,17,21,23,35-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-35-(hydroxymethyl)-19,30-dimethoxy-15,17,21,23,29-pentamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[1-hydroxy-3-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[2-hydroxy-1-(4-hydroxy-3-methoxycyclohexyl)propan-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone","(1R,16Z,24E,26E,28Z)-1,18-dihydroxy-12-[3-(4-hydroxy-3-methoxycyclohexyl)prop-1-en-2-yl]-19,30-dimethoxy-15,17,21,23,29,35-hexamethyl-11,36-dioxa-4-azatricyclo[30.3.1.0⁴,⁹]hexatriaconta-16,24,26,28-tetraene-2,3,10,14,20-pentone",...,myo-Inositol,naringenin-7-O-glucuronide,nicotinate beta-D-ribonucleotide,o-Tolyl salicylate,p-Anisic acid,p-Cresol glucuronide,p-Hydroxyphenylacetic acid,p-Octopamine,p-Synephrine,"phenyl 2,3-dihydroxybenzoate","phenyl 2,5-dihydroxybenzoate",sn-glycero-3-Phosphoethanolamine,"trans,cis-Lauro-2,6-dienoyl-CoA","trans-1,2-Dihydrobenzene-1,2-diol",trans-2-Enoyl-OPC4-CoA,trans-2-Enoyl-OPC6-CoA,trans-2-Enoyl-OPC8-CoA,trans-2-Hexenoyl-CoA,"trans-2-Methyl-5-isopropylhexa-2,5-dienoyl-CoA","trans-3,3',4',5,5',7-Hexahydroxyflavanone","trans-3,4-Dihydro-3,4-dihydroxy-7,12-dimethylbenz[a]anthracene",trans-3-Chloro-2-propene-1-ol,trans-3-Chloroacrylic acid,trans-3-Chloroallyl aldehyde,trans-3-Decenoyl-CoA,trans-3-Hexenoyl-CoA,trans-3-Hydroxycotinine glucuronide,trans-4-Carboxymethylenebut-2-en-4-olide,"trans-5,6-Dihydro-5,6-dihydroxy-7,12-dimethylbenz[a]anthracene",trans-Cinnamic acid,trans-Cinnamyl alcohol,trans-Dec-2-enoic acid,trans-Dodec-2-enoic acid,trans-Ferulic acid,trans-Octadec-2-enoyl-CoA,trans-Tetra-dec-2-enoic acid,xi-Norepinephrine,"{2-[4-(3-ethyl-2,3-diphenyloxiran-2-yl)phenoxy]ethyl}(methyl)amine","{2-[4-(4-chloro-1,2-diphenylbut-1-en-1-yl)phenoxy]ethyl}(methyl)amine","{[6-({5-[(6-{[(2-carboxyacetyl)oxy]methyl}-3,4,5-trihydroxyoxan-2-yl)oxy]-7-hydroxy-2-(4-oxocyclohexa-2,5-dien-1-ylidene)-2H-chromen-3-yl}oxy)-3,4,5-trihydroxyoxan-2-yl]methyl}[1-hydroxy-3-(4-hydroxyphenyl)prop-2-en-1-ylidene]oxidanium"
"(+)-(1R,2R)-1,2-Diphenylethane-1,2-diol",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
(+)-12a-Hydroxypachyrrhizone,0.0,1.0,0.0,0.25,0.25,1.0,0.0,0.0,0.0,0.0,1.0,0.5,0.5,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.5,0.0,0.0,0.5,0.166667,0.166667,0.0,0.0,0.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,1.0,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0
(+)-7-Isojasmonic acid CoA,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009804,0.0,0.0,0.0,0.0,0.0
(+)-Catechin 6-C-glucoside,0.0,0.25,0.0,1.0,1.0,0.25,0.0,0.0,0.0,0.0,0.25,0.5,0.5,0.5,0.5,0.0,0.0,0.0,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,...,0.0,0.5,0.0,0.0,0.2,0.111111,0.25,0.0,0.0,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.25,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0
(+)-Catechin 8-C-glucoside,0.0,0.25,0.0,1.0,1.0,0.25,0.0,0.0,0.0,0.0,0.25,0.5,0.5,0.5,0.5,0.0,0.0,0.0,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.25,...,0.0,0.5,0.0,0.0,0.2,0.111111,0.25,0.0,0.0,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.25,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
uf.saveData(attribute_similarity_matrix, path,
            output_name + '_attribute_similarity_matrix', 
            compression='npz', symmetric=True, dtype=np.float32)

In [None]:
# net.load_df(attribute_similarity_matrix.iloc[:,:].copy())
# net.filter_N_top('row', rank_type='sum', N_top=300)
# net.cluster()
# net.widget()

 ## Create Gene Similarity Matrix

In [None]:
gene_similarity_matrix = uf.createSimilarityMatrix(binary_matrix, 'jaccard', sparse=True)
gene_similarity_matrix.head()

In [None]:
uf.saveData(gene_similarity_matrix, path, 
            output_name + '_gene_similarity_matrix',
            compression='npz', symmetric=True, dtype=np.float32)

 ## Create Gene-Attribute Edge List

In [None]:
uf.createGeneAttributeEdgeList(binary_matrix, attribute_list, gene_list, 
                               path, output_name + '_gene_attribute_edge_list')

 # Create Downloadable Save File

In [None]:
uf.createArchive(path)

 ### Link to download output files: [click here](./output_archive.zip)