# Build disease-phenotype network from Orphanet Data

To get data from Orphanet: https://www.orphadata.com/data/xml/en_product4.xml

The following code extract Disease-Phenotype associations from Orphanet and creates:
* an association file containing OrphaCode, disease name, HPO ids and HPO terms in file **`data/OrphaDisease_HPO_extract.tsv`**
* a multiplex network with OrphaCode and HPO ids in file **`network/multiplex/Orpha/OrphaDisease_HPO.tsv`**

In [46]:
import xml.etree.ElementTree as ET
import csv

In [47]:
tree = ET.parse("../data/en_product4.xml")
root = tree.getroot()

In [48]:
# Create TSV file to store the extracted data
info_file = open('../data/OrphaDisease_HPO_extract.tsv', 'w', newline='')
info_writer = csv.writer(info_file, delimiter ='\t')

# Create the network file for MultiXrank
net_file = open('../network/multiplex/Orpha/OrphaDisease_HPO.tsv', 'w', newline='')
net_writer = csv.writer(net_file, delimiter ='\t')

# Iterate over disorders and HPOs
for disorder in root.iter('Disorder'):
    orpha_code = "ORPHA:"+disorder.find('OrphaCode').text
    orpha_name = disorder.find('Name').text
    for hpo in disorder.iter("HPO"):
        hpo_id = hpo.find("HPOId").text
        hpo_term = hpo.find("HPOTerm").text
        info_writer.writerow([orpha_code, orpha_name, hpo_id, hpo_term])
        net_writer.writerow([orpha_code, hpo_id])

# Close files
info_file.close()
net_file.close()