In [None]:
import pandas as pd
import xmltodict
from tqdm import tqdm
from IPython.display import display, HTML

def runProcess():
    """
    main method for distribution_of_Orphanet_RD_by_preferential_parent notebook
    :return: None
    """
    print('--> Computing data ...')

    ## Select active entities in Orphadata Rare diseases and alignment product (product1)
    xml_data_cross = getData('../xml/en_product1.xml')
    actives_entities_cross = getActivesEntities(xml_data_cross)

    ## Select disorders
    disorders_cross, disorders_orphacode = getDisorders(actives_entities_cross)
    n_disorders_cross = len(disorders_cross)

    ## Get data from the Orphadata linearisation of RD product (product7)
    xml_data_linear = getData('../xml/en_product7.xml')
    dict_preferential_parents_entities = getEntitiesWithPreferentialParent(xml_data_linear, disorders_orphacode)
    ## Filter disorders with preferential parent
    dict_preferential_parents, dict_filtered_preferential_parents = filterDisorderWithPreferentialParent(
        disorders_cross, dict_preferential_parents_entities)
    dict_results = generateMatrixResults(dict_preferential_parents, dict_filtered_preferential_parents)

    ## Outputs
    ## open Excel file
    with pd.ExcelWriter('../output_tables/6_distribution_of_Orphanet_RD_by_preferential_parent.xlsx',
                        engine='xlsxwriter') as writer:
        myDataframe = pd.DataFrame(dict_results)
        myDataframe = myDataframe.sort_values(by=['Orphanet classification name'], ascending=True)
        myDataframe.to_excel(writer, index=False)
    ## display table in stdout as HTML table
    display(HTML(myDataframe.to_html(index=False)))


def getData(xmlfile):
    """
    Read an xml return a dict with xmltodict package
    :return: xml parsed as dict
    """
    with open(xmlfile, "r", encoding='ISO-8859-1') as ini:
        xml_dict = xmltodict.parse(ini.read())
    return xml_dict


def getActivesEntities(xml_data):
    """
    filter out inactive entities highlighting flag 8192
    :param xml_data:
    :return:
    """
    actives_entities = []
    for entity in tqdm(xml_data["JDBOR"]["DisorderList"]["Disorder"]):
        DisorderFlagList = entity['DisorderFlagList']['DisorderFlag']
        if isinstance(DisorderFlagList, dict):
            if DisorderFlagList['Value'] != '8192':
                actives_entities.append(entity)
        else:
            DisorderFlags = []
            for DisorderFlag in DisorderFlagList:
                DisorderFlags.append(DisorderFlag['Value'])
            if not '8192' in DisorderFlags:
                actives_entities.append(entity)
    return actives_entities


def getDisorders(actives_entities):
    """
    keeping only disorders (with flag 36547)
    :param actives_entities:
    :return:
    """
    disorders = []
    disorders_orphacode = []
    for entity in tqdm(actives_entities):
        if entity['DisorderGroup']['@id'] == '36547':
            disorders.append(entity)
            disorders_orphacode.append(entity['OrphaCode'])
    return disorders, disorders_orphacode


def getEntitiesWithPreferentialParent(xml_data, disorders_orphacode):
    """
    Filter entities with a preferential parent
    :param xml_data:
    :return: dict_preferential_parents_entities
    """
    ### Not involved classifications Ids
    notInvolvedClassifId = ['98053',  ### Rare genetic disease
                            '52662',  ### Rare teratologic disease
                            '280342',  ### Rare systemic or rheumatological disease of childhood
                            '565779',  ### Rare disorder potentially indicated for transplant
                            '616874'  ### Rare disorder without a determined diagnosis after full investigation
                            ]
    dict_preferential_parents_entities = {}
    for entity in tqdm(xml_data["JDBOR"]["DisorderList"]["Disorder"]):
        if entity['OrphaCode'] in disorders_orphacode:
            if entity['DisorderDisorderAssociationList']['@count'] != '0':
                DisorderDisorderAssociation = entity['DisorderDisorderAssociationList']['DisorderDisorderAssociation']
                targetDisorderOrphacode = DisorderDisorderAssociation['TargetDisorder']['OrphaCode']
                if DisorderDisorderAssociation['DisorderDisorderAssociationType']['Name']['#text'] == "Preferential parent":
                    if not targetDisorderOrphacode in notInvolvedClassifId:
                        dict_preferential_parents_entities[entity['OrphaCode']] = {
                            'Preferential parent': DisorderDisorderAssociation['TargetDisorder']['Name']['#text'],
                            'OrphaCode': DisorderDisorderAssociation['TargetDisorder']['OrphaCode']
                            }
    return dict_preferential_parents_entities


def filterDisorderWithPreferentialParent(actives_disorders, dict_preferential_parents_entities):
    """
    :param actives_disorders:
    :param dict_preferential_parents_entities:
    :return: dict_preferential_parents, dict_filtered_preferential_parents
    """
    dict_filtered_preferential_parents = {}
    dict_preferential_parents = {}
    for disorder in tqdm(actives_disorders):
        try:
            preferential_parent_label = dict_preferential_parents_entities[disorder['OrphaCode']]['Preferential parent']
            preferential_parent_orpha = dict_preferential_parents_entities[disorder['OrphaCode']]['OrphaCode']
            if not preferential_parent_orpha in dict_filtered_preferential_parents:
                dict_filtered_preferential_parents[preferential_parent_orpha] = [disorder['OrphaCode']]
            else:
                dict_filtered_preferential_parents[preferential_parent_orpha].append(disorder['OrphaCode'])
            dict_preferential_parents[preferential_parent_orpha] = preferential_parent_label
        except:
            pass
    return dict_preferential_parents, dict_filtered_preferential_parents


def generateMatrixResults(dict_preferential_parents, dict_filtered_preferential_parents):
    """
    :param dict_preferential_parents:
    :param dict_filtered_preferential_parents:
    :return: dict_results
    """
    dict_results = {}
    dict_results['Orphanet classification name'] = []
    dict_results['Number of RD by preferential parent'] = []
    for preferential_parent_orpha, list_orpha_disorders in tqdm(dict_filtered_preferential_parents.items()):
        dict_results['Orphanet classification name'].append(dict_preferential_parents[preferential_parent_orpha])
        dict_results['Number of RD by preferential parent'].append(len(list_orpha_disorders))
    return dict_results


if __name__ == '__main__':
    runProcess()