<h1>Genotype–phenotype correlation at codon 1740 of SETD2</h1>
<p>Generate phenopackets from the data reported in <a href="https://pubmed.ncbi.nlm.nih.gov/32710489/">Rabin et al., (2020) Genotype-phenotype correlation at codon 1740 of SETD2</a>.</p>

In [None]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
import numpy as np
import pyphetools
from pyphetools.creation import *
from pyphetools.output import PhenopacketTable
print(f"pyphetools version {pyphetools.__version__}")

In [None]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
pmid = "PMID:32710489"
title = "Genotype-phenotype correlation at codon 1740 of SETD2"
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", pmid=pmid, pubmed_title=title)
metadata.default_versions_with_hpo(version=hpo_version)

In [None]:
df = pd.read_excel("input/RabinSupplementaryTable1-SETD2.xlsx")

In [None]:
df

In [None]:
# Convert to row-based
dft = df.transpose()

dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft.index
dft['patient_id'] = dft.index
dft.head()

In [None]:
column_mapper_d = defaultdict(ColumnMapper)

In [None]:
prenatal_custom_map = {'agenesis of the corpus callosum': 'Agenesis of corpus callosum',  
                         '\nIUGR': 'Intrauterine growth retardation',
                         'small cerebellum':'Cerebellar hypoplasia',
                         'vsd': 'Ventricular septal defect',
                           'pre-eclampsia': 'Preeclampsia',
                       'right dysplastic multi cystic kidney':'Multicystic kidney dysplasia'
                        }
excluded = {'maternal asthma', 'heart defect', 'maternal cholelithiasis'}
prenatalMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=prenatal_custom_map, excluded_set=excluded)
#prenatalMapper.preview_column(dft['Prenatal complications'])
column_mapper_d['Prenatal complications'] = prenatalMapper

In [None]:
dev_custom_map = {'Severe global developmental delay': 'Severe global developmental delay'}
devMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=dev_custom_map)
#devMapper.preview_column(dft['Development'])
column_mapper_d['Development'] = devMapper

In [None]:
walking_custom_map = {'No': 'Inability to walk',  
                         'No; wheelchair bound at 10 years': 'Inability to walk',
                         'No at 3.5 years and could not stand at 3.5 years':'Delayed ability to walk',
                         'No at 13 years': 'Inability to walk',  
                         'Able to take a few steps at 7 years': 'Inability to walk',  
                       'No at 6 years':'Inability to walk',  
                        }
walkingMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=walking_custom_map)
#walkingMapper.preview_column(dft['Walking independently'])
column_mapper_d['Walking independently'] = walkingMapper

In [None]:
sitting_custom_map = {'No': 'Delayed ability to sit',  
                         'at 2.5 years': 'Delayed ability to sit',
                         'No at 3.5 years and could not stand at 3.5 years':'Delayed ability to walk',
                         'No at 10 years': 'Delayed ability to sit',  
                         'Attempting to sit at 6 years': 'Delayed ability to sit',  
                       'No at 6 years':'Inability to walk',  
                        }
sittingMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=sitting_custom_map)
#sittingMapper.preview_column(dft['Sitting independently'])
column_mapper_d['Sitting independently'] = sittingMapper

In [None]:
speech_custom_map = {'At 16 months making sounds': 'Delayed speech and language development',  
                         'No speech; only babbling at 10 years': 'Absent speech',
                         'No speech; only cooing at 3.5 years':'Delayed speech and language development',  
                         'Cccasional vocalizations at 7 years': 'Absent speech',
                         '15 months had single words; 4 years 6 months spoke in short sentences with pronunciation difficulties':'Delayed speech and language development',  
                        }
speechMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=speech_custom_map)
#speechMapper.preview_column(dft['speech'])
column_mapper_d['speech'] = speechMapper

In [None]:
skull_map = {'Metopic ridge': 'Prominent metopic ridge'}
skullMapper= CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=skull_map)
skullMapper.preview_column(dft['Fontanelle/ skull'])
column_mapper_d['Fontanelle/ skull'] = skullMapper

In [None]:
items = {
    'midface hypoplasia/maxillary hypoplasia': ["Midface retrusion","HP:0011800"],
    'wide nasal bridge': ['Wide nasal bridge', 'HP:0000431'],
    'broad nasal tip': ['Broad nasal tip', 'HP:0000455'],
    'Low hanging columella': ['Low hanging columella', 'HP:0009765'],
    'upslanted palbebral fissures': ['Upslanted palpebral fissure', 'HP:0000582'], 
    'narrow/short palbebral fissures': ['Short palpebral fissure','HP:0012745'],
    'Periorbital fullness': ['Periorbital fullness', 'HP:0000629'],
    'arched eyebrows': ['Highly arched eyebrow', 'HP:0002553'],
    'hypertelorism': ['Hypertelorism',  'HP:0000316'],
    'micrognathia': ['Micrognathia', 'HP:0000347'],
  }
item_column_mapper_d = hpo_cr.initialize_simple_column_maps(column_name_to_hpo_label_map=items, observed='Present',
    excluded='no')
  # Transfer to column_mapper_d
for k, v in item_column_mapper_d.items():
    column_mapper_d[k] = v

In [None]:
handsMapper = CustomColumnMapper(concept_recognizer=hpo_cr)
handsMapper.preview_column(dft['Minor malfromations of hands and feet'])
column_mapper_d['Minor malfromations of hands and feet'] = handsMapper

In [None]:
ears_d = {'low set': "Low-set ears",
             'Attached ear-lobes':"Attached earlobe",
          'earlobes attached to side':"Attached earlobe"
         }
excluded = {'malformed ears'}

earsMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=ears_d, excluded_set=excluded)
earsMapper.preview_column(dft['Malformations of the ears'])
column_mapper_d['Malformations of the ears'] = earsMapper

In [None]:
other_d = {'down turned corners of the mouth': 'Downturned corners of mouth',
          ' low set nipples': 'Low-set nipples',
          'Inverted': 'Inverted nipples',
          'Synophyrs': 'Synophrys'}
otherMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=other_d, excluded_set=excluded)
#otherMapper.preview_column(dft['Other malformations'])
column_mapper_d['Other malformations'] = otherMapper

In [None]:
eyeMapper = CustomColumnMapper(concept_recognizer=hpo_cr)
eyeMapper.preview_column(dft['Ophthalmology'])
column_mapper_d['Ophthalmology'] = eyeMapper

In [None]:
#eyeMapper.preview_column(dft['Audiology'])
ear_d = {'(mixed) hearing loss': 'Mixed hearing impairment',
         'Mixed hearing loss':  'Mixed hearing impairment',
          'Severe mixed hearing loss': 'Mixed hearing impairment',
          'Conductive hearing loss': 'Conductive hearing impairment',
          'Sensorineural hearing loss': 'Sensorineural hearing impairment'}
earMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=ear_d)
earMapper.preview_column(dft['Audiology'])
column_mapper_d['Audiology'] = earMapper

In [None]:
earMapper.preview_column(dft['Endocrine'])
endoMapper = CustomColumnMapper(concept_recognizer=hpo_cr)
#endoMapper.preview_column(dft['Endocrine'])
column_mapper_d['Endocrine'] = endoMapper

In [None]:
respiratory_d = {'trachemalacea': "Tracheomalacia"}
respMapper = CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=respiratory_d)
respMapper.preview_column(dft['Respiratory'])
column_mapper_d['Respiratory'] = respMapper

In [None]:
cord_d = {'PFO':'Patent foramen ovale',
            'VSD': 'Ventricular septal defect',
         'transverse arch hypoplasia': 'Hypoplastic aortic arch',
         'LVOT': 'Left ventricular outflow tract obstruction',
         'PDA':'Patent ductus arteriosus',
          'DORV':'Double outlet right ventricle',
          'Persistent LSVC':'Persistent left superior vena cava',
         'ASD':'Atrial septal defect'}
corMapper =  CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=cord_d)
#corMapper.preview_column(dft['Cardiac'])
column_mapper_d['Cardiac'] = corMapper

In [None]:
gi_d = {'GTT': 'Feeding difficulties',
       'GGT': 'Feeding difficulties',
       'PEG': 'Feeding difficulties',
       'reflux': 'Gastroesophageal reflux'}
giMapper =  CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=gi_d)
giMapper.preview_column(dft['Gastrointestinal'])
column_mapper_d['Gastrointestinal'] = giMapper

In [None]:
guMapper =  CustomColumnMapper(concept_recognizer=hpo_cr)
#guMapper.preview_column(dft['Renal / urinary tract'])
column_mapper_d['Renal / urinary tract'] = guMapper

In [None]:
genitalMapper =  CustomColumnMapper(concept_recognizer=hpo_cr)
genitalMapper.preview_column(dft['Genital'])
column_mapper_d['Genital'] = genitalMapper

In [None]:
skelMapper =  CustomColumnMapper(concept_recognizer=hpo_cr)
skelMapper.preview_column(dft['Skeletal'])
column_mapper_d['Skeletal'] = skelMapper

In [None]:
nMapper =  CustomColumnMapper(concept_recognizer=hpo_cr)
nMapper.preview_column(dft['Neuromuscular'])
column_mapper_d['Neuromuscular'] = nMapper

In [None]:
imaging_d = {'small pons': 'Hypoplasia of the pons',
            'Dandy Walker malformation': 'Dandy-Walker malformation',
            'hypoplasia of cerebellar vermis': 'Cerebellar vermis hypoplasia',
            'corpus callosum is thinned': 'Thin corpus callosum',
            'Thin CC':'Thin corpus callosum',
            'generalised atrophy particularly brainstem': 'Brainstem atrophy'}
imagingMapper =  CustomColumnMapper(concept_recognizer=hpo_cr, custom_map_d=imaging_d)
imagingMapper.preview_column(dft['Neuroimaging'])
column_mapper_d['Neuroimaging'] = imagingMapper

<h3>Variants</h3>
<p>The individuals in this study have one of two distinct variants.</p>

In [None]:
genome = 'hg38'
default_genotype = 'heterozygous'
transcript='NM_014159.7'
varMapper = VariantColumnMapper(assembly=genome,column_name='Variant', 
                                transcript=transcript, default_genotype=default_genotype)

In [None]:
variant_5218 = varMapper.map_cell('c.5218C>T')

In [None]:
variant_5219 = varMapper.map_cell('c.5219G>A')

In [None]:
variant_map = {"p.(Arg1740Trp)": variant_5218, 'p.(Arg1740Gln)': variant_5219}
#varMapper.set_variant_symbol_dictionary(variant_map)

In [None]:
ageMapper = AgeColumnMapper.not_provided()
sexMapper = SexColumnMapper(male_symbol='Male', female_symbol='Female', column_name='Sex')
individual_column_name = 'patient_id'


encoder = CohortEncoder(df=dft, 
                        hpo_cr=hpo_cr, 
                        column_mapper_d=column_mapper_d, 
                        individual_column_name=individual_column_name,
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        variant_dictionary=variant_map,
                        metadata=metadata,
                        pmid=pmid)

rabin_omim = "OMIM:620155"
rabin_label = "Rabin-Pappas syndrome"
rabin = {"id": rabin_omim, 'label': rabin_label}
idd_ad70_omim = "OMIM:620157"
idd_ad70_label = "Intellectual developmental disorder, autosomal dominant 70"
idd_ad70 = {"id": idd_ad70_omim, 'label': idd_ad70_label}
# Create map from patient id to labels
disease_map = {}
for i in range(1, 13):
    pat_id = f"Group 1 Patient {i}"
    disease_map[pat_id] = rabin
for i in range(1, 4):
    pat_id = f"Group 2 Patient {i}"
    disease_map[pat_id] = idd_ad70
encoder.set_disease_dictionary(disease_map)

In [None]:
individuals = encoder.get_individuals()

In [None]:
output_directory = "phenopackets"
Individual.output_individuals_as_phenopackets(individual_list=individuals,
                                              pmid=pmid,
                                              metadata=metadata.to_ga4gh(),
                                              outdir=output_directory)