# ACADM
[medium-chain acyl-CoA dehydrogenase deficiency (ACADMD)](https://omim.org/entry/201450) is caused by homozygous or compound heterozygous mutation in the medium-chain acyl-CoA dehydrogenase gene (ACADM).
Here we ingest a dataset reported in [Tucci S, et al. (2021) Genotype and residual enzyme activity in medium-chain acyl-CoA dehydrogenase (MCAD) deficiency: Are predictions possible? J Inherit Metab Dis.](https://pubmed.ncbi.nlm.nih.gov/33580884/). Limited clinical information is provided, but we can collect data on genotypes and enzyme activity to show how GPSEA can be used to perform tests of association of genotypes with numerical measurements using a t test.

In [2]:
from pyphetools.creation import Measurements, VariantManager, IntergenicVariant, StructuralVariant, OntologyTerms, MetaData
from pyphetools.pp import *
import pandas as pd
import os
from google.protobuf.json_format import MessageToJson
import pyphetools
print(f"pyphetools version {pyphetools.__version__}")

pyphetools version 0.9.109


In [3]:
df = pd.read_excel("input/Tucci_ACADM_data.xlsx")
df.head()

Unnamed: 0,Individual,Allel1,Amino Acid Change,Exon,Allel2,Amino Acid Change.1,Exon.1,MCAD Activity%,VLCAD Activity,AC C8 [µmol/L]b,AC C6 [µmol/L]b,AC C10 [µmol/L]b,AC others [µmol/L] or ratio
0,1,c.985A>G,p.K329E,11.0,c.985A>G,p.K329E,11,0.0,72.0,,,,
1,2,c.985A>G,p.K329E,11.0,c.985A>G,p.K329E,11,0.0,71.0,,,,
2,3,c.985A>G,p.K329E,11.0,c.985A>G,p.K329E,11,0.0,82.0,,,,C8/C10 17.2; C8/C12 24.1
3,4,c.985A>G,p.K329E,11.0,c.985A>G,p.K329E,11,0.0,88.0,11.86 (ref. <0.269),192163),1.23 (ref. <0.403),C10:1 0.46 (ref.<0.106); C6/C2 0.13 (ref.<0.00...
4,5,c.985A>G,p.K329E,11.0,c.985A>G,p.K329E,11,0.0,79.0,,,,


### Remove rows without date
Some of the rows either have no variant data or no MCAD activity data.
We also filter out rows that do not describe biallelic variants, e.g., one of the columns is WT.

In [4]:
n_rows_1 = len(df)
df = df[(df['Allel1'] != 'ns') & (df['Allel2'] != 'ns') & (df['MCAD Activity%'].astype(str).str.strip().replace('nan', '') != '')]
df = df[(df['Allel1'] != 'WT') & (df['Allel2'] != 'WT') ]
df = df[(df['Allel1'].astype(str).str.strip().replace('nan', '') != '') & (df['Allel2'].astype(str).str.strip().replace('nan', '') != '') ]
n_rows_2 = len(df)
print(f"Filtered from {n_rows_1} to {n_rows_2}")


Filtered from 460 to 115


In [5]:
df.tail()

Unnamed: 0,Individual,Allel1,Amino Acid Change,Exon,Allel2,Amino Acid Change.1,Exon.1,MCAD Activity%,VLCAD Activity,AC C8 [µmol/L]b,AC C6 [µmol/L]b,AC C10 [µmol/L]b,AC others [µmol/L] or ratio
197,198,c.127G>A,p.E43K,3.0,c.985A>G,p.K329E,11,33.0,,0.78 (ref.<0.19),0.48 (ref.<0.19),0.45 (ref.<0.31),
204,205,c.653C>A,p.A218D,8.0,c.985A>G,p.K329E,11,34.0,90.0,,,,
208,209,c.455A>G,p.E152G,6.0,c.985A>G,p.K329E,11,35.0,81.0,0.98 (ref. 0-0.28),,,C8/C16 0.28
214,215,c.127G>A,p.E43K,3.0,c.985A>G,p.K329E,11,36.0,96.0,,,,
222,223,c.985A>G,p.K329E,11.0,c.1091T>C,p.I364T,11,38.0,72.0,,,,


# MCAD activity
[LOINC:74892-1](https://loinc.org/74892-1) refers to Medium-chain Acyl CoA dehydrogenase [Enzymatic activity/mass] in Fibroblast
The following function takes the activity level in the "MCAD Activity%" column and creates a corresponding Phenopacket Measurement.


In [6]:
def mcad_measurement(row):
    value = row["MCAD Activity%"]
    loinc_code = "LOINC:74892-1"
    loinc_label = "Medium-chain Acyl CoA dehydrogenase [Enzymatic activity/mass] in Fibroblast"
    if value == "na":
        return None
    if isinstance(value,str) and value.endswith(" "):
        raise ValueError(f"Maformed MCAD: {value}")
    try:
        concentration = int(value)
        m = Measurements.percent(code=loinc_code,
                                      label=loinc_label,
                                      concentration=concentration)
        return m
    except ValueError:
        print(f"Could not parse \"{value}\"")


In [8]:
acadm_symbol = "ACADM"
acadm_id = "HGNC:89"
acadm_MANE_transcript = "NM_000016.6"
vmanager = VariantManager(df=df, 
                          individual_column_name="Individual", 
                          transcript=acadm_MANE_transcript, 
                          gene_id=acadm_id, 
                          gene_symbol=acadm_symbol, 
                          allele_1_column_name="Allel1", 
                          allele_2_column_name="Allel2")
variant_d = vmanager.get_variant_d()

In [9]:
from pyphetools.pp.v202 import MetaData
PMID = "PMID:33580884"
title = "Genotype and residual enzyme activity in medium-chain acyl-CoA dehydrogenase (MCAD) deficiency: Are predictions possible?"

created_by="ORCID:0000-0002-5648-2155"
metadata = MetaData.metadata_for_pmid(created_by=created_by, pmid=PMID, citation_title=title, include_loinc=True)


In [11]:
from pyphetools.pp.v202 import *


def row_to_phenopacket(row:pd.Series):
    individual_id = str(row["Individual"])
    phenopacket_id = "PMID_33580884_individual_{}".format(individual_id)  
    i = Individual(id=individual_id)
    
    allele_1 = row["Allel1"]
    allele_2 = row["Allel2"]
    var_list = list()
    if allele_1 == allele_2:
        var = variant_d.get(allele_1)
        var_obj = var.to_variant_interpretation_202()
        var_obj.variation_descriptor.allelic_state = OntologyTerms.homozygous()
        var_list.append(var_obj)
    else:
        var1 = variant_d.get(allele_1)
        var_obj1 = var1.to_variant_interpretation_202()
        var2 = variant_d.get(allele_2)
        var_obj2 = var2.to_variant_interpretation_202()
        var_obj1.variation_descriptor.allelic_state = OntologyTerms.heterozygous()
        var_obj2.variation_descriptor.allelic_state = OntologyTerms.heterozygous()
        var_list.append(var_obj1)
        var_list.append(var_obj2)
    ## create genomic interpretation
    interpretation_list = list()
    for var in var_list:
        genomic_interpretation = GenomicInterpretation(subject_or_biosample_id=individual_id, 
                                                       interpretation_status=GenomicInterpretation.InterpretationStatus.CAUSATIVE,
                                                       call=var)
        interpretation_list.append(genomic_interpretation)
    ## Disease is always OMIM:201910 for this cohort.
    diseaseClass = OntologyClass(id="OMIM:201450", label="Acyl-CoA dehydrogenase, medium chain, deficiency of")

    disease = Disease(term=diseaseClass)
    diagnosis = Diagnosis(disease=diseaseClass, genomic_interpretations=interpretation_list)
    interpretation = Interpretation(id=individual_id, progress_status=Interpretation.ProgressStatus.SOLVED, diagnosis=diagnosis)
   
    measurements = list()
    m = mcad_measurement(row=row) 
    measurements.append(m)
    ppkt = Phenopacket(id=phenopacket_id, 
                       subject=i, 
                       diseases=[disease],
                       measurements=measurements, 
                       interpretations=[interpretation], 
                       meta_data=metadata)
    return ppkt

In [12]:
phenopacket_list = list()
for _, row in df.iterrows():
    ppkt = row_to_phenopacket(row)
    phenopacket_list.append(ppkt.to_message())
outdir = "phenopackets"
if not os.path.isdir(outdir):
    os.makedirs(outdir)
written = 0
json_list = list()
for ppkt in phenopacket_list:    
    json_string = MessageToJson(ppkt)
    fname = ppkt.id.replace(" ", "_") + ".json"
    outpth = os.path.join(outdir, fname)
    with open(outpth, "wt") as fh:
        fh.write(json_string)
        json_list.append(json_string)
        written += 1
print(f"We output {written} GA4GH phenopackets to the directory {outdir}")

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
2.0
3.0
3.0
3.0
3.0
4.0
4.0
4.0
4.0
4.0
4.0
5.0
5.0
7.0
8.0
9.0
9.0
10.0
10.0
11.0
13.0
14.0
14.0
14.0
15.0
15.0
15.0
16.0
16.0
16.0
17.0
17.0
18.0
18.0
18.0
18.0
19.0
19.0
20.0
20.0
20.0
20.0
20.0
20.0
20.0
21.0
21.0
21.0
21.0
22.0
24.0
24.0
27.0
28.0
28.0
28.0
28.0
29.0
32.0
33.0
34.0
35.0
36.0
38.0
We output 115 GA4GH phenopackets to the directory phenopackets
