# ASPM
[primary microcephaly-5 (MCPH5)](https://omim.org/entry/608716) is caused by homozygous or compound heterozygous mutation in the ASPM gene.

In [4]:
import gpsea
from gpsea.analysis.predicate import PatientCategories
from gpsea.preprocessing import load_phenopacket_folder
from gpsea.preprocessing import configure_caching_cohort_creator
from gpsea.model import FeatureType, VariantEffect
from gpsea.view import CohortViewable
from gpsea.preprocessing import UniprotProteinMetadataService
from gpsea.model.genome import GRCh38
from gpsea.preprocessing import VVMultiCoordinateService
from gpsea.view import ProteinVisualizable, ProteinVisualizer, ProteinViewable
import hpotk
import gpsea

from IPython.display import display, HTML

store = hpotk.configure_ontology_store()
hpo = store.load_minimal_hpo(release='v2024-08-13')
print(f'Loaded HPO v{hpo.version}')
print(f"Using gpsea version {gpsea.__version__}")


Loaded HPO v2024-08-13
Using gpsea version 0.4.1.dev0


The ASPM gene is the human ortholog of the Drosophila melanogaster 'abnormal spindle' gene (asp), which is essential for normal mitotic spindle function in embryonic neuroblasts. The mouse gene Aspm is expressed specifically in the primary sites of prenatal cerebral cortical neurogenesis ([OMIM:605481](https://omim.org/entry/605481)).

In [5]:
gene_symbol = 'ASPM'
mane_tx_id = 'NM_018136.5'
mane_protein_id = 'NP_060606.3' # abnormal spindle-like microcephaly-associated protein isoform 1

# Load phenopackets

The following code loads the 22 phenopackets available from release 0.1.19 of phenopacket-store.

In [6]:
from ppktstore.registry import configure_phenopacket_registry

phenopacket_store_release = '0.1.19'  # Update, if necessary
registry = configure_phenopacket_registry()

with registry.open_phenopacket_store(release=phenopacket_store_release) as ps:
    phenopackets = tuple(ps.iter_cohort_phenopackets(gene_symbol))

print(f'Loaded {len(phenopackets)} phenopackets')

Loaded 22 phenopackets


In [7]:
from gpsea.preprocessing import configure_caching_cohort_creator, load_phenopackets

cohort_creator = configure_caching_cohort_creator(hpo)

cohort, qc = load_phenopackets(
    phenopackets=phenopackets, 
    cohort_creator=cohort_creator,
)
print(f'Loaded {len(cohort)} individuals')

Individuals Processed: 100%|██████████| 22/22 [00:20<00:00,  1.07individuals/s]
Loaded 22 individuals


In [8]:
qc.summarize()

Validated under none policy


In [9]:
from gpsea.view import CohortViewable

cv = CohortViewable(hpo)
report = cv.process(cohort, transcript_id=mane_tx_id)
display(HTML(report))

HPO Term,ID,Seen in n individuals
Microcephaly,HP:0000252,22
Intellectual disability,HP:0001249,18
Short stature,HP:0004322,16
Decreased body weight,HP:0004325,14
Hyperactivity,HP:0000752,7
Aggressive behavior,HP:0000718,3
Self-injurious behavior,HP:0100716,2
Motor delay,HP:0001270,1
Opisthotonus,HP:0002179,1
Simplified gyral pattern,HP:0009879,1

Count,Variant key,Variant Name,Protein Variant,Variant Class
5,1_197117875_197117875_G_A,c.3979C>T,p.Arg1327Ter,STOP_GAINED
4,1_197143563_197143564_TC_T,c.688del,p.Glu230AsnfsTer30,FRAMESHIFT_VARIANT
3,1_197093156_197093156_G_A,c.9190C>T,p.Arg3064Ter,STOP_GAINED
2,1_197124158_197124159_CT_C,c.3341del,p.Lys1114SerfsTer3,FRAMESHIFT_VARIANT
2,1_197090328_197090328_G_A,c.9697C>T,p.Arg3233Ter,STOP_GAINED
2,1_197090945_197090945_G_A,c.9541C>T,p.Arg3181Ter,STOP_GAINED
2,1_197124923_197124930_TATCCACA_T,c.3108_3114del,p.Val1037GlyfsTer13,FRAMESHIFT_VARIANT
1,1_197142463_197142463_G_A,c.1789C>T,p.Arg597Ter,STOP_GAINED
1,1_197086966_197086966_G_A,c.10168C>T,p.Arg3390Ter,STOP_GAINED
1,1_197101467_197101469_TTC_T,c.7782_7783del,p.Lys2595SerfsTer6,FRAMESHIFT_VARIANT

Disease Name,Disease ID,Annotation Count
"Microcephaly 5, primary, autosomal recessive",OMIM:608716,22

Variant effect,Annotation Count
FRAMESHIFT_VARIANT,10
STOP_GAINED,16
INFRAME_DELETION,1
SPLICE_DONOR_VARIANT,1


In [10]:
from gpsea.model.genome import GRCh38
from gpsea.preprocessing import configure_default_protein_metadata_service, VVMultiCoordinateService
from gpsea.view import ProteinVisualizable, ProteinViewable


pms = configure_default_protein_metadata_service()
protein_meta = pms.annotate(mane_protein_id)
txc_service = VVMultiCoordinateService(genome_build=GRCh38)
tx_coordinates = txc_service.fetch(mane_tx_id)
pvis = ProteinVisualizable(tx_coordinates=tx_coordinates, protein_meta=protein_meta, cohort=cohort)

viewer = ProteinViewable()
html_prot = viewer.process(cohort, pvis)
display(HTML(html_prot))

ValueError: Could not find an entry for NP_060606.3 in Uniprot response

In [11]:
from gpsea.view import CohortVariantViewer

viewer = CohortVariantViewer(tx_id=mane_tx_id)
report = viewer.process(cohort)
display(HTML(report))

Variant key,Variant (cDNA),Variant (protein),Effects,Count
1_197117875_197117875_G_A,c.3979C>T,p.Arg1327Ter,stop gained,5
1_197143563_197143564_TC_T,c.688del,p.Glu230AsnfsTer30,frameshift,4
1_197093156_197093156_G_A,c.9190C>T,p.Arg3064Ter,stop gained,3
1_197124923_197124930_TATCCACA_T,c.3108_3114del,p.Val1037GlyfsTer13,frameshift,2
1_197124158_197124159_CT_C,c.3341del,p.Lys1114SerfsTer3,frameshift,2
1_197090328_197090328_G_A,c.9697C>T,p.Arg3233Ter,stop gained,2
1_197090945_197090945_G_A,c.9541C>T,p.Arg3181Ter,stop gained,2
1_197104639_197104639_G_A,c.4612C>T,p.Arg1538Ter,stop gained,1
1_197142463_197142463_G_A,c.1789C>T,p.Arg597Ter,stop gained,1
1_197139831_197139834_TTTG_T,c.1959_1961del,p.Asn653del,inframe deletion,1


In [12]:
from gpsea.model import VariantEffect
from gpsea.analysis.predicate.genotype import VariantPredicates, ModeOfInheritancePredicate

is_missense = VariantPredicates.variant_effect(VariantEffect.FRAMESHIFT_VARIANT, tx_id=mane_tx_id)
gt_predicate = ModeOfInheritancePredicate.autosomal_recessive(is_missense)

  gt_predicate = ModeOfInheritancePredicate.autosomal_recessive(is_missense)


In [13]:
from gpsea.model import FeatureType
from gpsea.analysis.predicate.phenotype import prepare_predicates_for_terms_of_interest

pheno_predicates = prepare_predicates_for_terms_of_interest(
    cohort=cohort,
    hpo=hpo,
)

In [14]:
from gpsea.analysis.pcats.stats import FisherExactTest
from gpsea.analysis.mtc_filter import HpoMtcFilter

mtc_filter = HpoMtcFilter.default_filter(hpo=hpo, term_frequency_threshold=0.2)
mtc_correction = 'fdr_bh'
statistic = FisherExactTest()

In [15]:
from gpsea.analysis.pcats import HpoTermAnalysis

analysis = HpoTermAnalysis(
    count_statistic=statistic,
    mtc_filter=mtc_filter,
    mtc_correction=mtc_correction,
    mtc_alpha=0.05,
)

In [16]:
result = analysis.compare_genotype_vs_phenotypes(
    cohort=cohort,
    gt_predicate=gt_predicate,
    pheno_predicates=pheno_predicates,
)

In [17]:
from gpsea.view import MtcStatsViewer

viewer = MtcStatsViewer()
report = viewer.process(result)
display(HTML(report))

Code,Reason,Count
HMF01,Skipping term with maximum frequency that was less than threshold 0.2,1
HMF02,Skipping term because no genotype has more than one observed HPO count,1
HMF04,Skipping term because all genotypes have same HPO observed proportions,19
HMF06,Skipping term with less than 6 observations (not powered for 2x3),5
HMF08,Skipping general term,16


In [18]:
from gpsea.view import summarize_hpo_analysis

report = summarize_hpo_analysis(hpo=hpo, result=result)
report

What is the genotype group,HOM_REF,HOM_REF,HET,HET,BIALLELIC_ALT,BIALLELIC_ALT,Unnamed: 7_level_0,Unnamed: 8_level_0
Unnamed: 0_level_1,Count,Percent,Count,Percent,Count,Percent,Corrected p values,p values
Hyperactivity [HP:0000752],5/15,33%,1/1,100%,1/4,25%,1.0,0.509314
Short stature [HP:0004322],10/15,67%,1/1,100%,5/6,83%,1.0,0.725584
Decreased body weight [HP:0004325],9/15,60%,1/1,100%,4/6,67%,1.0,1.0
Aggressive behavior [HP:0000718],2/15,13%,0/1,0%,1/5,20%,1.0,1.0
