# SLC32A1

Heterozyogous variants in SLC32A1 are associated with
two autosomal dominant conditions, [developmental and epileptic encephalopathy-114 (DEE114)](https://omim.org/entry/620774)
and [generalized epilepsy with febrile seizures plus-12 (GEFSP12)](https://omim.org/entry/620755).

In [1]:
import hpotk
import gpsea

store = hpotk.configure_ontology_store()
hpo = store.load_minimal_hpo(release='v2024-08-13')
print(f'Loaded HPO v{hpo.version}.')
print(f"Using gpsea version {gpsea.__version__}.")

Loaded HPO v2024-08-13.
Using gpsea version 0.7.1.


### SLC32A1
We user the [Matched Annotation from NCBI and EMBL-EBI (MANE)](https://www.ncbi.nlm.nih.gov/refseq/MANE/) transcript and the corresponding protein identifier for SLC32A1.

In [2]:
gene_symbol = 'SLC32A1'
mane_tx_id = 'NM_080552.3'
mane_protein_id = 'NP_542119.1' # vesicular inhibitory amino acid transporter

In [3]:
from ppktstore.registry import configure_phenopacket_registry
phenopacket_registry = configure_phenopacket_registry()
with phenopacket_registry.open_phenopacket_store("0.1.21") as ps:
    phenopackets = tuple(ps.iter_cohort_phenopackets(gene_symbol))
print(f'Loaded {len(phenopackets)} phenopackets')

from gpsea.preprocessing import configure_caching_cohort_creator, load_phenopackets
cohort_creator = configure_caching_cohort_creator(hpo)
cohort, validation = load_phenopackets(  
    phenopackets=phenopackets,
    cohort_creator=cohort_creator,
)
validation.summarize()

Loaded 38 phenopackets
Individuals Processed: 100%|██████████| 38/38 [00:00<00:00, 679.44individuals/s]
Validated under permissive policy


In [4]:
from gpsea.view import CohortViewer

cv = CohortViewer(hpo)
cv.process(cohort, transcript_id=mane_tx_id)

HPO Term,ID,Seen in n individuals
Febrile seizure (within the age range of 3 months to 6 years),HP:0002373,23
Generalized-onset seizure,HP:0002197,11
Focal-onset seizure,HP:0007359,7
Hypotonia,HP:0001252,4
Developmental regression,HP:0002376,4
Motor delay,HP:0001270,4
Delayed speech and language development,HP:0000750,4
Intellectual disability,HP:0001249,4
Bilateral tonic-clonic seizure with generalized onset,HP:0025190,3
Focal impaired awareness seizure,HP:0002384,3

Seen in n individuals,Variant key,Variant Name,Protein Effect,Variant Class
9,20_38728050_38728050_T_C,c.989T>C,p.Met330Thr,MISSENSE_VARIANT
8,20_38728464_38728464_T_C,c.1403T>C,p.Leu468Pro,MISSENSE_VARIANT
4,20_38724851_38724851_G_T,c.127G>T,p.Gly43Cys,MISSENSE_VARIANT
4,20_38728394_38728394_C_T,c.1333C>T,p.Leu445Phe,MISSENSE_VARIANT
3,20_38728452_38728452_C_G,c.1391C>G,p.Thr464Arg,MISSENSE_VARIANT
2,20_38728454_38728454_G_A,c.1393G>A,p.Gly465Ser,MISSENSE_VARIANT
2,20_38727849_38727849_T_C,c.788T>C,p.Val263Ala,MISSENSE_VARIANT
2,20_38728443_38728443_G_A,c.1382G>A,p.Gly461Asp,MISSENSE_VARIANT
1,20_38728026_38728026_T_G,c.965T>G,p.Phe322Cys,MISSENSE_VARIANT
1,20_38727848_38727848_G_A,c.787G>A,p.Val263Met,MISSENSE_VARIANT

Name,ID,N diagnosed individuals
"Generalized epilepsy with febrile seizures plus, type 12",OMIM:620755,34
Developmental and epileptic encephalopathy 114,OMIM:620774,4

Variant effect,Annotation count,Percent
MISSENSE_VARIANT,38,100%


In [5]:
from gpsea.model.genome import GRCh38
from gpsea.model import ProteinMetadata
from gpsea.preprocessing import configure_default_protein_metadata_service, VVMultiCoordinateService
from gpsea.view import ProteinVisualizer
import matplotlib.pyplot as plt

txc_service = VVMultiCoordinateService(genome_build=GRCh38)
pms = configure_default_protein_metadata_service()
tx_coordinates = txc_service.fetch(mane_tx_id)
protein_meta = pms.annotate(mane_protein_id)

protein_meta = ProteinMetadata.from_uniprot_json(
        protein_id=mane_protein_id,
        label="SLC31A1",
        uniprot_json="Q9H598.json",
        protein_length=550,
)

slc32a1_fig, ax = plt.subplots(figsize=(15, 8))
visualizer = ProteinVisualizer()
visualizer.draw_protein_diagram(
    tx_coordinates,
    protein_meta,
    cohort,
    ax=ax,
)
slc32a1_fig.tight_layout()

ValueError: Unrecognized protein feature type: "Topological domain"

# Strategy
No genotype-phenotype correlations were identified in the published literature. All pathogenic variants are missense. Therefore, we will search for the most common variant and for N-terminal vs. C-terminal.

In [None]:
from gpsea.analysis.pcats import configure_hpo_term_analysis
analysis = configure_hpo_term_analysis(hpo)

from gpsea.analysis.predicate.phenotype import prepare_predicates_for_terms_of_interest
pheno_predicates = prepare_predicates_for_terms_of_interest(
    cohort=cohort,
    hpo=hpo,
)

In [None]:
from gpsea.model.genome import Region
from gpsea.view import MtcStatsViewer
from gpsea.analysis.predicate.genotype import VariantPredicates, monoallelic_predicate

n_term = VariantPredicates.region(region=Region(0, 360), tx_id=mane_tx_id)
nterm_predicate = monoallelic_predicate(
    a_predicate=n_term,
    b_predicate=~n_term,
    a_label="N term",
    b_label="other"
)

nterm_result = analysis.compare_genotype_vs_phenotypes(
    cohort=cohort,
    gt_predicate=nterm_predicate,
    pheno_predicates=pheno_predicates,
)


viewer = MtcStatsViewer()
viewer.process(nterm_result)


In [None]:
from gpsea.view import summarize_hpo_analysis

summarize_hpo_analysis(hpo=hpo, result=nterm_result)

In [None]:
from gpsea.analysis.predicate.genotype import sex_predicate

mf_result = analysis.compare_genotype_vs_phenotypes(
    cohort=cohort,
    gt_predicate=sex_predicate(),
    pheno_predicates=pheno_predicates,
)

summarize_hpo_analysis(hpo=hpo, result=mf_result)

In [None]:
from gpsea.analysis.predicate.genotype import VariantPredicates, monoallelic_predicate
met330thr = VariantPredicates.variant_key("20_38728050_38728050_T_C") # c.989T>C	p.Met330Thr
met330thr_predicate = monoallelic_predicate(
    a_predicate=met330thr,
    b_predicate= ~met330thr,
    a_label='p.Met330Thr',
     b_label= 'Other variant'
)
met330thr_result = analysis.compare_genotype_vs_phenotypes(
    cohort=cohort,
    gt_predicate=met330thr_predicate,
    pheno_predicates=pheno_predicates,
)
summarize_hpo_analysis(hpo=hpo, result=met330thr_result)

# Summary

In [None]:
from gpseacs.report import GpseaAnalysisReport, GPAnalysisResultSummary

fet_results = (
    GPAnalysisResultSummary.from_multi(
        result=nterm_result,
    ),
    GPAnalysisResultSummary.from_multi(
        result=met330thr_result,
    ),
     GPAnalysisResultSummary.from_multi(
        result=mf_result,
    ),
)

caption = "No significant correlation identified."
report = GpseaAnalysisReport(name=gene_symbol, 
                             cohort=cohort, 
                             fet_results=fet_results,
                             gene_symbol=gene_symbol,
                             mane_tx_id=mane_tx_id,
                             mane_protein_id=mane_protein_id,
                             caption=caption)

In [None]:
from gpseacs.report import GpseaNotebookSummarizer
summarizer = GpseaNotebookSummarizer(hpo=hpo, gpsea_version=gpsea.__version__)
summarizer.summarize_report(report=report)

In [None]:
summarizer.process_latex(report=report, protein_fig=slc32a1_fig)