In [1]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [11]:
from pprint import pprint

import requests
import pandas as pd

In [49]:
from extraction import (
    animal_disease_to_genes,
    animal_disease_to_alleles,
    animal_disease_to_models,
)

---

- disease view 
  - COVID-19 https://www.alliancegenome.org/disease/DOID:0080600
  - coronary artery disease https://www.alliancegenome.org/disease/DOID:3393
- allele view https://www.alliancegenome.org/allele/MGI:6431336
- gene view https://www.alliancegenome.org/gene/HGNC:13557

---
- ECO evidence & conclusion ontology https://www.ebi.ac.uk/ols/ontologies/eco

Schema models:

Rels

- `(Disease)`-`[ANIMAL_EVIDENCE_GENE]`->`(AnimalGene)`
  - association_type
  - evidence_id
  - publication_id
- `(Disease)`-`[ANIMAL_EVIDENCE_ALLELE]`->`(AnimalAllele)`
  - association_type
  - evidence_id
  - publication_id
- `(Disease)`-`[ANIMAL_EVIDENCE_MODEL]`->`(AnimalModel)`
  - association_type
  - evidence_id
  - publication_id

Nodes

- `(Disease)`, `(Literature)`
- `(AnimalGene)`, `(AnimalAllele)`, `(AnimalModel)`
  - {X}_id
  - {X}_name
  - {X}_species_id
  - {X}_species_name
- `(AnimalEvidence)`
  - id
  - name
  - symbol
- `(AnimalSpecies)`?
  - id
  - name
    - Homo sapiens        
    - Mus musculus           
    - Rattus norvegicus      
    - Caenorhabditis elegans 
    - Drosophila melanogaster
    - Danio rerio            
    - Saccharomyces cerevisiae

TODO:

    - docs on `association_type`
    - get model "type: genotype"
    - https://www.mousephenotype.org/; disease - knockout_genes
    - http://www.informatics.jax.org/downloads/reports/MGI_PhenotypicAllele.rpt; disease - gene; knockout rel prop; double check, x-reference
    - https://www.mousephenotype.org/data/phenotypes/MP:0000266
    - http://ftp.ebi.ac.uk/pub/databases/impc/all-data-releases/release-13.0/results/

---

In [20]:
API = "https://www.alliancegenome.org"

DOID = "DOID:0080600"  # COVID-19
DOID1 = "DOID:3393"  # coronary artery disease

In [12]:
route = "/api/disease/{id}"
r = requests.get(API + route.format(id=DOID))
r.raise_for_status()
disease_info = r.json()

pprint(disease_info)

{'children': None,
 'crossReferences': {},
 'definition': 'A Coronavirus infection that is characterized by fever, cough '
               'and shortness of breath and that has_material_basis_in '
               'SARS-CoV-2.',
 'definitionLinks': ['https://www.cdc.gov/coronavirus/2019-ncov/about/index.html',
                     'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=2697049',
                     'https://www.ncbi.nlm.nih.gov/pubmed/?term=32007143',
                     'https://www.ncbi.nlm.nih.gov/pubmed/?term=32007145',
                     'https://www.who.int/emergencies/diseases/novel-coronavirus-2019'],
 'id': 'DOID:0080600',
 'name': 'COVID-19',
 'parents': [{'id': 'DOID:0080599', 'name': 'Coronavirus infectious disease'}],
 'sources': [{'name': 'RGD',
              'url': 'https://rgd.mcw.edu/rgdweb/ontology/annot.html?species=All&x=1&acc_id=DOID:0080600#annot'},
             {'name': 'MGI',
              'url': 'http://www.informatics.jax.org/disease/DOI

---

# Genes

In [14]:
route = "/api/disease/{id}/genes"
r = requests.get(API + route.format(id=DOID))
r.raise_for_status()
assoc_genes = r.json()["results"]

pprint(assoc_genes)

[{'allele': None,
  'associationType': 'is_implicated_in',
  'disease': {'id': 'DOID:0080600',
              'name': 'COVID-19',
              'url': 'http://www.disease-ontology.org/?id=DOID:0080600'},
  'evidenceCodes': [{'displaySynonym': 'IAGP',
                     'id': 'ECO:0007191',
                     'name': 'inference by association of genotype from '
                             'phenotype used in manual assertion'}],
  'gene': {'crossReferences': {},
           'id': 'HGNC:7',
           'secondaryIds': [],
           'species': {'commonNames': "['human', 'hsa']",
                       'dataProviderFullName': 'Rat Genome Database',
                       'dataProviderShortName': 'RGD',
                       'name': 'Homo sapiens',
                       'shortName': 'Hsa',
                       'taxonId': 'NCBITaxon:9606'},
           'symbol': 'A2M',
           'synonyms': [],
           'type': 'gene',
           'url': None},
  'geneticEntityType': 'gene',
  'model'

In [32]:
# diagnostics: evidence is unique
foo = [_["evidenceCodes"] for _ in assoc_genes]
bar = [len(_) != 1 for _ in foo]
print(len(foo))
print(sum(bar))

20
0


In [41]:
df = animal_disease_to_genes(doid=DOID)

In [42]:
df

Unnamed: 0,primary_disease_id,disease_id,gene_id,gene_name,gene_species_id,gene_species_name,association_type,evidence,publication_id
0,DOID:0080600,DOID:0080600,HGNC:7,A2M,NCBITaxon:9606,Homo sapiens,is_implicated_in,"[{'id': 'ECO:0007191', 'name': 'inference by a...",[PMID:32747830]
1,DOID:0080600,DOID:0080600,HGNC:79,ABO,NCBITaxon:9606,Homo sapiens,is_implicated_in,"[{'id': 'ECO:0007191', 'name': 'inference by a...",[PMID:32379894]
2,DOID:0080600,DOID:0080600,HGNC:2707,ACE,NCBITaxon:9606,Homo sapiens,is_implicated_in,"[{'id': 'ECO:0007191', 'name': 'inference by a...",[PMID:32286246]
3,DOID:0080600,DOID:0080600,HGNC:2707,ACE,NCBITaxon:9606,Homo sapiens,is_not_implicated_in,"[{'id': 'ECO:0007191', 'name': 'inference by a...",[PMID:32386188]
4,DOID:0080600,DOID:0080600,HGNC:13557,ACE2,NCBITaxon:9606,Homo sapiens,is_implicated_in,"[{'id': 'ECO:0000315', 'name': 'mutant phenoty...","[PMID:32220422, PMID:32380511]"
5,DOID:0080600,DOID:0080600,HGNC:336,AGTR1,NCBITaxon:9606,Homo sapiens,is_implicated_in,"[{'id': 'ECO:0000315', 'name': 'mutant phenoty...",[PMID:32228222]
6,DOID:0080600,DOID:0080600,HGNC:399,ALB,NCBITaxon:9606,Homo sapiens,is_marker_for,"[{'id': 'ECO:0000270', 'name': 'expression pat...","[PMID:32198776, PMID:32427582]"
7,DOID:0080600,DOID:0080600,HGNC:485,ANGPT2,NCBITaxon:9606,Homo sapiens,is_marker_for,"[{'id': 'ECO:0000270', 'name': 'expression pat...",[PMID:32458111]
8,DOID:0080600,DOID:0080600,HGNC:1027,BDH1,NCBITaxon:9606,Homo sapiens,is_marker_for,"[{'id': 'ECO:0000270', 'name': 'expression pat...",[PMID:32456948]
9,DOID:0080600,DOID:0080600,HGNC:1318,C3,NCBITaxon:9606,Homo sapiens,is_marker_for,"[{'id': 'ECO:0000270', 'name': 'expression pat...",[PMID:32434211]


In [45]:
df["gene_species_name"].value_counts()

Homo sapiens    20
Name: gene_species_name, dtype: int64

---

# Alleles

In [46]:
route = "/api/disease/{id}/alleles"
r = requests.get(API + route.format(id=DOID))
r.raise_for_status()
assoc_alleles = r.json()["results"]

pprint(assoc_alleles)

[{'allele': {'category': 'allele',
             'crossReferences': {'primary': {'displayName': 'MGI:6431336',
                                             'name': 'MGI:6431336',
                                             'url': 'http://www.informatics.jax.org/allele/MGI:6431336'},
                                 'references': {'displayName': 'MGI:6431336',
                                                'name': 'MGI:6431336',
                                                'url': 'http://www.informatics.jax.org/reference/allele/MGI:6431336?typeFilter=Literature'}},
             'hasDisease': False,
             'hasPhenotype': False,
             'id': 'MGI:6431336',
             'secondaryIds': [],
             'species': {'commonNames': "['mouse', 'mmu']",
                         'dataProviderFullName': 'Mouse Genome Informatics',
                         'dataProviderShortName': 'MGI',
                         'name': 'Mus musculus',
                         'shortName': 'Mmu',


In [48]:
df = animal_disease_to_alleles(doid=DOID)
df

Unnamed: 0,primary_disease_id,disease_id,allele_id,allele_name,allele_species_id,allele_species_name,association_type,evidence,publication_id
0,DOID:0080600,DOID:0080600,MGI:6431336,Ace2<em1(ACE2)Yowa>,NCBITaxon:10090,Mus musculus,is_implicated_in,"[{'id': 'ECO:0000033', 'name': 'author stateme...",[PMID:32485164]
1,DOID:0080600,DOID:0080600,MGI:6401292,Tg(Ace2-ACE2)1Cqin,NCBITaxon:10090,Mus musculus,is_implicated_in,"[{'id': 'ECO:0000033', 'name': 'author stateme...","[PMID:32380511, PMID:32444876, PMID:32498696]"
2,DOID:0080600,DOID:0080600,MGI:6415223,Tg(FOXJ1-ACE2)1Rba,NCBITaxon:10090,Mus musculus,is_implicated_in,"[{'id': 'ECO:0000033', 'name': 'author stateme...","[PMID:32498696, PMID:32516571]"


---

# Models

In [19]:
route = "/api/disease/{id}/models"
r = requests.get(API + route.format(id=DOID))
r.raise_for_status()
models = r.json()["results"]

pprint(models)

[{'allele': None,
  'associationType': 'IS_MODEL_OF',
  'disease': {'id': 'DOID:0080600',
              'name': 'COVID-19',
              'url': 'http://www.disease-ontology.org/?id=DOID:0080600'},
  'evidenceCodes': [{'displaySynonym': 'TAS',
                     'id': 'ECO:0000033',
                     'name': 'author statement supported by traceable '
                             'reference'}],
  'gene': None,
  'geneticEntityType': 'gene',
  'model': {'crossReferences': {},
            'id': 'MGI:6431340',
            'modCrossRefCompleteUrl': 'http://www.informatics.jax.org/allele/genoview/MGI:6431340',
            'name': 'Ace2<sup>em1(ACE2)Yowa</sup>/Ace2<sup>em1(ACE2)Yowa</sup> '
                    '[background:] C57BL/6-Ace2<sup>em1(ACE2)Yowa</sup>',
            'nameText': 'Ace2<em1(ACE2)Yowa>/Ace2<em1(ACE2)Yowa> [background:] '
                        'C57BL/6-Ace2<em1(ACE2)Yowa>',
            'secondaryIds': [],
            'species': {'commonNames': "['mouse', 'mmu']",
 

In [50]:
df = animal_disease_to_models(doid=DOID)
df

Unnamed: 0,primary_disease_id,disease_id,model_id,model_name,model_species_id,model_species_name,association_type,evidence,publication_id
0,DOID:0080600,DOID:0080600,MGI:6431340,Ace2<em1(ACE2)Yowa>/Ace2<em1(ACE2)Yowa> [backg...,NCBITaxon:10090,Mus musculus,IS_MODEL_OF,"[{'id': 'ECO:0000033', 'name': 'author stateme...",[PMID:32485164]
1,DOID:0080600,DOID:0080600,MGI:6402641,Tg(Ace2-ACE2)1Cqin/? [background:] involves: ICR,NCBITaxon:10090,Mus musculus,IS_MODEL_OF,"[{'id': 'ECO:0000033', 'name': 'author stateme...","[PMID:32380511, PMID:32444876]"
2,DOID:0080600,DOID:0080600,MGI:6415340,Tg(FOXJ1-ACE2)1Rba/? [background:] involves: C...,NCBITaxon:10090,Mus musculus,IS_MODEL_OF,"[{'id': 'ECO:0000033', 'name': 'author stateme...",[PMID:32516571]
