In [1]:
import requests
 
ensembl_server = 'http://rest.ensembl.org'

def do_request(server, service, *args, **kwargs):
    url_params = ''
    for a in args:
        if a is not None:
            url_params += '/' + a
    req = requests.get('%s/%s%s' % (server, service, url_params),
                       params=kwargs,
                       headers={'Content-Type': 'application/json'})
 
    if not req.ok:
        req.raise_for_status()
    return req.json()

In [2]:
answer = do_request(ensembl_server, 'info/species')

for i, sp in enumerate(answer['species']):
    print(i, sp['name'])

0 sus_scrofa_rongchang
1 ochotona_princeps
2 struthio_camelus_australis
3 oreochromis_niloticus
4 ictalurus_punctatus
5 pelusios_castaneus
6 cottoperca_gobio
7 cercocebus_atys
8 tursiops_truncatus
9 heterocephalus_glaber_male
10 salmo_trutta
11 pseudonaja_textilis
12 mus_musculus_casteij
13 acanthochromis_polyacanthus
14 gorilla_gorilla
15 zosterops_lateralis_melanops
16 choloepus_hoffmanni
17 accipiter_nisus
18 mus_musculus_akrj
19 poecilia_latipinna
20 prolemur_simus
21 kryptolebias_marmoratus
22 ornithorhynchus_anatinus
23 echeneis_naucrates
24 pteropus_vampyrus
25 mus_musculus_nodshiltj
26 mustela_putorius_furo
27 latimeria_chalumnae
28 panthera_leo
29 betta_splendens
30 sus_scrofa_wuzhishan
31 cavia_aperea
32 cyprinodon_variegatus
33 delphinapterus_leucas
34 bos_grunniens
35 sphaeramia_orbicularis
36 cricetulus_griseus_chok1gshd
37 strigops_habroptila
38 pongo_abelii
39 spermophilus_dauricus
40 sus_scrofa_bamei
41 mus_musculus_balbcj
42 cyprinus_carpio_huanghe
43 terrapene_carolin

In [3]:
ext_dbs = do_request(ensembl_server, 'info/external_dbs', 'homo_sapiens', filter='HGNC%')
print(ext_dbs)

[{'description': None, 'display_name': 'HGNC Symbol', 'name': 'HGNC', 'release': '1'}, {'description': 'transcript name from HGNC', 'display_name': 'Transcript name', 'name': 'HGNC_trans_name', 'release': '1'}]


In [4]:
answer = do_request(ensembl_server, 'lookup/symbol', 'homo_sapiens', 'LCT')
print(answer)
lct_id = answer['id']

{'logic_name': 'ensembl_havana_gene_homo_sapiens', 'source': 'ensembl_havana', 'id': 'ENSG00000115850', 'end': 135837184, 'species': 'homo_sapiens', 'assembly_name': 'GRCh38', 'biotype': 'protein_coding', 'display_name': 'LCT', 'db_type': 'core', 'start': 135787850, 'description': 'lactase [Source:HGNC Symbol;Acc:HGNC:6530]', 'seq_region_name': '2', 'version': 10, 'object_type': 'Gene', 'strand': -1}


In [5]:
lct_seq = do_request(ensembl_server, 'sequence/id', lct_id)
print(lct_seq)

{'version': 10, 'id': 'ENSG00000115850', 'seq': 'AACAGTTCCTAGAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGGGGGTCAGACTGGGAGTCTGATAGAAATTTCATTTCCACCGCTGGTCCTCTAACCAATGACTTGCTGCACAACCTGAGTGGTCTCCTGGGAGACCAGAGTTCTAACTTTGTAGCAGGGGACAAAGACATGTATGTTTGTCACCAGCCACTGCCCACTTTCCTGCCAGAATACTTCAGCAGTCTCCATGCCAGTCAGATCACCCATTATAAGGTATTTCTGTCATGGGCACAGCTCCTCCCAGCAGGAAGCACCCAGAATCCAGACGAGAAAACAGTGCAGTGCTACCGGCGACTCCTCAAGGCCCTCAAGACTGCACGGCTTCAGCCCATGGTCATCCTGCACCACCAGACCCTCCCTGCCAGCACCCTCCGGAGAACCGAAGCCTTTGCTGACCTCTTCGCCGACTATGCCACATTCGCCTTCCACTCCTTCGGGGACCTAGTTGGGATCTGGTTCACCTTCAGTGACTTGGAGGAAGTGATCAAGGAGCTTCCCCACCAGGAATCAAGAGCGTCACAACTCCAGACCCTCAGTGATGCCCACAGAAAAGCCTATGAGATTTACCACGAAAGCTATGCTTTTCAGGGTGAGTACACATTGACCTGATGGTGACCCCTCGGCAACCTTCATCACACACCTTCCCCATCCTCCTTAGAGCAGATTCGACATTTCTCCCAACTCACCTTCAGCAGTCCTCTTATGTCTGTGCATAGGGAGAAATTAATATTGTAAATTGATTTCCCACTGGCGATAGGAAGGGGTAGCTAACATGGCAAAACACTCAGCATTTCCTTTGAAAAATATCTTTGAGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCCGAGGTGGGCGGATCACTTGAAGTCAGGAGTTCGAGAC

In [6]:
lct_xrefs = do_request(ensembl_server, 'xrefs/id', lct_id)

for xref in lct_xrefs:
    print(xref['db_display_name'])
    print(xref)

LRG display in Ensembl gene
{'primary_id': 'LRG_338', 'synonyms': [], 'description': 'Locus Reference Genomic record for LCT', 'dbname': 'ENS_LRG_gene', 'display_id': 'LRG_338', 'version': '0', 'db_display_name': 'LRG display in Ensembl gene', 'info_type': 'DIRECT', 'info_text': ''}
Expression Atlas
{'dbname': 'ArrayExpress', 'description': None, 'primary_id': 'ENSG00000115850', 'synonyms': [], 'version': '0', 'db_display_name': 'Expression Atlas', 'display_id': 'ENSG00000115850', 'info_type': 'DIRECT', 'info_text': ''}
NCBI gene (formerly Entrezgene)
{'db_display_name': 'NCBI gene (formerly Entrezgene)', 'version': '0', 'display_id': 'LCT', 'dbname': 'EntrezGene', 'description': 'lactase', 'synonyms': [], 'primary_id': '3938', 'info_text': '', 'info_type': 'DEPENDENT'}
HGNC Symbol
{'info_type': 'DIRECT', 'info_text': 'Generated via ensembl_manual', 'description': 'lactase', 'dbname': 'HGNC', 'synonyms': [], 'primary_id': 'HGNC:6530', 'db_display_name': 'HGNC Symbol', 'version': '0', '

In [7]:
refs = do_request(ensembl_server, 'xrefs/id', lct_id, external_db='GO', all_levels='1')
print(lct_id, refs)

ENSG00000115850 [{'info_type': 'DIRECT', 'info_text': 'UniProt', 'primary_id': 'GO:0000016', 'synonyms': [], 'linkage_types': ['IEA'], 'description': 'lactase activity', 'dbname': 'GO', 'display_id': 'GO:0000016', 'version': '0', 'db_display_name': 'GO'}, {'info_type': 'DIRECT', 'info_text': 'GO_Central', 'linkage_types': ['IBA'], 'dbname': 'GO', 'description': 'lactase activity', 'primary_id': 'GO:0000016', 'synonyms': [], 'version': '0', 'db_display_name': 'GO', 'display_id': 'GO:0000016'}, {'info_type': 'DIRECT', 'info_text': 'PINC', 'synonyms': [], 'primary_id': 'GO:0000016', 'dbname': 'GO', 'description': 'lactase activity', 'linkage_types': ['TAS'], 'display_id': 'GO:0000016', 'db_display_name': 'GO', 'version': '0'}, {'info_text': 'UniProt', 'info_type': 'DIRECT', 'display_id': 'GO:0003824', 'db_display_name': 'GO', 'version': '0', 'synonyms': [], 'primary_id': 'GO:0003824', 'description': 'catalytic activity', 'dbname': 'GO', 'linkage_types': ['IEA']}, {'info_text': '', 'info_t

In [8]:
hom_response = do_request(ensembl_server, 'homology/id', lct_id, type='orthologues', sequence='none')
#print(hom_response['data'][0]['homologies'])
homologies = hom_response['data'][0]['homologies']

for homology in homologies:
    print(homology['target']['species'])
    if homology['target']['species'] != 'equus_caballus':
        continue
    print(homology)
    print(homology['taxonomy_level'])
    horse_id = homology['target']['id']

caenorhabditis_elegans
caenorhabditis_elegans
drosophila_melanogaster
pan_troglodytes
pan_paniscus
gorilla_gorilla
pongo_abelii
nomascus_leucogenys
cercocebus_atys
rhinopithecus_bieti
macaca_fascicularis
mandrillus_leucophaeus
rhinopithecus_roxellana
chlorocebus_sabaeus
macaca_nemestrina
macaca_mulatta
colobus_angolensis_palliatus
theropithecus_gelada
papio_anubis
piliocolobus_tephrosceles
saimiri_boliviensis_boliviensis
cebus_capucinus
callithrix_jacchus
aotus_nancymaae
carlito_syrichta
prolemur_simus
otolemur_garnettii
microcebus_murinus
propithecus_coquereli
fukomys_damarensis
tupaia_belangeri
mesocricetus_auratus
ictidomys_tridecemlineatus
oryctolagus_cuniculus
rattus_norvegicus
heterocephalus_glaber_female
oryctolagus_cuniculus
jaculus_jaculus
heterocephalus_glaber_male
octodon_degus
chinchilla_lanigera
microtus_ochrogaster
microtus_ochrogaster
cavia_porcellus
cricetulus_griseus_crigri
nannospalax_galili
microtus_ochrogaster
ochotona_princeps
mus_musculus
meriones_unguiculatus
per

In [9]:
horse_req = do_request(ensembl_server, 'lookup/id', horse_id)
print(horse_req)

{'description': 'lactase [Source:VGNC Symbol;Acc:VGNC:19613]', 'seq_region_name': '18', 'version': 3, 'object_type': 'Gene', 'strand': -1, 'source': 'ensembl', 'logic_name': 'ensembl', 'end': 19724999, 'id': 'ENSECAG00000018594', 'species': 'equus_caballus', 'biotype': 'protein_coding', 'assembly_name': 'EquCab3.0', 'display_name': 'LCT', 'db_type': 'core', 'start': 19678126}
