In [1]:
import requests
 
ensembl_server = 'http://rest.ensembl.org'

def do_request(server, service, *args, **kwargs):
    url_params = ''
    for a in args:
        if a is not None:
            url_params += '/' + a
    req = requests.get('%s/%s%s' % (server, service, url_params),
                       params=kwargs,
                       headers={'Content-Type': 'application/json'})
 
    if not req.ok:
        req.raise_for_status()
    return req.json()

In [2]:
answer = do_request(ensembl_server, 'info/species')
for sp in answer['species']:
    print(sp['name'])

saccharomyces_cerevisiae
mus_pahari
taeniopygia_guttata
otolemur_garnettii
erinaceus_europaeus
anolis_carolinensis
gadus_morhua
capra_hircus
chlorocebus_sabaeus
colobus_angolensis_palliatus
tursiops_truncatus
mus_musculus
bos_taurus
monodelphis_domestica
caenorhabditis_elegans
pteropus_vampyrus
microcebus_murinus
mandrillus_leucophaeus
ovis_aries
carlito_syrichta
mus_musculus_akrj
fukomys_damarensis
pelodiscus_sinensis
astyanax_mexicanus
mus_musculus_aj
peromyscus_maniculatus_bairdii
ficedula_albicollis
rhinopithecus_roxellana
cricetulus_griseus_chok1gshd
cavia_aperea
dipodomys_ordii
lepisosteus_oculatus
anas_platyrhynchos
canis_familiaris
callithrix_jacchus
pongo_abelii
mustela_putorius_furo
mus_musculus_c57bl6nj
xenopus_tropicalis
nannospalax_galili
mus_musculus_pwkphj
aotus_nancymaae
mus_musculus_nzohlltj
mus_musculus_129s1svimj
mus_musculus_cbaj
cavia_porcellus
ochotona_princeps
pan_troglodytes
petromyzon_marinus
pan_paniscus
mus_musculus_c3hhej
procavia_capensis
oryzias_latipes
rh

In [3]:
ext_dbs = do_request(ensembl_server, 'info/external_dbs', 'homo_sapiens', filter='HGNC%')
print(ext_dbs)

[{'name': 'HGNC', 'release': '1', 'description': None, 'display_name': 'HGNC Symbol'}, {'name': 'HGNC_trans_name', 'release': '1', 'description': 'transcript name from HGNC', 'display_name': 'HGNC transcript name'}]


In [4]:
answer = do_request(ensembl_server, 'lookup/symbol', 'homo_sapiens', 'LCT')
print(answer)
lct_id = answer['id']

{'source': 'ensembl_havana', 'object_type': 'Gene', 'logic_name': 'ensembl_havana_gene', 'version': 9, 'species': 'homo_sapiens', 'description': 'lactase [Source:HGNC Symbol;Acc:HGNC:6530]', 'display_name': 'LCT', 'assembly_name': 'GRCh38', 'biotype': 'protein_coding', 'end': 135837180, 'seq_region_name': '2', 'db_type': 'core', 'strand': -1, 'id': 'ENSG00000115850', 'start': 135787840}


In [5]:
lct_seq = do_request(ensembl_server, 'sequence/id', lct_id)
print(lct_seq)

{'desc': 'chromosome:GRCh38:2:135787840:135837180:-1', 'query': 'ENSG00000115850', 'id': 'ENSG00000115850', 'seq': 'GTTCCTAGAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGGGGGTCAGACTGGGAGTCTGATAGAAATTTCATTTCCACCGCTGGTCCTCTAACCAATGACTTGCTGCACAACCTGAGTGGTCTCCTGGGAGACCAGAGTTCTAACTTTGTAGCAGGGGACAAAGACATGTATGTTTGTCACCAGCCACTGCCCACTTTCCTGCCAGAATACTTCAGCAGTCTCCATGCCAGTCAGATCACCCATTATAAGGTATTTCTGTCATGGGCACAGCTCCTCCCAGCAGGAAGCACCCAGAATCCAGACGAGAAAACAGTGCAGTGCTACCGGCGACTCCTCAAGGCCCTCAAGACTGCACGGCTTCAGCCCATGGTCATCCTGCACCACCAGACCCTCCCTGCCAGCACCCTCCGGAGAACCGAAGCCTTTGCTGACCTCTTCGCCGACTATGCCACATTCGCCTTCCACTCCTTCGGGGACCTAGTTGGGATCTGGTTCACCTTCAGTGACTTGGAGGAAGTGATCAAGGAGCTTCCCCACCAGGAATCAAGAGCGTCACAACTCCAGACCCTCAGTGATGCCCACAGAAAAGCCTATGAGATTTACCACGAAAGCTATGCTTTTCAGGGTGAGTACACATTGACCTGATGGTGACCCCTCGGCAACCTTCATCACACACCTTCCCCATCCTCCTTAGAGCAGATTCGACATTTCTCCCAACTCACCTTCAGCAGTCCTCTTATGTCTGTGCATAGGGAGAAATTAATATTGTAAATTGATTTCCCACTGGCGATAGGAAGGGGTAGCTAACATGGCAAAACACTCAGCATTTCCTTTGAAAAATATCTTTGAGGCTCACGC

In [6]:
lct_xrefs = do_request(ensembl_server, 'xrefs/id', lct_id)
for xref in lct_xrefs:
    print(xref['db_display_name'])
    print(xref)

LRG display in Ensembl gene
{'display_id': 'LRG_338', 'primary_id': 'LRG_338', 'version': '0', 'description': 'Locus Reference Genomic record for LCT', 'dbname': 'ENS_LRG_gene', 'synonyms': [], 'info_text': '', 'info_type': 'DIRECT', 'db_display_name': 'LRG display in Ensembl gene'}
Expression Atlas
{'display_id': 'ENSG00000115850', 'primary_id': 'ENSG00000115850', 'version': '0', 'description': '', 'dbname': 'ArrayExpress', 'synonyms': [], 'info_text': '', 'info_type': 'DIRECT', 'db_display_name': 'Expression Atlas'}
MIM morbid
{'display_id': 'LACTASE DEFICIENCY, CONGENITAL [#223000]', 'primary_id': '223000', 'version': '0', 'description': 'LACTASE DEFICIENCY, CONGENITAL;;ALACTASIA, CONGENITAL;;DISACCHARIDE INTOLERANCE II', 'dbname': 'MIM_MORBID', 'synonyms': [], 'info_text': '', 'info_type': 'DEPENDENT', 'db_display_name': 'MIM morbid'}
MIM gene
{'display_id': 'LACTASE; LCT [*603202]', 'primary_id': '603202', 'version': '0', 'description': 'LACTASE; LCT;;LAC;;LACTASE-PHLORIZIN HYDROL

In [7]:
refs = do_request(ensembl_server, 'xrefs/id', lct_id, external_db='GO', all_levels='1')
print(lct_id, refs)

ENSG00000115850 [{'display_id': 'GO:0000016', 'primary_id': 'GO:0000016', 'version': '0', 'linkage_types': ['IEA'], 'description': 'lactase activity', 'dbname': 'GO', 'synonyms': [], 'info_text': 'UniProt', 'info_type': 'DIRECT', 'db_display_name': 'GO'}, {'display_id': 'GO:0000016', 'primary_id': 'GO:0000016', 'version': '0', 'linkage_types': ['IBA'], 'description': 'lactase activity', 'dbname': 'GO', 'synonyms': [], 'info_text': 'GO_Central', 'info_type': 'DIRECT', 'db_display_name': 'GO'}, {'display_id': 'GO:0000016', 'primary_id': 'GO:0000016', 'version': '0', 'linkage_types': ['TAS'], 'description': 'lactase activity', 'dbname': 'GO', 'synonyms': [], 'info_text': 'PINC', 'info_type': 'DIRECT', 'db_display_name': 'GO'}, {'display_id': 'GO:0000016', 'primary_id': 'GO:0000016', 'version': '0', 'linkage_types': ['TAS'], 'description': 'lactase activity', 'dbname': 'GO', 'synonyms': [], 'info_text': 'Reactome', 'info_type': 'DIRECT', 'db_display_name': 'GO'}, {'display_id': 'GO:0003824

In [8]:
hom_response = do_request(ensembl_server, 'homology/id', lct_id, type='orthologues', sequence='none')
#print(hom_response['data'][0]['homologies'])
homologies = hom_response['data'][0]['homologies']
for homology in homologies:
    print(homology['target']['species'])
    if homology['target']['species'] != 'equus_caballus':
        continue
    print(homology)
    print(homology['taxonomy_level'])
    horse_id = homology['target']['id']

pan_paniscus
pan_troglodytes
gorilla_gorilla
pongo_abelii
nomascus_leucogenys
colobus_angolensis_palliatus
rhinopithecus_roxellana
macaca_fascicularis
papio_anubis
cercocebus_atys
rhinopithecus_bieti
chlorocebus_sabaeus
mandrillus_leucophaeus
macaca_mulatta
macaca_nemestrina
aotus_nancymaae
saimiri_boliviensis_boliviensis
cebus_capucinus
callithrix_jacchus
tupaia_belangeri
loxodonta_africana
procavia_capensis
otolemur_garnettii
propithecus_coquereli
canis_familiaris
ovis_aries
mus_spretus
equus_caballus
{'source': {'perc_pos': 92.1121, 'protein_id': 'ENSP00000264162', 'taxon_id': 9606, 'cigar_line': '351M2D1503MD73M', 'species': 'homo_sapiens', 'perc_id': 85.781, 'align_seq': 'ATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGGGGGTCAGACTGGGAGTCTGATAGAAATTTCATTTCCACCGCTGGTCCTCTAACCAATGACTTGCTGCACAACCTGAGTGGTCTCCTGGGAGACCAGAGTTCTAACTTTGTAGCAGGGGACAAAGACATGTATGTTTGTCACCAGCCACTGCCCACTTTCCTGCCAGAATACTTCAGCAGTCTCCATGCCAGTCAGATCACCCATTATAAGGTATTTCTGTCATGGGCACAGCTCCTCCCAGCAGGAAGCACCCAGAATCC

In [9]:
horse_req = do_request(ensembl_server, 'lookup/id', horse_id)
print(horse_req)

{'source': 'ensembl', 'object_type': 'Gene', 'logic_name': 'ensembl', 'version': 1, 'species': 'equus_caballus', 'description': 'lactase [Source:VGNC Symbol;Acc:VGNC:19613]', 'display_name': 'LCT', 'assembly_name': 'EquCab2', 'biotype': 'protein_coding', 'end': 19657160, 'seq_region_name': '18', 'db_type': 'core', 'strand': -1, 'id': 'ENSECAG00000018594', 'start': 19610968}


In [10]:
#maybe synteny of MCM6 and LCT with caballus and gorilla