In [None]:
import pandas as pd
import scanpy as sc
import rpy2.robjects as robjects

try:
    robjects.r('library(homologene)')
except ImportError:
    import warnings

    warnings.warn("homologene is not installed")

## Obtain all species supported by the homologene package and their IDs

In [None]:
# species_ids
species_ids = robjects.r('homologene::taxData')

species_ids_df = pd.DataFrame({col: species_ids.rx2(col) for col in species_ids.names})
species_ids_dict = species_ids_df.set_index('name_txt')['tax_id'].to_dict()

In [None]:
species_ids_df

Unnamed: 0,tax_id,name_txt
0,10090,Mus musculus
1,10116,Rattus norvegicus
2,28985,Kluyveromyces lactis
3,318829,Magnaporthe oryzae
4,33169,Eremothecium gossypii
5,3702,Arabidopsis thaliana
6,4530,Oryza sativa
7,4896,Schizosaccharomyces pombe
8,4932,Saccharomyces cerevisiae
9,5141,Neurospora crassa


In [None]:
species_ids_dict["Caenorhabditis elegans"]

6239

In [None]:
species_ids_dict["Homo sapiens"]

9606

### This tutorial takes [C. Elegans](https://storage.googleapis.com/worm_public/ad_worm_aging.h5ad) dataset as an example

In [None]:
data_train = sc.read_h5ad('ad_worm_aging.h5ad')
gene_names = data_train.var["gene_names"].tolist()

## Call the homologene package to retrieve information

In [None]:
r_gene_names = robjects.ListVector({'gene_names': robjects.StrVector(gene_names)})
robjects.globalenv['genelist'] = r_gene_names.rx2('gene_names')

homologenes_mapping = robjects.r('homologene(genelist, inTax = 6239, outTax = 9606)')

In [None]:
homologenes_df = pd.DataFrame({col: homologenes_mapping.rx2(col) for col in homologenes_mapping.names})

In [None]:
homologenes_df

Unnamed: 0,6239,9606,6239_ID,9606_ID
0,homt-1,NTMT1,171590,28989
1,sesn-1,SESN3,171593,143686
2,Y48G1C.12,ANAPC10,3565580,10393
3,pgs-1,PGS1,171594,9489
4,csk-1,CSK,266817,1445
...,...,...,...,...
3080,mlc-2,MYLPF,181775,29895
3081,mlc-1,MYLPF,181776,29895
3082,npr-24,SSTR2,187897,6752
3083,H18N23.2,PPP1R3B,181788,79660
