### Create anndata object for Haber, 2017 LGR5+ stem cells

- **Developed by:** Anna Maguza
- **Affilation:** Faculty of Medicine, Würzburg University
- **Date of creation:** 19th August 2024
- **Last modified date:** 20th August 2024

+ Import packages

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import anndata as ad

+ Load datasets

In [2]:
AtlasFullLength_TPM = pd.read_csv('/../../../gut_project/raw_fastq_files/Haber_2017/GSE92332/GSE92332_AtlasFullLength_TPM.txt', sep='\t', index_col=0)
AtlasFullLength_TPM = AtlasFullLength_TPM.T
AtlasFullLength_TPM.head()

Unnamed: 0,0610005C13Rik,0610007N19Rik,0610007P14Rik,0610008F07Rik,0610009B14Rik,0610009B22Rik,0610009D07Rik,0610009L18Rik,0610009O20Rik,0610010B08Rik,...,Zxda,Zxdb,Zxdc,Zyg11a,Zyg11b,Zyx,Zzef1,Zzz3,a,l7Rn6
S634_Lgr5GFP_M7_GFPLow_Enterocyte,103.9,21.93,150.61,8.6,0.0,59.52,33.52,0.0,1.08,0.64,...,0.0,1.67,1.04,0.0,3.82,11.72,55.49,0.0,0.0,5.99
S745_Lgr5GFP_M7_GFPLow_Stem,0.0,70.47,0.0,0.0,0.0,10.22,51.87,0.0,54.82,45.11,...,0.22,0.0,7.72,0.0,12.01,3.77,4.03,0.0,0.0,53.27
S856_Lgr5GFP_M7_GFPLow_Stem,8.99,20.84,0.0,0.0,0.0,76.65,219.94,0.0,57.28,7.79,...,0.0,0.0,0.0,0.0,0.26,0.0,14.46,0.56,0.0,57.09
S967_Lgr5GFP_M7_GFPLow_Goblet,0.0,67.15,212.32,0.0,0.0,64.97,45.24,0.0,135.89,0.0,...,0.0,0.0,0.0,0.0,5.01,0.0,33.99,0.0,0.0,37.72
S1078_Lgr5GFP_M7_GFPLow_Goblet,50.94,36.25,104.89,0.0,0.0,63.68,93.14,0.0,1.05,0.0,...,0.39,0.0,0.0,0.0,0.47,0.0,16.56,15.39,0.0,0.0


In [3]:
AtlasFullLength_TPM['barcode'] = AtlasFullLength_TPM.index.copy()

In [4]:
AtlasFullLength_TPM[['barcode', 'Gene_marker', 'Donor_ID', 'GFP_intensity', 'Cell_Type']] = AtlasFullLength_TPM['barcode'].str.split('_', expand=True)

In [5]:
AtlasFullLength_TPM.head()

Unnamed: 0,0610005C13Rik,0610007N19Rik,0610007P14Rik,0610008F07Rik,0610009B14Rik,0610009B22Rik,0610009D07Rik,0610009L18Rik,0610009O20Rik,0610010B08Rik,...,Zyx,Zzef1,Zzz3,a,l7Rn6,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type
S634_Lgr5GFP_M7_GFPLow_Enterocyte,103.9,21.93,150.61,8.6,0.0,59.52,33.52,0.0,1.08,0.64,...,11.72,55.49,0.0,0.0,5.99,S634,Lgr5GFP,M7,GFPLow,Enterocyte
S745_Lgr5GFP_M7_GFPLow_Stem,0.0,70.47,0.0,0.0,0.0,10.22,51.87,0.0,54.82,45.11,...,3.77,4.03,0.0,0.0,53.27,S745,Lgr5GFP,M7,GFPLow,Stem
S856_Lgr5GFP_M7_GFPLow_Stem,8.99,20.84,0.0,0.0,0.0,76.65,219.94,0.0,57.28,7.79,...,0.0,14.46,0.56,0.0,57.09,S856,Lgr5GFP,M7,GFPLow,Stem
S967_Lgr5GFP_M7_GFPLow_Goblet,0.0,67.15,212.32,0.0,0.0,64.97,45.24,0.0,135.89,0.0,...,0.0,33.99,0.0,0.0,37.72,S967,Lgr5GFP,M7,GFPLow,Goblet
S1078_Lgr5GFP_M7_GFPLow_Goblet,50.94,36.25,104.89,0.0,0.0,63.68,93.14,0.0,1.05,0.0,...,0.0,16.56,15.39,0.0,0.0,S1078,Lgr5GFP,M7,GFPLow,Goblet


In [6]:
X = AtlasFullLength_TPM.iloc[:, 1:20108] 
var = pd.DataFrame(index=AtlasFullLength_TPM.columns[1:20108])
obs = AtlasFullLength_TPM.iloc[:, 20108:20114]

adata = ad.AnnData(X=X.values, var=var, obs=obs)

In [7]:
adata.write('/../../../gut_project/raw_fastq_files/Haber_2017/Haber_2017_Smartseq_LGR5_FACS_data.h5ad')

+ Add human orthologue genes

In [8]:
adata_copy = adata.copy()

In [95]:
human_mouse_orthologues = pd.read_csv('/../../../ensembl_data/mouse_to_human_orthologues_ensembl.txt', sep='\t', index_col=0)

In [91]:
duplicates = human_mouse_orthologues['Gene name'].duplicated()
num_duplicates = duplicates.sum()
num_duplicates

85408

In [92]:
human_mouse_orthologues_cleaned = human_mouse_orthologues.loc[
    human_mouse_orthologues.groupby('Gene name')['%id. target Human gene identical to query gene'].idxmax()
]
human_mouse_orthologues_cleaned = human_mouse_orthologues_cleaned.reset_index(drop=True)


In [93]:
human_mouse_orthologues_cleaned = human_mouse_orthologues_cleaned.drop_duplicates(subset='Gene name')

In [13]:
intersect = np.intersect1d(human_mouse_orthologues_cleaned['Gene name'], adata.var.index)
len(intersect)

14814

In [14]:
human_mouse_orthologues_cleaned = human_mouse_orthologues_cleaned.loc[human_mouse_orthologues_cleaned['Gene name'].isin(intersect)]
adata = adata[:, intersect]

  if not is_categorical_dtype(df_full[k]):


In [15]:
adata.var['original_mouse_gene_name'] = adata.var.index.copy()

  adata.var['original_mouse_gene_name'] = adata.var.index.copy()


In [16]:
adata.var = adata.var.merge(human_mouse_orthologues_cleaned, left_index=True, right_on='Gene name')

AnnData expects .var.index to contain strings, but got values like:
    [0, 14, 18, 24, 30]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)


In [18]:
adata.var = adata.var.set_index('Human gene name')

In [20]:
adata.var.index = adata.var.index.astype(str)

In [21]:
adata.write('/../../../gut_project/Processed_data/Gut_data/Haber_2017_Smartseq/Haber_2017_Smartseq_LGR5_FACS_data_human_orthologues_genes.h5ad')

+ Create anndata object from re-mapped data

In [72]:
countmatrix = pd.read_csv('/../../../gut_project/raw_fastq_files/Haber_2017/counts_all.txt', 
                          sep='\t', 
                          comment='#', 
                          skiprows=1)

In [73]:
countmatrix.columns = countmatrix.columns.str.split('/').str[-1].str.replace('_Aligned.sortedByCoord.out.bam', '')

In [74]:
countmatrix = countmatrix.set_index('Geneid')

+ Go from SRR to cell name as in FACS data

In [75]:
adata = sc.read_h5ad('/../../../gut_project/Processed_data/Gut_data/Haber_2017_Smartseq/Haber_2017_Smartseq_LGR5_FACS_data_human_orthologues_genes.h5ad')
adata = adata[adata.obs['Cell_Type'] == 'Stem']
adata = adata[adata.obs['Gene_marker'] == 'Lgr5GFP']
adata.obs['full_cell_description'] = adata.obs.index.copy()

  utils.warn_names_duplicates("var")
  if not is_categorical_dtype(df_full[k]):
  adata.obs['full_cell_description'] = adata.obs.index.copy()
  utils.warn_names_duplicates("var")


In [76]:
dataframe = pd.read_csv('/../../../gut_project/raw_fastq_files/Haber_2017/sample_database.csv')
dataframe = dataframe[dataframe['Title'].str.contains('Atlas WT single cell ')]
dataframe['Sample'] = dataframe['Title'].str.split('Atlas WT single cell ').str[1]
dataframe['Sample'] = 'S' + dataframe['Sample']

In [77]:
dataframe = dataframe[dataframe['Sample'].isin(adata.obs['barcode'])]

In [78]:
countmatrix = countmatrix.T
countmatrix = countmatrix.iloc[5:]
countmatrix.head(10)

Geneid,ENSMUSG00000102693.2,ENSMUSG00000064842.3,ENSMUSG00000051951.6,ENSMUSG00000102851.2,ENSMUSG00000103377.2,ENSMUSG00000104017.2,ENSMUSG00000103025.2,ENSMUSG00000089699.2,ENSMUSG00000103201.2,ENSMUSG00000103147.2,...,ENSMUSG00000096550.2,ENSMUSG00000094172.2,ENSMUSG00000094887.2,ENSMUSG00000091585.3,ENSMUSG00000095763.2,ENSMUSG00000095523.2,ENSMUSG00000095475.2,ENSMUSG00000094855.2,ENSMUSG00000095019.2,ENSMUSG00000095041.8
SRX2418522,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,105
SRX2418533,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,904
SRX2418544,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,354
SRX2418555,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43
SRX2418577,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,138
SRX2418588,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,502
SRX2418599,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,136
SRX2418622,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,18
SRX2418633,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,529
SRX2418666,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,78


In [79]:
sra_accession_df = dataframe[['Sample', 'SRA Accession']]
adata.obs = adata.obs.merge(sra_accession_df, left_on='barcode', right_on='Sample', how='left')

In [80]:
adata.obs

Unnamed: 0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,Sample,SRA Accession
0,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,S745,SRX2419245
1,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,S856,SRX2419356
2,S222,Lgr5GFP,M7,GFPHigh,Stem,S222_Lgr5GFP_M7_GFPHigh_Stem,S222,SRX2418722
3,S333,Lgr5GFP,M7,GFPLow,Stem,S333_Lgr5GFP_M7_GFPLow_Stem,S333,SRX2418833
4,S589,Lgr5GFP,M7,GFPLow,Stem,S589_Lgr5GFP_M7_GFPLow_Stem,S589,SRX2419089
...,...,...,...,...,...,...,...,...
487,S423,Lgr5GFP,M5,GFPHigh,Stem,S423_Lgr5GFP_M5_GFPHigh_Stem,S423,SRX2418923
488,S424,Lgr5GFP,M5,GFPHigh,Stem,S424_Lgr5GFP_M5_GFPHigh_Stem,S424,SRX2418924
489,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,S425,SRX2418925
490,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,S427,SRX2418927


In [81]:
countmatrix_rows = set(countmatrix.index)
sra_accessions = set(adata.obs['SRA Accession'])
missing_in_countmatrix_rows = sra_accessions - countmatrix_rows
print(missing_in_countmatrix_rows)

{'SRX2419104', 'SRX2418850', 'SRX2419603'}


In [82]:
countmatrix['row_index'] = countmatrix.index
merged_df = pd.merge(adata.obs, countmatrix, left_on='SRA Accession', right_on='row_index', how='inner')
merged_df = merged_df.drop(columns=['row_index'])
merged_df.set_index('SRA Accession', inplace=True)

In [83]:
merged_df

Unnamed: 0_level_0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,Sample,ENSMUSG00000102693.2,ENSMUSG00000064842.3,ENSMUSG00000051951.6,...,ENSMUSG00000096550.2,ENSMUSG00000094172.2,ENSMUSG00000094887.2,ENSMUSG00000091585.3,ENSMUSG00000095763.2,ENSMUSG00000095523.2,ENSMUSG00000095475.2,ENSMUSG00000094855.2,ENSMUSG00000095019.2,ENSMUSG00000095041.8
SRA Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SRX2419245,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,S745,0,0,0,...,0,0,0,0,0,0,0,0,0,583
SRX2419356,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,S856,0,0,0,...,0,0,0,0,0,0,0,0,0,274
SRX2418722,S222,Lgr5GFP,M7,GFPHigh,Stem,S222_Lgr5GFP_M7_GFPHigh_Stem,S222,0,0,0,...,0,0,0,0,0,0,0,0,0,11
SRX2418833,S333,Lgr5GFP,M7,GFPLow,Stem,S333_Lgr5GFP_M7_GFPLow_Stem,S333,0,0,0,...,0,0,0,0,0,0,0,0,0,153
SRX2419089,S589,Lgr5GFP,M7,GFPLow,Stem,S589_Lgr5GFP_M7_GFPLow_Stem,S589,0,0,0,...,0,0,0,0,0,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SRX2418923,S423,Lgr5GFP,M5,GFPHigh,Stem,S423_Lgr5GFP_M5_GFPHigh_Stem,S423,0,0,0,...,0,0,0,0,0,0,0,0,0,378
SRX2418924,S424,Lgr5GFP,M5,GFPHigh,Stem,S424_Lgr5GFP_M5_GFPHigh_Stem,S424,0,0,0,...,0,0,0,0,0,0,0,0,0,694
SRX2418925,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,S425,0,0,0,...,0,0,0,0,0,0,0,0,0,391
SRX2418927,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,S427,0,0,0,...,0,0,0,0,0,0,0,0,0,398


In [84]:
obs = merged_df.iloc[:, :7]
X = merged_df.iloc[:, 7:]
var = pd.DataFrame(index=X.columns)
new_adata = ad.AnnData(X=X.values, obs=obs, var=var)

In [85]:
new_adata.obs

Unnamed: 0_level_0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,Sample
SRA Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SRX2419245,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,S745
SRX2419356,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,S856
SRX2418722,S222,Lgr5GFP,M7,GFPHigh,Stem,S222_Lgr5GFP_M7_GFPHigh_Stem,S222
SRX2418833,S333,Lgr5GFP,M7,GFPLow,Stem,S333_Lgr5GFP_M7_GFPLow_Stem,S333
SRX2419089,S589,Lgr5GFP,M7,GFPLow,Stem,S589_Lgr5GFP_M7_GFPLow_Stem,S589
...,...,...,...,...,...,...,...
SRX2418923,S423,Lgr5GFP,M5,GFPHigh,Stem,S423_Lgr5GFP_M5_GFPHigh_Stem,S423
SRX2418924,S424,Lgr5GFP,M5,GFPHigh,Stem,S424_Lgr5GFP_M5_GFPHigh_Stem,S424
SRX2418925,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,S425
SRX2418927,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,S427


In [88]:
new_adata.var = new_adata.var.astype(str)
new_adata.var.index = new_adata.var.index.astype(str)
new_adata.obs = new_adata.obs.astype(str)
new_adata.obs.index = new_adata.obs.index.astype(str)
new_adata.X = new_adata.X.astype(float)

In [89]:
new_adata.write_h5ad('/../../../gut_project/Processed_data/Gut_data/Haber_2017_Smartseq/Haber_2017_Smartseq_stem_cells_remapped.h5ad')

+ Add human paralogues

In [96]:
human_mouse_orthologues = pd.read_csv('/../../../ensembl_data/mouse_to_human_orthologues_ensembl.txt', sep='\t', index_col=0)

In [100]:
duplicates = human_mouse_orthologues['Gene stable ID version'].duplicated()
num_duplicates = duplicates.sum()
num_duplicates

85346

In [101]:
human_mouse_orthologues_cleaned = human_mouse_orthologues.loc[
    human_mouse_orthologues.groupby('Gene stable ID version')['%id. target Human gene identical to query gene'].idxmax()
]
human_mouse_orthologues_cleaned = human_mouse_orthologues_cleaned.reset_index(drop=True)


In [102]:
human_mouse_orthologues_cleaned = human_mouse_orthologues_cleaned.drop_duplicates(subset='Gene stable ID version')

In [103]:
intersect = np.intersect1d(human_mouse_orthologues_cleaned['Gene stable ID version'], new_adata.var.index)
len(intersect)

20473

In [104]:
human_mouse_orthologues_cleaned = human_mouse_orthologues_cleaned.loc[human_mouse_orthologues_cleaned['Gene stable ID version'].isin(intersect)]
new_adata = new_adata[:, intersect]

  if not is_categorical_dtype(df_full[k]):


In [105]:
new_adata.var['original_mouse_gene_id'] = new_adata.var.index.copy()

  new_adata.var['original_mouse_gene_id'] = new_adata.var.index.copy()


In [106]:
new_adata.var = new_adata.var.merge(human_mouse_orthologues_cleaned, left_index=True, right_on='Gene stable ID version')

AnnData expects .var.index to contain strings, but got values like:
    [0, 1, 5, 14, 18]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)


In [107]:
new_adata.var = new_adata.var.set_index('Human gene name')

In [108]:
new_adata.var.index = new_adata.var.index.astype(str)

In [109]:
new_adata.var

Unnamed: 0_level_0,original_mouse_gene_id,Gene stable ID version,Transcript stable ID,Transcript stable ID version,Human gene stable ID,Human homology type,%id. target Human gene identical to query gene,%id. query gene identical to target Human gene,Human Gene-order conservation score,"Human orthology confidence [0 low, 1 high]",Human protein or transcript stable ID,Query protein or transcript ID,Protein stable ID,Protein stable ID version,Gene name
Human gene name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
GNAI3,ENSMUSG00000000001.5,ENSMUSG00000000001.5,ENSMUST00000000001,ENSMUST00000000001.5,ENSG00000065135,ortholog_one2one,98.3051,98.3051,100.0,1,ENSP00000358867,ENSMUSP00000000001,ENSMUSP00000000001,ENSMUSP00000000001.5,Gnai3
CDC45,ENSMUSG00000000028.16,ENSMUSG00000000028.16,ENSMUST00000000028,ENSMUST00000000028.14,ENSG00000093009,ortholog_one2one,91.3428,91.3428,100.0,1,ENSP00000263201,ENSMUSP00000000028,ENSMUSP00000000028,ENSMUSP00000000028.8,Cdc45
SCML2,ENSMUSG00000000037.18,ENSMUSG00000000037.18,ENSMUST00000238603,ENSMUST00000238603.2,ENSG00000102098,ortholog_one2one,36.5325,50.5714,100.0,0,ENSP00000251900,ENSMUSP00000076593,ENSMUSP00000158772,ENSMUSP00000158772.2,Scml2
APOH,ENSMUSG00000000049.12,ENSMUSG00000000049.12,ENSMUST00000000049,ENSMUST00000000049.6,ENSG00000091583,ortholog_one2one,76.5217,76.5217,100.0,1,ENSP00000205948,ENSMUSP00000000049,ENSMUSP00000000049,ENSMUSP00000000049.6,Apoh
NARF,ENSMUSG00000000056.8,ENSMUSG00000000056.8,ENSMUST00000103015,ENSMUST00000103015.4,ENSG00000141562,ortholog_one2one,84.4156,85.5263,100.0,1,ENSP00000309899,ENSMUSP00000099304,ENSMUSP00000099304,ENSMUSP00000099304.4,Narf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
RNU6-977P,ENSMUSG00002076963.1,ENSMUSG00002076963.1,ENSMUST00020183153,ENSMUST00020183153.1,ENSG00000252220,ortholog_one2many,60.3774,62.1359,,0,ENST00000516411,ENSMUST00020183153,,,Gm54684
SNORD100,ENSMUSG00002076971.1,ENSMUSG00002076971.1,ENSMUST00020182284,ENSMUST00020182284.1,ENSG00000221500,ortholog_one2one,83.7838,81.5789,,1,ENST00000408573,ENSMUST00020182284,,,Snord100
5S_rRNA,ENSMUSG00002076988.1,ENSMUSG00002076988.1,ENSMUST00020182589,ENSMUST00020182589.1,ENSG00000277411,ortholog_one2many,42.3077,51.8868,,0,ENST00000614916,ENSMUST00020182589,,,Gm56371
,ENSMUSG00002076990.1,ENSMUSG00002076990.1,ENSMUST00020183326,ENSMUST00020183326.1,ENSG00000281311,ortholog_many2many,79.2593,79.2593,,1,ENST00000628590,ENSMUST00020183326,,,Gm22711


In [110]:
new_adata.write('/../../../gut_project/Processed_data/Gut_data/Haber_2017_Smartseq/Haber_2017_Smartseq_stem_cells_remapped_human_orthologues_genes.h5ad')