### Create anndata object for Haber, 2017 LGR5+ stem cells

- **Developed by:** Anna Maguza
- **Affilation:** Faculty of Medicine, Würzburg University
- **Date of creation:** 19th August 2024
- **Last modified date:** 3rd September 2024

+ Import packages

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import anndata as ad

+ Load datasets

In [2]:
AtlasFullLength_TPM = pd.read_csv('data/raw_fastq_files/Haber_2017/GSE92332/GSE92332_AtlasFullLength_TPM.txt', sep='\t', index_col=0)
AtlasFullLength_TPM = AtlasFullLength_TPM.T
AtlasFullLength_TPM.head()

Unnamed: 0,0610005C13Rik,0610007N19Rik,0610007P14Rik,0610008F07Rik,0610009B14Rik,0610009B22Rik,0610009D07Rik,0610009L18Rik,0610009O20Rik,0610010B08Rik,...,Zxda,Zxdb,Zxdc,Zyg11a,Zyg11b,Zyx,Zzef1,Zzz3,a,l7Rn6
S634_Lgr5GFP_M7_GFPLow_Enterocyte,103.9,21.93,150.61,8.6,0.0,59.52,33.52,0.0,1.08,0.64,...,0.0,1.67,1.04,0.0,3.82,11.72,55.49,0.0,0.0,5.99
S745_Lgr5GFP_M7_GFPLow_Stem,0.0,70.47,0.0,0.0,0.0,10.22,51.87,0.0,54.82,45.11,...,0.22,0.0,7.72,0.0,12.01,3.77,4.03,0.0,0.0,53.27
S856_Lgr5GFP_M7_GFPLow_Stem,8.99,20.84,0.0,0.0,0.0,76.65,219.94,0.0,57.28,7.79,...,0.0,0.0,0.0,0.0,0.26,0.0,14.46,0.56,0.0,57.09
S967_Lgr5GFP_M7_GFPLow_Goblet,0.0,67.15,212.32,0.0,0.0,64.97,45.24,0.0,135.89,0.0,...,0.0,0.0,0.0,0.0,5.01,0.0,33.99,0.0,0.0,37.72
S1078_Lgr5GFP_M7_GFPLow_Goblet,50.94,36.25,104.89,0.0,0.0,63.68,93.14,0.0,1.05,0.0,...,0.39,0.0,0.0,0.0,0.47,0.0,16.56,15.39,0.0,0.0


In [3]:
AtlasFullLength_TPM['barcode'] = AtlasFullLength_TPM.index.copy()

In [4]:
AtlasFullLength_TPM[['barcode', 'Gene_marker', 'Donor_ID', 'GFP_intensity', 'Cell_Type']] = AtlasFullLength_TPM['barcode'].str.split('_', expand=True)

In [5]:
AtlasFullLength_TPM.head()

Unnamed: 0,0610005C13Rik,0610007N19Rik,0610007P14Rik,0610008F07Rik,0610009B14Rik,0610009B22Rik,0610009D07Rik,0610009L18Rik,0610009O20Rik,0610010B08Rik,...,Zyx,Zzef1,Zzz3,a,l7Rn6,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type
S634_Lgr5GFP_M7_GFPLow_Enterocyte,103.9,21.93,150.61,8.6,0.0,59.52,33.52,0.0,1.08,0.64,...,11.72,55.49,0.0,0.0,5.99,S634,Lgr5GFP,M7,GFPLow,Enterocyte
S745_Lgr5GFP_M7_GFPLow_Stem,0.0,70.47,0.0,0.0,0.0,10.22,51.87,0.0,54.82,45.11,...,3.77,4.03,0.0,0.0,53.27,S745,Lgr5GFP,M7,GFPLow,Stem
S856_Lgr5GFP_M7_GFPLow_Stem,8.99,20.84,0.0,0.0,0.0,76.65,219.94,0.0,57.28,7.79,...,0.0,14.46,0.56,0.0,57.09,S856,Lgr5GFP,M7,GFPLow,Stem
S967_Lgr5GFP_M7_GFPLow_Goblet,0.0,67.15,212.32,0.0,0.0,64.97,45.24,0.0,135.89,0.0,...,0.0,33.99,0.0,0.0,37.72,S967,Lgr5GFP,M7,GFPLow,Goblet
S1078_Lgr5GFP_M7_GFPLow_Goblet,50.94,36.25,104.89,0.0,0.0,63.68,93.14,0.0,1.05,0.0,...,0.0,16.56,15.39,0.0,0.0,S1078,Lgr5GFP,M7,GFPLow,Goblet


In [6]:
X = AtlasFullLength_TPM.iloc[:, 1:20108] 
var = pd.DataFrame(index=AtlasFullLength_TPM.columns[1:20108])
obs = AtlasFullLength_TPM.iloc[:, 20108:20114]

adata = ad.AnnData(X=X.values, var=var, obs=obs)

In [7]:
adata.var

0610007N19Rik
0610007P14Rik
0610008F07Rik
0610009B14Rik
0610009B22Rik
...
Zyx
Zzef1
Zzz3
a
l7Rn6


In [8]:
adata.write('data/raw_fastq_files/Haber_2017/Haber_2017_Smartseq_LGR5_FACS_data_TPM_normalized.h5ad')

+ Create anndata object from re-mapped data

In [9]:
countmatrix = pd.read_csv('data/raw_fastq_files/Haber_2017/counts_all.txt', 
                          sep='\t', 
                          comment='#', 
                          skiprows=1)

In [10]:
countmatrix.columns = countmatrix.columns.str.split('/').str[-1].str.replace('_Aligned.sortedByCoord.out.bam', '')

In [11]:
countmatrix = countmatrix.set_index('Geneid')

+ Go from SRR to cell name as in FACS data

In [12]:
adata = sc.read_h5ad('data/raw_fastq_files/Haber_2017/Haber_2017_Smartseq_LGR5_FACS_data_TPM_normalized.h5ad')
adata = adata[adata.obs['Gene_marker'] == 'Lgr5GFP']
adata.obs['full_cell_description'] = adata.obs.index.copy()

  if not is_categorical_dtype(df_full[k]):
  adata.obs['full_cell_description'] = adata.obs.index.copy()


In [13]:
adata.obs['cell_type'] = adata.obs['GFP_intensity'].copy()
adata.obs['cell_type'] = adata.obs['cell_type'].cat.add_categories(['LGR5+ stem cell', 'LGR5- stem cell', 'LGR5- CD24- EpCAM+ stem cell'])
adata.obs.loc[(adata.obs['GFP_intensity'] == 'GFPHigh'), 'cell_type'] = 'LGR5+ stem cell'
adata.obs.loc[(adata.obs['GFP_intensity'] == 'GFPLow'), 'cell_type'] = 'LGR5- stem cell'
adata.obs.loc[(adata.obs['GFP_intensity'] == 'GFP-.CD24-.EpCAM+'), 'cell_type'] = 'LGR5- CD24- EpCAM+ stem cell'

In [14]:
dataframe = pd.read_csv('/data/raw_fastq_files/Haber_2017/sample_database.csv')
dataframe = dataframe[dataframe['Title'].str.contains('Atlas WT single cell ')]
dataframe['Sample'] = dataframe['Title'].str.split('Atlas WT single cell ').str[1]
dataframe['Sample'] = 'S' + dataframe['Sample']

In [15]:
dataframe = dataframe[dataframe['Sample'].isin(adata.obs['barcode'])]

In [16]:
countmatrix = countmatrix.T
countmatrix = countmatrix.iloc[5:]
countmatrix.head(10)

Geneid,ENSMUSG00000102693.2,ENSMUSG00000064842.3,ENSMUSG00000051951.6,ENSMUSG00000102851.2,ENSMUSG00000103377.2,ENSMUSG00000104017.2,ENSMUSG00000103025.2,ENSMUSG00000089699.2,ENSMUSG00000103201.2,ENSMUSG00000103147.2,...,ENSMUSG00000096550.2,ENSMUSG00000094172.2,ENSMUSG00000094887.2,ENSMUSG00000091585.3,ENSMUSG00000095763.2,ENSMUSG00000095523.2,ENSMUSG00000095475.2,ENSMUSG00000094855.2,ENSMUSG00000095019.2,ENSMUSG00000095041.8
SRX2418522,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,105
SRX2418533,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,904
SRX2418544,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,354
SRX2418555,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43
SRX2418577,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,138
SRX2418588,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,502
SRX2418599,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,136
SRX2418622,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,18
SRX2418633,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,529
SRX2418666,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,78


In [17]:
sra_accession_df = dataframe[['Sample', 'SRA Accession']]
adata.obs = adata.obs.merge(sra_accession_df, left_on='barcode', right_on='Sample', how='left')

In [18]:
adata.obs

Unnamed: 0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,cell_type,Sample,SRA Accession
0,S634,Lgr5GFP,M7,GFPLow,Enterocyte,S634_Lgr5GFP_M7_GFPLow_Enterocyte,LGR5- stem cell,S634,SRX2419134
1,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S745,SRX2419245
2,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S856,SRX2419356
3,S967,Lgr5GFP,M7,GFPLow,Goblet,S967_Lgr5GFP_M7_GFPLow_Goblet,LGR5- stem cell,S967,SRX2419467
4,S1078,Lgr5GFP,M7,GFPLow,Goblet,S1078_Lgr5GFP_M7_GFPLow_Goblet,LGR5- stem cell,S1078,SRX2419579
...,...,...,...,...,...,...,...,...,...
1050,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S425,SRX2418925
1051,S426,Lgr5GFP,M5,GFPHigh,Goblet,S426_Lgr5GFP_M5_GFPHigh_Goblet,LGR5+ stem cell,S426,SRX2418926
1052,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S427,SRX2418927
1053,S428,Lgr5GFP,M5,GFPHigh,Enterocyte-Progenitor-Early,S428_Lgr5GFP_M5_GFPHigh_Enterocyte-Progenitor-...,LGR5+ stem cell,S428,SRX2418928


In [19]:
countmatrix_rows = set(countmatrix.index)
sra_accessions = set(adata.obs['SRA Accession'])
missing_in_countmatrix_rows = sra_accessions - countmatrix_rows
print(missing_in_countmatrix_rows)

{'SRX2419648', 'SRX2419398', 'SRX2419945', 'SRX2419185', 'SRX2419802', 'SRX2418787', 'SRX2419294', 'SRX2419370', 'SRX2419493', 'SRX2419505', 'SRX2418828', 'SRX2418769', 'SRX2419624', 'SRX2419348', 'SRX2419643', 'SRX2419395', 'SRX2418792', 'SRX2419847', 'SRX2419838', 'SRX2418791', 'SRX2419848', 'SRX2418803', 'SRX2419804', 'SRX2419116', 'SRX2419168', 'SRX2419635', 'SRX2419285', 'SRX2419775', 'SRX2418771', 'SRX2419626', 'SRX2419760', 'SRX2419452', 'SRX2418822', 'SRX2419149', 'SRX2419110', 'SRX2419172', 'SRX2419187', 'SRX2419339', 'SRX2419619', 'SRX2419765', 'SRX2418904', 'SRX2419244', 'SRX2419343', 'SRX2419828', 'SRX2419912', 'SRX2419800', 'SRX2419191', 'SRX2419157', 'SRX2418790', 'SRX2419812', 'SRX2419681', 'SRX2419796', 'SRX2419650', 'SRX2418944', 'SRX2419508', 'SRX2419757', 'SRX2419797', 'SRX2419640', 'SRX2419430', 'SRX2419821', 'SRX2419418', 'SRX2419465', 'SRX2419336', 'SRX2419108', 'SRX2419671', 'SRX2419494', 'SRX2419679', 'SRX2419738', 'SRX2419627', 'SRX2418857', 'SRX2419502', 'SRX2

In [20]:
countmatrix['row_index'] = countmatrix.index
merged_df = pd.merge(adata.obs, countmatrix, left_on='SRA Accession', right_on='row_index', how='inner')
merged_df = merged_df.drop(columns=['row_index'])
merged_df.set_index('SRA Accession', inplace=True)

In [21]:
merged_df

Unnamed: 0_level_0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,cell_type,Sample,ENSMUSG00000102693.2,ENSMUSG00000064842.3,...,ENSMUSG00000096550.2,ENSMUSG00000094172.2,ENSMUSG00000094887.2,ENSMUSG00000091585.3,ENSMUSG00000095763.2,ENSMUSG00000095523.2,ENSMUSG00000095475.2,ENSMUSG00000094855.2,ENSMUSG00000095019.2,ENSMUSG00000095041.8
SRA Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SRX2419245,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S745,0,0,...,0,0,0,0,0,0,0,0,0,583
SRX2419356,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S856,0,0,...,0,0,0,0,0,0,0,0,0,274
SRX2418722,S222,Lgr5GFP,M7,GFPHigh,Stem,S222_Lgr5GFP_M7_GFPHigh_Stem,LGR5+ stem cell,S222,0,0,...,0,0,0,0,0,0,0,0,0,11
SRX2418833,S333,Lgr5GFP,M7,GFPLow,Stem,S333_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S333,0,0,...,0,0,0,0,0,0,0,0,0,153
SRX2419089,S589,Lgr5GFP,M7,GFPLow,Stem,S589_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S589,0,0,...,0,0,0,0,0,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SRX2418923,S423,Lgr5GFP,M5,GFPHigh,Stem,S423_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S423,0,0,...,0,0,0,0,0,0,0,0,0,378
SRX2418924,S424,Lgr5GFP,M5,GFPHigh,Stem,S424_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S424,0,0,...,0,0,0,0,0,0,0,0,0,694
SRX2418925,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S425,0,0,...,0,0,0,0,0,0,0,0,0,391
SRX2418927,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S427,0,0,...,0,0,0,0,0,0,0,0,0,398


In [41]:
obs = merged_df.iloc[:, :8]
X = merged_df.iloc[:, 8:]
var = pd.DataFrame(index=X.columns)
new_adata = ad.AnnData(X=X.values, obs=obs, var=var)

In [42]:
new_adata.obs

Unnamed: 0_level_0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,cell_type,Sample
SRA Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SRX2419245,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S745
SRX2419356,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S856
SRX2418722,S222,Lgr5GFP,M7,GFPHigh,Stem,S222_Lgr5GFP_M7_GFPHigh_Stem,LGR5+ stem cell,S222
SRX2418833,S333,Lgr5GFP,M7,GFPLow,Stem,S333_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S333
SRX2419089,S589,Lgr5GFP,M7,GFPLow,Stem,S589_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S589
...,...,...,...,...,...,...,...,...
SRX2418923,S423,Lgr5GFP,M5,GFPHigh,Stem,S423_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S423
SRX2418924,S424,Lgr5GFP,M5,GFPHigh,Stem,S424_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S424
SRX2418925,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S425
SRX2418927,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S427


In [43]:
new_adata.var

ENSMUSG00000102693.2
ENSMUSG00000064842.3
ENSMUSG00000051951.6
ENSMUSG00000102851.2
ENSMUSG00000103377.2
...
ENSMUSG00000095523.2
ENSMUSG00000095475.2
ENSMUSG00000094855.2
ENSMUSG00000095019.2
ENSMUSG00000095041.8


+ Add gene names

In [44]:
annot = sc.queries.biomart_annotations(
    "mmusculus",
    ["ensembl_gene_id", "start_position", "end_position", "chromosome_name", "external_gene_name",],
).set_index("ensembl_gene_id")

In [45]:
annot

Unnamed: 0_level_0,start_position,end_position,chromosome_name,external_gene_name
ensembl_gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENSMUSG00000064336,1,68,MT,mt-Tf
ENSMUSG00000064337,70,1024,MT,mt-Rnr1
ENSMUSG00000064338,1025,1093,MT,mt-Tv
ENSMUSG00000064339,1094,2675,MT,mt-Rnr2
ENSMUSG00000064340,2676,2750,MT,mt-Tl1
...,...,...,...,...
ENSMUSG00000027514,173048405,173060716,2,Zbp1
ENSMUSG00000038400,173066251,173118326,2,Pmepa1
ENSMUSG00000087600,173118467,173120221,2,Pmepa1os
ENSMUSG00000025314,90260098,90410991,2,Ptprj


In [46]:
new_adata.var['ensemble_gene_id_version'] = new_adata.var.index.copy()

In [47]:
new_adata.var['ensemble_gene_id'] = new_adata.var['ensemble_gene_id_version'].str.split('.').str[0]

In [48]:
new_adata.var.index = new_adata.var['ensemble_gene_id']

In [49]:
new_adata.var = new_adata.var.merge(annot, left_index=True, right_index=True, how='left')

In [50]:
new_adata.var

Unnamed: 0_level_0,ensemble_gene_id_version,ensemble_gene_id,start_position,end_position,chromosome_name,external_gene_name
ensemble_gene_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSMUSG00000102693,ENSMUSG00000102693.2,ENSMUSG00000102693,3143476,3144545,1,4933401J01Rik
ENSMUSG00000064842,ENSMUSG00000064842.3,ENSMUSG00000064842,3172239,3172348,1,Gm26206
ENSMUSG00000051951,ENSMUSG00000051951.6,ENSMUSG00000051951,3276124,3741721,1,Xkr4
ENSMUSG00000102851,ENSMUSG00000102851.2,ENSMUSG00000102851,3322980,3323459,1,Gm18956
ENSMUSG00000103377,ENSMUSG00000103377.2,ENSMUSG00000103377,3435954,3438772,1,Gm37180
...,...,...,...,...,...,...
ENSMUSG00000095523,ENSMUSG00000095523.2,ENSMUSG00000095523,837364,840451,JH584299.1,
ENSMUSG00000095475,ENSMUSG00000095475.2,ENSMUSG00000095475,910289,913083,JH584299.1,
ENSMUSG00000094855,ENSMUSG00000094855.2,ENSMUSG00000094855,921942,924675,JH584299.1,
ENSMUSG00000095019,ENSMUSG00000095019.2,ENSMUSG00000095019,81607,82689,JH584303.1,


In [51]:
new_adata.obs

Unnamed: 0_level_0,barcode,Gene_marker,Donor_ID,GFP_intensity,Cell_Type,full_cell_description,cell_type,Sample
SRA Accession,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SRX2419245,S745,Lgr5GFP,M7,GFPLow,Stem,S745_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S745
SRX2419356,S856,Lgr5GFP,M7,GFPLow,Stem,S856_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S856
SRX2418722,S222,Lgr5GFP,M7,GFPHigh,Stem,S222_Lgr5GFP_M7_GFPHigh_Stem,LGR5+ stem cell,S222
SRX2418833,S333,Lgr5GFP,M7,GFPLow,Stem,S333_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S333
SRX2419089,S589,Lgr5GFP,M7,GFPLow,Stem,S589_Lgr5GFP_M7_GFPLow_Stem,LGR5- stem cell,S589
...,...,...,...,...,...,...,...,...
SRX2418923,S423,Lgr5GFP,M5,GFPHigh,Stem,S423_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S423
SRX2418924,S424,Lgr5GFP,M5,GFPHigh,Stem,S424_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S424
SRX2418925,S425,Lgr5GFP,M5,GFPHigh,Stem,S425_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S425
SRX2418927,S427,Lgr5GFP,M5,GFPHigh,Stem,S427_Lgr5GFP_M5_GFPHigh_Stem,LGR5+ stem cell,S427


In [52]:
new_adata.var = new_adata.var.astype(str)
new_adata.var.index = new_adata.var.index.astype(str)
new_adata.obs = new_adata.obs.astype(str)
new_adata.obs.index = new_adata.obs.index.astype(str)
new_adata.X = new_adata.X.astype(float)

+ Berform basic filtering

In [53]:
sc.pp.filter_genes(new_adata, min_counts = 1, inplace = True)
sc.pp.filter_genes(new_adata, min_cells = 3, inplace = True)

sc.pp.filter_cells(new_adata, min_genes = 50, inplace = True)
sc.pp.filter_cells(new_adata, min_counts = 3, inplace = True)

  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):
  if not is_categorical_dtype(df_full[k]):


* Save new object

In [55]:
new_adata.write_h5ad('data/Haber_2017_Smartseq/Haber_2017_Smartseq_stem_cells_remapped.h5ad')