### Create anndata object for Grün, 2016 stem cells

- **Developed by:** Anna Maguza
- **Affilation:** Faculty of Medicine, Würzburg University
- **Date of creation:** 10th September 2024
- **Last modified date:** 10th September 2024

In [9]:
import pandas as pd
import anndata as ad
import glob
import os

In [10]:
path = "data/Grun_2016"

In [11]:
csv_files = glob.glob(os.path.join(path, "*.csv"))

In [12]:
data_list = []
file_names = []

for file in csv_files:
    file_name = os.path.basename(file).replace(".gene.coutt.csv", "")
    file_names.append(file_name)
    
    df = pd.read_csv(file, index_col="GENEID", sep='\t')
    
    df.columns = [f"{file_name}_{i+1}" for i in range(len(df.columns))]
    
    data_list.append(df)

In [13]:
combined_data = pd.concat(data_list, axis=1)

In [14]:
adata = ad.AnnData(X=combined_data.T)

In [15]:
adata.var_names = combined_data.index

In [16]:
adata.obs["file_name"] = [name for name in file_names for _ in range(len(data_list[0].columns))]

In [24]:
adata.obs["GSM_number"] = adata.obs["file_name"].apply(lambda x: x.split("_")[0])

In [25]:
gsm_descriptions = {
    "GSM1987570": "Extracted RNA 5-day traced Lgr5+ cells replicate 1",
    "GSM1987571": "Extracted RNA 5-day traced Lgr5+ cells replicate 2",
    "GSM1987573": "Extracted RNA 5-day traced Lgr5+ cells replicate 3",
    "GSM1987574": "Extracted RNA 5-day traced Lgr5+ cells replicate 4",
    "GSM1987575": "Extracted RNA 5-day traced Lgr5+ cells replicate 5",
    "GSM1987576": "Extracted RNA 3-week traced Lgr5+ cells, CD24+ fraction",
    "GSM1987577": "Extracted RNA 3-week CD24+ control cells, non-traced",
    "GSM1987578": "Extracted RNA 8-week traced Lgr5+ cells, CD24+ fraction replicate 1",
    "GSM1987579": "Extracted RNA 8-week traced Lgr5+ cells",
    "GSM1987581": "Extracted RNA 8-week traced Lgr5+ cells, CD24+ fraction replicate 2",
    "GSM1987582": "Extracted RNA 8-week traced Lgr5+ cells, CD24+ fraction replicate 3",
    "GSM1987583": "Extracted RNA 8-week CD24+ control cells, non-traced replicate 1",
    "GSM1987584": "Extracted RNA 8-week CD24+ control cells, non-traced replicate 2"
}

In [26]:
adata.obs["gsm_description"] = adata.obs["GSM_number"].map(gsm_descriptions)

In [28]:
adata.write_h5ad("data/Grun_2016/Grün_2016_all_cells.h5ad")