### Load the libraries

In [1]:
### Load the libraries 
import os, sys
import warnings
warnings.filterwarnings('ignore')
import pandas as pd 
import numpy as np 
import anndata as ad 
import matplotlib.pyplot as plt 
import scanpy as sc

<i><b> Print the container version </b></i>

In [2]:
# Container used for this analysis can be found here : cokorac/cs-core-image-amd64:dev (date of use : 23/01/25)

<i><b> Set the home directory </b></i>

In [3]:
os.environ['HOME_Nikola'] = '/group/kalebic/Nikola/1_single_cell_final/Nikola_final/final/scdgomics'
home_path = os.getenv('HOME_Nikola')
home_path

'/group/kalebic/Nikola/1_single_cell_final/Nikola_final/final/scdgomics'

<i><b> Set the plotting parameters </b></i>

In [4]:
%matplotlib inline
sc.set_figure_params(dpi = 80)

### Assemble the data

In [5]:
# Load the expression data 
expression_data = pd.read_csv('/group/kalebic/Nikola/1_single_cell_final/Nikola_final/final/scdgomics/datasets/GSE104323_10X_expression_data_V2.tab', delimiter='\t' ,index_col=0)
expression_data.head()

Unnamed: 0_level_0,10X79_1_TCTACCATGCCTAA-,10X79_2_GTACTAGTGAACAT-,10X79_2_AATCAGTACCTACA-,10X79_1_CGGGTTCTTGAGGT-,10X79_1_GTGGAAGGCGTACA-,10X79_1_GTCCGCAAGCCATT-,10X79_2_TAAAGCAATACGCT-,10X79_1_AGTGATCAGCAACT-,10X79_1_CTCAATCCCAAGAT-,10X79_1_CCTTGTCGGATGTT-,...,10X79_2_CCAAATCCTCCTAG-,10X79_2_ATGTAGTTATCGGT-,10X80_2_GCTAATCTTATCTG-,10X80_1_CATTTAGTACGCGA-,10X80_2_GAATCTCAGCGACC-,10X80_1_CACGGTCTACGAGT-,10X80_2_ATTGCAGCCACGTC-,10X80_1_GCGGTTCGGCATCG-,10X83_4_TTCAGCATACTCTT-,10X80_1_TTACCAGGAGTAGA-
cellid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0610007P14Rik,0,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
0610009B22Rik,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0610009L18Rik,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0610009O20Rik,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
0610010F05Rik,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# Load the metadata file
metadata = pd.read_csv('/group/kalebic/Nikola/1_single_cell_final/Nikola_final/final/scdgomics/datasets/GSE104323_metadata_barcodes_24185cells.txt', index_col=0, delimiter='\t')
metadata = metadata.dropna(how='any')
metadata.head()

Unnamed: 0_level_0,source name,organism,characteristics: strain,characteristics: age,characteristics: sex of pooled animals,characteristics: cell cluster,molecule,SRR run accession,raw file (original file name),UMI_CellularBarcode
Sample name (24185 single cells),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10X79_1_AAACTAGCTAGCCC-,dentate gyrus,Mus musculus,hGFAP-GFP,P120,2males+1female,Neuroblast,total RNA,SRR6089817,10X79_1_AAACTAGCTAGCCC.fq.gz,CGGCGATCCC_AAACTAGCTAGCCC
10X79_1_AAACTAGGATGTAT-,dentate gyrus,Mus musculus,hGFAP-GFP,P120,2males+1female,OPC,total RNA,SRR6089947,10X79_1_AAACTAGGATGTAT.fq.gz,AGTGGTAATG_AAACTAGGATGTAT
10X79_1_AAACTCACGGCGTT-,dentate gyrus,Mus musculus,hGFAP-GFP,P120,2males+1female,GC-adult,total RNA,SRR6089529,10X79_1_AAACTCACGGCGTT.fq.gz,GGGTGCGCTC_AAACTCACGGCGTT
10X79_1_AAACTGTCGGCTCA-,dentate gyrus,Mus musculus,hGFAP-GFP,P120,2males+1female,MOL,total RNA,SRR6089595,10X79_1_AAACTGTCGGCTCA.fq.gz,CCTTTCAACG_AAACTGTCGGCTCA
10X79_1_AAACTGTGATAAGT-,dentate gyrus,Mus musculus,hGFAP-GFP,P120,2males+1female,OPC,total RNA,SRR6090058,10X79_1_AAACTGTGATAAGT.fq.gz,CCTTTCAGGT_AAACTGTGATAAGT


In [7]:
# Reset the index
metadata = metadata.reset_index()
# Build the initial dataframe
expression_data = expression_data.T
adata = ad.AnnData(expression_data)
# Add var_names and obs_names
adata.obs_names = metadata['Sample name (24185 single cells)']
adata.var_names = expression_data.T.index

In [8]:
# Add the metadata
adata.obs = metadata

### Save the anndata

In [9]:
adata.write_h5ad(os.path.join(home_path, 'data_versions/Adata_raw.h5ad'))