### Notebook to format the BDRhapsody data from Seven Bridges into an `anndata` object with raw counts in `adata.X`

- **Developed by:** Carlos Talavera-López Ph.D
- **Institute of Computational Biology - Computational Health Centre - Helmholtz Munich**
- v230314

### Import required modules

In [1]:
import anndata
import pandas as pd
import scanpy as sc
import datatable as dt

### Set up working environment

In [2]:
BDdata = '/Volumes/A7V/nobackup/raw_data/single_cell/COPD_IAV/sevenbridges_raw/grch38-iav/'

### Read in _BD Rhapsody_ files for Versuch_1

- UMIs are called _Raw_Molecules_
- Genes/Features are called _Bioproduct_
- Sample Tag Version: hs | Sample Tag Names: 7-CSE_CTRL ; 8-GnR_CTRL ; 9-CSE_IAV ; 10-GnR_IAV

In [3]:
BDr_1 = dt.fread(BDdata + 'Versuch_2/V2-test_SampleTag01_hs_CSE_06_CRTL/V2-test_SampleTag01_hs_CSE_06_CRTL_RSEC_ReadsPerCell.csv', header = True, skip_to_line = 8).to_pandas().T
BDr_1.columns = BDr_1.iloc[0]
BDr_1 = BDr_1.iloc[1:]
adata_1 = sc.AnnData(BDr_1.T, dtype = 'int64')
adata_1



AnnData object with n_obs × n_vars = 5650 × 31125

In [4]:
adata_1.obs['sex'] = 'male'
adata_1.obs['age'] = '40'
adata_1.obs['condition'] = 'CTRL'
adata_1.obs['ethnicity'] = 'caucasian'
adata_1.obs['PaCO2'] = '99.4'
adata_1.obs['donor'] = 'GNR-07'
adata_1.obs['infection'] = 'CTRL'
adata_1.obs['disease'] = 'healthy'
adata_1.obs['SMK'] = '1'
adata_1.obs['illumina_stimunr'] = '21_0341'
adata_1.obs['bd_rhapsody'] = '2'
adata_1.obs.head()

Unnamed: 0_level_0,sex,age,condition,ethnicity,PaCO2,donor,infection,disease,SMK,illumina_stimunr,bd_rhapsody
Cell_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10354960,male,40,CTRL,caucasian,99.4,GNR-07,CTRL,healthy,1,21_0341,2
3989787,male,40,CTRL,caucasian,99.4,GNR-07,CTRL,healthy,1,21_0341,2
5344905,male,40,CTRL,caucasian,99.4,GNR-07,CTRL,healthy,1,21_0341,2
9913023,male,40,CTRL,caucasian,99.4,GNR-07,CTRL,healthy,1,21_0341,2
759605,male,40,CTRL,caucasian,99.4,GNR-07,CTRL,healthy,1,21_0341,2


In [5]:
BDr_2 = dt.fread(BDdata + 'Versuch_2/V2-test_SampleTag02_hs_CSE_06_IAV/V2-test_SampleTag02_hs_CSE_06_IAV_RSEC_ReadsPerCell.csv', header = True, skip_to_line = 8).to_pandas().T
BDr_2.columns = BDr_2.iloc[0]
BDr_2 = BDr_2.iloc[1:]
adata_2 = sc.AnnData(BDr_2.T, dtype = 'int64')
adata_2



AnnData object with n_obs × n_vars = 5409 × 31125

In [6]:
adata_2.obs['sex'] = 'male'
adata_2.obs['age'] = '40'
adata_2.obs['condition'] = 'IAV'
adata_2.obs['ethnicity'] = 'caucasian'
adata_2.obs['PaCO2'] = '99.4'
adata_2.obs['donor'] = 'GNR-07'
adata_2.obs['infection'] = 'IAV'
adata_2.obs['disease'] = 'healthy'
adata_2.obs['SMK'] = '2'
adata_2.obs['illumina_stimunr'] = '21_0341'
adata_2.obs['bd_rhapsody'] = '2'
adata_2.obs.head()

Unnamed: 0_level_0,sex,age,condition,ethnicity,PaCO2,donor,infection,disease,SMK,illumina_stimunr,bd_rhapsody
Cell_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10330399,male,40,IAV,caucasian,99.4,GNR-07,IAV,healthy,2,21_0341,2
1068684,male,40,IAV,caucasian,99.4,GNR-07,IAV,healthy,2,21_0341,2
4443267,male,40,IAV,caucasian,99.4,GNR-07,IAV,healthy,2,21_0341,2
1645496,male,40,IAV,caucasian,99.4,GNR-07,IAV,healthy,2,21_0341,2
6794952,male,40,IAV,caucasian,99.4,GNR-07,IAV,healthy,2,21_0341,2


In [7]:
BDr_3 = dt.fread(BDdata + 'Versuch_2/V2-test_SampleTag11_hs_GnR_07_CTRL/V2-test_SampleTag11_hs_GnR_07_CTRL_RSEC_ReadsPerCell.csv', header = True, skip_to_line = 8).to_pandas().T
BDr_3.columns = BDr_3.iloc[0]
BDr_3 = BDr_3.iloc[1:]
adata_3 = sc.AnnData(BDr_3.T, dtype = 'int64')
adata_3



AnnData object with n_obs × n_vars = 5875 × 31125

In [8]:
adata_3.obs['sex'] = 'male'
adata_3.obs['age'] = '69'
adata_3.obs['condition'] = 'CTRL'
adata_3.obs['ethnicity'] = 'caucasian'
adata_3.obs['PaCO2'] = '61.2'
adata_3.obs['donor'] = 'CSE-06'
adata_3.obs['infection'] = 'CTRL'
adata_3.obs['disease'] = 'COPD'
adata_3.obs['SMK'] = '11'
adata_3.obs['illumina_stimunr'] = '21_0341'
adata_3.obs['bd_rhapsody'] = '2'
adata_3.obs.head()

Unnamed: 0_level_0,sex,age,condition,ethnicity,PaCO2,donor,infection,disease,SMK,illumina_stimunr,bd_rhapsody
Cell_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
7529938,male,69,CTRL,caucasian,61.2,CSE-06,CTRL,COPD,11,21_0341,2
8727987,male,69,CTRL,caucasian,61.2,CSE-06,CTRL,COPD,11,21_0341,2
8272170,male,69,CTRL,caucasian,61.2,CSE-06,CTRL,COPD,11,21_0341,2
12993450,male,69,CTRL,caucasian,61.2,CSE-06,CTRL,COPD,11,21_0341,2
6073429,male,69,CTRL,caucasian,61.2,CSE-06,CTRL,COPD,11,21_0341,2


In [9]:
BDr_4 = dt.fread(BDdata + 'Versuch_2/V2-test_SampleTag12_hs_GnR_07_IAV/V2-test_SampleTag12_hs_GnR_07_IAV_RSEC_ReadsPerCell.csv', header = True, skip_to_line = 8).to_pandas().T
BDr_4.columns = BDr_4.iloc[0]
BDr_4 = BDr_4.iloc[1:]
adata_4 = sc.AnnData(BDr_4.T, dtype = 'int64')
adata_4



AnnData object with n_obs × n_vars = 6702 × 31125

In [10]:
adata_4.obs['sex'] = 'male'
adata_4.obs['age'] = '69'
adata_4.obs['condition'] = 'IAV'
adata_4.obs['ethnicity'] = 'caucasian'
adata_4.obs['PaCO2'] = '73.3'
adata_4.obs['donor'] = 'CSE-06'
adata_4.obs['infection'] = 'IAV'
adata_4.obs['disease'] = 'COPD'
adata_4.obs['SMK'] = '12'
adata_4.obs['illumina_stimunr'] = '21_0341'
adata_4.obs['bd_rhapsody'] = '2'
adata_4.obs.head()

Unnamed: 0_level_0,sex,age,condition,ethnicity,PaCO2,donor,infection,disease,SMK,illumina_stimunr,bd_rhapsody
Cell_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3254460,male,69,IAV,caucasian,73.3,CSE-06,IAV,COPD,12,21_0341,2
8141269,male,69,IAV,caucasian,73.3,CSE-06,IAV,COPD,12,21_0341,2
11663239,male,69,IAV,caucasian,73.3,CSE-06,IAV,COPD,12,21_0341,2
11087655,male,69,IAV,caucasian,73.3,CSE-06,IAV,COPD,12,21_0341,2
475436,male,69,IAV,caucasian,73.3,CSE-06,IAV,COPD,12,21_0341,2


### Save individual objects

In [11]:
adata_1.write('/Volumes/A7V/nobackup/raw_data/single_cell/COPD_IAV/sevenbridges_raw/grch38-iav/Versuch_2/Versuch_2_grch38-iav_CSE_06_CRTL_GEX.h5ad')
adata_2.write('/Volumes/A7V/nobackup/raw_data/single_cell/COPD_IAV/sevenbridges_raw/grch38-iav/Versuch_2/Versuch_2_grch38-iav_CSE_06_IAV_GEX.h5ad')
adata_3.write('/Volumes/A7V/nobackup/raw_data/single_cell/COPD_IAV/sevenbridges_raw/grch38-iav/Versuch_2/Versuch_2_grch38-iav_GnR_07_CTRL_GEX.h5ad')
adata_4.write('/Volumes/A7V/nobackup/raw_data/single_cell/COPD_IAV/sevenbridges_raw/grch38-iav/Versuch_2/Versuch_2_grch38-iav_GnR_07_IAV_GEX.h5ad')

In [12]:
adata_1.X

array([[ 0,  0,  0, ...,  0, 12,  0],
       [ 0,  0,  0, ...,  0, 32,  0],
       [ 0,  0,  0, ...,  0, 10,  0],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])