### Notebook to format the BDRhapsody data from Seven Bridges into an `anndata` object

- **Developed by:** Carlos Talavera-López Ph.D
- **Institute of Computational Biology - Computational Health Centre - Helmholtz Munich**
- v230313

### Import required modules

In [None]:
import anndata
import pandas as pd
import scanpy as sc
import datatable as dt

### Set up working environment

In [None]:
BDdata = '/Volumes/XF-11/working_datasets/marburg/sevenbridges_raw/grch38-iav/'

### Read in _BD Rhapsody_ files for Versuch_2

- UMIs are called _Raw_Molecules_
- Genes/Features are called _Bioproduct_
- Sample Tag Version: hs | Sample Tag Names: 7-CSE_CTRL ; 8-GnR_CTRL ; 9-CSE_IAV ; 10-GnR_IAV

In [None]:
BDr_1 = dt.fread(BDdata + 'Versuch_2/V2-test_SampleTag01_hs_CSE_06_CRTL/V2-test_SampleTag01_hs_CSE_06_CRTL_Expression_Data.st', header = True, skip_to_line = 8).to_pandas()
BDr_1_df = pd.DataFrame().assign(Cells = BDr_1['Cell_Index'], UMIs = BDr_1['RSEC_Reads'].astype('float'), Features = BDr_1['Bioproduct'])
BDr_1_table = pd.pivot_table(BDr_1_df, values = 'UMIs', index = ['Cells'], columns = ['Features'], fill_value = 0)
adata_1 = sc.AnnData(BDr_1_table, dtype = 'int64')
adata_1

In [None]:
adata_1.obs['sex'] = 'male'
adata_1.obs['age'] = '40'
adata_1.obs['condition'] = 'CTRL'
adata_1.obs['ethnicity'] = 'caucasian'
adata_1.obs['PaCO2'] = '99.4'
adata_1.obs['donor'] = 'GNR-07'
adata_1.obs['infection'] = 'CTRL'
adata_1.obs['disease'] = 'healthy'
adata_1.obs['SMK'] = '1'
adata_1.obs['illumina_stimunr'] = '21_0341'
adata_1.obs['bd_rhapsody'] = '2'
adata_1.obs.head()

In [None]:
BDr_2 = dt.fread(BDdata + 'Versuch_2/V2-test_SampleTag02_hs_CSE_06_IAV/V2-test_SampleTag02_hs_CSE_06_IAV_Expression_Data.st', header = True, skip_to_line = 8).to_pandas()
BDr_2_df = pd.DataFrame().assign(Cells = BDr_2['Cell_Index'], UMIs = BDr_2['RSEC_Reads'].astype('float'), Features = BDr_2['Bioproduct'])
BDr_2_table = pd.pivot_table(BDr_2_df, values = 'UMIs', index = ['Cells'], columns = ['Features'], fill_value = 0)
adata_2 = sc.AnnData(BDr_2_table, dtype = 'int64')
adata_2

In [None]:
adata_2.obs['sex'] = 'male'
adata_2.obs['age'] = '40'
adata_2.obs['condition'] = 'IAV'
adata_2.obs['ethnicity'] = 'caucasian'
adata_2.obs['PaCO2'] = '99.4'
adata_2.obs['donor'] = 'GNR-07'
adata_2.obs['infection'] = 'IAV'
adata_2.obs['disease'] = 'healthy'
adata_2.obs['SMK'] = '2'
adata_2.obs['illumina_stimunr'] = '21_0341'
adata_2.obs['bd_rhapsody'] = '2'
adata_2.obs.head()

In [None]:
BDr_3 = dt.fread(BDdata + 'Versuch_2_SampleTag11_hs_GnR_07_CTRL/Versuch_2_SampleTag11_hs_GnR_07_CTRL_Expression_Data.st', header = True, skip_to_line = 8).to_pandas()
BDr_3_df = pd.DataFrame().assign(Cells = BDr_3['Cell_Index'], UMIs = BDr_3['Raw_Molecules'].astype('float'), Features = BDr_3['Bioproduct'])
BDr_3_table = pd.pivot_table(BDr_3_df, values = 'UMIs', index = ['Cells'], columns = ['Features'], fill_value = 0)
adata_3 = sc.AnnData(BDr_3_table)
adata_3

In [None]:
adata_3.obs['sex'] = 'male'
adata_3.obs['age'] = '69'
adata_3.obs['condition'] = 'CTRL'
adata_3.obs['ethnicity'] = 'caucasian'
adata_3.obs['PaCO2'] = '61.2'
adata_3.obs['donor'] = 'CSE-06'
adata_3.obs['infection'] = 'CTRL'
adata_3.obs['disease'] = 'COPD'
adata_3.obs['SMK'] = '11'
adata_3.obs['illumina_stimunr'] = '21_0341'
adata_3.obs['bd_rhapsody'] = '2'
adata_3.obs.head()

In [None]:
BDr_4 = dt.fread(BDdata + 'Versuch_2_SampleTag12_hs_GnR_07_IAV/Versuch_2_SampleTag12_hs_GnR_07_IAV_Expression_Data.st', header = True, skip_to_line = 8).to_pandas()
BDr_4_df = pd.DataFrame().assign(Cells = BDr_4['Cell_Index'], UMIs = BDr_4['Raw_Molecules'].astype('float'), Features = BDr_4['Bioproduct'])
BDr_4_table = pd.pivot_table(BDr_4_df, values = 'UMIs', index = ['Cells'], columns = ['Features'], fill_value = 0)
adata_4 = sc.AnnData(BDr_4_table)
adata_4

In [None]:
adata_4.obs['sex'] = 'male'
adata_4.obs['age'] = '69'
adata_4.obs['condition'] = 'IAV'
adata_4.obs['ethnicity'] = 'caucasian'
adata_4.obs['PaCO2'] = '73.3'
adata_4.obs['donor'] = 'CSE-06'
adata_4.obs['infection'] = 'IAV'
adata_4.obs['disease'] = 'COPD'
adata_4.obs['SMK'] = '12'
adata_4.obs['illumina_stimunr'] = '21_0341'
adata_4.obs['bd_rhapsody'] = '2'
adata_4.obs.head()

### Save individual objects

In [None]:
adata_1.write('/Volumes/Bf110/ct5/raw_data/lung/influenza/icb/Versuch_2_ST01_CSE_CRTL_GEX.h5ad')
adata_2.write('/Volumes/Bf110/ct5/raw_data/lung/influenza/icb/Versuch_2_ST02_CSE_IAV_GEX.h5ad')
adata_3.write('/Volumes/Bf110/ct5/raw_data/lung/influenza/icb/Versuch_2_ST11_GnR_CTRL_GEX.h5ad')
adata_4.write('/Volumes/Bf110/ct5/raw_data/lung/influenza/icb/Versuch_2_ST12_GnR_IAV_GEX.h5ad')

In [None]:
adata_4.X