### Notebook for `merging` the Pkp2 and Ttn datasets of ACM murine cardiac leucocytes 6 month post MCMV infection 

#### Environment: Scanpy

- **Developed by**: Alexandra Cirnu
- **Modified by**: Alexandra Cirnu
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- **Date of creation**: 240220
- **Date of modification**: 240220

### Import required modules

In [1]:
import anndata
import numpy as np
import pandas as pd
import scanpy as sc

## Read in all datasets

In [2]:
def X_is_raw(adata): return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

In [3]:
adata = sc.read_h5ad('/Users/alex/data/ACM_cardiac_leuco/processed_merged/Merge_demux_QCed_ac240220.raw.h5ad')
adata

AnnData object with n_obs × n_vars = 44594 × 32285
    obs: 'sample', 'condition', 'genotype', 'infection', 'library', 'n_genes', 'doublet_scores', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score'
    var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'n_cells_by_counts-A1', 'mean_counts-A1', 'pct_dropout_by_counts-A1', 'total_counts-A1', 'n_cells_by_counts-A2', 'mean_counts-A2', 'pct_dropout_by_counts-A2', 'total_counts-A2', 'n_cells_by_counts-A3', 'mean_counts-A3', 'pct_dropout_by_counts-A3', 'total_counts-A3', 'n_cells_by_counts-A4', 'mean_counts-A4', 'pct_dropout_by_counts-A4', 'total_counts-A4', 'n_cells_by_counts-B1', 'mean_counts-B1', 'pct_dropout_by_counts-B1', 'total_counts-B1', 'n_cells_by_counts-B2', 'mean_counts-B2', 'pct_dropout_by_counts-B2', 'total_counts-B2'
    layers: 'counts', 'sqrt_norm'

In [4]:
adata.obs

Unnamed: 0,sample,condition,genotype,infection,library,n_genes,doublet_scores,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,percent_mt,n_counts,percent_chrY,XIST-counts,S_score,G2M_score
AAACCCAAGAAGCGAA-1-A1,Pkp2_HetKO_noninf_1,Pkp2_HetKO_noninf,Pkp2_HetKO,noninf,A1,3786,0.158879,3786,19322,468,2.422110,3813,19.733982,0.024221,19322,0.056930,0,-0.583666,-0.626720
AAACCCAAGATACAGT-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,2769,0.037037,2769,8562,163,1.903761,2026,23.662696,0.019038,8562,0.000000,0,-0.339369,-0.496060
AAACCCAAGTCTAACC-1-A1,Pkp2_HetKO_MCMV_2,Pkp2_HetKO_MCMV,Pkp2_HetKO,MCMV,A1,5514,0.139442,5514,30195,569,1.884418,2666,8.829276,0.018844,30195,0.009935,0,2.894925,0.533800
AAACCCACAACCACGC-1-A1,Pkp2_HetKO_MCMV_3,Pkp2_HetKO_MCMV,Pkp2_HetKO,MCMV,A1,3097,0.066038,3097,10116,164,1.621194,970,9.588770,0.016212,10116,0.019771,0,-0.006805,-0.399040
AAACCCAGTGCTCTCT-1-A1,Pkp2_HetKO_MCMV_3,Pkp2_HetKO_MCMV,Pkp2_HetKO,MCMV,A1,2558,0.027823,2558,6153,115,1.869007,730,11.864131,0.018690,6153,0.065009,0,-0.340229,-0.079097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTTGCAAGCTCTA-1-B2,Ttn_Ctr_noninf_1,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,1849,0.190000,1849,3733,65,1.741227,189,5.062952,0.017412,3733,0.000000,0,-0.179842,-0.495021
TTTGTTGGTACAGGTG-1-B2,Ttn_Ctr_MCMV_2,Ttn_Ctr_MCMV,Ttn_Ctr,MCMV,B2,4744,0.146919,4744,24293,905,3.725353,3179,13.086074,0.037254,24293,0.000000,13,-0.435458,-0.744328
TTTGTTGGTAGTAAGT-1-B2,Ttn_HetKO_MCMV_2,Ttn_HetKO_MCMV,Ttn_HetKO,MCMV,B2,2416,0.040512,2416,6564,69,1.051188,983,14.975625,0.010512,6564,0.030469,0,0.363058,-0.589437
TTTGTTGTCCCAGGAC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,2559,0.106383,2559,6388,148,2.316844,463,7.247965,0.023168,6388,0.140889,0,0.080642,-0.877899


### Subset adata object and leave only Pkp2 Ctr noninf and Ttn Ctr noninf

In [5]:
adata = adata[adata.obs['condition'].isin(["Pkp2_Ctr_noninf", "Ttn_Ctr_noninf"]) , :]
adata.obs

Unnamed: 0,sample,condition,genotype,infection,library,n_genes,doublet_scores,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,percent_mt,n_counts,percent_chrY,XIST-counts,S_score,G2M_score
AAACCCAAGATACAGT-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,2769,0.037037,2769,8562,163,1.903761,2026,23.662696,0.019038,8562,0.000000,0,-0.339369,-0.496060
AAACGCTGTTGTGTTG-1-A1,Pkp2_Ctr_noninf_1,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,365,0.056180,365,750,260,34.666667,34,4.533333,0.346667,750,0.000000,0,-0.019995,-0.083225
AAACGCTTCTCGCTCA-1-A1,Pkp2_Ctr_noninf_1,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,1599,0.083333,1599,3855,167,4.332036,70,1.815824,0.043320,3855,0.025940,0,-0.169221,-0.390143
AAAGGTACAGAACATA-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,3854,0.088608,3854,17536,479,2.731524,1791,10.213276,0.027315,17536,0.051323,0,-0.317631,-0.578954
AAAGTCCAGGGACACT-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,3788,0.226994,3788,14174,330,2.328207,2049,14.456046,0.023282,14174,0.014110,0,-0.370103,-0.370085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGGTTCACGCAGTC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,4219,0.068120,4219,18844,683,3.624496,2868,15.219699,0.036245,18844,0.000000,0,-0.901389,-1.291861
TTTGGTTTCCTGTACC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,3321,0.226994,3321,11805,263,2.227870,1469,12.443880,0.022279,11805,0.025413,1,-0.277671,-0.184796
TTTGTTGCAAGCTCTA-1-B2,Ttn_Ctr_noninf_1,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,1849,0.190000,1849,3733,65,1.741227,189,5.062952,0.017412,3733,0.000000,0,-0.179842,-0.495021
TTTGTTGTCCCAGGAC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,2559,0.106383,2559,6388,148,2.316844,463,7.247965,0.023168,6388,0.140889,0,0.080642,-0.877899


### Check if raw data is stored in merged adata object

In [6]:
X_is_raw(adata)

True

## Save merged object

In [7]:
adata.write("/Users/alex/data/ACM_cardiac_leuco/processed_merged/Merge+Subset_Ctrls_demux_QCed_ac240221.raw.h5ad")