### Notebook for `merging` the Pkp2 and Ttn datasets of ACM murine cardiac leucocytes 6 month post MCMV infection 

#### Environment: Scanpy

- **Developed by**: Alexandra Cirnu
- **Modified by**: Alexandra Cirnu
- **Würzburg Institute for Systems Immunology & Julius-Maximilian-Universität Würzburg**
- **Date of creation**: 240220
- **Date of modification**: 240409

### Import required modules

In [1]:
import anndata
import numpy as np
import pandas as pd
import scanpy as sc
import muon as mu
from muon import atac as ac
from muon import prot as pt
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib import rcParams
from scipy.sparse import csr_matrix

In [2]:
%matplotlib inline
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 160, color_map = 'RdPu', dpi_save = 180, vector_friendly = True, format = 'svg')

-----
anndata     0.10.6
scanpy      1.9.8
-----
PIL                 10.2.0
appnope             0.1.4
asttokens           NA
colorama            0.4.6
comm                0.2.1
cycler              0.12.1
cython_runtime      NA
dateutil            2.8.2
debugpy             1.8.1
decorator           5.1.1
exceptiongroup      1.2.0
executing           2.0.1
h5py                3.10.0
igraph              0.11.4
ipykernel           6.29.2
ipywidgets          8.1.2
jedi                0.19.1
joblib              1.3.2
kiwisolver          1.4.5
leidenalg           0.10.2
llvmlite            0.42.0
matplotlib          3.8.3
matplotlib_inline   0.1.6
mpl_toolkits        NA
mudata              0.2.3
muon                0.1.5
natsort             8.4.0
numba               0.59.0
numpy               1.26.4
packaging           23.2
pandas              2.2.0
parso               0.8.3
patsy               0.5.6
pickleshare         0.7.5
platformdirs        4.2.0
prompt_toolkit      3.0.42
psutil        

## Read in all datasets

In [3]:
def X_is_raw(adata): return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

In [4]:
mdata = mu.read_h5mu('/Users/alex/data/ACM_cardiac_leuco/processed_merged/Merge_demux_QCed_muon_ac240408.raw.h5mu')
mdata



In [5]:
adata = mdata.mod['rna']

In [6]:
adata.obs

Unnamed: 0,sample,condition,genotype,infection,library,model,n_genes_by_counts,total_counts,doublet_scores,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,percent_mt,n_counts,percent_chrY,XIST-counts,S_score,G2M_score
AAACCCAAGAAGCGAA-1-A1,Pkp2_HetKO_noninf_1,Pkp2_HetKO_noninf,Pkp2_HetKO,noninf,A1,Pkp2,3786,19322.0,0.158879,468.0,2.422109,3813.0,19.733982,0.024221,19322.0,0.056930,0.0,-0.583666,-0.626720
AAACCCAAGATACAGT-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,Pkp2,2769,8562.0,0.037037,163.0,1.903761,2026.0,23.662695,0.019038,8562.0,0.000000,0.0,-0.339369,-0.496061
AAACCCAAGTCTAACC-1-A1,Pkp2_HetKO_MCMV_2,Pkp2_HetKO_MCMV,Pkp2_HetKO,MCMV,A1,Pkp2,5514,30195.0,0.139442,569.0,1.884418,2666.0,8.829276,0.018844,30195.0,0.009935,0.0,2.894924,0.533799
AAACCCACAACCACGC-1-A1,Pkp2_HetKO_MCMV_3,Pkp2_HetKO_MCMV,Pkp2_HetKO,MCMV,A1,Pkp2,3097,10116.0,0.066038,164.0,1.621194,970.0,9.588771,0.016212,10116.0,0.019771,0.0,-0.006805,-0.399040
AAACCCAGTGCTCTCT-1-A1,Pkp2_HetKO_MCMV_3,Pkp2_HetKO_MCMV,Pkp2_HetKO,MCMV,A1,Pkp2,2558,6153.0,0.027823,115.0,1.869007,730.0,11.864132,0.018690,6153.0,0.065009,0.0,-0.340230,-0.079096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTTGCAAGCTCTA-1-B2,Ttn_Ctr_noninf_1,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,Ttn,1849,3733.0,0.190000,65.0,1.741227,189.0,5.062952,0.017412,3733.0,0.000000,0.0,-0.179842,-0.495021
TTTGTTGGTACAGGTG-1-B2,Ttn_Ctr_MCMV_2,Ttn_Ctr_MCMV,Ttn_Ctr,MCMV,B2,Ttn,4744,24293.0,0.146919,905.0,3.725353,3179.0,13.086075,0.037254,24293.0,0.000000,13.0,-0.435458,-0.744327
TTTGTTGGTAGTAAGT-1-B2,Ttn_HetKO_MCMV_2,Ttn_HetKO_MCMV,Ttn_HetKO,MCMV,B2,Ttn,2416,6564.0,0.040512,69.0,1.051188,983.0,14.975625,0.010512,6564.0,0.030469,0.0,0.363058,-0.589437
TTTGTTGTCCCAGGAC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,Ttn,2559,6388.0,0.106383,148.0,2.316844,463.0,7.247965,0.023168,6388.0,0.140889,0.0,0.080642,-0.877899


### Subset adata object and leave only Pkp2 Ctr noninf and Ttn Ctr noninf

In [7]:
adata = adata[adata.obs['condition'].isin(["Pkp2_Ctr_noninf", "Ttn_Ctr_noninf"]) , :]
adata.obs    

Unnamed: 0,sample,condition,genotype,infection,library,model,n_genes_by_counts,total_counts,doublet_scores,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,percent_mt,n_counts,percent_chrY,XIST-counts,S_score,G2M_score
AAACCCAAGATACAGT-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,Pkp2,2769,8562.0,0.037037,163.0,1.903761,2026.0,23.662695,0.019038,8562.0,0.000000,0.0,-0.339369,-0.496061
AAACGCTGTTGTGTTG-1-A1,Pkp2_Ctr_noninf_1,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,Pkp2,365,750.0,0.056180,260.0,34.666668,34.0,4.533333,0.346667,750.0,0.000000,0.0,-0.019995,-0.083225
AAACGCTTCTCGCTCA-1-A1,Pkp2_Ctr_noninf_1,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,Pkp2,1599,3855.0,0.083333,167.0,4.332036,70.0,1.815824,0.043320,3855.0,0.025940,0.0,-0.169221,-0.390143
AAAGGTACAGAACATA-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,Pkp2,3854,17536.0,0.088608,479.0,2.731524,1791.0,10.213275,0.027315,17536.0,0.051323,0.0,-0.317631,-0.578955
AAAGTCCAGGGACACT-1-A1,Pkp2_Ctr_noninf_2,Pkp2_Ctr_noninf,Pkp2_Ctr,noninf,A1,Pkp2,3788,14174.0,0.226994,330.0,2.328207,2049.0,14.456045,0.023282,14174.0,0.014110,0.0,-0.370103,-0.370084
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGGTTCACGCAGTC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,Ttn,4219,18844.0,0.068120,683.0,3.624496,2868.0,15.219699,0.036245,18844.0,0.000000,0.0,-0.901389,-1.291862
TTTGGTTTCCTGTACC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,Ttn,3321,11805.0,0.226994,263.0,2.227870,1469.0,12.443880,0.022279,11805.0,0.025413,1.0,-0.277671,-0.184796
TTTGTTGCAAGCTCTA-1-B2,Ttn_Ctr_noninf_1,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,Ttn,1849,3733.0,0.190000,65.0,1.741227,189.0,5.062952,0.017412,3733.0,0.000000,0.0,-0.179842,-0.495021
TTTGTTGTCCCAGGAC-1-B2,Ttn_Ctr_noninf_2,Ttn_Ctr_noninf,Ttn_Ctr,noninf,B2,Ttn,2559,6388.0,0.106383,148.0,2.316844,463.0,7.247965,0.023168,6388.0,0.140889,0.0,0.080642,-0.877899


### Check if raw data is stored in merged adata object

In [8]:
X_is_raw(adata)

True

#### Update the mdata object


In [9]:
mdata.mod['rna'] = adata
mdata

In [10]:
mdata.mod['prot'] = mdata.mod['prot'][mdata.mod['rna'].obs.index]
mdata.update()
mdata

## Save merged object

In [11]:
mdata.write("/Users/alex/data/ACM_cardiac_leuco/processed_merged/Merge_demux_subset_Ctrls_QCed_muon_ac240409.raw.h5mu")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[key] = c
