### Notebook for the cell proportion analysis of Healthy_vs_COPD CTRL_vs_IAV data

- **Developed by**: Carlos Talavera-López Ph.D
- **Würzburg Institute for Systems Immunology - Faculty of Medicine - Julius Maximilian Universität Würzburg**
- **Created on**: 231204
- **Last modified**: 231204

### Load required packages

In [1]:
import logging
import anndata
import anndata2ri
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import rpy2.rinterface_lib.callbacks

### Set up working environment

In [2]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.10.3
scanpy      1.9.6
-----
PIL                 10.1.0
anndata2ri          1.3.1
appnope             0.1.3
asttokens           NA
cffi                1.16.0
comm                0.2.0
cycler              0.12.1
cython_runtime      NA
dateutil            2.8.2
debugpy             1.8.0
decorator           5.1.1
exceptiongroup      1.2.0
executing           2.0.1
get_annotations     NA
h5py                3.10.0
igraph              0.10.8
importlib_resources NA
ipykernel           6.27.1
ipywidgets          8.1.1
jedi                0.19.1
jinja2              3.1.2
joblib              1.3.2
kiwisolver          1.4.5
leidenalg           0.10.1
llvmlite            0.41.1
markupsafe          2.1.3
matplotlib          3.8.2
mpl_toolkits        NA
mpmath              1.3.0
natsort             8.4.0
numba               0.58.1
numpy               1.24.4
packaging           23.2
pandas              2.1.3
parso               0.8.3
pexpect             4.9.0
platformdirs        

In [3]:
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

In [4]:
anndata2ri.activate()

  anndata2ri.activate()


In [5]:
%load_ext rpy2.ipython

### Set up `milo` for the underlying analysis

In [6]:
%%R
library(miloR)
library(igraph)

Lade nötiges Paket: edgeR
Lade nötiges Paket: limma

Attache Paket: ‘igraph’

Das folgende Objekt ist maskiert ‘package:miloR’:

    graph

Die folgenden Objekte sind maskiert von ‘package:stats’:

    decompose, spectrum

Das folgende Objekt ist maskiert ‘package:base’:

    union



### Load working object

In [7]:
adata = sc.read_h5ad('../../../data/Marburg_cell_states_locked_ctl231212.raw.h5ad')
adata

AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden', 'cell_compartment', '_scvi_batch', '_scvi_labels', 'C_scANVI', 'viral_counts', 'infected_status'
    var: 'mt', 'ribo'
    uns: 'cell_states_colors', 'disease_colors', 'group_colors', 'infected_status_colors', 'infection_colors'
    obsm: 'X_scANVI', 'X_scVI', 'X_umap'

### Test for differential abundance with `milo`

In [8]:
sc.pp.neighbors(adata, n_neighbors = 50, random_state = 1712, use_rep = 'X_scANVI')

computing neighbors


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:22)


In [9]:
pca_matrix = adata.obsm['X_scANVI']
pca_df = pd.DataFrame(pca_matrix)
pca_df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,-0.023857,0.698145,1.647817,-0.009554,1.303181,0.162096,0.434327,-1.678667,0.252368,-1.498314,...,-0.879557,-0.175128,-0.498197,0.115287,1.640779,0.082409,1.497953,0.116839,0.114829,0.257356
1,0.260839,0.612675,2.014230,0.121441,-0.816559,0.284699,0.482280,0.179233,0.281227,-0.712338,...,-1.012894,-0.082516,-0.305419,-0.442082,2.326496,0.025421,1.941844,1.020679,0.317055,0.248593
2,0.527197,1.051738,0.284978,-0.108563,-0.954801,0.301718,0.474045,0.999606,0.318987,-0.334104,...,0.307652,0.436324,-0.097590,-0.563716,1.280361,0.011321,2.084684,-0.893512,0.369019,0.249331
3,0.376179,0.445397,0.685485,0.032689,-1.605751,0.343839,0.174283,0.361205,0.282133,-0.426715,...,-0.427790,-0.210345,-0.169110,-0.024666,1.943300,0.041610,2.520197,-0.189592,0.274579,0.219191
4,0.252085,0.361741,1.059010,0.104622,-0.651614,0.339975,0.253447,1.021600,0.463458,-0.883869,...,-0.839840,-0.188566,-0.206212,0.224203,1.275067,0.040008,2.008775,-0.192023,0.349644,0.118984
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97568,0.252063,0.054477,0.380298,-0.036232,-1.005471,0.327484,0.164557,0.540161,0.157245,0.843312,...,-0.040203,-0.022366,-0.163603,-0.000305,-0.471740,-0.039819,1.182968,-1.321827,0.061175,0.160841
97569,-0.459390,0.138232,-1.504954,-0.127792,1.083336,0.181003,0.126632,-1.130855,-0.271288,0.921351,...,-1.665752,-0.129056,-0.266652,1.417193,-0.067425,-0.077619,-0.359845,1.260643,-0.342096,0.137981
97570,-0.349372,0.421996,-1.476419,-0.015994,1.812386,0.392780,-0.003219,-0.594024,0.176381,-0.276295,...,-1.634133,0.048385,-0.289255,1.237229,0.502203,-0.043584,-0.928989,-0.589505,0.009101,0.194208
97571,0.050182,0.346088,-0.437399,-0.172110,1.886432,0.426958,0.462852,0.580223,0.637776,0.388131,...,-0.705198,0.353627,-0.463801,0.430544,-1.324370,0.346113,0.755285,-1.434863,0.365466,-0.381117


### Differential abundance (DA) analysis with `milo`

In [10]:
adata_no_knn = adata.copy()
adata_no_knn.obsp = None
adata_no_knn.uns.pop("neighbors")
adata_no_knn

AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden', 'cell_compartment', '_scvi_batch', '_scvi_labels', 'C_scANVI', 'viral_counts', 'infected_status'
    var: 'mt', 'ribo'
    uns: 'cell_states_colors', 'disease_colors', 'group_colors', 'infected_status_colors', 'infection_colors'
    obsm: 'X_scANVI', 'X_scVI', 'X_umap'

- This can be converted to a SingleCellExperiment using R magic again

In [11]:
%%R -i adata_no_knn
adata_no_knn

class: SingleCellExperiment 
dim: 27208 97573 
metadata(5): cell_states_colors disease_colors group_colors
  infected_status_colors infection_colors
assays(1): X
rownames(27208): A1BG A1BG-AS1 ... ZYX ZZEF1
rowData names(2): mt ribo
colnames(97573): 493638-ST07_CSE_CTRL-V1-uninfected
  816750-ST07_CSE_CTRL-V1-uninfected ... 1638545-CSE03_IAV-V6-infected
  163246-CSE03_IAV-V6-infected
colData names(40): sex age ... viral_counts infected_status
reducedDimNames(3): X_scANVI X_scVI UMAP
mainExpName: NULL
altExpNames(0):


- Make a Milo class object for DA analysis

In [12]:
%%R 
milo <- Milo(adata_no_knn)
milo

class: Milo 
dim: 27208 97573 
metadata(5): cell_states_colors disease_colors group_colors
  infected_status_colors infection_colors
assays(1): X
rownames(27208): A1BG A1BG-AS1 ... ZYX ZZEF1
rowData names(2): mt ribo
colnames(97573): 493638-ST07_CSE_CTRL-V1-uninfected
  816750-ST07_CSE_CTRL-V1-uninfected ... 1638545-CSE03_IAV-V6-infected
  163246-CSE03_IAV-V6-infected
colData names(40): sex age ... viral_counts infected_status
reducedDimNames(3): X_scANVI X_scVI UMAP
mainExpName: NULL
altExpNames(0):
nhoods dimensions(2): 1 1
nhoodCounts dimensions(2): 1 1
nhoodDistances dimension(1): 0
graph names(0):
nhoodIndex names(1): 0
nhoodExpression dimension(2): 1 1
nhoodReducedDim names(0):
nhoodGraph names(0):
nhoodAdjacency dimension(2): 1 1


- Add KNN graph

In [13]:
knn_adjacency = adata.obsp["connectivities"]

In [14]:
%%R -i knn_adjacency

milo_graph <- buildFromAdjacency(knn_adjacency, k = 50, is.binary = TRUE)
graph(milo) <- miloR::graph(milo_graph)

- Add PCA matrix from X_scANVI

In [15]:
%%R -i pca_matrix

reducedDims(milo)$PCA <- as.matrix(pca_matrix)

### Run `milo` analysis 

In [16]:
design_df = adata.obs[["batch","donor", "group"]]
design_df.drop_duplicates(inplace = True)
design_df.index = design_df['batch']
design_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  design_df.drop_duplicates(inplace = True)


Unnamed: 0_level_0,batch,donor,group
batch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ST07_CSE_CTRL,ST07_CSE_CTRL,GNR-06,healthy_ctrl
CSE_06_CRTL,CSE_06_CRTL,GNR-07,healthy_ctrl
GER14_CRTL,GER14_CRTL,GER-14,healthy_ctrl
GnR08_CTRL,GnR08_CTRL,GNR-08,healthy_ctrl
GER12_CTRL,GER12_CTRL,GER-12,healthy_ctrl
GER02_CTRL,GER02_CTRL,GER-02,healthy_ctrl
ST09_CSE_IAV,ST09_CSE_IAV,CSE-07,copd_ctrl
GnR_07_CTRL,GnR_07_CTRL,CSE-06,copd_ctrl
CHE01_CTRL,CHE01_CTRL,CHE-01,copd_ctrl
CHE02_CTRL,CHE02_CTRL,CHE-02,copd_ctrl


In [17]:
%%R -i design_df -o DA_results

## Define neighbourhoods
milo <- makeNhoods(milo, prop = 0.1, k = 20, d = 30, refined = TRUE)

## Count cells in neighbourhoods
milo <- countCells(milo, meta.data = data.frame(colData(milo)), sample = "batch")

## Calculate distances between cells in neighbourhoods for spatial FDR correction
milo <- calcNhoodDistance(milo, d = 30)

## Test for differential abundance
DA_results <- testNhoods(milo, design = ~ group, design.df = design_df)

### Explore neighbourhoods using a volcano plot

- In the above dataframe, each row represents a neighbourhood (NOT a cell) and the log-Fold Change and adjusted p-value for differential abundance between stages are reported. We can start exploring the test results with a volcano plot.

In [None]:
DA_results

In [None]:
plt.plot(DA_results.logFC, -np.log10(DA_results.SpatialFDR), '.')
plt.xlabel("log-Fold Change")
plt.ylabel("- log10(Spatial FDR)")

### Visualizing results from Milo analysis

In [None]:
%%R
milo <- buildNhoodGraph(milo)

In [None]:
%%R -w 1000 -h 800
plotNhoodGraphDA(milo, DA_results, alpha = 0.05)