This notebook demonstrates computation of spatial cross-correlations between spatial samples

In [6]:
import anndata as ad
import scanpy as sc
import pandas as pd

import spatial_correlation as sp_corr

### Load AnnData of lung spatial slides annotated by IRIS domains and a csv with cell type proportions (IRIS deconvolution)

In [4]:
# load data
lung_A = ad.read_h5ad('../data/annotated_slice1_NEW.h5ad')
lung_B = ad.read_h5ad('../data/annotated_slice2_NEW.h5ad')
lung_C = ad.read_h5ad('../data/annotated_slice3_NEW.h5ad')
lung_D = ad.read_h5ad('../data/annotated_slice4_NEW.h5ad')


sc.pp.normalize_total(lung_A, target_sum=1e4)
sc.pp.log1p(lung_A)

sc.pp.normalize_total(lung_B, target_sum=1e4)
sc.pp.log1p(lung_B)

sc.pp.normalize_total(lung_C, target_sum=1e4)
sc.pp.log1p(lung_C)

sc.pp.normalize_total(lung_D, target_sum=1e4)
sc.pp.log1p(lung_D)

In [9]:
IRIS_df = pd.read_csv('../cellchat/IRIS proportions/IRIS_proportions_NEW.csv')

# separate IRIS data by slices
sample_names = IRIS_df["Slice"].unique()
print("Sample names: ", sample_names)

IRIS_prop1 = IRIS_df[IRIS_df["Slice"]==sample_names[0]]
IRIS_prop2 = IRIS_df[IRIS_df["Slice"]==sample_names[1]]
IRIS_prop3 = IRIS_df[IRIS_df["Slice"]==sample_names[2]]
IRIS_prop4 = IRIS_df[IRIS_df["Slice"]==sample_names[3]]

# set index as spot barcode for concatenation
IRIS_prop1.set_index("spotName", inplace=True)
IRIS_prop2.set_index("spotName", inplace=True)
IRIS_prop3.set_index("spotName", inplace=True)
IRIS_prop4.set_index("spotName", inplace=True)


Sample names:  ['LNEN071-IARC-A' 'LNEN084-IARC-B' 'LNEN107-IARC-C' 'LNEN206-IARC-D']


#### Compute spatial correlations for cell type proportions and expression of signalling genes

In [None]:
cell_types = ['Lower Airway Progenitor', 'Neuroendocrine CALCA+',
       'Neuroendocrine NEUROD1+', "Club", 'Macrophage', 'Macrophage_proliferating',
       'Myofibro', 'Fibro', 'T_conv', 'T_CD8', 'T/NK_proliferating']

genes = ['LPCAT1', 'SCGB3A2', 'GPRC5A', "MIF", 'CD74', 'CD44', 'CXCR4', 'COL1A1', 'COL1A2', 'ICAM1', 'ITGB2', 'ITGAX']

In [25]:
# prepare correct anndata object
adata_mix_A = sp_corr.make_adata_mixed_bv(lung_A, IRIS_prop1, genes, cell_types)
adata_mix_B = sp_corr.make_adata_mixed_bv(lung_B, IRIS_prop2, genes, cell_types)
adata_mix_C = sp_corr.make_adata_mixed_bv(lung_C, IRIS_prop3, genes, cell_types)
adata_mix_D = sp_corr.make_adata_mixed_bv(lung_D, IRIS_prop4, genes, cell_types)

In [None]:
# corelation genes-proportions
corr_bv_A, p_bv_A = sp_corr.compute_spatial_corr_mix(adata_mix_A, cell_types, genes)
corr_bv_B, p_bv_B = sp_corr.compute_spatial_corr_mix(adata_mix_B, cell_types, genes)
corr_bv_C, p_bv_C = sp_corr.compute_spatial_corr_mix(adata_mix_C, cell_types, genes)
corr_bv_D, p_bv_D = sp_corr.compute_spatial_corr_mix(adata_mix_D, cell_types, genes)


Step  0 / 12
Step  1 / 12
Step  2 / 12
Step  3 / 12
Step  4 / 12
Step  5 / 12
Step  6 / 12
Step  7 / 12
Step  8 / 12
Step  9 / 12
Step  10 / 12
Step  11 / 12


In [None]:
# save computed matrices in .csv
corr_mat_list = [corr_bv_A, corr_bv_B, corr_bv_C, corr_bv_D]
p_val_list = [p_bv_A, p_bv_B, p_bv_C, p_bv_D]

for i in range(4):
    df_corr = pd.DataFrame(corr_mat_list[i], columns=cell_types, index=genes)
    df_pval = pd.DataFrame(p_val_list[i], columns=cell_types, index=genes)
    df_corr.to_csv(f'../results/correlation tables/Corr_genes_types{i}_NEW.csv')
    df_pval.to_csv(f'../results/correlation tables/Pval_genes_types{i}_NEW.csv')
    
    with pd.ExcelWriter(f'../results/correlation tables/Corr_genes_types{i}_NEW.xlsx', engine="openpyxl") as writer:  
        df_corr.to_excel(writer, sheet_name='Correlation')
    with pd.ExcelWriter(f'../results/correlation tables/Pval_genes_types{i}_NEW.xlsx', engine="openpyxl") as writer:
        df_pval.to_excel(writer, sheet_name='Pval')



#### Compute spatial correlations between cell type proportions

In [19]:
# correlation between ct proportions
print("Slice 1\n")
corr_bv1, p_bv1 = sp_corr.compute_spatial_corr(adata_mix_A, cell_types)
print("Slice 2\n")
corr_bv2, p_bv2 = sp_corr.compute_spatial_corr(adata_mix_B, cell_types)
print("Slice 3\n")
corr_bv3, p_bv3 = sp_corr.compute_spatial_corr(adata_mix_C, cell_types)
print("Slice 4\n")
corr_bv4, p_bv4 = sp_corr.compute_spatial_corr(adata_mix_D, cell_types)

Step  0 / 11
Step  1 / 11
Step  2 / 11
Step  3 / 11
Step  4 / 11
Step  5 / 11
Step  6 / 11
Step  7 / 11
Step  8 / 11
Step  9 / 11
Step  10 / 11
Step  0 / 11
Step  1 / 11
Step  2 / 11
Step  3 / 11
Step  4 / 11
Step  5 / 11
Step  6 / 11
Step  7 / 11
Step  8 / 11
Step  9 / 11
Step  10 / 11
Step  0 / 11
Step  1 / 11
Step  2 / 11
Step  3 / 11
Step  4 / 11
Step  5 / 11
Step  6 / 11
Step  7 / 11
Step  8 / 11
Step  9 / 11
Step  10 / 11
Step  0 / 11
Step  1 / 11
Step  2 / 11
Step  3 / 11
Step  4 / 11
Step  5 / 11
Step  6 / 11
Step  7 / 11
Step  8 / 11
Step  9 / 11
Step  10 / 11


In [24]:
# save correlation coefficients and p-values to excel table
corr_mat_list = [corr_bv1, corr_bv2, corr_bv3, corr_bv4]
p_val_list = [p_bv1, p_bv2, p_bv3, p_bv4]

for i in range(4):
    df_corr = pd.DataFrame(corr_mat_list[i], columns=cell_types, index=cell_types)
    df_pval = pd.DataFrame(p_val_list[i], columns=cell_types, index=cell_types)
    df_corr.to_csv(f'../results/correlation tables/Corr_{i}_NEW.csv')
    df_pval.to_csv(f'../results/correlation tables/Pval_{i}_NEW.csv')
    with pd.ExcelWriter(f'../results/correlation tables/Corr_{i}_NEW.xlsx', engine="openpyxl") as writer:  
        df_corr.to_excel(writer, sheet_name='Correlation')
    with pd.ExcelWriter(f'../results/correlation tables/Pval_{i}_NEW.xlsx', engine="openpyxl") as writer:
        df_pval.to_excel(writer, sheet_name='Pval')
