In [5]:
#perform neighbor enrichment analysis with squidpy
import anndata
import cellcharter as cc
import pandas as pd
import squidpy as sq
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib as mpl
import time, os, sys
import glob
import warnings
import numpy as np
import seaborn as sns
warnings.simplefilter(action='ignore', category=FutureWarning)

In [6]:
#import simualted data
path_to_csv = './../../../../../MI_heart_paper/data/cell_table_final.csv'
obs = pd.read_csv(path_to_csv)

# mode if only links between different phenotypes should be used
only_inter = False

# paths 
path_to_csv = './../../../../../MI_heart_paper/data/cell_table_final.csv'
csv_files = glob.glob(os.path.join(path_to_csv, '*.csv'))
output_only_inter = './../../../../github/Comparison/20250218_results_MI/cellcharter_only_inter_knn5_try.csv'
output_not_only_inter = './../../../../github/Comparison/20250218_results_MI/cellcharter_not_only_inter_knn5_try.csv'

In [7]:
#filter out cell types
ignore_cell_types = ['exclude']
obs = obs[~obs['final_cell_type'].isin(ignore_cell_types)]

# add mock marker files to it
obs['D'] = np.random.randint(1, 101, size=len(obs))
obs['E'] = np.random.randint(1, 101, size=len(obs))
obs.index = obs.index.astype(str) 

# Load dataframe into anndata object
X = obs[['D', 'E']]
X = X.values
adata = anndata.AnnData(X)
adata.obs = obs
adata

AnnData object with n_obs × n_vars = 563180 × 2
    obs: 'fov', 'label', 'cell_size', 'X_centroid', 'Y_centroid', 'Eccentricity', 'Solidity', 'Extent', 'Orientation', 'cell_meta_cluster', 'timepoint', 'region', 'region_name', 'refined_cell_type', 'final_cell_type', 'endocardial_annotation', 'exclude_annotation', 'artefact', 'distance_from_lumen', 'lumen_bin', 'size_filter', 'D', 'E'

In [8]:
# get spatial coordinates to be in obsm
adata.obsm['spatial'] = obs[['X_centroid', 'Y_centroid']].values
# make "refined_cell_types" a categorical variable
adata.obs['final_cell_type'] = pd.Categorical(adata.obs['final_cell_type'])

In [9]:

# Assuming 'FOV' is the column in obs indicating different fields of view
fov_list = adata.obs['fov'].unique()

# Dictionary to store FOV-specific AnnData objects
fov_adata_dict = {}

# Loop over each FOV
for fov in fov_list:
    # Create a subset of adata for the specific FOV
    fov_adata = adata[adata.obs['fov'] == fov].copy()

    # Perform spatial neighbors analysis on the subset
    sq.gr.spatial_neighbors(fov_adata, coord_type = 'generic', delaunay=False, n_neighs=5)
    cc.gr.nhood_enrichment(fov_adata, cluster_key="final_cell_type", n_perms=300, only_inter=only_inter)
    # Store the result in the dictionary
    fov_adata_dict[fov] = fov_adata


In [11]:
import pandas as pd

# Initialize a list to hold each flattened DataFrame
flattened_dfs = []

# Loop through each fov in the dictionary
for fov, fov_adata in fov_adata_dict.items():
    # Extract the enrichment matrix
    enrichment_matrix = fov_adata.uns['final_cell_type_nhood_enrichment']['enrichment']
    cell_types = fov_adata.obs['final_cell_type'].cat.categories

    # Check if the dimensions of the zscore matrix match the number of categories
    assert len(cell_types) == enrichment_matrix.shape[0] == enrichment_matrix.shape[1], "Dimension mismatch!"
    zscore_df = pd.DataFrame(enrichment_matrix, index=cell_types, columns=cell_types)

    # Flatten the DataFrame
    flattened_df = zscore_df.stack().reset_index()
    flattened_df.columns = ['rowname', 'colname', 'enrichment']
    flattened_df['combined'] = flattened_df['rowname'] + '_' + flattened_df['colname']
    flattened_df['fov'] = fov
    flattened_df = flattened_df[['fov', 'combined', 'enrichment']]
    flattened_dfs.append(flattened_df)

# Concatenate all flattened DataFrames into one big DataFrame
big_dataframe = pd.concat(flattened_dfs, ignore_index=True)
final_dataframe = big_dataframe.pivot(index='fov', columns='combined', values='enrichment')
final_dataframe.reset_index('fov',inplace=True)

# Display the final DataFrame
print(final_dataframe)

combined         fov  Cardiomyocytes Ankrd1+_Cardiomyocytes  \
0             24h_83                              -0.334441   
1             24h_86                              -0.412330   
2             48h_76                              -0.301947   
3             48h_79                              -0.298629   
4              4h_96                              -0.295800   
5              4h_97                              -0.307912   
6         Control_12                              -0.143411   
7         Control_13                              -0.121895   
8         Control_14                              -0.158210   

combined  Cardiomyocytes Ankrd1+_Cardiomyocytes Ankrd1+  \
0                                              0.451668   
1                                              0.480463   
2                                              0.340891   
3                                              0.376948   
4                                              0.369317   
5              

In [12]:
# save the dataframe
if only_inter:
    final_dataframe.to_csv(output_only_inter, index=False)  # Set index=False to avoid saving the index as a column
else:
    final_dataframe.to_csv(output_not_only_inter, index=False)