In [1]:
#perform neighbor enrichment analysis with squidpy
import anndata
import pandas as pd
import squidpy as sq
import matplotlib.pyplot as plt
import matplotlib as mpl
import time, os, sys
import glob
import warnings
import numpy as np
import seaborn as sns
warnings.simplefilter(action='ignore', category=FutureWarning)

In [11]:
#import simualted data, either symmetric or asymmetric
path_to_csv = './../../../../data/20250217_sym00_nbh2_1000dim_grid200_300iter_50swaps/'
csv_files = glob.glob(os.path.join(path_to_csv, '*.csv'))
output_zscore = './../../../../../SCNA_thesis/github/Comparison/20250218_results_sym/squidpy_zscore_delaunay_4ct_self00_try.csv'
output_count = './../../../../../SCNA_thesis/github/Comparison/20250218_results_sym/squidpy_count_delaunay_4ct_self00_try.csv'

In [5]:
data_frames = []

# Loop through the CSV files and process each one
for file in csv_files:
    df = pd.read_csv(file)
    sample_id = os.path.splitext(os.path.basename(file))[0]
    df['sample_id'] = sample_id
    data_frames.append(df)

# Concatenate all DataFrames into one big DataFrame
obs = pd.concat(data_frames, ignore_index=True)

In [6]:
# add marker files to it, as SpatialLDA needs them
obs['D'] = np.random.randint(1, 101, size=len(obs))
obs['E'] = np.random.randint(1, 101, size=len(obs))

# Load dataframe into anndata object
obs = obs.astype({'ct':'string'})

# data matrix 
X = obs[['D', 'E']]
X = X.values
adata = anndata.AnnData(X)
adata.obs = obs

adata.obs

Unnamed: 0,x,y,ct,sample_id,D,E
0,0.000000,0.000000,1.0,self00_0.6_ab0_0.15_13,88,41
1,24.534492,0.000000,0.0,self00_0.6_ab0_0.15_13,94,100
2,52.289758,0.000000,0.0,self00_0.6_ab0_0.15_13,30,38
3,79.509940,0.000000,2.0,self00_0.6_ab0_0.15_13,69,37
4,102.494435,0.000000,2.0,self00_0.6_ab0_0.15_13,91,55
...,...,...,...,...,...,...
4283227,1000.000000,507.264433,2.0,self00_0.6_ab0_0.05_7,81,26
4283228,187.994456,853.032888,1.0,self00_0.6_ab0_0.05_7,58,67
4283229,155.090592,758.147251,0.0,self00_0.6_ab0_0.05_7,11,97
4283230,977.121401,36.181175,1.0,self00_0.6_ab0_0.05_7,65,95


In [7]:
# get spatial coordinates to be in obsm
adata.obsm['spatial'] = obs[['x', 'y']].values
# make ct a categorical variable
adata.obs['ct'] = pd.Categorical(adata.obs['ct'])

In [8]:
# Run NEP per 'fov'
fov_list = adata.obs['sample_id'].unique()

# Dictionary to store FOV-specific AnnData objects
fov_adata_dict = {}

# Loop over each FOV
for fov in fov_list:
    warnings.simplefilter(action='ignore', category=FutureWarning)
    fov_adata = adata[adata.obs['sample_id'] == fov].copy()

    # Perform spatial neighbors analysis on the subset
    sq.gr.spatial_neighbors(fov_adata, coord_type = 'generic', delaunay=True)
    sq.gr.nhood_enrichment(fov_adata, cluster_key="ct", n_perms=300, seed=fov_list.tolist().index(fov), show_progress_bar=False)
    # Store the result in the dictionary
    fov_adata_dict[fov] = fov_adata



In [12]:
# extract and save both, the z-score and the interaction count NEP matrix

def flatten_nep_matrices(fov_adata_dict, NEP_mode='count', output_path=None):
    """Flattens matrices (z-score or count) from a dictionary of FOV AnnData objects 
    and returns a pivoted DataFrame, then saves the result as a CSV file at the specified path.

    Parameters:
    - fov_adata_dict: dict of AnnData objects, each containing enrichment data.
    - NEP_mode: str, either 'zscore' or 'count', specifying which matrix to extract.
    - output_path: str, the file path where to save the resulting CSV file. If None, no file will be saved.

    Returns:
    - A pivoted DataFrame with enrichment values.
    """
    
    flattened_dfs = []

    for fov, fov_adata in fov_adata_dict.items():
        # Extract the chosen matrix
        if NEP_mode not in ['zscore', 'count']:
            raise ValueError("NEP_mode must be either 'zscore' or 'count'")
        
        matrix = fov_adata.uns['ct_nhood_enrichment'][NEP_mode]
        cell_types = fov_adata.obs['ct'].cat.categories
        matrix_df = pd.DataFrame(matrix, index=cell_types, columns=cell_types)
        
        # Flatten and format
        flattened_df = (
            matrix_df.stack()
            .reset_index()
            .rename(columns={'level_0': 'rowname', 'level_1': 'colname', 0: NEP_mode})
        )
        
        # Create combined column
        flattened_df['combined'] = flattened_df['rowname'] + '_' + flattened_df['colname']
        flattened_df['sample_id'] = fov
        flattened_dfs.append(flattened_df[['sample_id', 'combined', NEP_mode]])

    # Concatenate all DataFrames
    big_dataframe = pd.concat(flattened_dfs, ignore_index=True)
    final_dataframe = big_dataframe.pivot(index='sample_id', columns='combined', values=NEP_mode).reset_index()

    # Save the final dataframe to CSV if output_path is provided
    if output_path:
        final_dataframe.to_csv(output_path, index=False)

    return final_dataframe

# Run for counts and z-scores
final_dataframe = flatten_nep_matrices(fov_adata_dict, 
                                          NEP_mode='count', 
                                          output_path=output_count
                                          )
final_dataframe = flatten_nep_matrices(fov_adata_dict, 
                                          NEP_mode='zscore', 
                                          output_path=output_zscore
                                          )
print(final_dataframe)


combined               sample_id    0.0_0.0   0.0_1.0   0.0_2.0   0.0_3.0  \
0                 ran_ab0_0.05_1  -0.762103 -0.903543  0.634424 -0.548561   
1                ran_ab0_0.05_10  -1.231854  2.873572 -1.728955  0.596785   
2               ran_ab0_0.05_100   0.559639  0.524914 -0.169991 -1.922448   
3                ran_ab0_0.05_11  -0.835612 -0.452585  0.613584  0.571918   
4                ran_ab0_0.05_12  -0.901125  0.117545 -0.153883  0.509432   
...                          ...        ...       ...       ...       ...   
2395      self00_0.6_ab0_0.55_95   9.413890 -7.039113 -3.949839 -3.440473   
2396      self00_0.6_ab0_0.55_96   8.301409 -4.037858 -5.573347 -3.807794   
2397      self00_0.6_ab0_0.55_97   8.911215 -5.491911 -4.723506 -4.936117   
2398      self00_0.6_ab0_0.55_98   8.255351 -5.571623 -5.122068 -4.167416   
2399      self00_0.6_ab0_0.55_99  11.401746 -5.753787 -5.276475 -8.928538   

combined   1.0_0.0   1.0_1.0   1.0_2.0   1.0_3.0   2.0_0.0   2.0_1.0  \
0  