## **COMMOT Analysis of CCI**

*   The COMMOT documentation and package could be found  here: https://commot.readthedocs.io/en/latest
*   The paper could be found here: https://doi.org/10.1038/s41592-022-01728-4

### 1. **Import Package**

In [None]:
import os
import gc
import ot
import pickle
import anndata
import scanpy as sc
import pandas as pd
import numpy as np
from scipy import sparse
from scipy.stats import spearmanr, pearsonr
from scipy.spatial import distance_matrix
import matplotlib.pyplot as plt
import commot as ct

### 2. **Run the Analysis**

*   The below shows an example analysis on analyzing spot level data, where I have prepared the data by the following columns: `["x", "y", "celltypeA",..., "celltypeX", "cell_type(dominant cell type)", "GeneA",..., "GeneX"]`.
*   When running the analysis, we use the default settings following their tutorial.
*   `ct.tl.cluster_communication_spatial_permutation`: could obtain cell type specific interaction results.
*   `ct.tl.spatial_communication`: could obtain the direction pattern of the interaction results.

In [None]:
for num in ["slice1"]:
  path_exprsn = f'/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/exprsn_df.csv'
  data = pd.read_csv(path_exprsn, index_col=0)
  data = data.dropna(subset=['cell_type'])
  data = data.set_index('cell', drop=False)
  data.index.name = None
  gene_expression= data.drop(columns=["x","y","cell","Excitatory_neurons","Inhibitory_neuron", "Astrocyte", "Oligodendrocyte","Oligodendrocyte_precursor_cell",
                    "Microglia","Pericytes", "Endothelial_cells","cell_type"])
  meta_data = data[["x","y","cell_type",'cell']]
  meta_data.set_index('cell', inplace=True)
  # Create AnnData object
  adata = sc.AnnData(X=gene_expression, obs=meta_data)
  adata.obsm['spatial'] = meta_data[['x', 'y']].to_numpy()
  adata.var_names_make_unique()
  adata.raw = adata
  sc.pp.normalize_total(adata, inplace=True)
  sc.pp.log1p(adata)
  df_cellchat = ct.pp.ligand_receptor_database(species='human', signaling_type='Secreted Signaling', database='CellChat')
  df_cellchat_filtered = ct.pp.filter_lr_database(df_cellchat, adata, min_cell_pct=0.05)

  adata_dis500 = adata.copy()
  ct.tl.spatial_communication(adata_dis500, database_name='cellchat', df_ligrec=df_cellchat_filtered, dis_thr=500, heteromeric=True, pathway_sum=True)

  adata_dis500.write( f'/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/commot_dir_adata.h5ad')

  ct.tl.cluster_communication_spatial_permutation(adata,
                                                  database_name='cellchat',
                                                  df_ligrec=df_cellchat_filtered,
                                                  dis_thr=500, heteromeric=True,
                                                  clustering="cell_type")

  adata.write( f'/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/commot_cluster_adata.h5ad')

  # Get the keys from adata.uns
  uns_keys = list(adata.uns.keys())
  # Define the prefix to split the keys
  prefix = 'commot_cluster_spatial_permutation-cell_type-cellchat-'
  # Initialize empty lists for pathways and LR pairs
  pathway_vector = []
  lr_pair_vector = []
  # Iterate through the keys
  for key in uns_keys:
    # Exclude non-relevant keys
    if key in ['commot_cluster_spatial_permutation-cell_type-cellchat-total-total', 'log1p']:
      continue
    key_suffix = key.replace(prefix, '')
    if '-' in key_suffix:
      lr_pair_vector.append(key_suffix)
    else:
      pathway_vector.append(key_suffix)

  # Create an empty list to store the Links DataFrames
  LRpair_link = []
  for pair in lr_pair_vector:
    name = 'commot_cluster_spatial_permutation-cell_type-cellchat-' + pair
    #pair = 'commot_cluster_spatial_permutation-celltype-cellchat-ANGPTL'
    # Extract the communication matrix and p-value matrix
    communication_matrix = adata.uns[name]['communication_matrix']
    communication_pvalue = adata.uns[name]['communication_pvalue']

    # Convert communication_matrix into a long-form DataFrame
    communication_df = communication_matrix.stack().reset_index()
    communication_df.columns = ['ligand', 'receptor', 'score']

    # Convert communication_pvalue into a long-form DataFrame
    pvalue_df = communication_pvalue.stack().reset_index()
    pvalue_df.columns = ['ligand', 'receptor', 'pvalue']

    # Merge the two DataFrames on ligand and receptor
    result_df = pd.merge(communication_df, pvalue_df, on=['ligand', 'receptor'])
    result_df['interaction_name'] = pair
    # Create the interaction_name column
    #result_df['interaction_name'] = result_df['ligand'] + "_" + result_df['receptor']

    # Reorder columns
    result_df = result_df[['ligand', 'receptor', 'score', 'pvalue', 'interaction_name']]

    # Append the current Links DataFrame to the list
    LRpair_link.append(result_df)

  # Combine all Links DataFrames into one
  LRpair_df = pd.concat(LRpair_link, ignore_index=True)




  # Create an empty list to store the Links DataFrames
  pathway_link = []

  for pair in pathway_vector:
    name = 'commot_cluster_spatial_permutation-cell_type-cellchat-' + pair
    #pair = 'commot_cluster_spatial_permutation-celltype-cellchat-ANGPTL'
    # Extract the communication matrix and p-value matrix
    communication_matrix = adata.uns[name]['communication_matrix']
    communication_pvalue = adata.uns[name]['communication_pvalue']

    # Convert communication_matrix into a long-form DataFrame
    communication_df = communication_matrix.stack().reset_index()
    communication_df.columns = ['ligand', 'receptor', 'score']

    # Convert communication_pvalue into a long-form DataFrame
    pvalue_df = communication_pvalue.stack().reset_index()
    pvalue_df.columns = ['ligand', 'receptor', 'pvalue']

    # Merge the two DataFrames on ligand and receptor
    result_df = pd.merge(communication_df, pvalue_df, on=['ligand', 'receptor'])
    result_df['pathway'] = pair

    # Reorder columns
    result_df = result_df[['ligand', 'receptor', 'score', 'pvalue', 'pathway']]

    # Append the current Links DataFrame to the list
    pathway_link.append(result_df)

  # Combine all Links DataFrames into one
  pathway_df = pd.concat(pathway_link, ignore_index=True)
  path_write_path = f'/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/commot_pathway_result.csv'
  path_write_LR = f'/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/commot_interaction_result.csv'
  pathway_df.to_csv(path_write_path, index=False)
  LRpair_df.to_csv(path_write_LR , index=False)
