## **Run CellPhoneDB**

*   The CellPhoneDB documentation and package could be found  here: https://cellphonedb.readthedocs.io/en/latest
*   The paper could be found here:
https://doi.org/10.1038/s41596-024-01137-1
*  **The analysis method are the same start from CellPhoneDB_v3 to CellPhoneDB_v5, though we said CellPhoneDB_v3 (meaning the use of the statistical method), we use the CellPhoneDB_v5 database.**

### 1. **Import Package**

In [None]:
import pandas as pd
import glob
import os
import glob
import sys
import os
pd.set_option('display.max_columns', 100)
import anndata as ad
import scanpy as sc
import numpy as np
import random
from cellphonedb.src.core.methods import cpdb_statistical_analysis_method

### 2. **Run the Analysis**


*   The below shows an example analysis on analyzing spot level data, where I have prepared the data by the following columns: `["x", "y", "celltypeA",..., "celltypeX", "cell_type(dominant cell type)", "GeneA",..., "GeneX"]`.



In [None]:
for num in ['slice1','slice2','slice3','slice4']:
  path_exprsn = f'/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/exprsn_df.csv'
  data = pd.read_csv(path_exprsn,index_col=0)
  data = data.dropna(subset=['cell_type'])
  data = data.set_index('cell', drop=False)
  data.index.name = None
  data["x"] = np.random.permutation(data["x"].values)
  data["y"] = np.random.permutation(data["y"].values)

  gene_expression = data.drop(columns=["x","y","cell","Excitatory_neurons","Inhibitory_neuron", "Astrocyte", "Oligodendrocyte","Oligodendrocyte_precursor_cell",
                    "Microglia","Pericytes", "Endothelial_cells","cell_type"])
  adata = sc.AnnData(X=gene_expression)
  sc.pp.normalize_total(adata, target_sum=10000)
  sc.pp.log1p(adata)

  #normalized_expression = gene_expression.div(gene_expression.sum(axis=1), axis=0) * 10000
  #adata = ad.AnnData(normalized_expression)
  adata.write(f''/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/nor_counts.h5ad')


  meta_data = data[["cell","cell_type"]]
  meta_data = meta_data.copy()  # Ensures it is a separate DataFrame
  meta_data.rename(columns={"cell": "Cell", "cell_type": "cell_type"}, inplace=True)
  meta_data.set_index('Cell', inplace=True)
  meta_data.to_csv(f''/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/meta_data.tsv', sep="\t")

  cpdb_file_path = '/rsrch5/home/biostatistics/lku/cellphoneDB_v5/cellphonedb.zip'
  meta_file_path = f''/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/meta_data.tsv'
  counts_file_path = f''/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/nor_counts.h5ad'
  out_path = f''/rsrch5/home/biostatistics/lku/ILIBD/data/{num}/'
  cpdb_results = cpdb_statistical_analysis_method.call(
      cpdb_file_path = cpdb_file_path,                 # mandatory: CellphoneDB database zip file.
      meta_file_path = meta_file_path,                 # mandatory: tsv file defining barcodes to cell label.
      counts_file_path = counts_file_path,             # mandatory: normalized count matrix - a path to the counts file, or an in-memory AnnData object
      counts_data = 'hgnc_symbol',                     # defines the gene annotation in counts matrix.
      score_interactions = True,                       # optional: whether to score interactions or not.
      iterations = 1000,                               # denotes the number of shufflings performed in the analysis.
      threshold = 0.1,                                 # defines the min % of cells expressing a gene for this to be employed in the analysis.
      threads = 5,                                     # number of threads to use in the analysis.
      debug_seed = 42,                                 # debug randome seed. To disable >=0.
      result_precision = 3,                            # Sets the rounding for the mean values in significan_means.
      pvalue = 0.05,                                   # P-value threshold to employ for significance.
      separator = '|',                                 # Sets the string to employ to separate cells in the results dataframes "cellA|CellB".
      debug = False,                                   # Saves all intermediate tables employed during the analysis in pkl format.
      output_path = out_path,                          # Path to save results.
      output_suffix = None                             # Replaces the timestamp in the output files by a user defined string in the  (default: None).
      )