In [1]:
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import anndata
# import scanpy as sc

In [2]:
warnings.filterwarnings(action="ignore")
OUTPUT_DIR = "output/SC04.CellPhoneDB"
Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)

## input data file path

In [None]:
CellPhoneDB_V4 = 'data/cellphonedb.zip'
meta_file_path = 'output/SC04.CellPhoneDB/meta_file.csv'
counts_file_path = "output/SC04.CellPhoneDB/adata.h5ad"
n_jobs=8

In [3]:
adata = anndata.read("output/SC02.score/adata.h5ad")

In [8]:
Low = adata[adata.obs.CR_Score_Group=="Low"]
Low.copy().T.to_df().reset_index().to_feather(f"{OUTPUT_DIR}/Low.arrow")

In [7]:
meta_file = pd.DataFrame({'Cell':Low.obs_names,'CellType':Low.obs.CellType})
meta_file.to_csv("output/SC04.CellPhoneDB/Low.csv",index=False)

In [9]:
Low = adata[adata.obs.CR_Score_Group=="High"]
Low.copy().T.to_df().reset_index().to_feather(f"{OUTPUT_DIR}/High.arrow")
meta_file = pd.DataFrame({'Cell':Low.obs_names,'CellType':Low.obs.CellType})
meta_file.to_csv("output/SC04.CellPhoneDB/High.csv",index=False)

In [None]:


Low.write_h5ad("output/SC04.CellPhoneDB/Low.h5ad",compression='lzf')
meta_file = pd.DataFrame({'Cell':Low.obs.index,'CellType':Low.obs.CellType})
meta_file.to_csv("output/SC04.CellPhoneDB/Low.csv",index=False)
del Low,meta_file

In [None]:
cpdb(adata[adata.obs.CR_Score_Group=="Low"],database='data/cellphonedb.zip',cell_type="CellType",cpdb_suffix="Low",output_dir=OUTPUT_DIR)

In [None]:
cpdb(adata[adata.obs.CR_Score_Group=="High"],database='data/cellphonedb.zip',cell_type="CellType",cpdb_suffix="High",output_dir=OUTPUT_DIR)

In [None]:
def cpdb(adata,database,cell_type="CellType",cpdb_suffix="_",microenvs_file_path=None,n_jobs=8,output_dir="."):
    _a=adata
    _a.write_h5ad("_a.h5ad")
    meta_file = pd.DataFrame({'Cell':_a.obs.index,'CellType':_a.obs.CellType})
    meta_file.to_csv("_a.csv",index=False)
    from cellphonedb.src.core.methods import cpdb_statistical_analysis_method
    deconvoluted, means, pvalues, significant_means = cpdb_statistical_analysis_method.call(
        cpdb_file_path = database,                 # mandatory: CellPhoneDB database zip file.
        meta_file_path = "_a.csv",                 # mandatory: tsv file defining barcodes to cell label.
        counts_file_path = "_a.h5ad",             # mandatory: normalized count matrix.
        counts_data = 'hgnc_symbol',                     # defines the gene annotation in counts matrix.
        microenvs_file_path = microenvs_file_path,                      # optional (default: None): defines cells per microenvironment.
        iterations = 1000,                               # denotes the number of shufflings performed in the analysis.
        threshold = 0.1,                                 # defines the min % of cells expressing a gene for this to be employed in the analysis.
        threads = n_jobs,                                     # number of threads to use in the analysis.
        debug_seed = 42,                                 # debug randome seed. To disable >=0.
        result_precision = 3,                            # Sets the rounding for the mean values in significan_means.
        pvalue = 0.05,                                   # P-value threshold to employ for significance.
        subsampling = False,                             # To enable subsampling the data (geometri sketching).
        subsampling_log = False,                         # (mandatory) enable subsampling log1p for non log-transformed data inputs.
        subsampling_num_pc = 100,                        # Number of componets to subsample via geometric skectching (dafault: 100).
        subsampling_num_cells = 1000,                    # Number of cells to subsample (integer) (default: 1/3 of the dataset).
        separator = '|',                                 # Sets the string to employ to separate cells in the results dataframes "cellA|CellB".
        debug = False,                                   # Saves all intermediate tables employed during the analysis in pkl format.
        output_path = output_dir,                          # Path to save results.
        output_suffix = cpdb_suffix                            # Replaces the timestamp in the output files by a user defined string in the  (default: None).
        )
    from pathlib import Path
    Path("_a.h5ad").unlink()
    Path("_a.csv").unlink()

In [None]:
adata.raw.to_adata().write_h5ad(counts_file_path,compression='lzf')

## 可视化

In [None]:
import matplotlib.pyplot as plt
import ktplotspy as kpy

In [None]:
adata=anndata.read(counts_file_path)

In [None]:
adata.obs.CellType.cat.categories

In [None]:
# 读取cellphonedb的输出文件
i="Low"
means = pd.read_csv(f'{OUTPUT_DIR}/statistical_analysis_means_{i}.txt',sep='\t')
pvals = pd.read_csv(f'{OUTPUT_DIR}/statistical_analysis_pvalues_{i}.txt',sep='\t')
decon = pd.read_csv(f'{OUTPUT_DIR}/statistical_analysis_deconvoluted_{i}.txt',sep='\t')
p = kpy.plot_cpdb(
    adata=adata[adata.obs.CR_Score_Group=="High"],
    cell_type1="Hepatocytes",
    cell_type2=".", # this means all cell-types
    means=means,
    pvals=pvals
    # ,max_size=8,
    # max_highlight_size=2
    ,keep_significant_only=True
    ,highlight_size=1
    ,celltype_key="CellType"
    # ,genes=["CXCL12", "CXCR4"]
    # ,gene_family="chemokines"
    ,figsize = (10,10),
    title = ""
    )
p.save(f"{OUTPUT_DIR}/cpdb_dotplot_{i}.pdf",limitsize=False)

In [None]:
kpy.plot_cpdb_chord(
    adata=adata,
    cell_type1="B Cells",
    cell_type2=".",
    means=means,
    pvals=pvals,
    deconvoluted=decon,
    scale_lw=100,
    celltype_key="CellType"
    # ,genes=["ADGRB1"]
    # ,edge_cmap=plt.cm.coolwarm # 弦的颜色
    ,figsize=(6,6)
    );
plt.savefig(f"{OUTPUT_DIR}/cpdb_chord.pdf",bbox_inches="tight");