In [None]:
# Import dependencies
%matplotlib inline
import os
import glob
import pandas as pd
import anndata
import numpy as np
import scanpy as sc

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# set a working directory
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks"
os.chdir( wdir )

# folder structures
RESULTS_FOLDERNAME = "foetal/results/CellPhoneDB/"
FIGURES_FOLDERNAME = "foetal/figures/CellPhoneDB/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.savefig(os.path.join(folder, fname), format='svg')

# DOWNLOADING DATABASE

In [None]:
from IPython.display import HTML, display
from cellphonedb.utils import db_releases_utils

display(HTML(db_releases_utils.get_remote_database_versions_html()['db_releases_html_table']))

In [None]:
from cellphonedb.utils import db_utils

# -- Version of the databse
cpdb_version = 'v4.1.0'

# -- Path where the input files to generate the database are located
cpdb_target_dir = os.path.join(RESULTS_FOLDERNAME, cpdb_version)
db_utils.download_database(cpdb_target_dir, cpdb_version)

# PREPARING CPDB INPUT FILES

In [None]:
adata = sc.read_h5ad("foetal/results/Spatial/reference_signatures/sc.h5ad")
adata

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, '20w_normalised_counts.h5ad'))

In [None]:
adata.obs['age'].value_counts()

In [None]:
metadata = pd.DataFrame(adata.obs['C_scANVI'])
metadata.rename(columns={'C_scANVI': 'cell_type'}, inplace=True)
metadata.index.name = 'barcode_sample'
metadata.to_csv(os.path.join(RESULTS_FOLDERNAME,'metadata.tsv'), sep='\t', index=True)

In [None]:
metadata = pd.read_csv(os.path.join(RESULTS_FOLDERNAME,'metadata.tsv'), sep='\t')
metadata

In [None]:
list(adata.obs.index).sort() == list(metadata['barcode_sample']).sort()

In [None]:
adata.X = adata.layers['log1p_norm'].copy()
print(adata.X[0:5,0:5])

In [None]:
adata.obs['C_scANVI'].value_counts()

In [None]:
[i for i in adata.obs['C_scANVI'].unique()]

In [None]:
microenv = pd.DataFrame({
    'cell_type': ['ABI3BP GAS2 Fibroblasts 1', 
                  'ABI3BP GAS2 Fibroblasts 2', 
                  'Chondrocytes',
                  'FGF14 THBS4 Fibroblasts',
                  'COL6A6 FNDC1 Fibroblasts', 
                  'vasEndothelial Cells', 
                  
                  'COL3A1 PI16 Fibroblasts', 
                  'Immune Cells',
                  'vasEndothelial Cells', 
                  'lymEndothelial Cells',
                  'Nervous System Cells',
                  'Smooth Myocytes', 
                  'COL6A6 FNDC1 Fibroblasts',
                  
                  'NEGR1 SCN7A Fibroblasts', 
                  'Skeletal Myocytes',
                  'Satellite Cells', 
                  'COL3A1 PI16 Fibroblasts'
                 ],  
    'microenvironment': ['tendon', 'tendon', 'tendon', 'tendon', 'tendon', 'tendon', 
                        'loosect','loosect','loosect','loosect','loosect','loosect','loosect',
                        'muscle','muscle','muscle','muscle']
})
microenv

In [None]:
microenv.to_csv(os.path.join(RESULTS_FOLDERNAME,'microenvironments.tsv'), sep='\t', index=False)
microenv = pd.read_csv(os.path.join(RESULTS_FOLDERNAME,'microenvironments.tsv'), sep='\t')
microenv

# RUNNING CPDB

In [None]:
cpdb_version = 'v4.1.0'
cpdb_file_path = os.path.join(RESULTS_FOLDERNAME, cpdb_version, 'cellphonedb.zip')
meta_file_path = os.path.join(RESULTS_FOLDERNAME, 'metadata.tsv')
microenvs_file_path = os.path.join(RESULTS_FOLDERNAME, 'microenvironments.tsv')
counts_file_path = os.path.join(RESULTS_FOLDERNAME, '20w_normalised_counts.h5ad')
out_path = os.path.join(RESULTS_FOLDERNAME, 'results/method2/')

prepare the microenvironment file based on the spatial data results. Check if worth doing only on 20w samples or on the whole dataset.

In [None]:
adata.var.index = adata.var['ensembl_gene_id']
adata.var

In [None]:
%%time

from cellphonedb.src.core.methods import cpdb_statistical_analysis_method

deconvoluted, means, pvalues, significant_means = cpdb_statistical_analysis_method.call(
    cpdb_file_path = cpdb_file_path,                 # mandatory: CellPhoneDB database zip file.
    meta_file_path = meta_file_path,                 # mandatory: tsv file defining barcodes to cell label.
    counts_file_path = counts_file_path,             # mandatory: normalized count matrix.
    counts_data = 'ensembl',                         # defines the gene annotation in counts matrix.
    microenvs_file_path = microenvs_file_path,       # optional (default: None): defines cells per microenvironment.
    iterations = 1000,                               # denotes the number of shufflings performed in the analysis.
    threshold = 0.1,                                # defines the min % of cells expressing a gene for this to be employed in the analysis.
    threads = 8,                                     # number of threads to use in the analysis.
    debug_seed = 42,                                 # debug randome seed. To disable >=0.
    result_precision = 3,                            # Sets the rounding for the mean values in significan_means.
    pvalue = 0.05,                                   # P-value threshold to employ for significance.
    subsampling = False,                             # To enable subsampling the data (geometri sketching).
    subsampling_log = False,                         # (mandatory) enable subsampling log1p for non log-transformed data inputs.
    subsampling_num_pc = 100,                        # Number of componets to subsample via geometric skectching (dafault: 100).
    subsampling_num_cells = 10000,                   # Number of cells to subsample (integer) (default: 1/3 of the dataset).
    separator = '|',                                 # Sets the string to employ to separate cells in the results dataframes "cellA|CellB".
    debug = False,                                   # Saves all intermediate tables employed during the analysis in pkl format.
    output_path = out_path,                          # Path to save results.
    output_suffix = None,                            # Replaces the timestamp in the output files by a user defined string in the  (default: None).
    )

In [None]:
pvalues.head(3)

In [None]:
deconvoluted.head(3)

In [None]:
means.head(3)

In [None]:
import ktplotspy as kpy
import matplotlib.pyplot as plt

ax = kpy.plot_cpdb_heatmap(
        adata = adata,
        pvals = pvalues,
        celltype_key = "cell_labels",
        figsize = (10,10),
        title = "Number of significant interactions",
        symmetrical = False
    )
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'cellphonedb_celltype_interactions.svg'))
plt.show()

In [None]:
from cellphonedb.utils import search_utils

search_results = search_utils.search_analysis_results(
    query_cell_types_1 = ['COL6A6 FNDC1 Fibroblasts'],  # List of cells 1, will be paired to cells 2 (list or 'All').
    query_cell_types_2 = ['FGF14 THBS4 Fibroblasts'],     # List of cells 2, will be paired to cells 1 (list or 'All').
    #query_genes = ['TGFBR1'],                                       # filter interactions based on the genes participating (list).
    #query_interactions = ['CSF1_CSF1R'],                            # filter intereactions based on their name (list).
    significant_means = significant_means,                          # significant_means file generated by CellPhoneDB.
    deconvoluted = deconvoluted,                                    # devonvoluted file generated by CellPhoneDB.
    separator = '|',                                                # separator (default: |) employed to split cells (cellA|cellB).
    long_format = True                                              # converts the output into a wide table, removing non-significant interactions
)

search_results