In [9]:
pip install cellphonedb

Note: you may need to restart the kernel to use updated packages.


# **Download** Database from Source

# Install **CellphoneDB**

In [10]:
import pandas as pd
import glob
import os

In [11]:
import cellphonedb


# **Display database versions**

In [12]:
from IPython.display import HTML, display
from cellphonedb.utils import db_releases_utils

display(HTML(db_releases_utils.get_remote_database_versions_html()['db_releases_html_table']))


Version,Release date
v4.1.0,2023-03-09
,
,
,


# **Define the version and the path to download database**

In [15]:
# -- Version of the databse
cpdb_version = 'v5.0.0'

# -- Path where the input files to generate the database are located
cpdb_target_dir = os.path.join('/home/icb/shahana.dilruba/cellphonedb_t01/', cpdb_version)

# Download **database**

In [16]:
from cellphonedb.utils import db_utils

db_utils.download_database(cpdb_target_dir, cpdb_version)

Downloaded cellphonedb.zip into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0
Downloaded complex_input.csv into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0
Downloaded gene_input.csv into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0
Downloaded interaction_input.csv into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0
Downloaded protein_input.csv into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0
Downloaded uniprot_synonyms.tsv into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0/sources
Downloaded transcription_factor_input.csv into /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0/sources



# **Build database from files**

In [18]:
# -- Path where the input files to generate the database are located
cpdb_input_dir = '/home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0'
os.listdir(cpdb_input_dir)

['gene_input.csv',
 'transcription_factor_input.csv',
 'sources',
 'interaction_input.csv',
 'protein_input.csv',
 'complex_input.csv',
 'cellphonedb.zip']

# **Generate database**

CREATE DATABASE

In [19]:
from cellphonedb.utils import db_utils
# -- Creates new database
db_utils.create_db(cpdb_input_dir)




Created /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0/cellphonedb_02_20_2024_103116.zip successfully


In [22]:
pwd

'/home/icb/shahana.dilruba'

In [5]:
mv /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0/cellphonedb_02_20_2024_103116.zip /home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0/cellphonedb.zip


**METHOD1**

# Input **files**

In [10]:
cpdb_file_path = '/home/icb/shahana.dilruba/cellphonedb_t01/v5.0.0/cellphonedb.zip'
meta_file_path = '/home/icb/shahana.dilruba/metadata.tsv'
counts_file_path = '/home/icb/shahana.dilruba/pertpy_dir/2301_annot_patient_integrated.h5ad'
out_path = '/home/icb/shahana.dilruba/results/method1'

In [8]:
import pandas as pd


# Inspect input **files**

In [11]:
metadata = pd.read_csv(meta_file_path, sep = '\t')
metadata.head(3)

  metadata = pd.read_csv(meta_file_path, sep = '\t')


Unnamed: 0.1,Unnamed: 0,identifier,patient,name,barcode_round,sample,viral_counts,treatment,treatment_virus,infected,...,celltype_coarse,cell_type,infection_label,_scvi_batch,_scvi_labels,leiden_coarse,leiden_fine,leiden_subset,celltype_fine,celltype_approx
0,muc21058_AAACCCAAGTTTGAGA,muc21058,patient_1,ALI_1_Barcode_1,ALI_1,3,0.0,TNFa_IFNg,TNFa_IFNg + sars-cov-2,True,...,"13,1/club",club,infected,0,0,3,131,131,sub5/club,"13,1/club"
1,muc21058_AAACCCACACGCGCAT,muc21058,patient_1,ALI_1_Barcode_1,ALI_1,3,0.0,IFNa (b_d),IFNa (b_d),False,...,"14,0/club",club,uninfected,0,0,4,140,140,sub3/club,"14,0/club"
2,muc21058_AAACCCACAGACACAG,muc21058,patient_1,ALI_1_Barcode_1,ALI_1,3,0.0,IFNg,IFNg,False,...,10/suprabasal,suprabasal,uninfected,0,0,6,10,10,10/suprabasal,10/suprabasal


# **The counts files is a scanpy h5ad object. The dimensions and order of this object must coincide with the dimensions of the metadata file (i.e. must have the same number of cells in both files)**


In [12]:
import anndata

In [13]:
adata = anndata.read_h5ad(counts_file_path)
adata.shape

(254630, 18115)

# **Check barcodes in metadata and counts are the same.**


In [16]:
list(adata.obs.index).sort() == list(metadata['Unnamed: 0']).sort()

True

# No microenvironment data included in this analysis


# Run basic **analysis with METHOD 1**

In [17]:


from cellphonedb.src.core.methods import cpdb_analysis_method

cpdb_results = cpdb_analysis_method.call(
    cpdb_file_path = cpdb_file_path,           # mandatory: CellphoneDB database zip file.
    meta_file_path = meta_file_path,           # mandatory: tsv file defining barcodes to cell label.
    counts_file_path = counts_file_path,       # mandatory: normalized count matrix.
    counts_data = 'hgnc_symbol',               # defines the gene annotation in counts matrix.
    score_interactions = True,                 # optional: whether to score interactions or not.
    output_path = out_path,                    # Path to save results    microenvs_file_path = None,
    separator = '|',                           # Sets the string to employ to separate cells in the results dataframes "cellA|CellB".
    threads = 5,                               # number of threads to use in the analysis.
    threshold = 0.1,                           # defines the min % of cells expressing a gene for this to be employed in the analysis.
    result_precision = 3,                      # Sets the rounding for the mean values in significan_means.
    debug = False,                             # Saves all intermediate tables emplyed during the analysis in pkl format.
    output_suffix = None                       # Replaces the timestamp in the output files by a user defined string in the  (default: None)
)



[ ][CORE][20/02/24-12:53:44][INFO] [Non Statistical Method] Threshold:0.1 Precision:3
Reading user files...


  return pd.read_csv(file_stream, sep=separator, index_col=0 if index_column_first else None, dtype=dtype,


The following user files were loaded successfully:
/home/icb/shahana.dilruba/pertpy_dir/2301_annot_patient_integrated.h5ad
/home/icb/shahana.dilruba/metadata.tsv
[ ][CORE][20/02/24-12:54:44][INFO] Running Basic Analysis
[ ][CORE][20/02/24-12:54:44][INFO] Building results
[ ][CORE][20/02/24-12:54:45][INFO] Scoring interactions: Filtering genes per cell type..


100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:07<00:00,  1.86it/s]

[ ][CORE][20/02/24-12:54:53][INFO] Scoring interactions: Calculating mean expression of each gene per group/cell type..



100%|███████████████████████████████████████████████████████████████████████████████████| 13/13 [00:01<00:00,  8.50it/s]
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  matrix[index_name].replace(to_replace=id2name, inplace=True)


[ ][CORE][20/02/24-12:54:55][INFO] Scoring interactions: Calculating scores for all interactions and cell types..


100%|█████████████████████████████████████████████████████████████████████████████████| 169/169 [02:04<00:00,  1.35it/s]


Saved means_result to /home/icb/shahana.dilruba/results/method1/simple_analysis_means_result_02_20_2024_125701.txt
Saved deconvoluted to /home/icb/shahana.dilruba/results/method1/simple_analysis_deconvoluted_02_20_2024_125701.txt
Saved deconvoluted_percents to /home/icb/shahana.dilruba/results/method1/simple_analysis_deconvoluted_percents_02_20_2024_125701.txt
Saved interaction_scores to /home/icb/shahana.dilruba/results/method1/simple_analysis_interaction_scores_02_20_2024_125701.txt


In [18]:
print(cpdb_results.keys())

dict_keys(['means_result', 'deconvoluted', 'deconvoluted_percents', 'interaction_scores'])


In [19]:
cpdb_results['means_result'].head(2)

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,...,viral|basal prolif.,viral|ciliated,viral|club,viral|deuterosomal,viral|goblet,viral|hillock,viral|ionocytes,viral|suprabasal,viral|tuft,viral|viral
0,CPI-SC0A2DB962D,CDH1_integrin_a2b1_complex,simple:P12830,complex:integrin_a2b1_complex,CDH1,,False,False,False,curated,...,0.512,0.449,0.407,0.392,0.339,0.537,0.358,0.492,0.347,0.456
1,CPI-SC09D882C18,CDH1_integrin_aEb7_complex,simple:P12830,complex:integrin_aEb7_complex,CDH1,,False,False,False,curated,...,0.201,0.201,0.201,0.2,0.2,0.201,0.0,0.202,0.202,0.201


In [20]:
cpdb_results['interaction_scores'].head(2)

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,...,viral|basal prolif.,viral|ciliated,viral|club,viral|deuterosomal,viral|goblet,viral|hillock,viral|ionocytes,viral|suprabasal,viral|tuft,viral|viral
0,CPI-SC0A2DB962D,CDH1_integrin_a2b1_complex,simple:P12830,complex:integrin_a2b1_complex,CDH1,,False,False,False,curated,...,17.478,7.652,9.337,8.278,4.134,16.692,5.929,13.559,5.514,11.772
1,CPI-SC09D882C18,CDH1_integrin_aEb7_complex,simple:P12830,complex:integrin_aEb7_complex,CDH1,,False,False,False,curated,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
cpdb_results['deconvoluted'].head(2)

Unnamed: 0_level_0,gene_name,uniprot,is_complex,protein_name,complex_name,id_cp_interaction,gene,?,PNEC,basal,basal prolif.,ciliated,club,deuterosomal,goblet,hillock,ionocytes,suprabasal,tuft,viral
multidata_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1355,UBASH3B,Q8TF42,True,UBS3B_HUMAN,Dehydroepiandrosterone_bySTS,CPI-CS09B8977D7,UBASH3B,0.011,0.0,0.008,0.02,0.01,0.01,0.006,0.009,0.012,0.01,0.01,0.003,0.007
1355,UBASH3B,Q8TF42,True,UBS3B_HUMAN,Dehydroepiandrosterone_bySTS,CPI-CS05760BB78,UBASH3B,0.011,0.0,0.008,0.02,0.01,0.01,0.006,0.009,0.012,0.01,0.01,0.003,0.007


In [22]:
cpdb_results['deconvoluted_percents'].head(2)

Unnamed: 0_level_0,gene_name,uniprot,is_complex,protein_name,complex_name,id_cp_interaction,gene,?,PNEC,basal,basal prolif.,ciliated,club,deuterosomal,goblet,hillock,ionocytes,suprabasal,tuft,viral
multidata_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1355,UBASH3B,Q8TF42,True,UBS3B_HUMAN,Dehydroepiandrosterone_bySTS,CPI-CS09B8977D7,UBASH3B,0.025,0.0,0.018,0.05,0.025,0.023,0.017,0.018,0.015,0.014,0.021,0.007,0.012
1355,UBASH3B,Q8TF42,True,UBS3B_HUMAN,Dehydroepiandrosterone_bySTS,CPI-CS05760BB78,UBASH3B,0.025,0.0,0.018,0.05,0.025,0.023,0.017,0.018,0.015,0.014,0.021,0.007,0.012
