In [98]:
import pandas as pd
import numpy as np

import itertools
# CellChatDB, CellPhoneDB, Ramilowski2015, ConnectomeDB2020


In [99]:
# Literature interactions
lr_pairs_lit = pd.read_csv("../data/LR_pairs/all_pairs.csv", sep="\t")
lr_pairs_lit.Receptor = lr_pairs_lit.Receptor.str.replace(":", "_")
lr_pairs_lit["interaction"] = [f"{l} -> {r}" for l, r in lr_pairs_lit[["Ligand", "Receptor"]].to_numpy()]

In [115]:
print(len(set(lr_pairs_lit[lr_pairs_lit.reference == "scTHI"].interaction)))
print(len(set(lr_pairs_lit[lr_pairs_lit.reference == "YuanD"].interaction)))

common = set(lr_pairs_lit[lr_pairs_lit.reference == "scTHI"].interaction) & set(lr_pairs_lit[lr_pairs_lit.reference == "YuanD"].interaction)
print(common)
print(len(common))

329
78
{'BGN -> TLR2', 'NAMPT -> INSR', 'C3 -> CD46', 'RIMS2 -> ABCA1', 'NLGN2 -> NRXN2', 'IGF1 -> IGF1R', 'LTBP3 -> ITGB5', 'DLL1 -> NOTCH1', 'NLGN1 -> NRXN1', 'APP -> FPR2', 'NLGN3 -> NRXN2', 'COL1A1 -> CD44', 'APP -> CD74', 'HMGB1 -> THBD', 'IL18 -> IL1RAPL1', 'COL1A2 -> CD44', 'LRP1B -> PLAUR', 'NCAM1 -> GFRA1', 'TNC -> CNTN1'}
19


# All Interactions

In [100]:
cellchat = pd.read_csv("../output/liana/CellChatDB.csv", sep="\t")
cellphonedb = pd.read_csv("../output/liana/CellPhoneDB.csv", sep="\t")
ramilowski = pd.read_csv("../output/liana/Ramilowski2015.csv", sep="\t")
connectome = pd.read_csv("../output/liana/ConnectomeDB2020.csv", sep="\t")

cellchat = pd.concat([cellchat, cellchat.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)
cellphonedb = pd.concat([cellphonedb, cellphonedb.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)
ramilowski = pd.concat([ramilowski, ramilowski.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)
connectome = pd.concat([connectome, connectome.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)

print("Common interactions across all databases")
common_interactions = set(cellchat.interaction) & set(cellphonedb.interaction) & set(ramilowski.interaction) & set(connectome.interaction)
print(len(common_interactions))
print(common_interactions)

pd.Series(list(itertools.chain.from_iterable([interaction.split(" -> ") for interaction in  common_interactions]))).to_csv("../output/liana/all_common_interactions.csv", sep=",", index=None, header=None)

print("\nCommon interactions across all databases per cell type pair")
for cell_type_pair in cellchat.source_target.unique():
    cellchat_subset = cellchat[cellchat.source_target == cell_type_pair]
    cellphonedb_subset = cellphonedb[cellphonedb.source_target == cell_type_pair]
    ramilowski_subset = ramilowski[ramilowski.source_target == cell_type_pair]
    connectome_subset = connectome[connectome.source_target == cell_type_pair]

    print(cell_type_pair, set(cellchat_subset.interaction) & set(cellphonedb_subset.interaction) & set(ramilowski_subset.interaction) & set(connectome_subset.interaction))

print("\nPairwise resources comparison")
resources = ["cellchat", "cellphonedb", "connectome", "ramilowski"]
combis = list(itertools.combinations(resources, 2))

lr_dict = dict(zip(resources, [set(cellchat.interaction), set(cellphonedb.interaction), set(connectome.interaction), set(ramilowski.interaction)]))

for combi in combis: 
    common = (lr_dict[combi[0]] & lr_dict[combi[1]])
    print(combi)
    print(len(common))
    print(common)

Common interactions across all databases
16
{'EFNA5 -> EPHA5', 'APP -> CD74', 'FGF1 -> FGFR1', 'EFNA5 -> EPHA7', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44', 'EFNA5 -> EPHB2', 'JAG1 -> NOTCH2', 'NRG2 -> ERBB4', 'NRG3 -> ERBB4', 'PTN -> ALK', 'JAG1 -> NOTCH1', 'SEMA3E -> PLXND1', 'NCAM1 -> FGFR1', 'EFNA5 -> EPHA4', 'NRG1 -> ERBB4'}

Common interactions across all databases per cell type pair
Neuronal_Neuronal {'EFNA5 -> EPHA5', 'EFNA5 -> EPHA7', 'NRG2 -> ERBB4', 'EFNA5 -> EPHB2', 'NRG3 -> ERBB4', 'EFNA5 -> EPHA4', 'NRG1 -> ERBB4'}
Neuronal_Non_neuronal {'APP -> CD74', 'NRG2 -> ERBB4', 'EFNA5 -> EPHB2', 'NRG3 -> ERBB4', 'NRG1 -> ERBB4'}
Neuronal_Malignant {'EFNA5 -> EPHA5', 'SEMA4D -> PLXNB2', 'NRG2 -> ERBB4', 'EFNA5 -> EPHB2', 'NRG3 -> ERBB4', 'NCAM1 -> FGFR1', 'NRG1 -> ERBB4'}
Neuronal_Mixed {'NRG3 -> ERBB4', 'NRG2 -> ERBB4', 'EFNA5 -> EPHB2', 'NRG1 -> ERBB4'}
Mixed_Neuronal {'EFNA5 -> EPHA5', 'EFNA5 -> EPHA7', 'EFNA5 -> EPHB2', 'NRG3 -> ERBB4', 'EFNA5 -> EPHA4', 'NRG1 -> ERBB4'}
Non_neuronal_M

In [107]:
# Compare with literature
print("\nCompare with literature LRs (all references)")
print("all resources vs. literature", set(lr_pairs_lit.interaction) & common_interactions )
for resource in resources:
    common = (lr_dict[resource] & set(lr_pairs_lit.interaction))
    print(resource, len(common))

print("\nCompare with literature LRs (per references)")

# Compare with literature
references = lr_pairs_lit.reference.unique()
for reference in references:
    print("\n", reference)
    lr_pairs_lit_sub = lr_pairs_lit[lr_pairs_lit.reference == reference]
    print("all resources vs. literature", set(lr_pairs_lit_sub.interaction) & common_interactions )
    print("Individual sources")
    for resource in resources:
        common = (lr_dict[resource] & set(lr_pairs_lit_sub.interaction))
        print(resource, len(common))


Compare with literature LRs (all references)
all resources vs. literature {'APP -> CD74', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44'}
cellchat 9
cellphonedb 9
connectome 27
ramilowski 35

Compare with literature LRs (per references)

 scTHI
all resources vs. literature {'APP -> CD74', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44'}
Individual sources
cellchat 9
cellphonedb 9
connectome 26
ramilowski 34

 YuanD
all resources vs. literature {'APP -> CD74'}
Individual sources
cellchat 2
cellphonedb 1
connectome 3
ramilowski 5


## Top-10 Comparisons

In [108]:
cellchat = pd.read_csv("../output/liana/top10_CellChatDB.csv", sep="\t")
cellphonedb = pd.read_csv("../output/liana/top10_CellPhoneDB.csv", sep="\t")
ramilowski = pd.read_csv("../output/liana/top10_Ramilowski2015.csv", sep="\t")
connectome = pd.read_csv("../output/liana/top10_ConnectomeDB2020.csv", sep="\t")

cellchat = pd.concat([cellchat, cellchat.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)
cellphonedb = pd.concat([cellphonedb, cellphonedb.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)
ramilowski = pd.concat([ramilowski, ramilowski.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)
connectome = pd.concat([connectome, connectome.interaction.str.split(" -> ", expand=True).rename(columns={0: "ligand", 1: "receptor"})], axis=1)

print("Common interactions across all databases")
common_interactions = set(cellchat.interaction) & set(cellphonedb.interaction) & set(ramilowski.interaction) & set(connectome.interaction)
print(len(common_interactions))
print(common_interactions)

pd.Series(list(itertools.chain.from_iterable([interaction.split(" -> ") for interaction in  common_interactions]))).to_csv("../output/liana/all_common_interactions.csv", sep=",", index=None, header=None)

print("\nCommon interactions across all databases per cell type pair")
for cell_type_pair in cellchat.source_target.unique():
    cellchat_subset = cellchat[cellchat.source_target == cell_type_pair]
    cellphonedb_subset = cellphonedb[cellphonedb.source_target == cell_type_pair]
    ramilowski_subset = ramilowski[ramilowski.source_target == cell_type_pair]
    connectome_subset = connectome[connectome.source_target == cell_type_pair]

    print(cell_type_pair, set(cellchat_subset.interaction) & set(cellphonedb_subset.interaction) & set(ramilowski_subset.interaction) & set(connectome_subset.interaction))

print("\nPairwise resources comparison")
resources = ["cellchat", "cellphonedb", "connectome", "ramilowski"]
combis = list(itertools.combinations(resources, 2))
lr_dict = dict(zip(resources, [set(cellchat.interaction), set(cellphonedb.interaction), set(connectome.interaction), set(ramilowski.interaction)]))

for combi in combis: 
    common = (lr_dict[combi[0]] & lr_dict[combi[1]])
    print(combi)
    print(len(common))
    print(common)

Common interactions across all databases
7
{'APP -> CD74', 'FGF1 -> FGFR1', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44', 'NRG2 -> ERBB4', 'NRG3 -> ERBB4', 'SEMA3E -> PLXND1'}

Common interactions across all databases per cell type pair
Malignant_Malignant set()
Malignant_Neuronal set()
Malignant_Non_neuronal {'APP -> CD74', 'SEMA3E -> PLXND1'}
Neuronal_Malignant set()
Neuronal_Neuronal {'NRG3 -> ERBB4'}
Neuronal_Non_neuronal {'NRG3 -> ERBB4', 'NRG2 -> ERBB4', 'APP -> CD74'}
Non_neuronal_Malignant {'FGF1 -> FGFR1', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44'}
Non_neuronal_Neuronal set()
Non_neuronal_Non_neuronal {'APP -> CD74'}

Pairwise resources comparison
('cellchat', 'cellphonedb')
16
{'APP -> CD74', 'SEMA3A -> NRP1_PLXNA4', 'FGF1 -> FGFR1', 'EFNA5 -> EPHA7', 'COL6A2 -> ITGA2_ITGB1', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44', 'NRG2 -> ERBB4', 'COL6A1 -> ITGA2_ITGB1', 'NRG3 -> ERBB4', 'PTN -> ALK', 'SEMA3E -> PLXND1', 'PTPRC -> CD22', 'NCAM1 -> FGFR1', 'EFNA5 -> EPHA4', 'NRG1 -> ERBB4'}
('cellchat', 'connect

In [109]:
# Compare with literature
print("\nCompare with literature LRs (all references)")
print("all resources vs. literature", set(lr_pairs_lit.interaction) & common_interactions )
for resource in resources:
    common = (lr_dict[resource] & set(lr_pairs_lit.interaction))
    print(resource, len(common))

print("\nCompare with literature LRs (per references)")

# Compare with literature
references = lr_pairs_lit.reference.unique()
for reference in references:
    print("\n", reference)
    lr_pairs_lit_sub = lr_pairs_lit[lr_pairs_lit.reference == reference]
    print("all resources vs. literature", set(lr_pairs_lit_sub.interaction) & common_interactions )
    print("Individual sources")
    for resource in resources:
        common = (lr_dict[resource] & set(lr_pairs_lit_sub.interaction))
        print(resource, len(common))
        



Compare with literature LRs (all references)
all resources vs. literature {'APP -> CD74', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44'}
cellchat 9
cellphonedb 9
connectome 27
ramilowski 35

Compare with literature LRs (per references)

 scTHI
all resources vs. literature {'APP -> CD74', 'SEMA4D -> PLXNB2', 'SPP1 -> CD44'}
Individual sources
cellchat 9
cellphonedb 9
connectome 26
ramilowski 34

 YuanD
all resources vs. literature {'APP -> CD74'}
Individual sources
cellchat 2
cellphonedb 1
connectome 3
ramilowski 5
