# Looking at the L/R interactinos enriched in particular pairs of cell types and DE in ROIs

CellphoneDB

This code (from [here](https://github.com/ventolab/UHCA/tree/master/cellphoneDBv3)) uses DEGs computed for each cluster to identify relevant L/R interactions between the cells in a microenviroment

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

In [2]:
# Define cutoff variables
filter_int_user_curated = True # Use only user_curated interactions?
per_cutoff = 0.1 # min % of cells in the cluster required  with expression > 0 for the gene
pval_cutoff = 0.05 # max adjusted p-value requeired to consider a gene as DEG

# as of 18.03.2021, not using the logFC cutoff at all! --> 0 here
logFC_cutoff = 0 # min logFC to consider a gene as DEG

## Load cellphone database


In [3]:
# Gene names
genes_cpDB = pd.read_csv('./hsa_uniprot.txt', 
                         sep = '\t')

In [4]:
genes_cpDB

Unnamed: 0,uniprot,Entry,gene_name
0,P01611,KVD12_HUMAN,IGKV1D-12
1,P01615,KVD28_HUMAN,IGKV2D-28
2,Q15334,L2GL1_HUMAN,LLGL1
3,Q6ZP29,LAAT1_HUMAN,PQLC2
4,Q9GZZ8,LACRT_HUMAN,LACRT
...,...,...,...
20311,Q9H900,ZWILC_HUMAN,ZWILCH
20312,P98169,ZXDB_HUMAN,ZXDB
20313,Q2QGD7,ZXDC_HUMAN,ZXDC
20314,Q15942,ZYX_HUMAN,ZYX


In [5]:
# Complexes members
com_cpDB = pd.read_csv('./database/complex_generated.csv')
com_cpDB['complex_name'] = ['complex:' + complex_name for complex_name in com_cpDB['complex_name']]

In [6]:
com_cpDB

Unnamed: 0,complex_name,uniprot_1,uniprot_2,uniprot_3,uniprot_4,transmembrane,peripheral,secreted,secreted_desc,secreted_highlight,receptor,receptor_desc,integrin,other,other_desc,pdb_id,pdb_structure,stoichiometry,comments_complex
0,complex:contactin complex II,Q12860,Q92823,,,True,False,False,,False,False,,False,False,,,FALSE,,NRCAM bind in cis and in trans to contactin-1
1,complex:FCyR1A,P12314,P30273,,,True,False,False,,False,True,Human IgG receptor,False,False,,,FALSE,,Also called CD64. Interacts with FCERG1; forms...
2,complex:IL6 receptor,P08887,P40189,,,True,False,False,,False,True,Cytokine receptor IL6 family,False,False,,1p9m,binding,IL6;IL6;IL6R;IL6R;IL6ST;IL6ST,Signal activation necessitate an association w...
3,complex:AT8B4CC50B complex,Q8TF62,Q3MIR4,,,True,False,False,,False,False,,False,False,,,FALSE,,Interacts with beta subunits TMEM30A and TMEM30B
4,complex:KCNV1KCNB2 complex,Q6PIU1,Q92953,,,True,False,False,,False,False,,False,False,,,FALSE,,Has to be associated with another potassium ch...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255,complex:NODAL receptor,P36896,Q13705,P13385,,True,False,False,,False,True,,False,False,,,FALSE,,
256,complex:RET receptor 1,P07949,P56159,,,True,False,False,,False,True,,False,False,,,FALSE,,
257,complex:RET receptor 2,P07949,O00451,,,True,False,False,,False,True,,False,False,,,FALSE,,
258,complex:RET receptor 3,P07949,O60609,,,True,False,False,,False,True,,False,False,,,FALSE,,


In [7]:
# Generate complexes2gene symbol dictionary
Com2Gene = {}


for complex_name in np.unique(com_cpDB['complex_name']):
    #print(complex_name)
    
    # getting rid of 'complex:' in the beginning
    #complex_name = complex_name[8:]
    #print(complex_name)
    
    curr_complex_proteins = list(com_cpDB[(com_cpDB['complex_name'] == complex_name)].loc[:, ['uniprot_1', 'uniprot_2', 'uniprot_3', 'uniprot_4']].values.tolist())
    # flatten list 
    curr_complex_proteins = [item for sublist in curr_complex_proteins for item in sublist]
    #print(curr_complex_proteins)
    # remove nans
    curr_complex_proteins = [x for x in curr_complex_proteins if str(x) != 'nan']
    #print('after removing nans:', curr_complex_proteins)
    
    # getting corresponding gene names from the gene table
    Com2Gene[complex_name] = list(genes_cpDB[genes_cpDB['uniprot'].isin(curr_complex_proteins)]['gene_name'])
    

In [8]:
# Load interactions from cellphoneDB/out/means.txt output file                   
int_cpDB = pd.read_csv('./out/means.txt',
                      sep='\t')

# disregarding pairwise average expression values
int_cpDB = int_cpDB.loc[:, list(int_cpDB.columns)[:11]]
int_cpDB

Unnamed: 0,id_cp_interaction,interacting_pair,partner_a,partner_b,gene_a,gene_b,secreted,receptor_a,receptor_b,annotation_strategy,is_integrin
0,CPI-SS0A7B487D4,KLRG2_WNT11,simple:A4D1S0,simple:O96014,KLRG2,WNT11,True,True,False,InnateDB-All,False
1,CPI-SS0FEC87269,KLRG2_TNFSF9,simple:A4D1S0,simple:P41273,KLRG2,TNFSF9,True,True,False,InnateDB-All,False
2,CPI-SS028784FC6,HLA-DPA1_TNFSF9,simple:HLADPA1,simple:P41273,HLA-DPA1,TNFSF9,True,True,False,InnateDB-All,False
3,CPI-SS00A8596B5,PVR_TNFSF9,simple:P15151,simple:P41273,PVR,TNFSF9,True,True,False,InnateDB-All,False
4,CPI-SS044DF8749,KLRG2_WNT5B,simple:A4D1S0,simple:Q9H1J7,KLRG2,WNT5B,True,True,False,InnateDB-All,False
...,...,...,...,...,...,...,...,...,...,...,...
1204,CPI-SS0281839F1,IL37_IL1RAPL1,simple:Q9NZH6,simple:Q9NZN1,IL37,IL1RAPL1,True,False,True,InnateDB,False
1205,CPI-SS0FF51A757,TNFSF18_TNFRSF18,simple:Q9UNG2,simple:Q9Y5U5,TNFSF18,TNFRSF18,True,False,True,user_curated,False
1206,CPI-CC0D5F9D20F,a2Bb3 complex_fibrinogen,complex:a2Bb3 complex,complex:fibrinogen,,,False,False,False,user_curated,True
1207,CPI-CC08A4A83F1,aMb2 complex_fibrinogen,complex:aMb2 complex,complex:fibrinogen,,,False,False,False,user_curated,True


In [9]:
# MANDATORY: remove "curated" because we have cleaned and renamed them (this is a long story, just do it)
# these interactions have either been renamed or excluded so best not to use them
int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] != 'curated']

In [10]:
# OPTIONAL: Use only user_curated interactions?
if filter_int_user_curated:
    int_cpDB = int_cpDB[int_cpDB['annotation_strategy'] == 'user_curated']

In [11]:
# Generate Int2Gene dictionary
Int2Gene = {}

for i in int_cpDB.index:
    #print('row number:', i)
    curr_df_row = int_cpDB.loc[i,:]
    #print('row:', curr_df_row)
    
    # if partnerA is complex (aka np.isnan(table['gene_a']) == True), then retrieve members from dictionary
    # complex name will be in table['partner_a']
    if str(curr_df_row['gene_a']) == 'nan':
        partner_A = Com2Gene[curr_df_row['partner_a']]
    else:
        # if it's not a complex, then get the partner A from table['gene_a']
        partner_A = [curr_df_row['gene_a']]
        
    # if partnerB is complex, then retrieve members from dictionary
    # complex name will be in table['partner_b']
    if str(curr_df_row['gene_b']) == 'nan':
        partner_B = Com2Gene[curr_df_row['partner_b']]
    else:
        # if it's not a complex, then get the partner B from table['gene_b']
        partner_B = [curr_df_row['gene_b']]
        
    interaction_id = curr_df_row['interacting_pair']

    Int2Gene[interaction_id] = {'partner_a': partner_A,
                  'partner_b': partner_B}

## Load cluster's gene percentage expression

Prepared in S2 notebook

In [13]:
# Load percentage expression info
# Matrix of genes (rows) per celltypes (columns) containing the proportion [0-1] of cells 
# in a celltype expressing the gene
path_Exp = './PercentExpressed_for_cellphone.csv'

# Load matrix
Per_df = pd.read_csv(path_Exp, index_col=0) 

# Dictionary of celltypes2expressed genes
genes_expr_per_cell_type = {} 

for ct in Per_df.columns:
    #print(ct)
    curr_table = pd.DataFrame(Per_df.loc[:, ct])
    
    # only leave in genes expressed in this cell type according to a threshold declared in the beginnning of this notebook
    mask = (curr_table[ct] > per_cutoff)
    genes_expr_per_cell_type[ct] = list(curr_table[ct][mask].index)

In [14]:
Per_df

Unnamed: 0_level_0,B cell,CD8 T cell,DC,Distal_tubules_and_collecting_duct,Endothelium,Loop_of_Henle,MNP,Mast cell,Mesangial_cells,NK cell,NKT cell,Nephron_epithelium,Neutrophil,Podocytes,Private,Proximal_tubular_cells,Th cell
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
RP11-34P13.3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000304,0.0,0.003861,0.000000,0.000000,0.000000
FAM138A,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
OR4F5,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
RP11-34P13.7,0.000000,0.000000,0.000000,0.004505,0.003623,0.001335,0.000000,0.000000,0.000000,0.002608,0.001256,0.002126,0.0,0.000000,0.000990,0.000000,0.002299
RP11-34P13.8,0.000000,0.000000,0.000000,0.002252,0.001035,0.000000,0.000000,0.000000,0.000000,0.001304,0.001256,0.000304,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AC233755.2,0.000000,0.000000,0.000000,0.002252,0.000000,0.000668,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
AC233755.1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
AC240274.1,0.013514,0.006452,0.043478,0.065315,0.013975,0.016021,0.058242,0.076923,0.050847,0.011734,0.007538,0.007594,0.0,0.027027,0.006931,0.020704,0.009195
AC213203.1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


## Load DE expression info

In [15]:
path_DE = './joint_DEGs_list_all_cell_types_for_cellphone.csv'
DE_df = pd.read_csv(path_DE, #row.names = 0
                )

DE_df


Unnamed: 0.1,Unnamed: 0,Gene,logFC,P.Value,adj.P.Val,AveExpr_cluster,AveExpr_rest,percentExpr_cluster,percentExpr_rest,cluster
0,0,HTRA1,-147.903284,3.151900e-21,5.069516e-17,160.806267,308.709550,1,1,B cell
1,1,DDN,-29.632876,2.596122e-19,1.989300e-15,24.005380,53.638256,1,1,B cell
2,2,AIF1,-34.851114,3.710458e-19,1.989300e-15,34.119944,68.971058,1,1,B cell
3,3,NEDD9,-18.092348,5.381394e-19,2.163858e-15,21.228594,39.320942,1,1,B cell
4,4,ALDOB,-103.131731,2.539504e-18,8.169075e-15,32.868548,136.000279,1,1,B cell
...,...,...,...,...,...,...,...,...,...,...
273423,16079,SLC26A11,0.000132,9.998341e-01,9.999668e-01,9.855141,9.855009,1,1,Th cell
273424,16080,RNPEP,0.000157,9.999368e-01,9.999668e-01,38.436916,38.436759,1,1,Th cell
273425,16081,TTC37,0.000041,9.999576e-01,9.999668e-01,15.905058,15.905017,1,1,Th cell
273426,16082,CDSN,0.000035,9.999589e-01,9.999668e-01,10.358523,10.358489,1,1,Th cell


In [16]:
# filter the DE table according to cutoffs declared in the beginning of the notebook
# separately for upreg and downreg genes

DE_df = DE_df[(abs(DE_df['logFC']) > logFC_cutoff)
             & (DE_df['adj.P.Val'] < pval_cutoff)
             & (DE_df['percentExpr_cluster'] > per_cutoff)]

DE_df_upreg = DE_df[DE_df['logFC'] > 0]
DE_df_downreg = DE_df[DE_df['logFC'] < 0]

In [17]:
# without logFC filtering at all
print(DE_df.shape)
print(DE_df_upreg.shape)
print(DE_df_downreg.shape)

(61319, 10)
(34136, 10)
(27183, 10)


In [18]:
# Build dictionary clusters2DE_genes
# separately for upreg and downreg genes

clusters_upreg = list(np.unique(DE_df_upreg['cluster']))
clusters_downreg = list(np.unique(DE_df_downreg['cluster']))

is_DE_upreg = {}
is_DE_downreg = {}

for cluster in clusters_upreg:
    is_DE_upreg[cluster] = list(DE_df_upreg[DE_df_upreg['cluster'] == cluster]['Gene'])
    
for cluster in clusters_downreg:
    is_DE_downreg[cluster] = list(DE_df_downreg[DE_df_downreg['cluster'] == cluster]['Gene'])

## Define cell pairs to test

In [19]:
len(list(genes_expr_per_cell_type.keys()))

17

In [20]:
list(genes_expr_per_cell_type.keys())

['B cell',
 'CD8 T cell',
 'DC',
 'Distal_tubules_and_collecting_duct',
 'Endothelium',
 'Loop_of_Henle',
 'MNP',
 'Mast cell',
 'Mesangial_cells',
 'NK cell',
 'NKT cell',
 'Nephron_epithelium',
 'Neutrophil',
 'Podocytes',
 'Private',
 'Proximal_tubular_cells',
 'Th cell']

In [21]:
# Get all pairwise cluster combinations: A--B, B--A
# Consider the reverse interaction too: A--B but B--A as well
pairwise_cluster_combinations = list(itertools.permutations(list(genes_expr_per_cell_type.keys()), 2))
len(pairwise_cluster_combinations)


272

In [22]:
pairwise_cluster_combinations[:5]

[('B cell', 'CD8 T cell'),
 ('B cell', 'DC'),
 ('B cell', 'Distal_tubules_and_collecting_duct'),
 ('B cell', 'Endothelium'),
 ('B cell', 'Loop_of_Henle')]

In [23]:
# add self interactions
self_inter_combinations = [(ct, ct) for ct in list(genes_expr_per_cell_type.keys())]
pairwise_cluster_combinations = pairwise_cluster_combinations + self_inter_combinations
len(pairwise_cluster_combinations)


289

In [24]:
len(is_DE_upreg.keys())

17

In [25]:
len(is_DE_downreg.keys())

17

In [26]:
# We only want to test pairs including at least one celltype in the DE folder

pairwise_cluster_combinations_upreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_upreg.keys()) or elem[1] in list(is_DE_upreg.keys())]
pairwise_cluster_combinations_downreg = [elem for elem in pairwise_cluster_combinations if elem[0] in list(is_DE_downreg.keys()) or elem[1] in list(is_DE_downreg.keys())]

In [27]:
len(pairwise_cluster_combinations_upreg)

289

In [28]:
len(pairwise_cluster_combinations_downreg)

289

In [29]:
# Make cluster pair labels: celltypeA--celltypeB             
cluster_combinations_labels_upreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_upreg]
cluster_combinations_labels_downreg = [comb[0] + '---' + comb[1] for comb in pairwise_cluster_combinations_downreg]

In [30]:
len(cluster_combinations_labels_upreg)

289

In [31]:
len(cluster_combinations_labels_downreg)

289

# Retrieve CellphoneDB L/R interactions

A relevant interaction shoudl have

1. All their participants expressed in the corresponding celltypes
2. At least one participant is a DEG

In [32]:
len(Int2Gene.keys())

809

In [33]:
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_upreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_upreg)) )
                                         )

df_Exrp_LR_in_celltype_pairs_downreg = pd.DataFrame(index = list(Int2Gene.keys()),
                                          columns = cluster_combinations_labels_downreg,
                                          data = np.zeros( (len(list(Int2Gene.keys())), 
                                                            len(cluster_combinations_labels_downreg)) )
                                         )

In [34]:
df_Exrp_LR_in_celltype_pairs_upreg

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---Loop_of_Henle,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,...,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Nephron_epithelium---Nephron_epithelium,Neutrophil---Neutrophil,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM3_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NRTN_RET receptor 2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TNFSF18_TNFRSF18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
a2Bb3 complex_fibrinogen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aMb2 complex_fibrinogen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---Loop_of_Henle,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,...,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Nephron_epithelium---Nephron_epithelium,Neutrophil---Neutrophil,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM3_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NRTN_RET receptor 2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TNFSF18_TNFRSF18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
a2Bb3 complex_fibrinogen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aMb2 complex_fibrinogen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] = 1


CPU times: user 32 s, sys: 0 ns, total: 32 s
Wall time: 32 s


In [37]:
%%time

# LR_pairs_celltype_pairs_df will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# So, fill 1 if all genes are expressed in all clusters

for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # are all partner_A genes expressed in celltype_A and are all partner_B genes expressed in celltype_B?
        are_all_expressed = all(elem in genes_expr_per_cell_type[ct_A] for elem in partner_A_genes) & all(elem in genes_expr_per_cell_type[ct_B] for elem in partner_B_genes)
        
        if are_all_expressed:
            df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] = 1


CPU times: user 32 s, sys: 0 ns, total: 32 s
Wall time: 32 s


In [38]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([229136,   4665]))

In [39]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([229136,   4665]))

In [40]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.sum(axis=0))

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 37., 38., 40., 41., 42.,
       43., 52., 60., 69.])

In [41]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.sum(axis=0))

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 37., 38., 40., 41., 42.,
       43., 52., 60., 69.])

In [42]:
df_Exrp_LR_in_celltype_pairs_upreg.shape

(809, 289)

In [43]:
df_Exrp_LR_in_celltype_pairs_downreg.shape

(809, 289)

In [44]:
# keep celltype pairs with at least one expressed interaction

df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=0)]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(809, 289)
(809, 289)


In [45]:
# keep interactions with at least one celltype_pair
df_Exrp_LR_in_celltype_pairs_upreg = df_Exrp_LR_in_celltype_pairs_upreg.loc[(df_Exrp_LR_in_celltype_pairs_upreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_upreg.shape)

df_Exrp_LR_in_celltype_pairs_downreg = df_Exrp_LR_in_celltype_pairs_downreg.loc[(df_Exrp_LR_in_celltype_pairs_downreg != 0).any(axis=1), :]
print(df_Exrp_LR_in_celltype_pairs_downreg.shape)

(303, 289)
(303, 289)


In [46]:
df_Exrp_LR_in_celltype_pairs_downreg.columns

Index(['B cell---CD8 T cell', 'B cell---DC',
       'B cell---Distal_tubules_and_collecting_duct', 'B cell---Endothelium',
       'B cell---Loop_of_Henle', 'B cell---MNP', 'B cell---Mast cell',
       'B cell---Mesangial_cells', 'B cell---NK cell', 'B cell---NKT cell',
       ...
       'Mast cell---Mast cell', 'Mesangial_cells---Mesangial_cells',
       'NK cell---NK cell', 'NKT cell---NKT cell',
       'Nephron_epithelium---Nephron_epithelium', 'Neutrophil---Neutrophil',
       'Podocytes---Podocytes', 'Private---Private',
       'Proximal_tubular_cells---Proximal_tubular_cells', 'Th cell---Th cell'],
      dtype='object', length=289)

In [47]:
df_Exrp_LR_in_celltype_pairs_downreg.loc[:,'B cell---CD8 T cell']

PVR_CD96              0.0
PVR_CD226             0.0
PVR_TIGIT             0.0
CADM1_CADM1           0.0
CRTAM_CADM1           0.0
                     ... 
CD47_SIRPG            1.0
CD47_SIRB1 complex    0.0
LAIR1_LILRB4          0.0
CLEC2B_KLRF1          0.0
ESAM_ESAM             0.0
Name: B cell---CD8 T cell, Length: 303, dtype: float64

In [48]:
df_Exrp_LR_in_celltype_pairs_downreg

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---Loop_of_Henle,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,...,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Nephron_epithelium---Nephron_epithelium,Neutrophil---Neutrophil,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CD47_SIRPG,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
CD47_SIRB1 complex,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [49]:
list(df_Exrp_LR_in_celltype_pairs_downreg.index) == list(df_Exrp_LR_in_celltype_pairs_upreg.index)

True

In [50]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg.values, return_counts=True)

(array([0., 1.]), array([82902,  4665]))

In [51]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg.values, return_counts=True)

(array([0., 1.]), array([82902,  4665]))

In [52]:
np.sum(df_Exrp_LR_in_celltype_pairs_upreg.values)

4665.0

In [53]:
np.sum(df_Exrp_LR_in_celltype_pairs_downreg.values)

4665.0

In [54]:
# Initialize DE matrix from LR_pairs_celltype_pairs_df and set all values to 0
# DE will be a binary matrix with 1 indicating that all the genes in the interaction are expressed in the corresponding celltype
# and one is a DE in the celltypes of interests
# Make scaffold matrix: L/R interactions (rows) x celltype pairs (columns)
df_Exrp_LR_in_celltype_pairs_upreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_upreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_upreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_upreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_upreg.columns))))
                                            )
df_Exrp_LR_in_celltype_pairs_downreg_DE = pd.DataFrame(index = list(df_Exrp_LR_in_celltype_pairs_downreg.index),
                                          columns = list(df_Exrp_LR_in_celltype_pairs_downreg.columns),
                                            data = np.zeros( (len(list(df_Exrp_LR_in_celltype_pairs_downreg.index)), 
                                                            len(list(df_Exrp_LR_in_celltype_pairs_downreg.columns))))
                                            )

In [55]:
# to find special cell types that do not have upreg DE genes

all_ct = list(genes_expr_per_cell_type.keys())

ct_with_upreg_DE = list(is_DE_upreg.keys())

ct_with_downreg_DE = list(is_DE_downreg.keys())


ct_with_no_upreg_DE_genes = set(all_ct) - set(ct_with_upreg_DE)
ct_with_no_downreg_DE_genes = set(all_ct) - set(ct_with_downreg_DE)


In [56]:
ct_with_no_upreg_DE_genes

set()

In [57]:
ct_with_no_downreg_DE_genes

set()

In [58]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_upreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        
        # so cell type T8_activated_2 will throw an error here because it doesn't have downreg DE genes, so needs a special if
        if ct_A in ct_with_no_upreg_DE_genes:
            # if ct_A doesn't have upreg DE, we only care about if partner_B_genes are upreg in this case
            are_any_DE = all(elem in is_DE_upreg[ct_B] for elem in partner_B_genes)
        elif ct_B in ct_with_no_upreg_DE_genes:
            # if ct_B doesn't have upreg DE, we only care about if partner_A_genes are upreg in this case
            are_any_DE = all(elem in is_DE_upreg[ct_A] for elem in partner_A_genes)
        else:
            # if both partners have upreg DE genes, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_upreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_upreg[ct_B] for elem in partner_B_genes)
      
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_upreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 4.7 s, sys: 0 ns, total: 4.7 s
Wall time: 4.7 s


In [59]:
%%time
# So, fill 1 if at least one gene in the interaction is DE
for interaction in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index):
    #print('interaction', interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction)+1, 
    #      'out of', len(list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index)))
    for ct_pair in list(df_Exrp_LR_in_celltype_pairs_downreg_DE.columns):
        #print(ct_pair)
        
        ct_A = ct_pair.split('---')[0]
        ct_B = ct_pair.split('---')[1]
        
        partner_A_genes = Int2Gene[interaction]['partner_a']
        partner_B_genes = Int2Gene[interaction]['partner_b']
        
        # so cell type T8_activated_2 will throw an error here because it doesn't have downreg DE genes, so needs a special if
        if ct_A in ct_with_no_downreg_DE_genes:
            # if ct_A doesn't have downreg DE, we only care about if partner_B_genes are downreg in this case
            are_any_DE = all(elem in is_DE_downreg[ct_B] for elem in partner_B_genes)
        elif ct_B in ct_with_no_downreg_DE_genes:
            # if ct_B doesn't have downreg DE, we only care about if partner_A_genes are downreg in this case
            are_any_DE = all(elem in is_DE_downreg[ct_A] for elem in partner_A_genes)
        else:
            # if both partners have downreg DE genes, proceed as normal
            # are partner_A genes DE in celltype_A OR are partner_B genes DE in celltype_B?
            are_any_DE = all(elem in is_DE_downreg[ct_A] for elem in partner_A_genes) | all(elem in is_DE_downreg[ct_B] for elem in partner_B_genes)
   
        if are_any_DE & (df_Exrp_LR_in_celltype_pairs_downreg.loc[interaction, ct_pair] == 1):
            df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction, ct_pair] = 1

CPU times: user 4.09 s, sys: 7.74 ms, total: 4.1 s
Wall time: 4.1 s


In [60]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---Loop_of_Henle,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,...,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Nephron_epithelium---Nephron_epithelium,Neutrophil---Neutrophil,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CD47_SIRPG,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD47_SIRB1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [61]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---Loop_of_Henle,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,...,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Nephron_epithelium---Nephron_epithelium,Neutrophil---Neutrophil,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
PVR_CD96,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_CD226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
PVR_TIGIT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CADM1_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CRTAM_CADM1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CD47_SIRPG,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
CD47_SIRB1 complex,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LAIR1_LILRB4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CLEC2B_KLRF1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [62]:
# UPREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_upreg_DE = df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_upreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_upreg_DE.shape, '\n')

shape after filtering interactions
(303, 251) 

shape after filtering cell type pairs
(86, 251) 



In [63]:
# DOWNREG interactions

# keep interactions expressed in at least one celltype pair
df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[:, (df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=0)]
print('shape after filtering interactions')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

# keep celltypepairs with at least one interaction

df_Exrp_LR_in_celltype_pairs_downreg_DE = df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[(df_Exrp_LR_in_celltype_pairs_downreg_DE != 0).any(axis=1),:]
print('shape after filtering cell type pairs')
print(df_Exrp_LR_in_celltype_pairs_downreg_DE.shape, '\n')

shape after filtering interactions
(303, 281) 

shape after filtering cell type pairs
(60, 281) 



In [64]:
np.unique(df_Exrp_LR_in_celltype_pairs_upreg_DE.values, return_counts=True)

(array([0., 1.]), array([20064,  1522]))

In [65]:
np.unique(df_Exrp_LR_in_celltype_pairs_downreg_DE.values, return_counts=True)

(array([0., 1.]), array([15268,  1592]))

In [66]:
df_Exrp_LR_in_celltype_pairs_upreg_DE

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,B cell---Neutrophil,...,Loop_of_Henle---Loop_of_Henle,MNP---MNP,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
FN1_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
COL5A2_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
COL1A2_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
COL6A2_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
COL18A1_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SELP_CD24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD99_PILRA,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CD52_SIGLEC10,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LTBR_LTB,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [67]:
df_Exrp_LR_in_celltype_pairs_downreg_DE

Unnamed: 0,B cell---CD8 T cell,B cell---DC,B cell---Distal_tubules_and_collecting_duct,B cell---Endothelium,B cell---Loop_of_Henle,B cell---MNP,B cell---Mast cell,B cell---Mesangial_cells,B cell---NK cell,B cell---NKT cell,...,MNP---MNP,Mast cell---Mast cell,Mesangial_cells---Mesangial_cells,NK cell---NK cell,NKT cell---NKT cell,Nephron_epithelium---Nephron_epithelium,Podocytes---Podocytes,Private---Private,Proximal_tubular_cells---Proximal_tubular_cells,Th cell---Th cell
COL4A5_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A3_a10b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A5_a11b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A3_a11b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A5_a1b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A3_a1b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A5_a2b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
COL4A3_a2b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FN1_a3b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
THBS1_a3b1 complex,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


## Save results

In [68]:
save_path = './'

In [69]:
# binary result matrices
df_Exrp_LR_in_celltype_pairs_upreg_DE.to_csv(save_path + 'cellphone_interactions_upreg_in_abnormal_vs_healthy_no_logFC_cutoff.csv')
df_Exrp_LR_in_celltype_pairs_downreg_DE.to_csv(save_path + 'cellphone_interactions_downreg_in_abnormal_vs_healthy_no_logFC_cutoff.csv')

## Save results in a more readable format

Gene by gene breakdown with added DEG stats

### Upreg interactions

In [71]:
faulty_index_count = 0

vec2_append_upreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_upreg_DE.index:
    
    #print(interaction, list(df_Exrp_LR_in_celltype_pairs_upreg_DE.index).index(interaction) + 1, 
    #      'out of', len(df_Exrp_LR_in_celltype_pairs_upreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_upreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        # row by row
        vec2_append_upreg[str(curr_count)] = {}
        
        vec2_append_upreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_upreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_upreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        vec2_append_upreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_upreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_upreg[DE_df_upreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        
        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            vec2_append_upreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_upreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_upreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_upreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

In [72]:
# outlining the final table format
df_output_upreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_upreg.keys())
                            )
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1517,,,,,,,,,,,,,
1518,,,,,,,,,,,,,
1519,,,,,,,,,,,,,
1520,,,,,,,,,,,,,


In [73]:
list(df_output_upreg.columns) == list(vec2_append_upreg['0'].keys())

True

In [74]:
len(vec2_append_upreg.keys())

1522

In [75]:
vec2_append_upreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [76]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1517,,,,,,,,,,,,,
1518,,,,,,,,,,,,,
1519,,,,,,,,,,,,,
1520,,,,,,,,,,,,,


In [77]:
%%time

for i in list(vec2_append_upreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_upreg[i].keys())
    for col in curr_keys:
        df_output_upreg.loc[i,col] = vec2_append_upreg[i][col]

CPU times: user 1.03 s, sys: 3.95 ms, total: 1.03 s
Wall time: 1.03 s


In [78]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,FN1_a10b1 complex,[FN1],"[ITGA10, ITGB1]",Distal_tubules_and_collecting_duct,Mesangial_cells,True,[5.7094563491760715],[0.0181158810046262],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
1,FN1_a10b1 complex,[FN1],"[ITGA10, ITGB1]",Endothelium,Mesangial_cells,True,[5.7094563491760715],[0.0181158810046262],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
2,FN1_a10b1 complex,[FN1],"[ITGA10, ITGB1]",Mesangial_cells,Mesangial_cells,True,[5.7094563491760715],[0.0181158810046262],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
3,COL5A2_a10b1 complex,[COL5A2],"[ITGA10, ITGB1]",Mesangial_cells,Mesangial_cells,True,[3.80515143068838],[0.00014071735352043501],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
4,COL1A2_a10b1 complex,[COL1A2],"[ITGA10, ITGB1]",Mesangial_cells,Mesangial_cells,True,[11.7352958382638],[0.000128758661947416],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1517,LAIR1_LILRB4,[LAIR1],[LILRB4],NKT cell,MNP,True,[3.7702558889103504],[4.07767586446817e-05],[1],True,[4.85937078071363],[6.360364419824e-07],[1]
1518,LAIR1_LILRB4,[LAIR1],[LILRB4],Private,DC,True,[3.7702558889103504],[4.07767586446817e-05],[1],True,[4.85937078071363],[6.360364419824e-07],[1]
1519,LAIR1_LILRB4,[LAIR1],[LILRB4],Private,MNP,True,[3.7702558889103504],[4.07767586446817e-05],[1],True,[4.85937078071363],[6.360364419824e-07],[1]
1520,LAIR1_LILRB4,[LAIR1],[LILRB4],DC,DC,True,[3.7702558889103504],[4.07767586446817e-05],[1],True,[4.85937078071363],[6.360364419824e-07],[1]


In [79]:
# getting rid of the square parentheses [] in all the values

cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_upreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_upreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_upreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

In [80]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,FN1_a10b1 complex,FN1,"[ITGA10, ITGB1]",Distal_tubules_and_collecting_duct,Mesangial_cells,True,5.70946,0.0181159,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
1,FN1_a10b1 complex,FN1,"[ITGA10, ITGB1]",Endothelium,Mesangial_cells,True,5.70946,0.0181159,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
2,FN1_a10b1 complex,FN1,"[ITGA10, ITGB1]",Mesangial_cells,Mesangial_cells,True,5.70946,0.0181159,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
3,COL5A2_a10b1 complex,COL5A2,"[ITGA10, ITGB1]",Mesangial_cells,Mesangial_cells,True,3.80515,0.000140717,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
4,COL1A2_a10b1 complex,COL1A2,"[ITGA10, ITGB1]",Mesangial_cells,Mesangial_cells,True,11.7353,0.000128759,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1517,LAIR1_LILRB4,LAIR1,LILRB4,NKT cell,MNP,True,3.77026,4.07768e-05,1,True,4.85937,6.36036e-07,1
1518,LAIR1_LILRB4,LAIR1,LILRB4,Private,DC,True,3.77026,4.07768e-05,1,True,4.85937,6.36036e-07,1
1519,LAIR1_LILRB4,LAIR1,LILRB4,Private,MNP,True,3.77026,4.07768e-05,1,True,4.85937,6.36036e-07,1
1520,LAIR1_LILRB4,LAIR1,LILRB4,DC,DC,True,3.77026,4.07768e-05,1,True,4.85937,6.36036e-07,1


In [81]:
df_output_upreg.columns

Index(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A',
       'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B'],
      dtype='object')

In [82]:
#df_output_upreg.to_csv(save_path + 'cellphone_interactions_table_with_gene_stats_upreg_in_abnormal_vs_healthy_no_logFC_cutoff_all.csv')


### Downreg interactions

In [83]:
faulty_index_count = 0

vec2_append_downreg = {}

# row count
curr_count = 0

for interaction in df_Exrp_LR_in_celltype_pairs_downreg_DE.index:
    
    #print(interaction, list(df_Exrp_LR_in_celltype_pairs_downreg_DE.index).index(interaction) + 1, 
    #      'out of', len(df_Exrp_LR_in_celltype_pairs_downreg_DE.index))
    # current row
    curr_table = pd.DataFrame(df_Exrp_LR_in_celltype_pairs_downreg_DE.loc[interaction])
    curr_table = curr_table[curr_table[interaction] > 0]
    
    for celltype_pair in list(curr_table.index):
        
        #print('row', curr_count)
        
        #print('celltype pair', celltype_pair)
        
        # row by row
        vec2_append_downreg[str(curr_count)] = {}
        
        vec2_append_downreg[str(curr_count)]['interaction'] = interaction
        
        #print(celltype_pair)
        
        # getting genes, these are lists of length 1 for simple interactions and > 1 for complexes
        curr_partner_A_genes = Int2Gene[interaction]['partner_a']
        curr_partner_B_genes = Int2Gene[interaction]['partner_b']
        
        #print('curr partner A genes', curr_partner_A_genes, 'len:', len(curr_partner_A_genes))
        #print('curr partner B genes', curr_partner_B_genes, 'len:', len(curr_partner_B_genes))
        
        vec2_append_downreg[str(curr_count)]['partner_A_genes'] = curr_partner_A_genes
        vec2_append_downreg[str(curr_count)]['partner_B_genes'] = curr_partner_B_genes
        
        curr_celltype_A = celltype_pair.split('---')[0]
        curr_celltype_B = celltype_pair.split('---')[1]
        
        #print('curr partner A cell type', curr_celltype_A)
        #print('curr partner B cell type', curr_celltype_B)
        
        vec2_append_downreg[str(curr_count)]['celltype_A'] = curr_celltype_A
        vec2_append_downreg[str(curr_count)]['celltype_B'] = curr_celltype_B
        
        
        # are all partner_A genes DE in celltype_A and are all partner_B genes DE in celltype_B?
        # these DE table subsets have been filtered already according to cutoffs declared in the beginning of the notebook
        curr_celltype_A_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_A]
        curr_celltype_A_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_A_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_A])
        
        curr_celltype_B_DE_table_subset = DE_df_downreg[DE_df_downreg['cluster'] == curr_celltype_B]
        curr_celltype_B_DE_table_subset.set_index('Gene', inplace=True)
        
        # Per_df table for all genes, even not DE - to include % of cells expressing even non-DE partners
        curr_celltype_B_per_df_full = pd.DataFrame(Per_df.loc[:,curr_celltype_B])

        # if partner A is DE, add stats
        if all(elem in list(curr_celltype_A_DE_table_subset.index) for elem in curr_partner_A_genes):
            #print('curr partner A is DE')
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_DE_table_subset.loc[curr_partner_A_genes,'percentExpr_cluster'])
            
        else: # if not DE, add 'NA'
            #print('curr partner A is NOT DE')
            vec2_append_downreg[str(curr_count)]['is_partner_A_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_A'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_A'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_A'] = list(curr_celltype_A_per_df_full.loc[curr_partner_A_genes,curr_celltype_A])
            
            
        # if partner B is DE, add stats
        if all(elem in list(curr_celltype_B_DE_table_subset.index) for elem in curr_partner_B_genes):
            #print('curr partner B is DE')
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = True
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'logFC'])
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'adj.P.Val'])
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_DE_table_subset.loc[curr_partner_B_genes,'percentExpr_cluster'])
        else: # if not DE, add 'NA'
            #print('curr partner B is NOT DE')
            vec2_append_downreg[str(curr_count)]['is_partner_B_DE'] = False
            vec2_append_downreg[str(curr_count)]['logFC_gene_B'] = 'NA'
            vec2_append_downreg[str(curr_count)]['adj_pval_gene_B'] = 'NA'
            # even if the partner is not DE, we still want to know the % of cells expressing it
            vec2_append_downreg[str(curr_count)]['percent_expr_gene_B'] = list(curr_celltype_B_per_df_full.loc[curr_partner_B_genes,curr_celltype_B])
            
        curr_count += 1
    
    

In [84]:
# outlining the final table format
df_output_downreg = pd.DataFrame(columns = ['interaction',
                                     'partner_A_genes',
                                     'partner_B_genes',
                                     'celltype_A',
                                     'celltype_B',
                                     'is_partner_A_DE',
                                     'logFC_gene_A',
                                     'adj_pval_gene_A',
                                     'percent_expr_gene_A',
                                     'is_partner_B_DE',
                                     'logFC_gene_B',
                                     'adj_pval_gene_B',
                                     'percent_expr_gene_B'],
                              index = list(vec2_append_downreg.keys())
                            )
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587,,,,,,,,,,,,,
1588,,,,,,,,,,,,,
1589,,,,,,,,,,,,,
1590,,,,,,,,,,,,,


In [85]:
list(df_output_downreg.columns) == list(vec2_append_downreg['0'].keys())

True

In [86]:
len(vec2_append_downreg.keys())

1592

In [87]:
vec2_append_downreg['0'].keys()

dict_keys(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A', 'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A', 'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B', 'adj_pval_gene_B', 'percent_expr_gene_B'])

In [88]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,,,,,,,,,,,,,
3,,,,,,,,,,,,,
4,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587,,,,,,,,,,,,,
1588,,,,,,,,,,,,,
1589,,,,,,,,,,,,,
1590,,,,,,,,,,,,,


In [89]:
%%time

for i in list(vec2_append_downreg.keys()):
    #print(i)
    curr_keys = list(vec2_append_downreg[i].keys())
    for col in curr_keys:
        df_output_downreg.loc[i,col] = vec2_append_downreg[i][col]

CPU times: user 1.09 s, sys: 0 ns, total: 1.09 s
Wall time: 1.09 s


In [90]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,COL4A5_a10b1 complex,[COL4A5],"[ITGA10, ITGB1]",Distal_tubules_and_collecting_duct,Mesangial_cells,True,[-4.64555523715494],[0.0063016608218016],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
1,COL4A5_a10b1 complex,[COL4A5],"[ITGA10, ITGB1]",Podocytes,Mesangial_cells,True,[-4.64555523715494],[0.0063016608218016],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
2,COL4A3_a10b1 complex,[COL4A3],"[ITGA10, ITGB1]",Distal_tubules_and_collecting_duct,Mesangial_cells,True,[-11.1362155380015],[0.0132613800893692],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
3,COL4A3_a10b1 complex,[COL4A3],"[ITGA10, ITGB1]",Loop_of_Henle,Mesangial_cells,True,[-11.1362155380015],[0.0132613800893692],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
4,COL4A3_a10b1 complex,[COL4A3],"[ITGA10, ITGB1]",Podocytes,Mesangial_cells,True,[-11.1362155380015],[0.0132613800893692],[1],False,,,"[0.1694915254237288, 0.7288135593220338]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587,CD47_SIRB1 complex,[CD47],"[SIRPB1, TYROBP]",Podocytes,MNP,True,[-5.21611639328334],[0.00179869684824925],[1],False,,,"[0.1934065934065934, 0.9813186813186812]"
1588,CD47_SIRB1 complex,[CD47],"[SIRPB1, TYROBP]",Private,MNP,True,[-5.21611639328334],[0.00179869684824925],[1],False,,,"[0.1934065934065934, 0.9813186813186812]"
1589,CD47_SIRB1 complex,[CD47],"[SIRPB1, TYROBP]",Proximal_tubular_cells,MNP,True,[-5.21611639328334],[0.00179869684824925],[1],False,,,"[0.1934065934065934, 0.9813186813186812]"
1590,CD47_SIRB1 complex,[CD47],"[SIRPB1, TYROBP]",Th cell,MNP,True,[-5.21611639328334],[0.00179869684824925],[1],False,,,"[0.1934065934065934, 0.9813186813186812]"


In [91]:
cols2correct = ['partner_A_genes', 'partner_B_genes', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B']

for row in list(df_output_downreg.index):
    #print('row', row)
    for col in cols2correct:
        #print('column', col)
        curr_value = df_output_downreg.loc[row, col] # with []
        #print(curr_value)
        if (curr_value != 'NA') & (len(curr_value) == 1): # ignoring NAs and lists of length > 1 - complex genes that is
        #if not isinstance(curr_value, list) & (curr_value != 'NA'): # ignoring NAs and lists of length > 1 - complex genes that is
            df_output_downreg.loc[row, col] = curr_value[0] # this just get the element - string if a gene, numerical value if it's a stat
            

In [92]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
0,COL4A5_a10b1 complex,COL4A5,"[ITGA10, ITGB1]",Distal_tubules_and_collecting_duct,Mesangial_cells,True,-4.64556,0.00630166,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
1,COL4A5_a10b1 complex,COL4A5,"[ITGA10, ITGB1]",Podocytes,Mesangial_cells,True,-4.64556,0.00630166,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
2,COL4A3_a10b1 complex,COL4A3,"[ITGA10, ITGB1]",Distal_tubules_and_collecting_duct,Mesangial_cells,True,-11.1362,0.0132614,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
3,COL4A3_a10b1 complex,COL4A3,"[ITGA10, ITGB1]",Loop_of_Henle,Mesangial_cells,True,-11.1362,0.0132614,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
4,COL4A3_a10b1 complex,COL4A3,"[ITGA10, ITGB1]",Podocytes,Mesangial_cells,True,-11.1362,0.0132614,1,False,,,"[0.1694915254237288, 0.7288135593220338]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587,CD47_SIRB1 complex,CD47,"[SIRPB1, TYROBP]",Podocytes,MNP,True,-5.21612,0.0017987,1,False,,,"[0.1934065934065934, 0.9813186813186812]"
1588,CD47_SIRB1 complex,CD47,"[SIRPB1, TYROBP]",Private,MNP,True,-5.21612,0.0017987,1,False,,,"[0.1934065934065934, 0.9813186813186812]"
1589,CD47_SIRB1 complex,CD47,"[SIRPB1, TYROBP]",Proximal_tubular_cells,MNP,True,-5.21612,0.0017987,1,False,,,"[0.1934065934065934, 0.9813186813186812]"
1590,CD47_SIRB1 complex,CD47,"[SIRPB1, TYROBP]",Th cell,MNP,True,-5.21612,0.0017987,1,False,,,"[0.1934065934065934, 0.9813186813186812]"


In [93]:
df_output_downreg.columns

Index(['interaction', 'partner_A_genes', 'partner_B_genes', 'celltype_A',
       'celltype_B', 'is_partner_A_DE', 'logFC_gene_A', 'adj_pval_gene_A',
       'percent_expr_gene_A', 'is_partner_B_DE', 'logFC_gene_B',
       'adj_pval_gene_B', 'percent_expr_gene_B'],
      dtype='object')

In [94]:
#df_output_downreg.to_csv(save_path + 'cellphone_interactions_table_with_gene_stats_downreg_in_abnormal_vs_healthy_no_logFC_cutoff_all.csv')


# Adding spatial context

### 1. Identifying colocated cell types as inferred from cell type deconvolution of NanoString ROIs 

In [98]:
nanostring_data_path = '../KidneyDataset/'

In [99]:
# get the original cell type labels
Cell_Types_for_Spatial_Decon = pd.read_csv(nanostring_data_path + 'Cell_Types_for_Spatial_Decon.txt',
                                          sep='\t')

In [100]:
np.unique(Cell_Types_for_Spatial_Decon['cell_type_general'])

array(['B cell', 'CD8 T cell', 'DC', 'Distal_tubules_and_collecting_duct',
       'Endothelium', 'Loop_of_Henle', 'MNP', 'Mast cell',
       'Mesangial_cells', 'NK cell', 'NKT cell', 'Nephron_epithelium',
       'Neutrophil', 'Podocytes', 'Private', 'Proximal_tubular_cells',
       'Th cell'], dtype=object)

In [101]:
# cell type deconvolution of the ROIs
stats = pd.read_csv('../table_for_visualisation_ROIs.csv', index_col=0)
cols2keep = [elem for elem in stats.columns if ('ct_deconv_' in elem)]
stats = stats.loc[:,cols2keep]
stats

Unnamed: 0,ct_deconv_b cell,ct_deconv_cd8 t cell,ct_deconv_dc,ct_deconv_distal_tubules_and_collecting_duct,ct_deconv_endothelium,ct_deconv_loop_of_henle,ct_deconv_mnp,ct_deconv_mast cell,ct_deconv_mesangial_cells,ct_deconv_nk cell,ct_deconv_nkt cell,ct_deconv_nephron_epithelium,ct_deconv_neutrophil,ct_deconv_podocytes,ct_deconv_private,ct_deconv_proximal_tubular_cells,ct_deconv_th cell
disease3_scan | 001 | PanCK,0.165367,0.0,0.144766,5.912619,1.065527,6.559797,0.000000,0.010616,1.949580,0.792261,0.0,0.984002,0.000000,0.0,0.000000,1.813119,0.0
disease3_scan | 001 | neg,0.429078,0.0,0.116335,0.158539,1.864872,3.693691,0.359315,0.033528,2.786312,1.040060,0.0,0.900639,0.000000,0.0,0.000000,4.074634,0.0
disease3_scan | 002 | PanCK,0.160136,0.0,0.146487,6.320373,1.209093,8.880168,0.000000,0.025804,2.070078,0.691347,0.0,0.646674,0.000000,0.0,0.000000,1.328950,0.0
disease3_scan | 002 | neg,0.442468,0.0,0.155963,0.296930,2.417755,6.044222,0.283120,0.025277,2.978519,0.913327,0.0,0.785815,0.000000,0.0,0.000000,2.404160,0.0
disease3_scan | 003 | PanCK,0.310115,0.0,0.248369,3.837155,1.534945,12.669177,0.126365,0.027154,3.242177,1.004190,0.0,1.267699,0.000000,0.0,0.000000,3.365717,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
disease1B_scan | 020 | Geometric Segment,0.122601,0.0,0.021265,0.028349,0.546041,0.326325,0.067259,0.000000,1.668836,0.000000,0.0,0.633129,0.000000,0.0,0.215985,0.000000,0.0
disease1B_scan | 021 | Geometric Segment,0.138538,0.0,0.000000,0.000000,0.896659,0.386762,0.197888,0.000000,3.161280,0.000000,0.0,1.294000,0.091056,0.0,0.061645,0.000000,0.0
disease1B_scan | 022 | Geometric Segment,0.050873,0.0,0.000000,0.136636,1.348782,0.312505,0.538904,0.000000,3.095845,0.000000,0.0,0.000000,0.000000,0.0,0.006782,0.000000,0.0
disease1B_scan | 023 | Geometric Segment,0.177848,0.0,0.000000,0.000000,0.892310,2.175969,0.033884,0.001869,3.091686,0.000000,0.0,0.981306,0.000000,0.0,0.224569,0.000000,0.0


In [102]:
stats.columns

Index(['ct_deconv_b cell', 'ct_deconv_cd8 t cell', 'ct_deconv_dc',
       'ct_deconv_distal_tubules_and_collecting_duct', 'ct_deconv_endothelium',
       'ct_deconv_loop_of_henle', 'ct_deconv_mnp', 'ct_deconv_mast cell',
       'ct_deconv_mesangial_cells', 'ct_deconv_nk cell', 'ct_deconv_nkt cell',
       'ct_deconv_nephron_epithelium', 'ct_deconv_neutrophil',
       'ct_deconv_podocytes', 'ct_deconv_private',
       'ct_deconv_proximal_tubular_cells', 'ct_deconv_th cell'],
      dtype='object')

In [103]:
# get back to how the cell types were named in Cell_Types_for_Spatial_Decon
np.unique(Cell_Types_for_Spatial_Decon['cell_type_general'])

array(['B cell', 'CD8 T cell', 'DC', 'Distal_tubules_and_collecting_duct',
       'Endothelium', 'Loop_of_Henle', 'MNP', 'Mast cell',
       'Mesangial_cells', 'NK cell', 'NKT cell', 'Nephron_epithelium',
       'Neutrophil', 'Podocytes', 'Private', 'Proximal_tubular_cells',
       'Th cell'], dtype=object)

In [104]:
len(np.unique(Cell_Types_for_Spatial_Decon['cell_type_general']))

17

In [105]:
# get back to how the cell types were named in Cell_Types_for_Spatial_Decon
stats.columns = ['ct_deconv_B cell', 
                 'ct_deconv_CD8 T cell', 
                 'ct_deconv_DC',
                 'ct_deconv_Distal_tubules_and_collecting_duct', 
                 'ct_deconv_Endothelium',
                 'ct_deconv_Loop_of_Henle', 
                 'ct_deconv_MNP', 
                 'ct_deconv_Mast cell',
                 'ct_deconv_Mesangial_cells', 
                 'ct_deconv_NK cell', 
                 'ct_deconv_NKT cell',
                 'ct_deconv_Nephron_epithelium', 
                 'ct_deconv_Neutrophil',
                 'ct_deconv_Podocytes', 
                 'ct_deconv_Private',
                 'ct_deconv_Proximal_tubular_cells', 
                 'ct_deconv_Th cell']

In [106]:
# cellphone results were produced only for glomeruli (abnormal and helathy), so excluding tubules here
idx2keep = [elem for elem in stats.index if 'Geometric Segment' in elem]
len(idx2keep)

149

In [107]:
stats = stats.loc[idx2keep,:]

In [108]:
# (n_ROI (glomeruli), n_cell_types)
stats.shape

(149, 17)

In [109]:
cell_types = [elem[10:] for elem in stats.columns]
cell_types

['B cell',
 'CD8 T cell',
 'DC',
 'Distal_tubules_and_collecting_duct',
 'Endothelium',
 'Loop_of_Henle',
 'MNP',
 'Mast cell',
 'Mesangial_cells',
 'NK cell',
 'NKT cell',
 'Nephron_epithelium',
 'Neutrophil',
 'Podocytes',
 'Private',
 'Proximal_tubular_cells',
 'Th cell']

In [133]:
# make a dict of cell types present in each ROI
cell_types_in_ROI = {}

for ROI in stats.index:
    #print(ROI)
    curr_df = pd.DataFrame(stats.loc[ROI,:])
    # cutoff for minimum presence in ROI
    curr_df = curr_df[curr_df[ROI] > 0]
    
    # get rid of 'ct_deconv_'
    curr_df.index = [elem[10:] for elem in curr_df.index]
    
    cell_types_in_ROI[ROI] = list(curr_df.index)

In [134]:
cell_types_in_ROI['disease3_scan | 007 | Geometric Segment']

['B cell',
 'DC',
 'Distal_tubules_and_collecting_duct',
 'Endothelium',
 'Loop_of_Henle',
 'MNP',
 'Mast cell',
 'Mesangial_cells',
 'NK cell',
 'Nephron_epithelium',
 'Podocytes']

In [135]:
# all possible cell type pairs, ignoring self pairs
cell_type_pairs = list(itertools.combinations(cell_types, 2))

In [136]:
cell_type_pairs[:10]

[('B cell', 'CD8 T cell'),
 ('B cell', 'DC'),
 ('B cell', 'Distal_tubules_and_collecting_duct'),
 ('B cell', 'Endothelium'),
 ('B cell', 'Loop_of_Henle'),
 ('B cell', 'MNP'),
 ('B cell', 'Mast cell'),
 ('B cell', 'Mesangial_cells'),
 ('B cell', 'NK cell'),
 ('B cell', 'NKT cell')]

In [137]:
len(cell_type_pairs)

136

In [138]:
pairs_coloc = []

for pair in cell_type_pairs:
    #print(pair)
    
    ct1 = pair[0]
    ct2 = pair[1]
    
    for ROI in stats.index:
        #print(ROI)
        if ct1 in cell_types_in_ROI[ROI] and ct2 in cell_types_in_ROI[ROI]:
            pairs_coloc.append(pair)
    
pairs_coloc = list(set(pairs_coloc))   
    

In [139]:
len(pairs_coloc)

104

In [140]:
# all colocated cell types
pairs_coloc

[('B cell', 'Mesangial_cells'),
 ('B cell', 'MNP'),
 ('Mesangial_cells', 'Nephron_epithelium'),
 ('Loop_of_Henle', 'Mast cell'),
 ('Distal_tubules_and_collecting_duct', 'NK cell'),
 ('MNP', 'Podocytes'),
 ('Loop_of_Henle', 'Nephron_epithelium'),
 ('DC', 'Podocytes'),
 ('Distal_tubules_and_collecting_duct', 'Nephron_epithelium'),
 ('MNP', 'Private'),
 ('Endothelium', 'Mesangial_cells'),
 ('Endothelium', 'MNP'),
 ('DC', 'Private'),
 ('B cell', 'CD8 T cell'),
 ('B cell', 'Loop_of_Henle'),
 ('B cell', 'DC'),
 ('B cell', 'Nephron_epithelium'),
 ('CD8 T cell', 'Endothelium'),
 ('Mast cell', 'Private'),
 ('Loop_of_Henle', 'Neutrophil'),
 ('NK cell', 'Neutrophil'),
 ('DC', 'MNP'),
 ('Distal_tubules_and_collecting_duct', 'Mesangial_cells'),
 ('Nephron_epithelium', 'Neutrophil'),
 ('Endothelium', 'Loop_of_Henle'),
 ('B cell', 'Neutrophil'),
 ('Endothelium', 'Nephron_epithelium'),
 ('MNP', 'NK cell'),
 ('DC', 'NK cell'),
 ('MNP', 'Proximal_tubular_cells'),
 ('Distal_tubules_and_collecting_duct', 

In [167]:
# write the colocated cell types into a .csv file
pd.DataFrame(pairs_coloc).to_csv('./coloc_cell_types.csv')

In [141]:
# cell type pairs that are not colocated in any ROIs
cell_type_pairs_not_coloc = list(set(cell_type_pairs) - set(pairs_coloc))

In [142]:
cell_type_pairs_not_coloc

[('NKT cell', 'Proximal_tubular_cells'),
 ('Endothelium', 'Th cell'),
 ('Neutrophil', 'Th cell'),
 ('NKT cell', 'Podocytes'),
 ('NKT cell', 'Nephron_epithelium'),
 ('Mesangial_cells', 'Th cell'),
 ('NKT cell', 'Private'),
 ('Endothelium', 'NKT cell'),
 ('NKT cell', 'Th cell'),
 ('MNP', 'Th cell'),
 ('Mesangial_cells', 'NKT cell'),
 ('DC', 'Th cell'),
 ('Loop_of_Henle', 'Th cell'),
 ('NK cell', 'Th cell'),
 ('Distal_tubules_and_collecting_duct', 'Th cell'),
 ('NKT cell', 'Neutrophil'),
 ('MNP', 'NKT cell'),
 ('Nephron_epithelium', 'Th cell'),
 ('Loop_of_Henle', 'NKT cell'),
 ('NK cell', 'NKT cell'),
 ('Mast cell', 'Th cell'),
 ('CD8 T cell', 'Private'),
 ('Private', 'Th cell'),
 ('DC', 'NKT cell'),
 ('Distal_tubules_and_collecting_duct', 'NKT cell'),
 ('B cell', 'Th cell'),
 ('CD8 T cell', 'Th cell'),
 ('Podocytes', 'Th cell'),
 ('Proximal_tubular_cells', 'Th cell'),
 ('Mast cell', 'NKT cell'),
 ('B cell', 'NKT cell'),
 ('CD8 T cell', 'NKT cell')]

In [143]:
np.unique(stats[stats['ct_deconv_MNP'] > 0]['ct_deconv_NKT cell'])

array([0.])

### 2. Discrading interactions between cell types that are not colocated in any ROIs

Upregulated interactions

In [145]:
# discard cellphone results for cell_type_pairs_not_coloc

idx_to_discard_upreg = []

for idx in df_output_upreg.index:
    curr_ct_A = df_output_upreg.loc[idx, 'celltype_A']
    curr_ct_B = df_output_upreg.loc[idx, 'celltype_B']
    
    for pair in cell_type_pairs_not_coloc:
        ct1 = pair[0]
        ct2 = pair[1]
        
        if (curr_ct_A == ct1 and curr_ct_B == ct2) or (curr_ct_A == ct2 and curr_ct_B == ct1):
            idx_to_discard_upreg.append(idx)
            
    

In [146]:
idx_to_keep_upreg = set(df_output_upreg.index) - set(idx_to_discard_upreg)

In [147]:
len(idx_to_keep_upreg)

1181

In [148]:
df_output_upreg = df_output_upreg.loc[idx_to_keep_upreg,:]

In [149]:
df_output_upreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
912,TNFSF14_TNFRSF14,TNFSF14,TNFRSF14,Private,Podocytes,True,1.68803,0.027942,1,False,,,0.204633
27,COL18A1_a11b1 complex,COL18A1,"[ITGB1, ITGA11]",Mesangial_cells,Mesangial_cells,True,2.61451,0.0454014,1,False,,,"[0.7288135593220338, 0.1016949152542373]"
997,SEMA4A_PLXND1,SEMA4A,PLXND1,Mast cell,Endothelium,True,2.18048,0.00667136,1,False,,,0.265528
1376,SELP_CD24,SELP,CD24,Endothelium,Proximal_tubular_cells,False,,,0.135611,True,2.6454,0.0343053,1
258,FCER2_aXb2 complex,FCER2,"[ITGB2, ITGAX]",B cell,MNP,False,,,0.243243,True,"[10.1285183950506, 5.41908546718856]","[7.82221790238649e-13, 9.085959486961573e-06]","[1, 1]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1445,LTBR_LTB,LTBR,LTB,Endothelium,DC,False,,,0.118012,True,1.43944,0.0402619,1
105,FN1_a3b1 complex,FN1,"[ITGA3, ITGB1]",Mesangial_cells,Podocytes,True,5.70946,0.0181159,1,False,,,"[0.2432432432432433, 0.5714285714285714]"
1410,LTBR_LTB,LTBR,LTB,B cell,CD8 T cell,False,,,0.108108,True,1.43944,0.0402619,1
1331,SELL_SELPLG,SELL,SELPLG,Neutrophil,CD8 T cell,False,,,0.210145,True,1.65237,0.0310098,1


In [150]:
# this is the final output
df_output_upreg.to_csv('./cellphone_interactions_table_with_gene_stats_upreg_in_abnormal_vs_healthy_no_logFC_cutoff_all.csv')

In [151]:
# adding self pairs
pairs_coloc_all = pairs_coloc + [(elem, elem) for elem in cell_types]
len(pairs_coloc_all)

121

In [152]:
# subset tables for each cell type pair, including self interactions

df_output_upreg_pair = {}

for pair in pairs_coloc_all:
    ct1 = pair[0]
    ct2 = pair[1]
    df_output_upreg_pair[pair] = df_output_upreg[((df_output_upreg['celltype_A'] == ct1) & (df_output_upreg['celltype_B'] == ct2))
                                                | ((df_output_upreg['celltype_B'] == ct1) & (df_output_upreg['celltype_A'] == ct2))]


In [153]:
df_output_upreg_pair[('B cell','B cell')]

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
1163,CD55_ADGRE5,CD55,ADGRE5,B cell,B cell,False,,,0.554054,True,4.21691,0.00312234,1
603,HLA-F_LILRB2,HLA-F,LILRB2,B cell,B cell,False,,,0.297297,True,9.65228,2.21634e-10,1
516,ICAM3_aLb2 complex,ICAM3,"[ITGB2, ITGAL]",B cell,B cell,False,,,0.364865,True,"[10.1285183950506, 4.76710229557314]","[7.82221790238649e-13, 1.28565836060683e-08]","[1, 1]"
735,TNF_TNFRSF1B,TNF,TNFRSF1B,B cell,B cell,False,,,0.135135,True,6.01061,1.97712e-06,1
419,ICAM2_aLb2 complex,ICAM2,"[ITGB2, ITGAL]",B cell,B cell,False,,,0.216216,True,"[10.1285183950506, 4.76710229557314]","[7.82221790238649e-13, 1.28565836060683e-08]","[1, 1]"
638,HLA-G_LILRB2,HLA-G,LILRB2,B cell,B cell,False,,,0.283784,True,9.65228,2.21634e-10,1
1194,PTPRC_CD22,PTPRC,CD22,B cell,B cell,True,4.41069,4.76773e-05,1.0,False,,,0.324324
1501,LTBR_LTB,LTBR,LTB,B cell,B cell,False,,,0.108108,True,1.43944,0.0402619,1


Downregulated interactions

In [154]:
# discard cellphone results for cell_type_pairs_not_coloc

idx_to_discard_downreg = []

for idx in df_output_downreg.index:
    curr_ct_A = df_output_downreg.loc[idx, 'celltype_A']
    curr_ct_B = df_output_downreg.loc[idx, 'celltype_B']
    
    for pair in cell_type_pairs_not_coloc:
        ct1 = pair[0]
        ct2 = pair[1]
        
        if (curr_ct_A == ct1 and curr_ct_B == ct2) or (curr_ct_A == ct2 and curr_ct_B == ct1):
            idx_to_discard_downreg.append(idx)
            
    

In [155]:
idx_to_keep_downreg = set(df_output_downreg.index) - set(idx_to_discard_downreg)

In [157]:
len(idx_to_keep_downreg)

1284

In [159]:
df_output_downreg = df_output_downreg.loc[idx_to_keep_downreg,:]

In [160]:
df_output_downreg

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
912,NOTCH2_DLL4,NOTCH2,DLL4,Podocytes,Endothelium,True,-5.18517,0.00356748,1,False,,,0.200311
27,COL4A3_a1b1 complex,COL4A3,"[ITGA1, ITGB1]",Loop_of_Henle,Mesangial_cells,True,-11.1362,0.0132614,1,False,,,"[0.3898305084745763, 0.7288135593220338]"
997,SIRPA_CD47,SIRPA,CD47,Podocytes,MNP,True,-5.34036,0.0190914,1,True,-5.21612,0.0017987,1
1376,HLA-E_KLRC1,HLA-E,KLRC1,NK cell,Private,True,-56.6277,0.00045808,1,False,,,0.155446
337,CD74_APP,CD74,APP,Distal_tubules_and_collecting_duct,DC,True,-52.1071,0.014349,1,False,,,0.956522
...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,CD94:NKG2C_HLA-E,"[KLRD1, KLRC2]",HLA-E,CD8 T cell,Distal_tubules_and_collecting_duct,False,,,"[0.3290322580645161, 0.10967741935483873]",True,-56.6277,0.00045808,1
105,FGF1_FGFR1,FGF1,FGFR1,Podocytes,Endothelium,True,-90.8794,1.34843e-09,1,False,,,0.316253
1410,HLA-E_KLRC2,HLA-E,KLRC2,DC,CD8 T cell,True,-56.6277,0.00045808,1,False,,,0.109677
1331,BTC_ERBB4,BTC,ERBB4,Podocytes,Mesangial_cells,False,,,0.169884,True,-1.85113,0.0280744,1


In [161]:
# this is the final output
df_output_downreg.to_csv('./cellphone_interactions_table_with_gene_stats_downreg_in_abnormal_vs_healthy_no_logFC_cutoff_all.csv')

In [162]:
# adding self pairs
pairs_coloc_all = pairs_coloc + [(elem, elem) for elem in cell_types]
len(pairs_coloc_all)

121

In [163]:
# subset tables for each cell type pair, including self interactions

df_output_downreg_pair = {}

for pair in pairs_coloc_all:
    ct1 = pair[0]
    ct2 = pair[1]
    df_output_downreg_pair[pair] = df_output_downreg[((df_output_downreg['celltype_A'] == ct1) & (df_output_downreg['celltype_B'] == ct2))
                                                | ((df_output_downreg['celltype_B'] == ct1) & (df_output_downreg['celltype_A'] == ct2))]


In [164]:
df_output_downreg_pair[('B cell','B cell')]

Unnamed: 0,interaction,partner_A_genes,partner_B_genes,celltype_A,celltype_B,is_partner_A_DE,logFC_gene_A,adj_pval_gene_A,percent_expr_gene_A,is_partner_B_DE,logFC_gene_B,adj_pval_gene_B,percent_expr_gene_B
856,FcRn complex_ALB,"[B2M, FCGRT]",ALB,B cell,B cell,False,,,"[1.0, 0.472972972972973]",True,-3.1517,0.00163958,1.0
533,CD74_APP,CD74,APP,B cell,B cell,True,-52.1071,0.014349,1,False,,,0.324324
