## Visualisation of broad cell type interactions

_15 February 2022_

## Importing

### Modules

In [1]:
import numpy as np
import scipy as sp
import scanpy as sc
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
import re
import os
from numpy import asarray as ar
from collections import Counter

#sklearn <- machine learning
#statsmodels

sc.settings.verbosity = 1
sc.logging.print_version_and_date()
%load_ext autoreload
%autoreload 2

Running Scanpy 1.7.2, on 2022-02-16 13:31.


In [2]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all" # to show output from all the lines in a cells

In [3]:
pd.set_option('display.max_column',None) # display all the columns in pandas

In [4]:
pd.options.display.max_rows = 100

In [5]:
from datetime import date
today = str(date.today())

In [6]:
%cd /nfs/team205/vk8/scripts/cpdb

/nfs/team205/vk8/scripts/cpdb


In [7]:
sc.settings.set_figure_params(dpi = 300, color_map = 'RdPu', dpi_save = 200, vector_friendly = True, format = 'pdf')

## Load cpdb results

In [8]:
adata = sc.read("/nfs/team205/vk8/processed_data/muscle/data_v3/SKM_cells2nuclei_combined_no_int_2022-01-27_v2.h5ad")

In [9]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata.raw = adata 

In [10]:
ctypes = adata.obs['annotation_level0'].unique().tolist()
n_ctypes = len(ctypes)
n_genes = adata.var.shape[0]

In [11]:
n_ctypes
n_genes

39

33694

In [12]:
de_lmm = pd.read_csv("/nfs/team205/vk8/scripts/de_lmm_natsuhiko/results_all/de_celltype_Age_interaction_scells2snuclei_broad.txt", \
                        sep = "\t"
)

In [13]:
len(set(adata.var['GeneID']).difference(set(de_lmm['ENSEMBL'])))

18224

In [14]:
len(set(de_lmm['ENSEMBL']).difference(set(adata.var['GeneID'])))

0

In [15]:
df_ctype_broad_exprs = pd.DataFrame({"ENSEMBL": np.tile(adata.var['GeneID'], reps = n_ctypes), \
                                    "celltype":np.repeat(ctypes, repeats = n_genes)})

In [16]:
for i in ctypes:
    df_ctype_broad_exprs.loc [df_ctype_broad_exprs ['celltype'] == i, 'mean_young'] = \
    adata[(adata.obs['annotation_level0'] == i) & (adata.obs['Age_bin'] == 'young'),:].X.mean(0).T
    df_ctype_broad_exprs.loc [df_ctype_broad_exprs ['celltype'] == i, 'mean_aged'] = \
    adata[(adata.obs['annotation_level0'] == i) & (adata.obs['Age_bin'] == 'old'),:].X.mean(0).T

In [207]:
2**0.15

1.109569472067845

In [209]:
de_results = pd.read_csv("/nfs/team205/vk8/scripts/de_lmm_natsuhiko/results_all/de_celltype_Age_interaction_scells2snuclei_broad.txt", \
                        sep = "\t"
)
de_results['log2fc'] = de_results['beta_old']-de_results['beta_young']
de_results['sign'] = (de_results['ltsr']>0.9) & (de_results['flag5']==True)
de_results['REGULATION'] = de_results.apply(lambda x: "up" if ((x['sign'] == True) & (x['log2fc'] > 0.15)) else
                                            "dw" if ((x['sign'] == True) & (x['log2fc'] < (-0.15))) else
                                            "none", axis = 1)

In [210]:
de_results = de_results.merge(df_ctype_broad_exprs, how = 'outer', on = ['celltype', 'ENSEMBL'])

In [211]:
de_results[(de_results['celltype'] == 'MuSC') & (de_results['gene'] == 'CCL2')]

Unnamed: 0,celltype,gene,ENSEMBL,ltsr,beta_old,beta_young,flag5,log2fc,sign,REGULATION,mean_young,mean_aged
12677,MuSC,CCL2,ENSG00000108691,1.0,0.217809,-0.179205,True,0.397014,True,up,0.47246,1.047496


In [213]:
de_results['celltype'].unique()

array(['MuSC', 'CapEC', 'ArtEC', 'VenEC', 'Mural', 'Lymphatic',
       'Pericyte', 'SMC', 'FB', 'NerveFB', 'Tenocyte', 'nmSchwann_cell',
       'mSchwann_cell', 'B-plasma', 'B-cell', 'T-cell', 'RBC', 'NK-cell',
       'Eosinophil', 'Neutrophil', 'Macrophage', 'cDC2', 'Monocyte',
       'cDC1', 'Mast_cell', 'pDC', 'MF_typeI(fg)', 'MF_type-HOOK2+',
       'MF_typeII(fg)', 'MF_type-MYH8+', 'Mesothelium', 'MF_typeI',
       'MF_typeII', 'MF_type-SORBS2', 'MF_typeI(cytoplasmic)', 'MTJ',
       'MF_typeII(cytoplasmic)', 'NMJ', 'Adipocyte'], dtype=object)

In [214]:
de_results['celltype_group'] = de_results['celltype'].apply(lambda x: 'Myofiber' if x in ['MF_typeI(fg)', \
'MF_type-HOOK2+','MF_typeII(fg)', 'MF_type-MYH8+', 'MF_typeI', 'MF_typeII', 'MF_type-SORBS2', 'MF_typeI(cytoplasmic)', \
  'MTJ','MF_typeII(cytoplasmic)', 'NMJ']  else
    'Immune' if x in ['B-plasma', 'B-cell', 'T-cell', 'NK-cell', 'Eosinophil', 'Neutrophil', 'Macrophage', 'cDC2', \
                               'Monocyte','cDC1', 'Mast_cell', 'pDC', 'RBC'] else
     'Vessel' if x in ['CapEC', 'ArtEC', 'VenEC', 'Lymphatic', ] else
     'SMC' if x in ['Mural','Pericyte', 'SMC'] else
     'FB' if x in ['FB', 'NerveFB', 'Tenocyte'] else
     'Schwann' if x in ['nmSchwann_cell', 'mSchwann_cell'] else x)

In [215]:
de_results['celltype_group'].value_counts()

Immune         438022
Myofiber       370634
Vessel         134776
FB             101082
SMC            101082
Schwann         67388
Adipocyte       33694
MuSC            33694
Mesothelium     33694
Name: celltype_group, dtype: int64

In [216]:
de_results.shape
df_ctype_broad_exprs.shape

(1314066, 13)

(1314066, 4)

In [169]:
results_folder = '/nfs/team205/vk8/scripts/cpdb/SKM_cells2nuclei_broad/out/'
#cpdb_genes = pd.read_csv(f"{results_folder}deconvoluted.txt", sep = "\t")
#cpdb_sign_means = pd.read_csv(f"{results_folder}significant_means.txt", sep = "\t")
cpdb_means = pd.read_csv(f"{results_folder}means.txt", sep = "\t")
#cpdb_ints = pd.read_csv(f"{results_folder}relevant_interactions.txt", sep = "\t")

cpdb_rel_means = cpdb_means.loc[cpdb_means['interacting_pair'].isin(cpdb_ints['interacting_pair'].values), cpdb_ints.columns]

In [170]:
cpdb_results = pd.melt(cpdb_means, id_vars = ['id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b',
       'gene_a', 'gene_b', 'secreted', 'receptor_a', 'receptor_b',
       'annotation_strategy', 'is_integrin'], var_name = 'celltype_pair', value_name = 'mean')

In [171]:
cpdb_results['celltype_a'] = [x.split('|')[0].replace('-old','').replace('-young','') for x in cpdb_results['celltype_pair']]
cpdb_results['celltype_b'] = [x.split('|')[1].replace('-old','').replace('-young','') for x in cpdb_results['celltype_pair']]

In [172]:
young_ptrn = re.compile("young")
old_ptrn = re.compile("old")

cpdb_results['age_a'] = cpdb_results['celltype_pair'].apply(lambda x: 'young' if len(re.findall(young_ptrn, x.split('|')[0]))>0 else
                                                        'old' if len(re.findall(old_ptrn, x))>0 else 
                                                        'unknown')
cpdb_results['age_b'] = cpdb_results['celltype_pair'].apply(lambda x: 'young' if len(re.findall(young_ptrn, x.split('|')[1]))>0 else
                                                        'old' if len(re.findall(old_ptrn, x))>0 else 
                                                        'unknown')

cpdb_results['age_a'] = [x.split('|')[0].split('-')[1] for x in cpdb_results['celltype_pair']]
cpdb_results['age_b'] = [x.split('|')[1].split('-')[1] for x in cpdb_results['celltype_pair']]

In [173]:
cpdb_results['gene_receptor'] = [g1 if r1 else g2 for g1, g2, r1, r2 in zip(cpdb_results.gene_a, cpdb_results.gene_b, cpdb_results.receptor_a, cpdb_results.receptor_b)]
cpdb_results['gene_ligand'] = [g2 if r1 else g1 for g1, g2, r1, r2 in zip(cpdb_results.gene_a, cpdb_results.gene_b, cpdb_results.receptor_a, cpdb_results.receptor_b)]

cpdb_results['celltype_receptor'] = [g1 if r1 else g2 for g1, g2, r1, r2 in zip(cpdb_results.celltype_a, cpdb_results.celltype_b, cpdb_results.receptor_a, cpdb_results.receptor_b)]
cpdb_results['celltype_ligand'] = [g2 if r1 else g1 for g1, g2, r1, r2 in zip(cpdb_results.celltype_a, cpdb_results.celltype_b, cpdb_results.receptor_a, cpdb_results.receptor_b)]

cpdb_results.drop(columns=['gene_a', 'gene_b', 'celltype_a', 'celltype_b', 'receptor_a', 'receptor_b'], inplace=True)

cpdb_results['gene_receptor_id'] = [f'{x}-rec' for x in cpdb_results.gene_receptor]
cpdb_results['gene_ligand_id'] = [f'{x}-lig' for x in cpdb_results.gene_ligand]

cpdb_results['celltype_receptor_id'] = [f'{x}-rec' for x in cpdb_results.celltype_receptor]
cpdb_results['celltype_ligand_id'] = [f'{x}-lig' for x in cpdb_results.celltype_ligand]

In [174]:
cpdb_results["celltype_receptor"].value_counts()

MF_typeI                  194916
B-plasma                  194916
T-cell                    194916
MTJ                       194916
MuSC                      194916
MF_typeII(cytoplasmic)    194916
cDC1                      194916
NMJ                       194916
Pericyte                  194916
MF_type-HOOK2+            194916
Macrophage                194916
Mesothelium               194916
Lymphatic                 194916
cDC2                      194916
nmSchwann_cell            194916
Tenocyte                  194916
Mast_cell                 194916
Adipocyte                 194916
MF_type-SORBS2            194916
NK-cell                   194916
MF_typeII                 194916
FB                        194916
B-cell                    194916
mSchwann_cell             194916
MF_typeI(cytoplasmic)     194916
VenEC                     194916
Neutrophil                194916
ArtEC                     194916
Mural                     194916
MF_type-MYH8+             194916
CapEC     

### Filter out required results

In [175]:
df = cpdb_results[(cpdb_results['age_a']=='old') & (cpdb_results['age_b']=='old') & \
     (cpdb_results['gene_ligand'].isin(['CCL2', 'CCL3', 'CCL4', 'CXCL8'])) & (cpdb_results['mean']>0)]

In [176]:
df.shape

(11055, 19)

In [177]:
de_results

Unnamed: 0,celltype,gene,ENSEMBL,ltsr,beta_old,beta_young,flag5,log2fc,sign,REGULATION,mean_young,mean_aged,celltype_group
0,MuSC,FO538757.2,ENSG00000279457,0.000052,0.022952,-0.018559,True,0.041512,False,none,0.149266,0.131244,MuSC
1,MuSC,AP006222.2,ENSG00000228463,0.000005,-0.002092,0.002806,True,-0.004899,False,none,0.070520,0.074098,MuSC
2,MuSC,RP5-857K21.4,ENSG00000230021,0.000161,0.020245,-0.020712,False,0.040957,False,none,0.000168,0.000253,MuSC
3,MuSC,RP11-206L10.9,ENSG00000237491,0.000020,0.008438,-0.008829,False,0.017268,False,none,0.015160,0.019669,MuSC
4,MuSC,LINC00115,ENSG00000225880,0.000005,-0.000963,0.001038,False,-0.002001,False,none,0.015503,0.009462,MuSC
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1314061,Adipocyte,,ENSG00000278633,,,,,,,,0.000000,0.000000,Adipocyte
1314062,Adipocyte,,ENSG00000277856,,,,,,,,0.000000,0.000000,Adipocyte
1314063,Adipocyte,,ENSG00000275063,,,,,,,,0.000000,0.000000,Adipocyte
1314064,Adipocyte,,ENSG00000277475,,,,,,,,0.000000,0.000000,Adipocyte


In [217]:
df_merged = df.merge(de_results[['celltype', 'gene', 'log2fc', 'REGULATION', 'mean_young', 'mean_aged', \
                                 'celltype_group', 'flag5']], how = 'left', \
                                 left_on = ['celltype_ligand', 'gene_ligand'], right_on = ['celltype', 'gene'])

In [218]:
df_merged2 = df_merged.merge(de_results[['celltype', 'gene', 'log2fc', 'REGULATION', 'mean_young', 'mean_aged', 'celltype_group', \
                                        'flag5']], how = 'left', \
                       left_on = ['celltype_receptor', 'gene_receptor'],right_on = ['celltype', 'gene'], suffixes = ("-lig", "-rec"))

In [219]:
df_merged2['mean_rec-lig'] = df_merged2.apply(lambda x: 0 if \
      ((x['mean_aged-lig']==0) | np.isnan(x['mean_aged-lig'])) | \
      ((x['mean_aged-rec']==0) | np.isnan(x['mean_aged-rec'])) else x[['mean_aged-lig', 'mean_aged-rec']].mean(), \
                                        axis = 1)

In [220]:
df_merged2.to_csv(immune = sc.read(f"{data_folder}/SKM_immune_human_2022-03-30.h5ad")
fb = sc.read(f"{data_folder}/SKM_fibroblasts_Schwann_human_2022-03-30.h5ad")
endo_smc = sc.read(f"{data_folder}/SKM_Endothelium_SMC_human_2022-03-30.h5ad"))

In [221]:
df_merged2.reset_index(drop = True, inplace = True)

int_filt = merged2.loc[(merged2['mean']>0.5) & (merged2['REGULATION-lig']=='up')]

In [222]:
df_plt = df_merged2[['gene_ligand', 'gene_receptor', 'celltype_ligand', 'celltype_group-lig', 'celltype_receptor', \
'celltype_group-rec', 'gene_receptor_id','gene_ligand_id', 'celltype_ligand_id', 'celltype_receptor_id', \
'mean', 'REGULATION-lig', 'REGULATION-rec', 'mean_young-lig', 'mean_aged-lig', \
                  'mean_young-rec', 'mean_aged-rec', 'flag5-lig', 'flag5-rec']].drop_duplicates().reset_index(drop=True)


In [223]:
df_plt.shape

(11055, 19)

In [224]:
sum(df_plt['celltype_receptor'].isin(['MF_typeI(fg)', 'MF_typeII(fg)', 'MF_typeI(cytoplasmic)', \
                                                  'MF_typeII(cytoplasmic)']))

896

In [225]:
sum(df_plt['celltype_receptor'].isin(['MF_typeI(fg)', 'MF_typeII(fg)', 'MF_typeI(cytoplasmic)', \
                                                  'MF_typeII(cytoplasmic)']))

896

In [226]:
df_plt_filt = df_plt[~((df_plt['celltype_ligand'].isin(['MF_typeI(fg)', 'MF_typeII(fg)', 'MF_typeI(cytoplasmic)', \
                                                  'MF_typeII(cytoplasmic)'])) | \
              (df_plt['celltype_receptor'].isin(['MF_typeI(fg)', 'MF_typeII(fg)', 'MF_typeI(cytoplasmic)', \
                                                  'MF_typeII(cytoplasmic)'])))]

In [227]:
df_plt_filt[df_plt_filt['mean_aged-rec'].isnull()]

Unnamed: 0,gene_ligand,gene_receptor,celltype_ligand,celltype_group-lig,celltype_receptor,celltype_group-rec,gene_receptor_id,gene_ligand_id,celltype_ligand_id,celltype_receptor_id,mean,REGULATION-lig,REGULATION-rec,mean_young-lig,mean_aged-lig,mean_young-rec,mean_aged-rec,flag5-lig,flag5-rec
29,CCL4,GPRC5D,Adipocyte,Adipocyte,B-plasma,,GPRC5D-rec,CCL4-lig,Adipocyte-lig,B-plasma-rec,0.022,none,,0.0,0.016422,,,False,
54,CCL4,GPRC5D,Adipocyte,Adipocyte,FB,,GPRC5D-rec,CCL4-lig,Adipocyte-lig,FB-rec,0.008,none,,0.0,0.016422,,,False,
57,CCL3,CCR3,Adipocyte,Adipocyte,FB,,CCR3-rec,CCL3-lig,Adipocyte-lig,FB-rec,0.011,none,,0.0,0.021076,,,False,
65,CCL4,GPRC5D,Adipocyte,Adipocyte,Lymphatic,,GPRC5D-rec,CCL4-lig,Adipocyte-lig,Lymphatic-rec,0.010,none,,0.0,0.016422,,,False,
84,CCL4,CCR8,Adipocyte,Adipocyte,MF_type-SORBS2,,CCR8-rec,CCL4-lig,Adipocyte-lig,MF_type-SORBS2-rec,0.009,none,,0.0,0.016422,,,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10918,CCL3,CCR3,pDC,Immune,Macrophage,,CCR3-rec,CCL3-lig,pDC-lig,Macrophage-rec,0.045,none,,0.0,0.090141,,,True,
10969,CCL4,GPRC5D,pDC,Immune,NMJ,,GPRC5D-rec,CCL4-lig,pDC-lig,NMJ-rec,0.101,none,,0.0,0.193478,,,True,
10971,CCL3,CCR3,pDC,Immune,NMJ,,CCR3-rec,CCL3-lig,pDC-lig,NMJ-rec,0.050,none,,0.0,0.090141,,,True,
10999,CCL4,CCR8,pDC,Immune,T-cell,,CCR8-rec,CCL4-lig,pDC-lig,T-cell-rec,0.098,none,,0.0,0.193478,,,True,


In [228]:
df_plt_filt2 = df_plt_filt.loc[(df_plt_filt['flag5-lig']==True) & (df_plt_filt['flag5-rec']==True)]

In [229]:
de_results[(de_results['celltype']=='NK-cell') &(de_results['gene']=='CCL4')]

Unnamed: 0,celltype,gene,ENSEMBL,ltsr,beta_old,beta_young,flag5,log2fc,sign,REGULATION,mean_young,mean_aged,celltype_group
275695,NK-cell,CCL4,ENSG00000275302,1.0,0.227593,0.019925,True,0.207668,True,up,2.577406,2.809175,Immune


In [230]:
df_plt_filt2.loc[(df_plt_filt2['celltype_ligand']=='NK-cell')&(df_plt_filt2['gene_ligand']=='CCL4')]

Unnamed: 0,gene_ligand,gene_receptor,celltype_ligand,celltype_group-lig,celltype_receptor,celltype_group-rec,gene_receptor_id,gene_ligand_id,celltype_ligand_id,celltype_receptor_id,mean,REGULATION-lig,REGULATION-rec,mean_young-lig,mean_aged-lig,mean_young-rec,mean_aged-rec,flag5-lig,flag5-rec
2409,CCL4,ACKR2,NK-cell,Immune,Lymphatic,Vessel,ACKR2-rec,CCL4-lig,NK-cell-lig,Lymphatic-rec,1.43,up,none,2.577406,2.809175,0.169359,0.051229,True,True
6979,CCL4,SLC7A1,NK-cell,Immune,Adipocyte,Adipocyte,SLC7A1-rec,CCL4-lig,NK-cell-lig,Adipocyte-rec,1.492,up,none,2.577406,2.809175,0.242729,0.175701,True,True
6996,CCL4,CNR2,NK-cell,Immune,B-cell,Immune,CNR2-rec,CCL4-lig,NK-cell-lig,B-cell-rec,1.459,up,none,2.577406,2.809175,0.135127,0.10801,True,True
7008,CCL4,SLC7A1,NK-cell,Immune,B-plasma,Immune,SLC7A1-rec,CCL4-lig,NK-cell-lig,B-plasma-rec,1.426,up,none,2.577406,2.809175,0.208464,0.042247,True,True
7029,CCL4,SLC7A1,NK-cell,Immune,Eosinophil,Immune,SLC7A1-rec,CCL4-lig,NK-cell-lig,Eosinophil-rec,1.431,up,none,2.577406,2.809175,0.0,0.051893,True,True
7035,CCL4,SLC7A1,NK-cell,Immune,FB,FB,SLC7A1-rec,CCL4-lig,NK-cell-lig,FB-rec,1.427,up,none,2.577406,2.809175,0.087477,0.045341,True,True
7049,CCL4,SLC7A1,NK-cell,Immune,Lymphatic,Vessel,SLC7A1-rec,CCL4-lig,NK-cell-lig,Lymphatic-rec,1.499,up,none,2.577406,2.809175,0.278546,0.188376,True,True
7178,CCL4,CCR5,NK-cell,Immune,Monocyte,Immune,CCR5-rec,CCL4-lig,NK-cell-lig,Monocyte-rec,1.423,up,none,2.577406,2.809175,0.071319,0.037634,True,True
7192,CCL4,SLC7A1,NK-cell,Immune,MuSC,MuSC,SLC7A1-rec,CCL4-lig,NK-cell-lig,MuSC-rec,1.42,up,none,2.577406,2.809175,0.075719,0.031276,True,True
7227,CCL4,SLC7A1,NK-cell,Immune,NMJ,Myofiber,SLC7A1-rec,CCL4-lig,NK-cell-lig,NMJ-rec,1.442,up,none,2.577406,2.809175,0.102025,0.075219,True,True


In [192]:
df_plt_filt2

Unnamed: 0,gene_ligand,gene_receptor,celltype_ligand,celltype_group-lig,celltype_receptor,celltype_group-rec,gene_receptor_id,gene_ligand_id,celltype_ligand_id,celltype_receptor_id,mean,REGULATION-lig,REGULATION-rec,mean_young-lig,mean_aged-lig,mean_young-rec,mean_aged-rec,flag5-lig,flag5-rec
31,CCL2,CCR2,Adipocyte,Adipocyte,B-plasma,Immune,CCR2-rec,CCL2-lig,Adipocyte-lig,B-plasma-rec,0.143,none,dw,0.047444,0.115579,0.232737,0.170574,True,True
34,CCL2,CCR10,Adipocyte,Adipocyte,B-plasma,Immune,CCR10-rec,CCL2-lig,Adipocyte-lig,B-plasma-rec,0.154,none,none,0.047444,0.115579,0.111929,0.191570,True,True
41,CCL2,ACKR1,Adipocyte,Adipocyte,CapEC,Vessel,ACKR1-rec,CCL2-lig,Adipocyte-lig,CapEC-rec,0.152,none,none,0.047444,0.115579,0.125103,0.188752,True,True
67,CCL2,ACKR1,Adipocyte,Adipocyte,Lymphatic,Vessel,ACKR1-rec,CCL2-lig,Adipocyte-lig,Lymphatic-rec,0.560,none,up,0.047444,0.115579,0.248413,1.003917,True,True
182,CCL2,CCR2,Adipocyte,Adipocyte,Monocyte,Immune,CCR2-rec,CCL2-lig,Adipocyte-lig,Monocyte-rec,0.084,none,dw,0.047444,0.115579,0.108540,0.052353,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11035,CCL4,SLC7A1,pDC,Immune,cDC2,Immune,SLC7A1-rec,CCL4-lig,pDC-lig,cDC2-rec,0.121,none,none,0.000000,0.193478,0.043692,0.047618,True,True
11037,CCL3,IDE,pDC,Immune,cDC2,Immune,IDE-rec,CCL3-lig,pDC-lig,cDC2-rec,0.064,none,none,0.000000,0.090141,0.072747,0.037869,True,True
11039,CCL3,CCR1,pDC,Immune,cDC2,Immune,CCR1-rec,CCL3-lig,pDC-lig,cDC2-rec,0.138,none,dw,0.000000,0.090141,0.432870,0.186715,True,True
11050,CCL4,SLC7A1,pDC,Immune,pDC,Immune,SLC7A1-rec,CCL4-lig,pDC-lig,pDC-rec,0.243,none,none,0.000000,0.193478,0.000000,0.292557,True,True


In [231]:
def graph_elements(st_df, save = ""):
    nodes = pd.concat([
        st_df[['gene_receptor_id', 'gene_receptor']].drop_duplicates().rename(columns={'gene_receptor_id': 'node_id', 'gene_receptor': 'label'}).assign(layer=2, type='gene', type2='gene'),
        st_df[['gene_ligand_id', 'gene_ligand']].drop_duplicates().rename(columns={'gene_ligand_id': 'node_id', 'gene_ligand': 'label'}).assign(layer=3, type='gene', type2='gene'),

        st_df[['celltype_ligand_id', 'celltype_ligand', 'celltype_group-lig']].drop_duplicates().\
        rename(columns={'celltype_ligand_id': 'node_id', 'celltype_ligand': 'label', 'celltype_group-lig': 'group'}).assign(layer=4, type2='celltype'),
        st_df[['celltype_receptor_id', 'celltype_receptor', 'celltype_group-rec']].drop_duplicates().\
        rename(columns={'celltype_receptor_id': 'node_id', 'celltype_receptor': 'label', 'celltype_group-rec': 'group'}).assign(layer=1, type2='celltype'),
    ], axis=0).drop_duplicates().reset_index(drop=True)

    from textwrap import fill

    nodes['label_unwrapped'] = nodes['label']
    #nodes['label'] = [fill(x, 30) for x in nodes['label']]
    print(nodes)
    edges = pd.concat([
    st_df[['gene_ligand_id', 'gene_receptor_id']].drop_duplicates().rename(columns={'gene_ligand_id': 'from', 'gene_receptor_id': 'to'}).assign(type='reclig', regulation = "none", mean = 1),
    st_df[['celltype_ligand_id', 'gene_ligand_id', 'REGULATION-lig', 'mean_aged-lig']].drop_duplicates().rename(columns={'celltype_ligand_id': 'from', 'gene_ligand_id': 'to', 'REGULATION-lig' : 'regulation', 'mean_aged-lig':\
                                                                                     'mean'}),
    st_df[['celltype_receptor_id', 'gene_receptor_id', 'REGULATION-rec', 'mean_aged-rec']].drop_duplicates().rename(columns={'celltype_receptor_id': 'from', 'gene_receptor_id': 'to', 'REGULATION-rec' : 'regulation', 'mean_aged-rec':\
                                                                                     'mean'}),    
    ], axis=0).fillna('')
    print(edges)
    nodes.to_csv(f"{save}/nodes.csv", index=False)
    edges.to_csv(f"{save}/edges.csv", index=False)

In [232]:
graph_elements(df_plt_filt2.loc[df_plt_filt2['gene_ligand'] == 'CCL2'], \
               save = "/nfs/team205/vk8/processed_data/muscle/data_v3/cpdb_graphs/ccl2_v2")

               node_id           label  layer  type     type2      group  \
0             CCR2-rec            CCR2      2  gene      gene        NaN   
1            CCR10-rec           CCR10      2  gene      gene        NaN   
2            ACKR1-rec           ACKR1      2  gene      gene        NaN   
3            ACKR2-rec           ACKR2      2  gene      gene        NaN   
4             CCL2-lig            CCL2      3  gene      gene        NaN   
5        Adipocyte-lig       Adipocyte      4   NaN  celltype  Adipocyte   
6            ArtEC-lig           ArtEC      4   NaN  celltype     Vessel   
7         B-plasma-lig        B-plasma      4   NaN  celltype     Immune   
8            CapEC-lig           CapEC      4   NaN  celltype     Vessel   
9               FB-lig              FB      4   NaN  celltype         FB   
10       Lymphatic-lig       Lymphatic      4   NaN  celltype     Vessel   
11   MF_type-MYH8+-lig   MF_type-MYH8+      4   NaN  celltype   Myofiber   
12      Macr

In [233]:
graph_elements(df_plt_filt2.loc[df_plt_filt2['gene_ligand'] == 'CCL3'], \
               save = "/nfs/team205/vk8/processed_data/muscle/data_v3/cpdb_graphs/ccl3_v2")

               node_id           label  layer  type     type2      group  \
0              IDE-rec             IDE      2  gene      gene        NaN   
1             CCR1-rec            CCR1      2  gene      gene        NaN   
2             CCR5-rec            CCR5      2  gene      gene        NaN   
3            ACKR2-rec           ACKR2      2  gene      gene        NaN   
4             CCL3-lig            CCL3      3  gene      gene        NaN   
5         B-plasma-lig        B-plasma      4   NaN  celltype     Immune   
6       Eosinophil-lig      Eosinophil      4   NaN  celltype     Immune   
7       Macrophage-lig      Macrophage      4   NaN  celltype     Immune   
8         Monocyte-lig        Monocyte      4   NaN  celltype     Immune   
9            Mural-lig           Mural      4   NaN  celltype        SMC   
10         NK-cell-lig         NK-cell      4   NaN  celltype     Immune   
11          T-cell-lig          T-cell      4   NaN  celltype     Immune   
12          

In [234]:
graph_elements(df_plt_filt2.loc[df_plt_filt2['gene_ligand'] == 'CCL4'], \
               save = "/nfs/team205/vk8/processed_data/muscle/data_v3/cpdb_graphs/ccl4_v2")

           node_id       label  layer  type     type2      group  \
0       SLC7A1-rec      SLC7A1      2  gene      gene        NaN   
1         CNR2-rec        CNR2      2  gene      gene        NaN   
2         CCR5-rec        CCR5      2  gene      gene        NaN   
3        ACKR2-rec       ACKR2      2  gene      gene        NaN   
4         CCL4-lig        CCL4      3  gene      gene        NaN   
5       B-cell-lig      B-cell      4   NaN  celltype     Immune   
6     B-plasma-lig    B-plasma      4   NaN  celltype     Immune   
7   Eosinophil-lig  Eosinophil      4   NaN  celltype     Immune   
8   Macrophage-lig  Macrophage      4   NaN  celltype     Immune   
9    Mast_cell-lig   Mast_cell      4   NaN  celltype     Immune   
10    Monocyte-lig    Monocyte      4   NaN  celltype     Immune   
11     NK-cell-lig     NK-cell      4   NaN  celltype     Immune   
12      T-cell-lig      T-cell      4   NaN  celltype     Immune   
13        cDC1-lig        cDC1      4   NaN  cel

In [236]:
graph_elements(df_plt_filt2.loc[df_plt_filt2['gene_ligand'] == 'CXCL8'], \
               save = "/nfs/team205/vk8/processed_data/muscle/data_v3/cpdb_graphs/cxcl8_v2")

            node_id        label  layer  type     type2        group  \
0         ACKR1-rec        ACKR1      2  gene      gene          NaN   
1         CXCR1-rec        CXCR1      2  gene      gene          NaN   
2         CXCR2-rec        CXCR2      2  gene      gene          NaN   
3         CXCL8-lig        CXCL8      3  gene      gene          NaN   
4      B-plasma-lig     B-plasma      4   NaN  celltype       Immune   
5         CapEC-lig        CapEC      4   NaN  celltype       Vessel   
6    Eosinophil-lig   Eosinophil      4   NaN  celltype       Immune   
7    Macrophage-lig   Macrophage      4   NaN  celltype       Immune   
8   Mesothelium-lig  Mesothelium      4   NaN  celltype  Mesothelium   
9      Monocyte-lig     Monocyte      4   NaN  celltype       Immune   
10        Mural-lig        Mural      4   NaN  celltype          SMC   
11   Neutrophil-lig   Neutrophil      4   NaN  celltype       Immune   
12        VenEC-lig        VenEC      4   NaN  celltype       Ve

In [76]:
edges = pd.concat([
    df[['gene_ligand_id', 'gene_receptor_id', 'mean']].drop_duplicates().rename(columns={'gene_ligand_id': 'from', 'gene_receptor_id': 'to'}).assign(type='reclig'),
    df[['celltype_ligand_id', 'gene_ligand_id']].drop_duplicates().rename(columns={'celltype_ligand_id': 'from', 'gene_ligand_id': 'to'}),
    df[['celltype_receptor_id', 'gene_receptor_id']].drop_duplicates().rename(columns={'celltype_receptor_id': 'from', 'gene_receptor_id': 'to'}),    
], axis=0).fillna('')

edges

Unnamed: 0,from,to,mean,type
0,CCL2-lig,ACKR2-rec,1.31,reclig
1,CCL2-lig,ACKR2-rec,1.311,reclig
2,CCL2-lig,ACKR2-rec,1.936,reclig
3,CCL2-lig,ACKR1-rec,0.915,reclig
4,CCL3-lig,ACKR2-rec,4.092,reclig
...,...,...,...,...
1188,SMC-rec,ACKR2-rec,,
1267,Tenocyte-rec,ACKR2-rec,,
1347,VenEC-rec,ACKR2-rec,,
1474,cDC2-rec,ACKR2-rec,,


In [74]:
nodes.to_csv("/nfs/team205/vk8/processed_data/muscle/data_v3/cpdb_graphs/nodes.csv", index=False)

In [75]:
edges.to_csv("/nfs/team205/vk8/processed_data/muscle/data_v3/cpdb_graphs/edges.csv", index=False)