# Cell communication analysis: Recirculating immune cells

In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import anndata as ad
import hdf5plugin
import liana as li

import warnings
warnings.filterwarnings('ignore', category=ad.ImplicitModificationWarning)

# Add repo path to sys path (allows to access scripts and metadata from repo)
repo_path,_ = os.path.split(os.path.split(os.getcwd())[0])
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Autoreload custom scripts
%load_ext autoreload
%autoreload 2

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette
from plotting.utils import plot_grouped_boxplot, calc_figsize

In [None]:
# Define columns
col_cell_type_broad = 'taa_l3'
col_cell_type_fine = 'taa_l4'
# col_cell_type_broad_levels = get_ct_levels(col_cell_type_broad, taa_l1 = ['T', 'NK'])
# col_cell_type_fine_levels = get_ct_levels(col_cell_type_fine, taa_l1 = ['T', 'NK'])
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

In [None]:
# Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
adata.obs = adata.obs.join(ct_anno, how = 'left')
adata = adata[(adata.obs['anno_status'] == 'include') & (adata.obs['qc_status'] == 'PASS')]

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

adata

In [None]:
# List cell types of interest
ctoi = adata.obs[adata.obs['taa_l2'] == 'T_recirc']['taa_l4'].unique().tolist() + ['ILC', 'T_MAIT', 'NK_tr']
ctoi.extend(adata.obs[adata.obs['taa_l1'].isin(['TEC', 'Fb', 'EC', 'B', 'Mural', 'Mac', 'Mono', 'DC'])]['taa_l4'].unique().tolist())

np.array(ctoi)

In [None]:
# Subset adata
adata_sub = adata[adata.obs['taa_l4'].isin(ctoi),:].copy()
adata_sub.obs['cell_type'] = adata_sub.obs.apply(lambda x: x['taa_l4'] if x['taa_l1'] in ['T', 'B'] else x['taa_l3'], axis = 1)

# Remove B_dev (very few cells)
adata_sub = adata_sub[~adata_sub.obs['cell_type'].isin(['B_dev', 'B_dev_thy']),:]

adata_sub.obs['cell_type'].value_counts()

In [None]:
# Log-normalise data
sc.pp.normalize_total(adata_sub, target_sum=1e4)
sc.pp.log1p(adata_sub)

In [None]:
# Load CellPhoneDB database (v5.0.0)
ia_db = pd.read_csv(f'{data_path}/references/ccc_databases/cellphoneDB_v5.0.0_interaction_input.csv')
ia_db[['ligand', 'receptor']] = ia_db['interactors'].str.rsplit('-', n=1, expand=True)
ia_db['receptor'] = ia_db['receptor'].str.split('+')
ia_db = ia_db.explode('receptor')
ia_db

In [None]:
# List of chemokines, cytokines and other signalling molecules relevant to inflammation
moi = ['CXCR4', 'CXCR2', 'CXCR1', 'CXCR6', 'CXCL8', 'CXCL6', 'CXCL1',
       'CXCL5', 'CXCL3', 'CXCL2', 'CXCL9', 'CXCL10', 'CXCL11', 'CXCL13',
       'CXCL14', 'CXCL12', 'CXCR5', 'CXCL16', 'CXCL17', 'CXCR3',
       'CCR4', 'CCR8', 'CCR9', 'CCR3', 'CCR1', 'CCR2', 'CCR5AS', 'CCR5',
       'CCRL2', 'CCR6', 'CCR7', 'CCR10',
       'CCL20', 'CCL28', 'CCL26', 'CCL24', 'CCL27', 'CCL19', 'CCL21',
       'CCL22', 'CCL17', 'CCL2', 'CCL7', 'CCL11', 'CCL8', 'CCL13', 'CCL1',
       'CCL5', 'CCL16', 'CCL14', 'CCL15', 'CCL23', 'CCL18', 'CCL3',
       'CCL4', 'CCL3L1', 'CCL4L2', 'CCL25',
       'IL22RA1', 'IL23R', 'IL12RB2', 'ILF2', 'IL6R-AS1', 'IL6R', 'ILDR2',
       'IL10', 'IL19', 'IL20', 'IL24', 'IL1R2', 'IL1R1', 'IL1R1-AS1',
       'IL1RL2', 'IL1RL1', 'IL18R1', 'IL18RAP', 'IL1A', 'IL1B', 'IL37',
       'IL36G', 'IL36A', 'IL36B', 'IL36RN', 'IL1F10', 'IL1RN', 'ILKAP',
       'IL5RA', 'IL17RE', 'IL17RC', 'IL17RB', 'IL17RD', 'ILDR1', 'IL20RB',
       'IL20RB-AS1', 'IL12A-AS1', 'IL12A', 'IL1RAP', 'IL2', 'IL21',
       'IL21-AS1', 'IL15', 'IL7R', 'IL31RA', 'IL6ST', 'IL3', 'IL5',
       'IL13', 'IL4', 'IL9', 'IL17B', 'IL12B', 'ILRUN', 'IL17A', 'IL17F',
       'IL20RA', 'IL22RA2', 'IL6', 'IL7', 'IL33', 'IL11RA', 'IL15RA',
       'IL2RA', 'ILK', 'IL18BP', 'IL18', 'IL10RA', 'IL23A', 'IL26',
       'IL22', 'IL31', 'IL17D', 'IL25', 'IL16', 'IL32', 'IL4R', 'IL21R',
       'IL21R-AS1', 'IL27', 'IL34', 'IL17C', 'ILF3-DT', 'ILF3', 'IL27RA',
       'ILVBL', 'IL12RB1', 'IL4I1', 'IL11', 'IL10RB-DT', 'IL10RB',
       'IL17RA', 'IL2RB', 'IL17REL', 'IL3RA', 'IL1RAPL1', 'IL2RG',
       'IL1RAPL2', 'IL13RA2', 'IL13RA1', 'IL9R',
       'IFNLR1', 'IFNGR1', 'IFNB1', 'IFNW1', 'IFNA21', 'IFNA4', 'IFNA7',
       'IFNA10', 'IFNA16', 'IFNA14', 'IFNA5', 'IFNA6', 'IFNA13', 'IFNA2',
       'IFNA8', 'IFNA1', 'IFNE', 'IFNK', 'IFNG-AS1', 'IFNG', 'IFNL3',
       'IFNL2', 'IFNL1', 'IFNAR2', 'IFNAR1', 'IFNGR2',
       'TNFRSF18', 'TNFRSF4', 'TNFRSF14-AS1', 'TNFRSF14', 'TNFRSF25',
       'TNFRSF9', 'TNFRSF8', 'TNFRSF1B', 'TNFAIP8L2', 'TNFSF18', 'TNFSF4',
       'TNFAIP6', 'TNFSF10', 'TNFAIP8', 'TNF', 'TNFRSF21', 'TNFAIP3',
       'TNFRSF10B', 'TNFRSF10C', 'TNFRSF10D', 'TNFRSF10A-AS1',
       'TNFRSF10A', 'TNFRSF11B', 'TNFSF15', 'TNFSF8', 'TNFRSF1A',
       'TNFRSF19', 'TNFSF11', 'TNFSF13B', 'TNFAIP2', 'TNFAIP8L3',
       'TNFRSF12A', 'TNFRSF17', 'TNFSF12', 'TNFSF13', 'TNFRSF13B',
       'TNFAIP1', 'TNFRSF11A', 'TNFAIP8L1', 'TNFSF9', 'TNFSF14',
       'TNFRSF6B', 'TNFRSF13C',
       'LTA', 'LTB', 'LTBR',
       'LTBR', 'TNFRSF1A', 'TNFRSF1B', 'CXCL13', 'CCL19', 'CCL21', # LTo
        'TNF', 'TNFSF14', 'LTA', 'LTB', # LTi
        'ICAM1', 'ICAM2', 'ICAM3', 'ICAM4', 'ICAM5', 'CD40', 'ICOSLG', # T_B_interaction
        'PLXNB1', 'PLXNB2', 'BASP1', 'P2RY8', 'BATF', # GC_formation
        'CD40LG', 'ICOS', 'CD40', 'S1PR2', # BCR_activation
        'IL21', 'IL4', 'IL6', 'IL10', # interleukin_signalling
        'IFNG', # interferon
       ]

# def get_expressed_genes(adata: ad.AnnData, groupby : str, group : str, min_frac : float = 0.1) -> pd.Series:
    
#     n_obs_group = (adata.obs[groupby] == group).sum()
#     ecf = np.array(adata[adata.obs[groupby] == group].X.astype(bool).sum(axis=0) / n_obs_group).flatten()
    
#     return list(adata.var_names[ecf > min_frac])

ia_oi_db = ia_db[ia_db['ligand'].isin(moi) | ia_db['receptor'].isin(moi)]
all_genes = list(set(ia_oi_db['ligand'].unique().tolist() + ia_oi_db['receptor'].unique().tolist()))

# Save interactions of interest
ia_oi_db[['ligand', 'receptor']].to_csv(f'{data_path}/curated/inflammation_related_interactions.csv', index = False)

ia_oi_db.shape, len(all_genes)

In [None]:
# Filter out donors with fewer than 100 cells
min_cells = 100
col_sample = 'donor'
ncells_by_sample = adata_sub.obs[col_sample].value_counts().to_frame(name = 'n_cells')
print('Removing {} donors with fewer than {} cells'.format(ncells_by_sample[ncells_by_sample['n_cells'] < min_cells].shape[0], min_cells))
adata_sub = adata_sub[~adata_sub.obs[col_sample].isin(ncells_by_sample[ncells_by_sample['n_cells'] < min_cells].index)]

In [None]:
# Level to cell type assignment
ct_anno_levels = adata_sub.obs[['cell_type', 'taa_l4', 'taa_l3', 'taa_l1']].drop_duplicates().copy()
ct_anno_levels['level'] = ct_anno_levels.apply(lambda x: 'taa_l4' if x['taa_l1'] in ['T', 'B'] else 'taa_l3', axis = 1)
ct_anno_levels = ct_anno_levels.groupby('level').agg(list)
ct_anno_levels = ct_anno_levels.to_dict()['cell_type']

ct_levels = get_ct_levels('taa_l4', taa_l1 = ['T', 'B']) + get_ct_levels('taa_l3', taa_l1 = ['TEC', 'Fb', 'EC', 'Mural', 'Mac', 'Mono', 'DC', 'NK'])

import pprint
pprint.pprint(ct_anno_levels, compact = True)

In [None]:
taa_l4_freq = pd.read_csv(f'{data_path}/analyses/freqAnalysis/thyAgeing_all_{col_cell_type_fine}_byDonor_freq.csv', index_col = 0)
taa_l4_freq = taa_l4_freq.loc[taa_l4_freq[col_cell_type_fine].isin(ct_anno_levels['taa_l4']),:].rename(columns = {col_cell_type_fine: 'cell_type'})
taa_l3_freq = pd.read_csv(f'{data_path}/analyses/freqAnalysis/thyAgeing_all_{col_cell_type_broad}_byDonor_freq.csv', index_col = 0)
taa_l3_freq = taa_l3_freq.loc[taa_l3_freq[col_cell_type_broad].isin(ct_anno_levels['taa_l3']),:].rename(columns = {col_cell_type_broad: 'cell_type'})

ct_freq = pd.concat([taa_l4_freq, taa_l3_freq], axis = 0)
ct_freq.head()

## Analysis by sample

In [None]:
interactions_oi = ia_oi_db[['ligand', 'receptor']].drop_duplicates().apply(lambda x: (x['ligand'], x['receptor']), axis = 1).tolist()
interactions_oi 

In [None]:
li.mt.cellphonedb.by_sample(
    adata_sub,
    groupby='cell_type',
    resource_name='cellphonedb', 
    sample_key=col_sample, 
    expr_prop = 0,
    use_raw=False,
    interactions=interactions_oi,
    n_perms=500, 
    return_all_lrs=True, 
    verbose=True, 
    n_jobs = 4,
    )

In [None]:
sample_res = adata_sub.uns["liana_res"].copy()
sample_res = sample_res.merge(adata_sub.obs[[col_sample, col_age_group]].drop_duplicates(), on = col_sample, how = 'inner')

sample_res['interaction'] = sample_res['ligand'] + ' -> ' + sample_res['receptor']
sample_res['cells'] = sample_res['source'] + ' -> ' + sample_res['target']

# Account for cell type frequency
sample_res = sample_res.merge(ct_freq[['donor', 'cell_type', 'mean_prop']], left_on = ['donor', 'source'], right_on = ['donor', 'cell_type'], how = 'left').rename(columns = {'mean_prop': 'source_prop'}).drop('cell_type', axis = 1)
sample_res = sample_res.merge(ct_freq[['donor', 'cell_type', 'mean_prop']], left_on = ['donor', 'target'], right_on = ['donor', 'cell_type'], how = 'left').rename(columns = {'mean_prop': 'target_prop'}).drop('cell_type', axis = 1)
# sample_res['lr_means_norm'] = sample_res['lr_means'] * sample_res['source_prop'] * sample_res['target_prop']

# sample_res.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample.csv', index = False)

CXCL12: pro-inflammatory, induces activation and migration of immune cells (https://www.nature.com/articles/s41423-023-00974-6)

In [None]:
sample_res

### Differential communication analysis 

In [None]:
sample_res = pd.read_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample.csv')
sample_res['tot_prob'] = sample_res['source_prop'] * sample_res['target_prop'] * sample_res['ligand_props'] * sample_res['receptor_props']
sample_res.head()

In [None]:
# Check that only expected donors/samples are missing tot_prob (the ones which are not TOT sort)
all(sample_res.loc[sample_res['tot_prob'].isna()]['donor'].unique() == np.setdiff1d(sample_res['donor'].unique(), ct_freq['donor'].unique()))

In [None]:
# Calculate mean properties (using all samples, not just TOT sort)
mean_props = sample_res.groupby(['interaction', 'cells', 'age_group']).agg({'ligand_props' : 'mean', 'receptor_props' : 'mean','source_prop': 'mean', 'target_prop': 'mean', 'tot_prob': 'mean'}).reset_index()
mean_props = mean_props.pivot(index = ['interaction', 'cells'], columns = 'age_group', values = ['ligand_props', 'receptor_props', 'source_prop', 'target_prop','tot_prob']).reset_index()
mean_props.columns = ['_'.join(col).strip() for col in mean_props.columns.values]
mean_props = mean_props.rename(columns = {'interaction_': 'interaction', 'cells_': 'cells'})
mean_props.head()

In [None]:
# Filter interactions
# Select interactions with at least one age group with expression > 0.1
target_age_group = ['paed', 'infant']
min_expr = 0.1

# Keeping any interaction with at least one age group with expression > in expr
#ia_to_keep = mean_props.loc[mean_props[['ligand_props_' + ag for ag in target_age_group] + ['receptor_props_' + ag for ag in target_age_group]].ge(min_expr).all(axis=1)][['interaction', 'cells']]

# Keeping interactions where L and R are both expressed at min expr in the same age group
ia_to_keep = []
for g in target_age_group:
    ia_to_keep.append(mean_props.loc[mean_props[[f'ligand_props_{g}', f'receptor_props_{g}']].ge(min_expr).all(axis=1)][['interaction', 'cells']])
ia_to_keep = pd.concat(ia_to_keep, axis = 0).drop_duplicates()

ia_to_keep.shape

In [None]:
from concurrent.futures import ThreadPoolExecutor
from scipy.stats import ranksums
from statsmodels.stats.multitest import multipletests

donor_n = ct_freq[['donor', col_age_group]].drop_duplicates().groupby('age_group').agg({'donor': 'count'}).to_dict()['donor']
# Define a function to process a single cell
def process_cells(ct):
    ias = ia_to_keep[ia_to_keep['cells'] == ct]['interaction'].unique()
    test_ranksum = []
    for ia in ias:
        test_data = sample_res[(sample_res['interaction'] == ia) & (sample_res['cells'] == ct)][['donor', col_age_group, 'tot_prob']].dropna(subset=['tot_prob']).copy()
        group1 = test_data[test_data['age_group'] == target_age_group[0]]['tot_prob'].to_numpy()
        if group1.shape[0] < donor_n[target_age_group[0]]:
            group1 = np.concatenate((group1, np.repeat(0, repeats=donor_n[target_age_group[0]] - group1.shape[0])))
        group2 = test_data[test_data['age_group'] == target_age_group[1]]['tot_prob'].to_numpy()
        if group2.shape[0] < donor_n[target_age_group[1]]:
            group2 = np.concatenate((group2, np.repeat(0, repeats=donor_n[target_age_group[1]] - group2.shape[0])))
        test_res = ranksums(group1, group2)
        test_ranksum.append([ia, ct, test_res[0], test_res[1]])
    test_ranksum = pd.DataFrame(test_ranksum, columns=['interaction', 'cells', 'statistic', 'pvalue'])
    test_ranksum['padj'] = multipletests(test_ranksum['pvalue'], method='fdr_bh')[1]
    return test_ranksum

# Use ThreadPoolExecutor with 4 threads to process cells in parallel
with ThreadPoolExecutor(max_workers=4) as executor:
    results = list(executor.map(process_cells, ia_to_keep['cells'].unique()))

# Combine results
ranksums_res = pd.concat(results, axis=0)

#ranksums_res.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_ranksums.csv')

ranksums_res.sort_values('statistic', ascending=False)

In [None]:
ranksums_res = pd.read_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_ranksums.csv', index_col = 0)
ranksums_res[['source', 'target']] = ranksums_res['cells'].str.split(' -> ', expand=True)
ranksums_res[['ligand', 'receptor']] = ranksums_res['interaction'].str.split(' -> ', expand=True)
ranksums_res.sort_values('statistic', ascending=False)

In [None]:
ranksums_res_filtered = ranksums_res.loc[(ranksums_res['padj'] < .05)]
ranksums_res_filtered.sort_values('statistic', ascending=False)

In [None]:
ranksums_res = pd.read_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_ranksums.csv', index_col = 0)
ranksums_res[['source', 'target']] = ranksums_res['cells'].str.split(' -> ', expand=True)
ranksums_res[['ligand', 'receptor']] = ranksums_res['interaction'].str.split(' -> ', expand=True)
ranksums_res_filtered = ranksums_res.loc[(ranksums_res['padj'] < .05)]
ranksums_res_filtered.sort_values('statistic', ascending=False)

In [None]:
ranksums_res_filtered.loc[ranksums_res_filtered['target'].str.contains('B_plasma')].groupby('interaction').size()

In [None]:
ranksums_res_filtered.loc[ranksums_res_filtered['target'].str.contains('Mono')].groupby('interaction').size()

In [None]:
ranksums_res_filtered = pd.read_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_ranksums_filtered.csv', index_col = 0)
ranksums_res_filtered.head()

#### Add DEG and population change info

In [None]:
# Import DEGs
import pickle 

taa_l4_degs = f'{general_data_path}/analyses/dea/thyAgeing_dea_taa_l4_adult_vs_infant_ageEffect.pkl'
taa_l3_degs = f'{general_data_path}/analyses/dea/thyAgeing_dea_taa_l3_adult_vs_infant_ageEffect.pkl'

with open(taa_l4_degs, 'rb') as f:
    taa_l4_degs = pickle.load(f)
taa_l4_degs = {k:v for k,v in taa_l4_degs.items() if k in ct_anno_levels['taa_l4']}
taa_l4_degs = pd.concat(taa_l4_degs).reset_index(names = ['cell_type','gene_name']).set_index('gene_name')

with open(taa_l3_degs, 'rb') as f:
    taa_l3_degs = pickle.load(f)
taa_l3_degs = {k:v for k,v in taa_l3_degs.items() if k in ct_anno_levels['taa_l3']}
taa_l3_degs = pd.concat(taa_l3_degs).reset_index(names = ['cell_type','gene_name']).set_index('gene_name')

all_degs = pd.concat([taa_l4_degs, taa_l3_degs])
all_degs = all_degs.loc[all_degs.index.isin(ranksums_res_filtered['ligand'].unique().tolist() + ranksums_res_filtered['receptor'].unique().tolist())]

all_degs.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_diffCCC_recirc_degs.csv', index = True)
all_degs.head()

**Overview DEGs of recirc cells**

**B cells**:
- CCL4 upregulation on B plasmablasts and B plasma cells: role in recruiting Treg cells ([Bystry,2001](https://www.nature.com/articles/ni735)), monocytes and NK cells ([Menten, 2022](http://sciencedirect.com/science/article/pii/S135961010200045X?via%3Dihub))

**Fibroblasts**:
- CXCL8 upregulation
- IL1RL1 upregulation
- LIF upregulation
- CCL8 upregulation across fibroblasts: Broad leukocyte recruitment, amplifies chronic inflammation, supports myofibroblast transition	
- CCL3 upregulation across fibroblasts: T cell & monocyte recruitment, fibrosis, ECM remodeling
- IL1RL1 upregulation across fibroblasts: 	Activates fibroblasts via ST2L → pro-inflammatory, pro-fibrotic effects; Promotes ECM production, myofibroblast transition; Attracts and activates Th2/ILC2 cells, amplifies type 2 inflammation
- CXCL8 upregulation across fibroblasts: contributes to ECM deposition in fibrosis, role in wound healing, recruitment of immune cells

**T cells**:
- CXCL13 upregulation in recirculating T regs and resident Tregs

**Myeloid**:
- CCL20 upregulation in macrophages and monocytes: in response to infection/injury, Facilitates rapid immune cell recruitment in gut, lung, and skin, Recruits CCR6+ cells (some monocytes/macrophages, Th17 cells, DCs)


In [None]:
from scipy.cluster.hierarchy import linkage, leaves_list

df = all_degs.reset_index(names = 'gene_name').pivot_table(index = 'cell_type', columns = 'gene_name', values = 'log2FoldChange')

# Perform hierarchical clustering on the columns
linkage_matrix = linkage(df.T.fillna(0), method='ward')
column_order = leaves_list(linkage_matrix)

# Perform hierarchical clustering on the rows
row_linkage_matrix = linkage(df.fillna(0), method='ward')
row_order = leaves_list(row_linkage_matrix)

# Reorder the columns of the dataframe
df = df.iloc[row_order, column_order]

# Plot the reordered heatmap
p = sns.heatmap(df, cmap='PuOr_r', center=0, vmin=-5, vmax=5, cbar_kws={'label': 'log2FC'}, xticklabels=True, yticklabels=True)
p.set_xlabel('Cell type')
p.set_ylabel('Gene')
p.figure.set_size_inches(calc_figsize(width=280, height=120))
p.figure.tight_layout(rect=[0, 0, 1, 0.95], pad = 0)
plt.savefig(f'{plots_path}/cellComm/recirc/thyAgeing_diffCCC_recirc_degs_heatmap_clustered.pdf', bbox_inches='tight')

In [None]:
# Import logFC for each cell type
logfc_freq = pd.read_csv(f'{general_data_path}/analyses/freqAnalysis/thyAgeing_all_scvi_v5_2025-04-03_milo_ageGroups_medianLogFC.csv', index_col = 0)
logfc_freq = logfc_freq.loc[logfc_freq['comparison'] == 'adult_vs_infant',:]

all_freq = []
for k,v in ct_anno_levels.items():
    all_freq.append(logfc_freq.loc[(logfc_freq['anno'].isin(v)) & (logfc_freq['anno_level'] == k),:])
all_freq = pd.concat(all_freq)

all_freq.head()

In [None]:
ranksums_res_filtered = ranksums_res.loc[(ranksums_res['padj'] < .05)]
ranksums_res_filtered = ranksums_res_filtered.merge(all_freq[['anno', 'logFC']], left_on = 'source', right_on = 'anno', how = 'left').drop('anno', axis = 1).rename(columns = {'logFC': 'source_logFC'})
ranksums_res_filtered = ranksums_res_filtered.merge(all_freq[['anno', 'logFC']], left_on = 'target', right_on = 'anno', how = 'left').drop('anno', axis = 1).rename(columns = {'logFC': 'target_logFC'})
ranksums_res_filtered = ranksums_res_filtered.merge(all_degs[['cell_type', 'log2FoldChange', 'padj']].reset_index(names = 'gene_name').rename(columns = {'log2FoldChange': 'ligand_logFC', 'padj': 'ligand_padj'}), left_on = ['ligand', 'source'], right_on = ['gene_name', 'cell_type'], how = 'left').drop(['gene_name', 'cell_type'], axis = 1)
ranksums_res_filtered = ranksums_res_filtered.merge(all_degs[['cell_type', 'log2FoldChange', 'padj']].reset_index(names = 'gene_name').rename(columns = {'log2FoldChange': 'receptor_logFC', 'padj': 'receptor_padj'}), left_on = ['receptor', 'target'], right_on = ['gene_name', 'cell_type'], how = 'left').drop(['gene_name', 'cell_type'], axis = 1)
ranksums_res_filtered['pop_driven'] = ranksums_res_filtered.apply(lambda x: abs(x['source_logFC']) >= 1.3 or abs(x['target_logFC']) >= 1.3, axis = 1)
ranksums_res_filtered['deg_driven'] = ranksums_res_filtered.apply(lambda x: abs(x['ligand_padj']) < .05 or abs(x['receptor_padj']) < .05, axis = 1)
ranksums_res_filtered.head()

In [None]:
ranksums_res_filtered.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_ranksums_filtered.csv')

In [None]:
ranksums_res_filtered[['pop_driven', 'deg_driven']].value_counts()

### Heatmaps of number and magnitude of interactions

In [None]:
# Heatmap of number of differential interactions (no signif downregulated interactions)
from scipy.cluster.hierarchy import linkage, leaves_list
from plotting.utils import calc_figsize,thyAgeing_colors

df = ranksums_res_filtered.loc[ranksums_res_filtered['statistic'] > 0].copy()
df = df.groupby(['source', 'target']).size().to_frame(name='n_interactions').reset_index()
df = df.pivot(index='source', columns='target', values='n_interactions').fillna(0)

# Cluster rows and columns

row_linkage = linkage(df, method='ward')
col_linkage = linkage(df.T, method='ward')

row_order = leaves_list(row_linkage)
col_order = leaves_list(col_linkage)

df = df.iloc[row_order, col_order]

p = sns.heatmap(df, cmap=sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_colors['purple'], thyAgeing_colors['magenta'], thyAgeing_colors['orange'], thyAgeing_colors['yellow']], as_cmap=True,),
                annot=None, fmt='.2g', xticklabels='auto', yticklabels='auto',
                vmax = 30)
p.set_xlabel('Target cell type')
p.set_ylabel('Source cell type')
p.figure.tight_layout()
p.figure.set_size_inches(calc_figsize(width = 120, height = 120))
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignallingUp_heatmap.pdf')

In [None]:
# Heatmap of number of differential interactions (no signif downregulated interactions)
from scipy.cluster.hierarchy import linkage, leaves_list
from plotting.utils import calc_figsize,thyAgeing_colors

df = ranksums_res_filtered.loc[ranksums_res_filtered['statistic'] > 0].copy()
df = df.groupby(['source', 'target']).size().to_frame(name='n_interactions').reset_index()
df = df.pivot(index='source', columns='target', values='n_interactions').fillna(0)

df_order = [c for c in ct_levels if c in df.columns]
df = df.loc[df_order, df_order]

p = sns.heatmap(df, cmap=sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_colors['purple'], thyAgeing_colors['magenta'], thyAgeing_colors['orange'], thyAgeing_colors['yellow']], as_cmap=True,),
                annot=None, fmt='.2g', xticklabels='auto', yticklabels='auto',
                vmax = 30)
p.set_xlabel('Target cell type')
p.set_ylabel('Source cell type')
p.figure.tight_layout()
p.figure.set_size_inches(calc_figsize(width = 120, height = 120))
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignallingUp_celltypeSort_heatmap.pdf')

In [None]:
# Number of interactions by ligand-receptor pair
df = ranksums_res_filtered.loc[ranksums_res_filtered['statistic'] > 0].copy()
df = df.groupby(['ligand', 'receptor', 'interaction']).size().to_frame(name = 'n_interactions').reset_index()
df.sort_values('n_interactions', ascending=False, inplace = True)

plt.figure(figsize=calc_figsize(width = 280, height = 50))
sns.barplot(data=df, x='interaction', y='n_interactions', palette='viridis')
plt.yscale('log')
plt.xticks(rotation=90)
plt.xlabel('Interaction')
plt.ylabel('Log10(Number of interactions)')
plt.tight_layout()
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignalling_nIabyLR_barplot.pdf')

Heatmap of magnitude of a given interaction:

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import transforms

# Create a multipage PDF
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_generalChemo_interactions_heatmap.pdf') as pdf:
    for ia in ranksums_res_filtered['interaction'].unique().tolist():
        df = ranksums_res.loc[(ranksums_res['interaction'] == ia)].copy()
        df[['sender', 'receiver']] = df['cells'].str.split(' -> ', expand=True)

        df_stat = df.reset_index(names='gene_name').pivot_table(index='sender', columns='receiver', values='statistic')
        df_anno = df.reset_index(names='gene_name').pivot_table(index='sender', columns='receiver', values='padj')
        df_anno = df_anno.applymap(lambda x: '*' if x < 0.05 else '')
        
        plt.figure(figsize=calc_figsize(width=df_stat.shape[1]*5 + 20, height=df_stat.shape[0]*5 + 20))
        p = sns.heatmap(df_stat, cmap='PuOr_r', center=0, vmin=-5, vmax=5, cbar_kws={'label': 'statistic'}, xticklabels=True, yticklabels=True,
                        annot=df_anno, fmt='')

        for t in p.texts:
            trans = t.get_transform()
            offs = transforms.ScaledTranslation(0, 0, transforms.IdentityTransform())
            t.set_transform(offs + trans)

        p.set_title(f'{ia}')
        p.set_xlabel('Target cells')
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.set_ylabel('Source cells')
        p.set_yticklabels(p.get_yticklabels(), rotation=0)
        p.figure.tight_layout()

        # Save the current figure to the PDF
        pdf.savefig()
        plt.close()

Cells to inspect:
- B plasma
- Fb: Fb-adipo and interm
- GC
- T CD4 h

### Clustering interactions

In [None]:
# Subset DF to significant interactions
df = ranksums_res_filtered.pivot(index='cells', columns='interaction', values='statistic').fillna(0)
df.shape

Plot heatmap of interactions:

In [None]:
# Heatmap of interactions (rows = source -> target, columns = interaction)
from scipy.cluster.hierarchy import linkage, leaves_list

# Cluster rows and columns
row_linkage = linkage(df, method='ward')
col_linkage = linkage(df.T, method='ward')
row_order = leaves_list(row_linkage)
col_order = leaves_list(col_linkage)

df = df.iloc[row_order, col_order]

p = sns.heatmap(df, cmap='Spectral_r', annot=None, fmt='.2g', xticklabels=False, yticklabels=False,
                vmax = 6)
p.set_xlabel('Interaction')
p.set_ylabel('Source -> Target')
p.figure.tight_layout()
p.figure.set_size_inches(calc_figsize(width = 280, height = 500))
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignalling_allIa_heatmap.pdf')

#df.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_diffCCC_recirc_filteredSignalling_allIa_matrix.csv')

Cluster interactions by cells or interaction type using UMAP and k-means clustering:

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from typing import Optional


def perform_clustering_and_plot(umap_result, k_range, override_k : Optional[int] = None, figsize=calc_figsize(width=180, height=60)):
    import matplotlib.pyplot as plt

    # Ensure the UMAP result is 2-dimensional
    if umap_result.ndim != 2 or umap_result.shape[1] < 2:
        raise ValueError("UMAP result must be a 2-dimensional array with at least 2 columns.")

    # Extract the first two dimensions of UMAP result
    umap_2d = umap_result[:, :2]

    # Determine the optimal number of clusters using the elbow method
    inertia = []
    silhouette_scores = []
    k_values = range(k_range[0], k_range[1] + 1)

    for k in k_values:
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(umap_2d)
        inertia.append(kmeans.inertia_)
        silhouette_scores.append(silhouette_score(umap_2d, kmeans.labels_))
        
    # Choose the optimal k (e.g., based on the silhouette score)
    if override_k is not None:
        optimal_k = override_k
    else:
        # Find the k with the maximum silhouette score
        optimal_k = k_values[np.argmax(silhouette_scores)]
        # Alternatively, you can use the elbow method to choose k
        # optimal_k = k_values[np.argmin(np.diff(inertia)) + 1]

    # Plot the elbow curve and silhouette scores
    fig, axes = plt.subplots(1, 3, figsize=figsize, gridspec_kw={'width_ratios': [1, 1, 1.5]})

    # Elbow curve
    sns.lineplot(x=k_values, y=inertia, marker='o', markersize=5, ax=axes[0])
    axes[0].axvline(optimal_k, color='black', linestyle='--', label=f'Optimal k={optimal_k}')
    axes[0].legend()
    axes[0].set_title('Elbow Method')
    axes[0].set_xlabel('Number of Clusters (k)')
    axes[0].set_ylabel('Inertia')

    # Silhouette scores
    sns.lineplot(x=k_values, y=silhouette_scores, marker='o', markersize=5, ax=axes[1])
    axes[1].axvline(optimal_k, color='black', linestyle='--', label=f'Optimal k={optimal_k}')
    axes[1].legend()
    axes[1].set_title('Silhouette Scores')
    axes[1].set_xlabel('Number of Clusters (k)')
    axes[1].set_ylabel('Silhouette Score')

    # Perform k-means clustering with the optimal k
    kmeans = KMeans(n_clusters=optimal_k, random_state=42)
    clusters = kmeans.fit_predict(umap_2d)

    # UMAP clustering plot
    scatter = axes[2].scatter(umap_2d[:, 0], umap_2d[:, 1], c=clusters, cmap='tab20', s=5, alpha=0.8)
    axes[2].set_title(f'UMAP Clustering with k={optimal_k}')
    axes[2].set_xlabel('UMAP Dimension 1')
    axes[2].set_ylabel('UMAP Dimension 2')
    fig.colorbar(scatter, ax=axes[2], label='Cluster')

    plt.tight_layout()

    return clusters, plt

In [None]:
# Interaction type clusters
import umap
import pprint

# Perform UMAP
umap_model = umap.UMAP(n_components=2, random_state=42)
umap_result = umap_model.fit_transform(df.transpose())

clusters, plot = perform_clustering_and_plot(umap_result, k_range = (2,20))
plot.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignalling_interactionTypeClusters.pdf')

gene_clusters = pd.DataFrame(umap_result, columns=['UMAP1', 'UMAP2'], index = df.transpose().index)
gene_clusters['cluster'] = clusters
gene_clusters['interaction'] = gene_clusters.index
gene_clusters.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_interactionTypeClusters.csv', index = False)

gene_clusters_dict = gene_clusters.groupby('cluster').agg({'interaction': lambda x: list(x)}).to_dict()['interaction']

pprint.pprint(gene_clusters_dict, compact = True)

In [None]:
# Cell type clusters
import umap
import pprint

# Perform UMAP
umap_model = umap.UMAP(n_components=2, random_state=42)
umap_result = umap_model.fit_transform(df)

clusters, plot = perform_clustering_and_plot(umap_result, k_range = (5,100,5))
plot.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignalling_cellTypeClusters.pdf')

cell_clusters = pd.DataFrame(umap_result, columns=['UMAP1', 'UMAP2'], index = df.index)
cell_clusters['cluster'] = clusters
cell_clusters['cells'] = cell_clusters.index
cell_clusters.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_all_scvi_{object_version}_recirc_cellphoneDbBySample_cellTypeClusters.csv', index = False)

cell_clusters_dict = cell_clusters.groupby('cluster').agg({'cells': lambda x: list(x)}).to_dict()['cells']
pprint.pprint(cell_clusters_dict, compact = True)

In [None]:
pprint.pprint(gene_clusters_dict, compact=True),
pprint.pprint(cell_clusters_dict, compact=True)

Alternative: cluster and subset iteraction types per cell type cluster

In [None]:
# Heatmap of interaction clusters by interaction type
from scipy.cluster.hierarchy import linkage, leaves_list
from matplotlib.backends.backend_pdf import PdfPages

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_generalChemo_iaClusters_byIaType_heatmap.pdf') as pdf:
    for c in gene_clusters['cluster'].unique().tolist():
        df_red = df.loc[:, df.columns.isin(gene_clusters.loc[gene_clusters['cluster'] == c, 'interaction'].tolist())]
        df_red = df_red.loc[df_red.sum(axis = 1) > 0]

        # Cluster rows and columns
        row_linkage = linkage(df_red, method='ward')
        col_linkage = linkage(df_red.T, method='ward')
        row_order = leaves_list(row_linkage)
        col_order = leaves_list(col_linkage)

        df_red = df_red.iloc[row_order, col_order]

        p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                        vmax=6, yticklabels=True)
        p.set_xlabel('Interaction')
        p.set_ylabel('Source -> Target')
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.figure.tight_layout()
        p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
        
        pdf.savefig()
        plt.close()
        
# Heatmap of interaction clusters by cell type
from scipy.cluster.hierarchy import linkage, leaves_list
from matplotlib.backends.backend_pdf import PdfPages

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_generalChemo_iaClusters_byCellType_heatmap.pdf') as pdf:
    for c in cell_clusters['cluster'].unique().tolist():
        df_red = df.loc[df.index.isin(cell_clusters.loc[cell_clusters['cluster'] == c, 'cells'].tolist())]
        df_red = df_red.loc[:,df_red.sum(axis = 0) > 0]

        # Cluster rows and columns
        row_linkage = linkage(df_red, method='ward')
        col_linkage = linkage(df_red.T, method='ward')
        row_order = leaves_list(row_linkage)
        col_order = leaves_list(col_linkage)

        df_red = df_red.iloc[row_order, col_order]

        p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                        vmax=6, yticklabels=True)
        p.set_xlabel('Interaction')
        p.set_ylabel('Source -> Target')
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.figure.tight_layout()
        p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
        
        pdf.savefig()
        plt.close()

In [None]:
# with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_generalChemo_iaClusters_byCellTypeAndIaType_heatmap.pdf') as pdf:
#     for c in cell_clusters['cluster'].unique().tolist():
#         for g in gene_clusters['cluster'].unique().tolist():
#             df_red = df.loc[df.index.isin(cell_clusters.loc[cell_clusters['cluster'] == c, 'cells'].tolist()),
#                             df.columns.isin(gene_clusters.loc[gene_clusters['cluster'] == g, 'interaction'].tolist())]
#             df_red = df_red.loc[df_red.sum(axis = 1) > 0,df_red.sum(axis = 0) > 0]
            
#             if df_red.shape[0] <= 2:
#                 continue
#             if df_red.shape[1] <= 2:
#                 continue
#             else:
#                 # Cluster rows and columns
#                 row_linkage = linkage(df_red, method='ward')
#                 col_linkage = linkage(df_red.T, method='ward')
#                 row_order = leaves_list(row_linkage)
#                 col_order = leaves_list(col_linkage)

#                 df_red = df_red.iloc[row_order, col_order]

#                 p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
#                                 vmax=6, yticklabels=True)
#                 p.set_xlabel('Interaction')
#                 p.set_ylabel('Source -> Target')
#                 p.set_xticklabels(p.get_xticklabels(), rotation=90)
#                 p.figure.tight_layout()
#                 p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
                
#                 pdf.savefig()
#                 plt.close()

In [None]:
import pickle

ct_ia_clusters = {}
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_generalChemo_iaClusters_byCellTypeAndIaType_indivClustering_heatmap.pdf') as pdf:
    for c in cell_clusters['cluster'].unique().tolist():
        df_sub = df.loc[df.index.isin(cell_clusters.loc[cell_clusters['cluster'] == c, 'cells'].tolist())]
        
        # Perform UMAP & cluster interactions 
        umap_model = umap.UMAP(n_components=2, random_state=42)
        umap_result = umap_model.fit_transform(df_sub.transpose())

        clusters, plot = perform_clustering_and_plot(umap_result, k_range = (2,20))
        pdf.savefig()
        plt.close()

        gene_clusters = pd.DataFrame(umap_result, columns=['UMAP1', 'UMAP2'], index = df.transpose().index)
        gene_clusters['cluster'] = clusters
        gene_clusters['interaction'] = gene_clusters.index
        gene_clusters_dict = gene_clusters.groupby('cluster').agg({'interaction': lambda x: list(x)}).to_dict()['interaction']
        ct_ia_clusters[c] = gene_clusters_dict
        
        for g in gene_clusters['cluster'].unique().tolist():
            df_red = df.loc[df.index.isin(cell_clusters.loc[cell_clusters['cluster'] == c, 'cells'].tolist()),
                            df.columns.isin(gene_clusters.loc[gene_clusters['cluster'] == g, 'interaction'].tolist())]
            df_red = df_red.loc[df_red.sum(axis = 1) > 0,df_red.sum(axis = 0) > 0]
            
            if df_red.shape[0] <= 2:
                continue
            if df_red.shape[1] <= 2:
                continue
            else:
                # Cluster rows and columns
                row_linkage = linkage(df_red, method='ward')
                col_linkage = linkage(df_red.T, method='ward')
                row_order = leaves_list(row_linkage)
                col_order = leaves_list(col_linkage)

                df_red = df_red.iloc[row_order, col_order]

                p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                                vmax=6, yticklabels=True)
                p.set_xlabel('Interaction')
                p.set_ylabel('Source -> Target')
                p.set_title(f'Cell type cluster {c} - Interaction cluster {g}')
                p.set_xticklabels(p.get_xticklabels(), rotation=90)
                p.figure.tight_layout()
                p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
                
                pdf.savefig()
                plt.close()
                
with open(f'{data_path}/analyses/cellComm/recirc/thyAgeing_diffCCC_recirc_ctIaClusters.pkl', 'wb') as f:
    pickle.dump(ct_ia_clusters, f)

## Immune cell homing

Aspects of interest:
- recruitment
- tethering
- survival
- differentiation

In [None]:
string = 'IL4'
ranksums_res_filtered.loc[ranksums_res_filtered['ligand'].str.contains(string), 'interaction'].unique()

In [None]:
string = 'DPP4'
ranksums_res_filtered.loc[ranksums_res_filtered['receptor'].str.contains(string), 'interaction'].unique()

In [None]:
# Load curated interactions
immune_cell_homing_df = pd.read_excel(f'{data_path}/analyses/cellComm/recirc/thyAgeing_recircInteractions_curated.xlsx', sheet_name = 'immune_cell_homing')

# Check that all interactions are in the ranksums results
any([i for i in immune_cell_homing_df['interaction'] if i not in ranksums_res_filtered['interaction'].unique().tolist()])

In [None]:
homing_ia = ranksums_res_filtered.loc[ranksums_res_filtered['interaction'].isin(immune_cell_homing_df['interaction'].unique().tolist())].copy()
homing_ia = homing_ia.merge(mean_props[['cells', 'interaction', 'ligand_props_adult', 'receptor_props_adult']], on = ['cells', 'interaction'], how = 'left')

# Filter interactions by min ligand and receptor props (20%)
min_props = 0.2
homing_ia = homing_ia.loc[(homing_ia['ligand_props_adult'] >= min_props) & (homing_ia['receptor_props_adult'] >= min_props)].copy()

homing_ia.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_diffCCC_recirc_immuneCellHoming_interactions_selection.csv', index=False)

homing_ia.shape

In [None]:
from plotting.utils import plot_interactions
from matplotlib.backends.backend_pdf import PdfPages
immune_cell_groups = {'B' : [c for c in ct_levels if c.startswith('B_')],
                      'T/NK' : [c for c in ct_levels if c.startswith('T_') or c in ['NK', 'ILC']],
                      'Myeloid' : ['Mac', 'Mono', 'DC1', 'DC2', 'aDC', 'pDC']}

# Create a dictionary with splits of the dataframe based on the 'compartment' column
immune_cell_homing_dict = {compartment: df for compartment, df in immune_cell_homing_df.groupby('compartment')}

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_immuneCellHoming_interactions_scatter_selection.pdf') as pdf:
    for k,df in immune_cell_homing_dict.items():
        
        # Create dict of homing programs
        homing_programs_dict = df.groupby('type')['interaction'].apply(list).to_dict()
        # print(f'{k} homing programs:')
        # pprint.pprint(homing_programs_dict, compact = True)
        
        for p,l in homing_programs_dict.items():
            # Filter by interaction and target population
            df_for_vis = homing_ia.loc[(homing_ia['interaction'].isin(l)) & (homing_ia['target'].isin(immune_cell_groups[k]))].copy()

            if df_for_vis.shape[0] == 0:
                continue
            else:
                plot_interactions(df_for_vis, cell_type_levels = ct_levels, plot_title = f'{k} Cell {p}',
                                  figsize =calc_figsize(width=70, height=85),
                                  )
                pdf.savefig()
                plt.close()

In [None]:
ranksums_res.loc[ranksums_res['source'].str.contains('Fb'),'source'].unique()

In [None]:
programs_dict = immune_cell_homing_df.groupby('type')['interaction'].apply(list).to_dict()
celltype_dict = {'B': ['B_GC-like', 'B_age-associated', 'B_naive', 'B_plasmablast',
       'B_mem', 'B_plasma'],
                 'T' : ['T_CD4_act', 'T_CD4_fh', 'T_CD4_h', 'T_CD4_naive_recirc',
       'T_CD8_age-assoc', 'T_CD8_naive_recirc', 'T_Treg_recirc', 
       'T_CD8_em', 'T_CD8_rm', 'T_Treg_tr',
       'T_CD8_rm', 'T_CD8_age-assoc', 'T_Treg_tr'],
                 'NK/ILC/MAIT' : ['NK', 'ILC', 'T_MAIT'],
                 'EC' : ['EC-art', 'EC-cap', 'EC-cap_lipid', 'EC-lymphatic', 'EC-ven'],
                 'TEC' : ['TEC-EMT', 'TEC-mim', 'cTEC', 'mTECI', 'mTECII', 'mTECIII',
       'mcTEC'],
                 'Fb': ['Fb-adipo', 'Fb-interlo', 'Fb-interm', 'Fb-med', 'Fb-perilo'],
                 'DC' : ['DC1', 'DC2', 'pDC', 'aDC'],
                 'Mac/Mono' : ['Mac', 'Mono'],}

df = ranksums_res.loc[(ranksums_res['interaction'].isin(immune_cell_homing_df['interaction'].unique().tolist())) &
                      (ranksums_res['source'].isin([c for c in celltype_dict.values() for c in c]))].copy()
# Map the 'source' column to its corresponding key in celltype_dict
df['cell_type'] = df['source'].map(
    lambda x: next((key for key, values in celltype_dict.items() if x in values), x)
)
df = df.groupby(['cell_type', 'interaction'])['statistic'].mean().reset_index()
df = df.merge(immune_cell_homing_df[['interaction', 'type']].drop_duplicates(), on = 'interaction', how = 'left')
df['type'] = df['type'].map(lambda x: 'Survival and Differentiation' if x in ['Differentiation', 'Survival'] else x)
df = df.drop_duplicates(subset=['cell_type', 'interaction'], keep='first')
df.sort_values('type', ascending=True, inplace=True)
df

In [None]:
df_red = df.pivot(index='cell_type', columns='interaction', values='statistic').fillna(0)
df_red = df_red.loc[:, df['interaction']]
p = sns.heatmap(df_red, cmap='PuOr_r', annot=None, fmt='.2g', vmin = -4, vmax = 4, yticklabels=True)
p.set_xlabel('Interaction')
p.set_ylabel('Source -> Target')
p.set_xticklabels(p.get_xticklabels(), rotation=90)
p.set_yticklabels(p.get_yticklabels(), rotation=0)
p.figure.tight_layout()
p.figure.set_size_inches(calc_figsize(height=40, width=150))
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_immuneCellHoming_interactions_heatmap_bifTalk.pdf')

In [None]:
ranksums_res_filtered['target'].unique()

In [None]:
immune_cell_homing_df

In [None]:
target_cells = ['T_CD4_fh','T_CD4_act', 'T_CD4_h', 'T_CD8_em', 'T_MAIT', 'T_Treg_recirc',
                'T_CD4_naive_recirc', 'T_CD8_age-assoc', 'T_CD8_naive_recirc','T_CD8_rm'
                'B_age-associated', 'B_mem', 'B_naive', 'B_plasma','B_GC-like',
                'pDC','aDC','DC1','DC2',
                'ILC','NK','T_MAIT',
                'Mono', 'Mac']
# (ranksums_res_filtered['interaction'].str.contains(r'^(CC|CXC|IL)', regex=True))
df = homing_ia.loc[(homing_ia['interaction'].isin(immune_cell_homing_df.loc[immune_cell_homing_df['type'] == 'Recruitment', 'interaction'].unique().tolist())) & 
                          (homing_ia['target'].isin(target_cells))]

df.to_csv(f'{data_path}/analyses/cellComm/recirc/thyAgeing_diffCCC_recirc_immuneCellHoming_recruitmentInteractions_selection.csv', index=False)

df.shape

In [None]:
from plotting.utils import plot_interactions,thyAgeing_greys,thyAgeing_colors
plot_interactions(df, cell_type_levels = ct_levels,
                  cmap = sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),
                  figsize=calc_figsize(width=90, height=100))
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_immuneCellHoming_recruitmentInteractions_scatter_selection.pdf')

In [None]:
df['interaction'].unique()

## B plasma recruitment

Recruitment: 
- CXCL12 -> CXCR4
- CCL28 -> CCR10

Survival:
- TNFSF13 -> TNFRSF13B (TACI)
- TNFSF13B -> TNFRSF13C (BAFFR)
- TNFSF13B -> TNFRSF17 (BCMA)
- 'IL15 -> IL15RA'

Differentiation:
- 'IL10 -> IL10RA'

In [None]:
# Filter the DataFrame to include only rows where 'source' or 'target' is 'B_plasma'
plasma_df = ranksums_res_filtered.loc[(ranksums_res_filtered['target'] == 'B_plasma') | (ranksums_res_filtered['source'] == 'B_plasma')].copy()

Interactions with plasma cells as receivers:

In [None]:
plasma_df.loc[plasma_df['target'] == 'B_plasma'].groupby('interaction').size().sort_values(ascending=False).head(20)

In [None]:
plasma_df.loc[plasma_df['source'] == 'B_plasma'].groupby('interaction').size().sort_values(ascending=False).head(20)

In [None]:
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_plasmaCell_interactions_heatmap.pdf') as pdf:
    for c in ['source', 'target']:
        id_c = 'target' if c == 'source' else 'source'
        df_red = plasma_df.loc[plasma_df[c] == 'B_plasma'].pivot(index=id_c, columns='interaction', values='statistic').fillna(0)
        df_red = df_red.loc[df_red.sum(axis = 1) > 0,df_red.sum(axis = 0) > 0]

        # Cluster rows and columns
        row_linkage = linkage(df_red, method='ward')
        col_linkage = linkage(df_red.T, method='ward')
        row_order = leaves_list(row_linkage)
        col_order = leaves_list(col_linkage)

        df_red = df_red.iloc[row_order, col_order]

        p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                        vmax=6, yticklabels=True, xticklabels=True)
        p.set_xlabel('Interaction')
        p.set_ylabel(f'{id_c.capitalize()}')
        p.set_title(f'{c.capitalize()}: B_plasma')
        p.set_yticklabels(p.get_yticklabels(), rotation=0)
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.figure.tight_layout()
        p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
        
        pdf.savefig()
        plt.close()

In [None]:
from matplotlib import cm
from matplotlib.colors import to_hex

# Generate a colormap dictionary for unique interactions
unique_interactions = ia_df['interaction'].unique()
cmap = cm.get_cmap('tab20c', len(unique_interactions))  # Use a colorblind-friendly colormap
color_map = {interaction: to_hex(cmap(i)) for i, interaction in enumerate(unique_interactions)}

spaces = calc_group_spaces([len(g) for g in groups_upd.values() if len(g) > 1], space_bw_group=30, space_in_group=5)
circos = Circos(sectors)

# Plot sector track
#ColorCycler.set_cmap("Set3")
for sector in circos.sectors:
    track = sector.add_track(r_lim=(75, 95))
    track.axis(fc = '#e8e8e8')
    track.text(sector.name.replace('_', ' '), adjust_rotation=True, fontsize=6, orientation='vertical', wrap=True)
    
# # Plot group sectors range
ColorCycler.set_cmap('Paired')
for k,v in groups_upd.items():
    group_deg_lim = circos.get_group_sectors_deg_lim(v)
    circos.rect(r_lim=(97, 100), deg_lim=group_deg_lim, fc=group_cmap[k], ec="black", lw=0.5)
    group_center_deg = sum(group_deg_lim) / 2
    circos.text(f"{k}", r=103, deg=group_center_deg, adjust_rotation=True, fontsize = 7)
    

norm = Normalize(vmin=0, vmax=6)
cmap = cm.get_cmap('inferno_r')
# Plot link lines in various styles
for i, row in ia_df.iterrows():
    source = row['source']
    target = row['target']
    interaction = row['interaction']
    statistic = row['statistic']
    
    # Get the color for the interaction (log2FoldChange)
    #color = cmap(norm(statistic))
    #ls = 'dashed' if row['interaction_log2FoldChange'] < 0 else 'solid'
    
    # Get the color for the interaction (type)
    color = color_map[interaction]
    
    # Plot the link line
    circos.link_line((source, random.uniform(0.1,1.9)), (target, random.uniform(0.1,1.9)), direction=1, color=color, lw=statistic/2)

fig = circos.plotfig(figsize = calc_figsize(width = 120, height = 120))

#Plot legend
line_handles = [Line2D([], [], color=v, label=f'{k}') for k,v in color_map.items() if k in ia_df['interaction'].unique()]

line_legend = circos.ax.legend(
    handles=line_handles,
    bbox_to_anchor=(0.8, 1.1),
    fontsize=6,
    title="Interaction",
    title_fontsize=7,
    handlelength=2,
)
circos.ax.add_artist(line_legend)

# circos.colorbar(vmin=0, vmax=6, 
#                 cmap=cmap, 
#                 label='Statistic', 
#                 orientation='vertical', 
#                 colorbar_kws = dict(ticks=[0, 2, 4, 6]),
#                 )

# fig = circos.plotfig(figsize = calc_figsize(width = 120, height = 120))

_ = circos.ax.text(0, 110, f"Plasma cell interactions", size=7, va="top", ha="center")

plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_plasmaRecruitment_interactions_circos.pdf')

In [None]:
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_plasmaCellHoming_interactions_scatter.pdf') as pdf:
    # Create dict of homing programs
    homing_programs_dict = immune_cell_homing_df.loc[immune_cell_homing_df['cell_type'] == 'B_plasma'].groupby('type')['interaction'].apply(list).to_dict()
    # print(f'{k} homing programs:')
    # pprint.pprint(homing_programs_dict, compact = True)
    
    for p,l in homing_programs_dict.items():
        # Filter by interaction and target population
        df_for_vis = ranksums_res_filtered.loc[(ranksums_res_filtered['interaction'].isin(l)) & (ranksums_res_filtered['target'].isin(['B_plasma']))].copy()
        
        if df_for_vis.shape[0] == 0:
            continue
        else:
            plot_interactions(df_for_vis, cell_type_levels = ct_levels, plot_title = f'B Plasma Cell {p}',
                              cmap = sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),)
            pdf.savefig()
            plt.close()

## Fibroblasts

### FB-adipo

In [None]:
fb_df = ranksums_res_filtered.loc[(ranksums_res_filtered['target'] == 'Fb-adipo') | (ranksums_res_filtered['source'] == 'Fb-adipo')].copy()

In [None]:
fb_df.loc[fb_df['target'] == 'Fb-adipo'].groupby('interaction').size().sort_values(ascending=False).head(20)

In [None]:
fb_df.loc[fb_df['source'] == 'Fb-adipo'].groupby('interaction').size().sort_values(ascending=False).head(20)

In [None]:
ct = 'Fb-adipo'
df = fb_df.copy()
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_fbAdipo_interactions_heatmap.pdf') as pdf:
    for c in ['source', 'target']:
        id_c = 'target' if c == 'source' else 'source'
        df_red = df.loc[df[c] == ct].pivot(index=id_c, columns='interaction', values='statistic').fillna(0)
        df_red = df_red.loc[df_red.sum(axis = 1) > 0,df_red.sum(axis = 0) > 0]

        # Cluster rows and columns
        row_linkage = linkage(df_red, method='ward')
        col_linkage = linkage(df_red.T, method='ward')
        row_order = leaves_list(row_linkage)
        col_order = leaves_list(col_linkage)

        df_red = df_red.iloc[row_order, col_order]

        p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                        vmax=6, yticklabels=True, xticklabels=True)
        p.set_xlabel('Interaction')
        p.set_ylabel(f'{id_c.capitalize()}')
        p.set_title(f'{c.capitalize()}: {ct}')
        p.set_yticklabels(p.get_yticklabels(), rotation=0)
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.figure.tight_layout()
        p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
        
        pdf.savefig()
        plt.close()

### Fb-interm

In [None]:
fb_df = ranksums_res_filtered.loc[(ranksums_res_filtered['target'] == 'Fb-interm') | (ranksums_res_filtered['source'] == 'Fb-interm')].copy()
fb_df.loc[fb_df['target'] == 'Fb-interm'].groupby('interaction').size().sort_values(ascending=False).head(20)

In [None]:
fb_df.loc[fb_df['source'] == 'Fb-interm'].groupby('interaction').size().sort_values(ascending=False).head(20)

In [None]:
ct = 'Fb-interm'
df = fb_df.copy()
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_fbInterm_interactions_heatmap.pdf') as pdf:
    for c in ['source', 'target']:
        id_c = 'target' if c == 'source' else 'source'
        df_red = df.loc[df[c] == ct].pivot(index=id_c, columns='interaction', values='statistic').fillna(0)
        df_red = df_red.loc[df_red.sum(axis = 1) > 0,df_red.sum(axis = 0) > 0]

        # Cluster rows and columns
        row_linkage = linkage(df_red, method='ward')
        col_linkage = linkage(df_red.T, method='ward')
        row_order = leaves_list(row_linkage)
        col_order = leaves_list(col_linkage)

        df_red = df_red.iloc[row_order, col_order]

        p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                        vmax=6, yticklabels=True, xticklabels=True)
        p.set_xlabel('Interaction')
        p.set_ylabel(f'{id_c.capitalize()}')
        p.set_title(f'{c.capitalize()}: {ct}')
        p.set_yticklabels(p.get_yticklabels(), rotation=0)
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.figure.tight_layout()
        p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
        
        pdf.savefig()
        plt.close()

In [None]:
ranksums_res_filtered.loc[(ranksums_res_filtered['interaction'].str.startswith('IL33')) & (ranksums_res_filtered['target'] == 'Fb-interm')]

In [None]:
from plotting.utils import plot_interactions
plot_interactions(ranksums_res_filtered.loc[(ranksums_res_filtered['interaction'].str.startswith('IL33')) & (ranksums_res_filtered['target'] == 'Fb-interm')], cell_type_levels = ct_levels, 
                  plot_title = 'IL33 -> IL1RL1', figsize = calc_figsize(width = 80, height = 80),
                  cmap = sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),)
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_IL33_IL1RL1_Fb-interm_interactions_scatter.pdf')

In [None]:
from plotting.utils import plot_interactions,thyAgeing_greys
from matplotlib.backends.backend_pdf import PdfPages
ia_dict = {'CXC Chemokines' : 'CXC',
           'CCL Chemokines' : 'CC',
           'IL6' : 'IL6',
           'IL33' :'IL33',
           'CD40' : 'CD40',
           'TNF' : 'TNFSRSF12',
           'Lymphotoxin' : 'LT',}

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_fibroblast_interactions_scatter.pdf') as pdf:
    for k, v in ia_dict.items():
        df = fb_df.loc[fb_df['interaction'].str.startswith(v)]
        if df.shape[0] > 0:
            plot_interactions(
                df,
                cell_type_levels=ct_levels,
                plot_title=f'{k}',
                figsize=calc_figsize(width=80, height=80),
                cmap=sns.blend_palette(
                    [thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']],
                    as_cmap=True,
                ),
            )
            pdf.savefig()
            plt.close()

### Joint for Fb-adipo and Fb-interm

In [None]:
# Load curated interactions
fb_ia = pd.read_excel(f'{data_path}/analyses/cellComm/recirc/thyAgeing_recircInteractions_curated.xlsx', sheet_name = 'targeted')
fb_ia = fb_ia.loc[fb_ia['compartment'] == 'Fb']

# Check that all interactions are in the ranksums results
any([i for i in fb_ia['interaction'] if i not in ranksums_res_filtered['interaction'].unique().tolist()])

In [None]:
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_fbAdipoInterm_interactions_scatter.pdf') as pdf:
    for role in ['source', 'target']:
        # Create dict of homing programs
        programs_dict = fb_ia.loc[fb_ia['type'] == role].groupby('type_detailled')['interaction'].apply(list).to_dict()
        
        for p,l in programs_dict.items():
            # Filter by interaction and target population
            df_for_vis = ranksums_res_filtered.loc[(ranksums_res_filtered['interaction'].isin(l)) & (ranksums_res_filtered[role].isin(['Fb-adipo', 'Fb-interm']))].copy()
            
            if df_for_vis.shape[0] == 0:
                continue
            else:
                plot_interactions(df_for_vis, cell_type_levels = ct_levels, plot_title = f'{p}',
                                  cmap = sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),)
                pdf.savefig()
                plt.close()
        

In [None]:
df = test.loc[(test['interaction'] == 'IL1B -> IL1B') & (test['cells'] == 'Mono -> Mac')].copy()

plt.figure(figsize=(10, 6))
sns.kdeplot(data=df, x='log_tot_prop', hue='age_group', fill=True, common_norm=False, palette='tab10', alpha=0.5)
plt.title('Density Plot of tot_prop by Age Group')
plt.xlabel('tot_prop')
plt.ylabel('Density')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
np.log10([0.1, 0.01])

## GC

In [None]:
tls_markers = {'LTo': ['LTBR', 'TNFRSF1A', 'TNFRSF1B', 'CXCL13', 'CCL19', 'CCL21'],
               'LTi': ['TNF', 'TNFSF14', 'LTA', 'LTB'],
               'T_B_interaction' : ['ICAM1', 'ICAM2', 'ICAM3', 'ICAM4', 'ICAM5', 'CD40', 'ICOSLG'], 
               'GC_formation': ['PLXNB1', 'PLXNB2', 'BASP1', 'P2RY8', 'BATF'],
               'BCR_activation': ['CD40LG', 'ICOS', 'CD40', 'S1PR2'],
               'interleukin_signalling' : ['IL21', 'IL4', 'IL6', 'IL10',],
               'interferon' : ['IFNG']
}

tls_genes = [g for g in tls_markers.values() for g in g]

tls_df = ranksums_res_filtered.loc[(ranksums_res_filtered['ligand'].isin(tls_genes)) |
                                   (ranksums_res_filtered['receptor'].isin(tls_genes))].copy()

tls_df

In [None]:
with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_gcFormation_interactions_heatmap.pdf') as pdf:
    for k,v in tls_markers.items():
        df_red = tls_df.loc[tls_df['ligand'].isin(v) | tls_df['receptor'].isin(v)].pivot(index='cells', columns='interaction', values='statistic').fillna(0)
        df_red = df_red.loc[df_red.sum(axis = 1) > 0,df_red.sum(axis = 0) > 0]

        # Cluster rows and columns
        row_linkage = linkage(df_red, method='ward')
        col_linkage = linkage(df_red.T, method='ward')
        row_order = leaves_list(row_linkage)
        col_order = leaves_list(col_linkage)

        df_red = df_red.iloc[row_order, col_order]

        p = sns.heatmap(df_red, cmap='Spectral_r', annot=None, fmt='.2g',
                        vmax=6, yticklabels=True, xticklabels=True)
        p.set_xlabel('Interaction')
        p.set_ylabel(f'{id_c.capitalize()}')
        p.set_title(f'GC formation: {k}')
        p.set_yticklabels(p.get_yticklabels(), rotation=0)
        p.set_xticklabels(p.get_xticklabels(), rotation=90)
        p.figure.tight_layout()
        p.figure.set_size_inches(calc_figsize(height=df_red.shape[0]*5+20, width=df_red.shape[1]*5+20))
        
        pdf.savefig()
        plt.close()

In [None]:
from plotting.utils import plot_interactions
from matplotlib.backends.backend_pdf import PdfPages

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_gcFormation_interactions_scatter.pdf') as pdf:
    for k in tls_markers.keys():
        test_df = ranksums_res_filtered.loc[(ranksums_res_filtered['ligand'].isin(tls_markers[k])) |
                                        (ranksums_res_filtered['receptor'].isin(tls_markers[k]))].copy()
        
        p = plot_interactions(test_df, cell_type_levels = ct_levels, plot_title = k,
                              cmap = sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),)
        pdf.savefig()

In [None]:
# ct_anno_levels = pd.read_excel(f'{data_path}/curated/thyAgeing_full_curatedAnno_v9_2025-03-03_levels.xlsx')
# ct_grouping_dict_l4 = ct_anno_levels.groupby('taa_l1').agg({'taa_l4': lambda x: list(x)}).to_dict()['taa_l4']
# ct_grouping_dict_l3 = ct_anno_levels.groupby('taa_l1').agg({'taa_l4': lambda x: list(x)}).to_dict()['taa_l4']
# ct_grouping_dict = {k:ct_grouping_dict_l4[k]+ct_grouping_dict_l3[k] for k in list(ct_grouping_dict_l4.keys())}
# ct_grouping_dict = {k:[v for v in ct_levels if v in ct_grouping_dict[k]] for k in ct_grouping_dict.keys()}
# ct_grouping_dict = {k:v for k,v in ct_grouping_dict.items() if len(v) > 0}

# import pprint
# pprint.pprint(ct_grouping_dict, compact = True)

In [None]:
plot_interactions(ranksums_res_filtered.loc[ranksums_res_filtered['interaction'] == 'CCL2 -> CCR2'], cell_type_levels = ct_levels, plot_title = k)

### Roles of CCL19 in Germinal Centers

1. **Chemotaxis and Cell Migration**:
   - **Recruitment of T Cells and Dendritic Cells**: CCL19 is a potent chemoattractant for T cells and dendritic cells (DCs). It binds to the CCR7 receptor, which is expressed on these cells, guiding their migration into the T cell zones of secondary lymphoid organs, including the areas surrounding germinal centers.
   - **Positioning of Cells within Germinal Centers**: While CCL19 primarily acts in the T cell zones, its gradient can influence the positioning of cells within and around germinal centers, ensuring proper cell-cell interactions necessary for effective immune responses.

2. **Organization of Germinal Centers**:
   - **Structural Support**: CCL19, along with CCL21, helps organize the structure of secondary lymphoid organs by guiding the migration and positioning of T cells and DCs. This organization is crucial for the formation and maintenance of germinal centers, where B cells undergo proliferation, somatic hypermutation, and selection.
   - **Facilitation of T-B Cell Interactions**: The proper positioning of T cells and DCs around germinal centers, facilitated by CCL19, is essential for effective T-B cell interactions. These interactions are critical for the activation and differentiation of B cells within germinal centers.

3. **Immune Response Regulation**:
   - **Activation of T Cells**: CCL19 plays a role in the activation and priming of T cells by recruiting them to areas where they can interact with antigen-presenting cells (APCs), such as DCs. This activation is necessary for the subsequent help provided by T cells to B cells in germinal centers.
   - **Support of Dendritic Cell Function**: By recruiting DCs to the T cell zones, CCL19 ensures that these APCs can effectively present antigens to T cells, facilitating the initiation of adaptive immune responses.

4. **Maintenance of Lymphoid Architecture**:
   - **Homeostasis of Lymphoid Tissues**: CCL19, in conjunction with CCL21, is involved in maintaining the architecture and homeostasis of lymphoid tissues. This includes the proper formation of germinal centers and the overall organization of secondary lymphoid organs.

CCL19 plays a crucial role in the organization and function of germinal centers by guiding the migration and positioning of T cells and dendritic cells within secondary lymphoid organs. It facilitates the structural organization of germinal centers, supports effective T-B cell interactions, and regulates immune responses by ensuring the proper activation and function of T cells and dendritic cells. Understanding the role of CCL19 in germinal centers provides insights into the mechanisms underlying adaptive immune responses and the formation of effective immune memory.

### Roles of CCRL2 in Monocytes and Macrophages

1. **Chemokine Scavenging and Regulation**:
   - **Modulation of Chemokine Availability**: CCRL2 can bind to specific chemokines, such as CCL19, CCL5, and chemerin, without initiating typical chemokine receptor signaling. By binding these chemokines, CCRL2 can sequester them, reducing their availability to other signaling receptors and thereby modulating the local chemokine gradient and immune response.
   - **Regulation of Inflammation**: By scavenging chemokines, CCRL2 can help regulate the intensity and duration of inflammatory responses. This is particularly important in preventing excessive or chronic inflammation, which can lead to tissue damage.

2. **Immune Cell Recruitment and Migration**:
   - **Influence on Monocyte and Macrophage Trafficking**: Although CCRL2 itself does not directly mediate chemotaxis, its role in modulating chemokine levels can indirectly influence the recruitment and migration of monocytes and macrophages to sites of inflammation or injury.
   - **Regulation of Chemokine Receptor Expression**: CCRL2 expression can affect the expression and function of other chemokine receptors on monocytes and macrophages, thereby influencing their migratory behavior and positioning within tissues.

3. **Role in Inflammatory Diseases**:
   - **Contribution to Disease Pathogenesis**: Dysregulation of CCRL2 expression or function can contribute to the pathogenesis of various inflammatory diseases. For example, altered CCRL2 activity has been implicated in conditions such as rheumatoid arthritis, atherosclerosis, and asthma, where it can affect the recruitment and activation of monocytes and macrophages.
   - **Potential Therapeutic Target**: Given its role in modulating chemokine activity and inflammation, CCRL2 is being investigated as a potential therapeutic target for controlling inflammatory diseases. Modulating CCRL2 activity could help fine-tune immune responses and reduce pathological inflammation.

4. **Immune Surveillance and Homeostasis**:
   - **Maintenance of Tissue Homeostasis**: CCRL2 helps maintain tissue homeostasis by regulating the local chemokine environment, ensuring that monocytes and macrophages are appropriately recruited and activated in response to physiological needs.
   - **Role in Immune Surveillance**: By modulating chemokine levels, CCRL2 can influence the surveillance activities of monocytes and macrophages, ensuring they are effectively patrolling tissues and responding to potential threats.

CCRL2 plays a crucial role in monocytes and macrophages by acting as a chemokine scavenger receptor, modulating the availability and activity of chemokines. This regulation helps control the recruitment and migration of these immune cells, influences inflammatory responses, and maintains tissue homeostasis. Dysregulation of CCRL2 can contribute to the pathogenesis of various inflammatory diseases, making it a potential therapeutic target for controlling inflammation and immune responses. Understanding the role of CCRL2 in monocytes and macrophages provides insights into the mechanisms of immune regulation and the potential for therapeutic intervention in inflammatory conditions.

### Role of LTA/LTB in Germinal Centers & Lymphoid Architecture:
1. Formation of Secondary Lymphoid Organs (SLOs):
LTA and LTB signaling via LTβR is essential during embryonic development for the formation of lymph nodes, Peyer’s patches, and spleen organization.

LTβR activation drives expression of chemokines like CXCL13, CCL19, and CCL21, which attract and organize T cells, B cells, and dendritic cells.

2. Germinal Center Maintenance and Function:
Within mature lymphoid organs, GCs form in follicles. Their structure and maintenance depend on follicular dendritic cells (FDCs) — and FDC survival/function is driven by LTA/LTB signaling.

✳️ Key Effects:
Promotes survival and maintenance of FDC networks — critical for antigen display to B cells in GCs.

Supports CXCL13 production, which maintains B cell positioning in follicles.

Influences the light zone vs. dark zone architecture of GCs.

3. B Cell Contribution:
Activated B cells in GCs express LTA and LTB, delivering signals to LTβR on stromal cells and FDCs, creating a feedback loop that maintains the follicular structure.

4. Impact on Adaptive Immunity:
Without LTA/LTB signaling, mice show:

Disrupted lymphoid organ architecture.

Poorly formed or absent GCs.

Impaired class switching, affinity maturation, and memory B cell development.

TNFRSF14, also known as herpesvirus entry mediator (HVEM), is a member of the tumor necrosis factor receptor superfamily. It plays a significant role in the regulation of immune responses, including those occurring within germinal centers (GCs). Here are the key roles of TNFRSF14 in germinal centers:

### Roles of TNFRSF14 in Germinal Centers

1. **Regulation of T Cell and B Cell Interactions**:
   - **Costimulatory and Coinhibitory Signals**: TNFRSF14 can deliver both costimulatory and coinhibitory signals depending on its interaction with different ligands. It interacts with ligands such as LIGHT (TNFSF14), BTLA (B and T lymphocyte attenuator), and CD160. These interactions can modulate the activation and differentiation of T cells and B cells within germinal centers.
   - **Influence on T Follicular Helper (Tfh) Cells**: TNFRSF14 is expressed on Tfh cells, which are crucial for providing help to B cells in germinal centers. The interaction of TNFRSF14 with its ligands can influence the function and survival of Tfh cells, thereby affecting the quality and magnitude of the B cell response.

2. **Modulation of Germinal Center Formation and Maintenance**:
   - **Impact on Germinal Center B Cells**: TNFRSF14 is expressed on germinal center B cells, and its signaling can influence their proliferation, survival, and differentiation. Proper regulation of TNFRSF14 signaling is essential for the formation and maintenance of germinal centers, where B cells undergo somatic hypermutation and affinity maturation.
   - **Regulation of Apoptosis**: TNFRSF14 signaling can influence the apoptotic pathways in germinal center B cells. This regulation is important for the selection process within germinal centers, ensuring that only B cells with high-affinity receptors for the antigen survive and differentiate into memory B cells or plasma cells.

3. **Immune Regulation and Homeostasis**:
   - **Balancing Immune Responses**: TNFRSF14 plays a role in balancing immune responses by modulating the activation and inhibition of immune cells. In germinal centers, this balance is crucial for preventing excessive or inappropriate immune responses that could lead to autoimmunity or chronic inflammation.
   - **Interaction with Regulatory T Cells (Tregs)**: TNFRSF14 can interact with BTLA on regulatory T cells, influencing their function and the overall immune regulatory environment within germinal centers. This interaction helps maintain immune homeostasis and prevents autoimmunity.

4. **Role in Disease and Therapeutic Potential**:
   - **Implications in Autoimmune Diseases**: Dysregulation of TNFRSF14 signaling has been implicated in autoimmune diseases, where improper germinal center responses can lead to the production of autoantibodies. Understanding the role of TNFRSF14 in germinal centers can provide insights into the mechanisms underlying these diseases.
   - **Therapeutic Targeting**: Given its role in modulating immune responses, TNFRSF14 is a potential therapeutic target for modulating germinal center reactions in autoimmune diseases, chronic infections, and cancer. Targeting TNFRSF14 signaling pathways could help fine-tune immune responses and improve disease outcomes.

TNFRSF14 (HVEM) plays a crucial role in germinal centers by regulating T cell and B cell interactions, modulating germinal center formation and maintenance, and balancing immune responses. Its signaling influences the function and survival of Tfh cells and germinal center B cells, impacting the quality of the antibody response. Dysregulation of TNFRSF14 can contribute to autoimmune diseases, making it a potential therapeutic target for modulating immune responses in various conditions. Understanding the role of TNFRSF14 in germinal centers provides valuable insights into the mechanisms of immune regulation and the potential for therapeutic intervention.

## Innate immune cells

In [None]:
innate_df = ranksums_res_filtered.loc[(ranksums_res_filtered['source'].isin(['Mac', 'Mono', 'DC1', 'DC2', 'aDC', 'pDC', 'ILC', 'NK', 'T_MAIT']))]
innate_df

In [None]:
innate_df.groupby('interaction').size().sort_values(ascending=False).head(50)

In [None]:
from plotting.utils import plot_interactions,thyAgeing_greys,thyAgeing_colors
from matplotlib.backends.backend_pdf import PdfPages

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_innate_interactions_by_family.pdf') as pdf:
    for pattern in ['CXC', 'CC', 'IL', 'IFN', 'TNF']:
        df_plot = innate_df.loc[innate_df['interaction'].str.contains(rf'{pattern}', regex=True)]
        if df_plot.shape[0] == 0:
            continue
        plot_interactions(
            df_plot,
            cell_type_levels=ct_levels,
            cmap=sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),
            figsize=calc_figsize(width=90, height=100),
            plot_title=f'Innate interactions: {pattern}'
        )
        pdf.savefig()
        plt.close()

In [None]:
pattern = 'IL18'
df_plot = innate_df.loc[innate_df['interaction'].str.contains(rf'{pattern}', regex=True)]
plot_interactions(
            df_plot,
            cell_type_levels=ct_levels,
            cmap=sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),
            figsize=calc_figsize(width=90, height=100),
            plot_title=f'Innate interactions: {pattern}'
        )
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_innate_interactions_{pattern}_scatter.pdf')

In [None]:
pattern = 'TNF'
df_plot = innate_df.loc[innate_df['ligand'].isin(['TNF', 'IFNG', 'CD93'])]
plot_interactions(
            df_plot,
            cell_type_levels=ct_levels,
            cmap=sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_greys['grey5'], thyAgeing_colors['yellow']], as_cmap=True),
            figsize=calc_figsize(width=90, height=100),
            plot_title=f'Innate interactions: {pattern}'
        )
plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_innate_interactions_TNF_IFN_scatter.pdf')

## Plotting

### Interaction plot (scatter and line plot)

In [None]:
# # Point df
# points_df = []
# for i in ['target', 'source']:
#     df_sub = test_df[[i, f'{i}_logFC']].copy()
#     df_sub.columns = ['name', 'logFC']
#     df_sub['type'] = i
#     #df_sub['merge_type'] = 'source-ligand' if i == 'source' else 'receptor-target'
#     points_df.append(df_sub)

# lr_levels = []
# for i in ['ligand', 'receptor']:
#     df_sub = test_df[[i]].copy()
#     df_sub.columns = ['name']
#     df_sub['logFC'] = 1
#     df_sub['type'] = i
#     #df_sub['merge_type'] = 'ligand-receptor'
#     lr_levels.extend(df_sub['name'].unique().tolist())
#     points_df.append(df_sub)
    
# points_df = pd.concat(points_df, axis = 0)
# points_df.fillna({'logFC' : 1}, inplace = True)
# points_df = points_df.drop_duplicates()

# # Reorder points_df
# ts_levels = [v for v in ct_levels if v in points_df['name'].unique().tolist()]
# name_levels = ts_levels + lr_levels
# points_df['name'] = pd.Categorical(points_df['name'], categories=name_levels, ordered=True)
# points_df = points_df.sort_values(by='name', key=lambda col: col.map({name: i for i, name in enumerate(name_levels)}))

# max_n = points_df.groupby('type').size().max()

# points_df['y'] = points_df.groupby('type').cumcount() * (max_n / points_df.groupby('type').size()[points_df['type']].values) + (max_n - points_df.groupby('type').size()[points_df['type']].values) / points_df.groupby('type').size()[points_df['type']].values * 0.5
# points_df['x'] = points_df['type'].map({'source': 0.5, 'ligand': 1.5, 'receptor': 2, 'target': 3})

# edge_df = []
# # Ct to gene
# for i in [('ligand', 'source'), ('receptor', 'target')]:
#     df_sub = test_df[[i[0], i[1], f'{i[0]}_logFC']].copy()
#     df_sub.columns = ['from', 'to', 'logFC']
#     df_sub['rel'] = 'cell-gene'
#     df_sub['from_type'] = i[0]
#     df_sub['to_type'] = i[1]
#     edge_df.append(df_sub)
    
# # Gene to gene
# for i in [('ligand', 'receptor')]:
#     df_sub = test_df[[i[0], i[1]]].copy()
#     df_sub.columns = ['from', 'to']
#     df_sub['logFC'] = 1
#     df_sub['rel'] = 'gene-gene'
#     df_sub['from_type'] = i[0]
#     df_sub['to_type'] = i[1]
#     edge_df.append(df_sub)
# edge_df = pd.concat(edge_df, axis = 0)
# edge_df= edge_df.drop_duplicates()
  
# edge_df = edge_df.merge(points_df[['name', 'x', 'y', 'type']], left_on = ['from', 'from_type'], right_on = ['name', 'type'], how = 'left').drop(columns = ['name', 'type']).rename(columns = {'x' : 'x_from', 'y' : 'y_from'}).merge(points_df[['name', 'x', 'y', 'type']], left_on = ['to', 'to_type'], right_on = ['name', 'type'], how = 'left').drop(columns = ['name', 'type']).rename(columns = {'x' : 'x_to', 'y' : 'y_to'})
# edge_df = edge_df.drop_duplicates()

# import seaborn as sns

# import matplotlib.pyplot as plt

# # Create the scatter plot
# plt.figure(figsize=calc_figsize(width = 120, height = 120))

# # Create a color map for the logFC values
# norm = plt.Normalize(vmin=-5, vmax=5)
# sm = plt.cm.ScalarMappable(cmap='managua_r', norm=norm)
# sm.set_array([])
    
# scatter = sns.scatterplot(
#     data=points_df.loc[points_df['type'].isin(['source', 'target'])],
#     x='x',
#     y='y',
#     hue='logFC',
#     palette = 'managua_r', hue_norm=norm,
#     size='logFC',
#     sizes=(20, 100),
#     legend=False,
# )

# sns.scatterplot(
#     data=points_df.loc[points_df['type'].isin(['ligand', 'receptor'])],
#     x='x',
#     y='y',
#     marker='s',
#     color='black', s = 50,
#     legend=False,
#     ax = scatter,
# )

# # Plot lines connecting points based on edge_df
# for _, row in edge_df.iterrows():
#     linestyle = 'solid' if row['rel'] == 'cell-gene' else '--'
#     linecolor = sm.to_rgba(row['logFC']) if row['rel'] == 'cell-gene' else 'black'
#     linewidth = 0.5 if row['rel'] == 'cell-gene' else 1
#     scatter.plot(
#         [row['x_from'], row['x_to']],
#         [row['y_from'], row['y_to']],
#         linestyle=linestyle,
#         color=linecolor,
#         alpha=0.8,
#         linewidth=linewidth,
#         zorder=0,  # Move lines to the background
#     )
    
# # Add labels to each point
# for _, row in points_df.iterrows():
#     orig_kws = {'x' : row['x'], 'y':row['y'], 's':row['name'], 'fontsize':5}
#     if row['type'] == 'source':
#         kws = {'ha': 'right', 'va': 'center', 'x' : row['x'] - 0.1}
#     elif row['type'] == 'target':
#         kws = {'ha': 'left', 'va': 'center', 'x' : row['x'] + 0.1}
#     elif row['type'] == 'ligand' or row['type'] == 'receptor':
#         kws = {'ha': 'center', 'va': 'bottom', 'y' : row['y'] + 0.8, 'fontweight': 'bold'}
#     orig_kws.update(kws)
#     scatter.text(**orig_kws)

# # Add labels and title
# plt.xlabel('Type')
# plt.ylabel('Position')
# plt.title('LTo TLS interactions', fontsize=6)

# # Add colorbar with reduced width
# cbar = plt.colorbar(sm, label='logFC', ax=scatter, aspect=30)

# # Adjust the x-axis ticks
# plt.xticks(ticks=[0, 1, 2, 3], labels=['Source', 'Ligand', 'Receptor', 'Target'])
# plt.xlim(-0.3, 3.8)
# plt.ylim(-1, max_n + 1)

# # Despine
# scatter.set_axis_off()

# # Show the plot
# plt.tight_layout()
# plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_gcFormation_LTo_interactions_scatter.pdf', dpi=300)

### Circos plot

In [None]:
ia = 'CCL21 -> CCR7'
ia_df = ranksums_res_filtered.loc[ranksums_res_filtered['interaction'] == ia].copy()
ia_df.head()

In [None]:
from matplotlib import cm
from matplotlib.colors import to_hex

# Generate a colormap dictionary for unique interactions
unique_interactions = ia_df['interaction'].unique()
cmap = cm.get_cmap('tab20c', len(unique_interactions))  # Use a colorblind-friendly colormap
color_map = {interaction: to_hex(cmap(i)) for i, interaction in enumerate(unique_interactions)}

In [None]:
cts = list(set(ranksums_res_filtered['source'].unique().tolist() + ranksums_res_filtered['target'].unique().tolist()))
groups = {'T' : ['T_CD4_naive_recirc', 'T_CD4_act', 'T_CD4_h','T_CD4_fh','T_CD8_naive_recirc', 'T_CD8_em','T_CD8_rm','T_CD8_age-assoc', 'T_Treg_tr', 'T_Treg_recirc'],
          'Innate' : ['ILC', 'NK', 'T_MAIT',],
          'B' : ['B_GC-like', 'B_mem', 'B_plasma', 'B_naive', 'B_age-associated', 'B_plasmablast'],
          'DC' : ['DC1', 'DC2','pDC','aDC'],
          'Fb' : ['Fb-adipo', 'Fb-interlo', 'Fb-med', 'Fb-perilo', 'Fb-interm'],
          'Vasc' : ['EC-art', 'EC-cap', 'EC-cap_lipid', 'EC-lymphatic','EC-ven', 'Pericyte', 'SMC'],
          'Myeloid' : ['Mac', 'Mono'],
          'TEC' : ['cTEC','mTECI', 'mTECII', 'mTECIII', 'mcTEC', 'TEC-EMT','TEC-mim']}
sectors = {name: 2 for name in [c for c in groups.values() for c in c if c in cts]}
groups_upd = {k:[v for v in v if v in cts] for k,v in groups.items() if len([v for v in v if v in cts]) > 0}

group_cmap = {c:h for c,h in zip(groups.keys(), sns.color_palette('colorblind', as_cmap=True))}

[k for k in cts if not k in list(sectors.keys())]

In [None]:
from pycirclize import Circos
from pycirclize.utils import calc_group_spaces, ColorCycler
from matplotlib.colors import Normalize
from matplotlib.lines import Line2D
import random

with PdfPages(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_generalChemo_interactions_circos.pdf') as pdf:
    for ia in ranksums_res_filtered['interaction'].unique().tolist():
        ia_df = ranksums_res_filtered.loc[ranksums_res_filtered['interaction'] == ia].copy()
        
        # Calculate appropriate group between/within spaces
        # In this example, 10 sectors are displayed as groups dividied into [4, 3, 3]
        spaces = calc_group_spaces([len(g) for g in groups_upd.values() if len(g) > 1], space_bw_group=30, space_in_group=5)
        circos = Circos(sectors)

        # Plot sector track
        #ColorCycler.set_cmap("Set3")
        for sector in circos.sectors:
            track = sector.add_track(r_lim=(75, 95))
            track.axis(fc = '#e8e8e8')
            track.text(sector.name.replace('_', ' '), adjust_rotation=True, fontsize=6, orientation='vertical', wrap=True)
            
        # # Plot group sectors range
        ColorCycler.set_cmap('Paired')
        for k,v in groups_upd.items():
            group_deg_lim = circos.get_group_sectors_deg_lim(v)
            circos.rect(r_lim=(97, 100), deg_lim=group_deg_lim, fc=group_cmap[k], ec="black", lw=0.5)
            group_center_deg = sum(group_deg_lim) / 2
            circos.text(f"{k}", r=103, deg=group_center_deg, adjust_rotation=True, fontsize = 7)
            

        norm = Normalize(vmin=0, vmax=6)
        cmap = cm.get_cmap('inferno_r')
        # Plot link lines in various styles
        for i, row in ia_df.iterrows():
            source = row['source']
            target = row['target']
            interaction = row['interaction']
            statistic = row['statistic']
            
            # Get the color for the interaction
            color = cmap(norm(statistic))
            #ls = 'dashed' if row['interaction_log2FoldChange'] < 0 else 'solid'
            
            # Plot the link line
            circos.link_line((source, random.uniform(0.1,1.9)), (target, random.uniform(0.1,1.9)), direction=1, color=color, lw=statistic/2)

        # fig = circos.plotfig()

        # Plot legend
        # line_handles = [Line2D([], [], color=v, label=f'{k}') for k,v in color_map.items() if k in ia_df['interaction'].unique()]

        # line_legend = circos.ax.legend(
        #     handles=line_handles,
        #     bbox_to_anchor=(0.93, 1.02),
        #     fontsize=6,
        #     title="Interaction",
        #     title_fontsize=7,
        #     handlelength=2,
        # )
        # circos.ax.add_artist(line_legend)

        circos.colorbar(vmin=0, vmax=6, 
                        cmap=cmap, 
                        label='Statistic', 
                        orientation='vertical', 
                        colorbar_kws = dict(ticks=[0, 2, 4, 6]),
                        )

        fig = circos.plotfig(figsize = calc_figsize(width = 120, height = 120))

        _ = circos.ax.text(0, 110, f"{ia}", size=7, va="top", ha="center")
        
        # Save the current figure to the PDF
        pdf.savefig()
        plt.close()

#plt.savefig(f'{plots_path}/cellComm/recirc/by_sample/thyAgeing_diffCCC_recirc_filteredSignalling_{ia}_circos.pdf')

### Graph-based plot

In [None]:
ia_oi = 'CCL21 -> CCR7'
ligand_oi, receptor_oi = ia_oi.split(' -> ')
ligand_oi

In [None]:
# Prepare cell vertex attributes
cell_vertex_attr = ia_df[['source', 'target']].melt(value_name='cell_type').drop_duplicates().merge(all_freq[['anno', 'logFC']], left_on = 'cell_type', right_on = 'anno', how = 'left').rename(columns = {'variable': 'level'})
cell_vertex_attr.rename(columns = {'cell_type' : 'vertex_name', 'logFC' : 'log2FoldChange'}, inplace = True)
cell_vertex_attr = cell_vertex_attr[['vertex_name', 'log2FoldChange', 'level']].drop_duplicates()

# Prepare gene vertex attributes
gene_vertex_attr = pd.DataFrame({'vertex_name' : [ligand_oi, receptor_oi],
                                 'level' : ['ligand', 'receptor'],})

# Prepare cell-gene edge attributes
cell_gene_edge_attr = pd.concat([
    ia_df[['ligand', 'source']].melt(id_vars = 'source',value_name='gene_name').drop_duplicates().rename(columns = {'source':'cell_type'}).merge(all_degs[['log2FoldChange', 'cell_type']].reset_index(), on = ['gene_name', 'cell_type'], how = 'left'),
    ia_df[['receptor', 'target']].melt(id_vars = 'target',value_name='gene_name').drop_duplicates().rename(columns = {'target':'cell_type'}).merge(all_degs[['log2FoldChange', 'cell_type']].reset_index(), on = ['gene_name', 'cell_type'], how = 'left')
]).rename(columns = {'variable': 'level'}).drop_duplicates()
cell_gene_edge_attr.rename(columns = {'cell_type' : 'source_vertex', 'gene_name' : 'target_vertex'}, inplace = True)
cell_gene_edge_attr = cell_gene_edge_attr[['source_vertex', 'target_vertex', 'log2FoldChange']].drop_duplicates()
cell_gene_edge_attr['relationship_type'] = 'cell-gene'

# Prepare gene-gene edge attributes
gene_gene_edge_attr = pd.DataFrame({'source_vertex' : 'CCL21', 'target_vertex' : 'CCR7', 'relationship_type' : 'gene-gene'}, index = [0])

vertex_attr = pd.concat([cell_vertex_attr, gene_vertex_attr], axis = 0)
edge_attr = pd.concat([cell_gene_edge_attr, gene_gene_edge_attr], axis = 0)

vertex_attr.fillna({'log2FoldChange' : 10}, inplace = True)
edge_attr.fillna({'log2FoldChange' : 0}, inplace = True)

In [None]:
from igraph import Graph
import igraph as ig
g = Graph.DataFrame(edge_attr, directed= False, vertices=vertex_attr, use_vids = False)

In [None]:
norm = Normalize(vmin=-5, vmax=5)
cmap = cm.get_cmap('Spectral_r')

visual_style = {}
visual_style["vertex_size"] = g.vs["log2FoldChange"] * 100
#visual_style["vertex_color"] = [color_dict[gender] for gender in g.vs["gender"]]
visual_style["vertex_label"] = g.vs["name"]
# Ensure 'log2FoldChange' is numeric and map colors accordingly
visual_style['edge_color'] = [cmap(norm(v)) for v in g.es['log2FoldChange'] if isinstance(v, (int, float))]
ig.plot(g, **visual_style)