# Thymus agein atlas | T/NK cell compartment: senescence signatures

In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import anndata as ad
import hdf5plugin

# Add repo path to sys path (allows to access scripts and metadata from repo)
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/T_NK_compartment'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Autoreload custom scripts
%load_ext autoreload
%autoreload 2

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

import warnings
warnings.filterwarnings('ignore', category=ad.ImplicitModificationWarning)

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette, t_nk_groupings
from plotting.utils import plot_grouped_boxplot, calc_figsize,thyAgeing_colors, get_tint_palette

## Load data

In [None]:
# Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
for c in ct_anno.columns:
    if c in adata.obs.columns:
        adata.obs.drop(c, axis = 1, inplace = True)
adata.obs = adata.obs.join(ct_anno)

# Filter data (only include annotated cells)
adata = adata[~adata.obs['taa_l5'].str.contains('locnt|-sp|explore', na = True)]

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)


adata

In [None]:
# Log-normalise data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
# Define columns
col_cell_type_broad = 'taa_l3'
col_cell_type_fine = 'taa_l4'
col_cell_type_broad_levels = get_ct_levels(col_cell_type_broad, taa_l1 = ['T', 'NK'])
col_cell_type_fine_levels = get_ct_levels(col_cell_type_fine, taa_l1 = ['T', 'NK'])
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

In [None]:
# Load degs
import pickle 

deg_df = f'{general_data_path}/analyses/dea/thyAgeing_dea_taa_l4_adult_vs_infant_ageEffect.pkl'

with open(deg_df, 'rb') as f:
    deg_df = pickle.load(f)
deg_df = {k:v for k,v in deg_df.items() if k in col_cell_type_fine_levels}
deg_df = pd.concat(deg_df).reset_index(names = ['cell_type','gene_name']).set_index('gene_name')
deg_df['cell_type'] = pd.Categorical(deg_df['cell_type'], categories=col_cell_type_fine_levels, ordered=True)
deg_df['up_down'] = ['up' if lfc > 0 else 'down' for lfc in deg_df['log2FoldChange']]

deg_df.head()

## Senescence markers

In [None]:
gene_program = 'senescence'

# Create dictionary of senescence markers
senmayo_dict = pd.read_excel(f'{data_path}/curated/senMayoMarkers.xlsx',
                     names = ['gene_name', 'class', 'state', 'ref'])
senmayo_dict = senmayo_dict.groupby('class')['gene_name'].apply(list).to_dict()
senmayo_genes = [gene for gene_list in senmayo_dict.values() for gene in gene_list]

import pprint
pprint.pprint(senmayo_dict,compact = True)

# Check if there are any genes not in the adata
[g for genes in senmayo_dict.values() for g in genes if g not in adata.var_names]

In [None]:
# Add senescence score
# sc.tl.score_genes(adata, gene_list=senmayo_genes, score_name='senmayo_score', use_raw=False)
# adata.obs[['senmayo_score']].to_csv(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}_SenMayo_score.csv')

senmayo_score = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}_SenMayo_score.csv', index_col=0)
adata.obs = adata.obs.join(senmayo_score)

In [None]:
df = adata.obs.groupby([col_age_group, col_cell_type_fine, 'donor']).agg(mean_sen = ('senmayo_score', 'mean')).reset_index()
#df.dropna(inplace = True)
df.head()

In [None]:
plot_grouped_boxplot(data = df, x = col_cell_type_fine, y = 'mean_sen', hue = col_age_group, order = col_cell_type_fine_levels, hue_order = col_age_group_levels, 
                     x_label = 'Cell population', y_label = 'Mean SenMayo score', legend_title = 'Age group', add_stats = True, format_percent = False,
                     save_stats = f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_'
                     )
plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_boxplot.pdf', dpi = 300, bbox_inches = 'tight')

In [None]:
# How many genes per cell type are differntially expressed?
sen_degs = deg_df[(deg_df['gene_name'].isin([g for genes in senmayo_dict.values() for g in genes if g in adata.var_names])) & (deg_df['padj'] < 0.05) & (abs(deg_df['log2FoldChange']) >= 1.3)]
sen_degs = sen_degs.merge(pd.DataFrame.from_dict(senmayo_dict, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna())
sen_degs.groupby('cell_type').size()

In [None]:
import matplotlib.colors as mcolors
import textwrap
df = sen_degs.copy()
df['program'] = df['program'].str.replace(')', ') ').str.replace('/', '/ ')

# Create a divergent colormap centered around 0
vmin, vmax, vcenter = df['log2FoldChange'].min(), df['log2FoldChange'].max(), 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.diverging_palette(220, 20, center="light", as_cmap=True, s=100)

xranges = df.groupby("program")["gene_name"].nunique()
xranges *= 1.1
g = sns.relplot(data=df, x='gene_name', y='cell_type', hue='log2FoldChange', size='log2FoldChange',
                palette=cmap, hue_norm=normalize, height=7, aspect=0.5,
                col='program', col_order=xranges.index, legend=None,
                facet_kws={'sharey': True, 'sharex': False, 'gridspec_kws': dict(width_ratios=xranges)})

# Function to wrap text
def wrap_text(text, width=15):
    return '\n'.join(textwrap.wrap(text, width))

# Apply the text wrapping function to facet titles
g.set_titles("{col_name}", fontweight='bold')
for ax in g.axes.flat:
    title = ax.get_title()
    ax.set_title(wrap_text(title), fontweight='bold')

g.set_xticklabels(rotation=90)
g.set_xlabels('Gene')
g.set_ylabels('Cell type')

g.tight_layout()

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
g.figure.colorbar(sm, ax=g.axes, orientation='vertical', label='log2FoldChange', pad=0.01)

plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tCells_{gene_program}_deg_heatmap.pdf', dpi = 300, bbox_inches = 'tight')

In [None]:
sen_degs.groupby(['gene_name', 'up_down']).agg(n_cell_types = ('cell_type', 'nunique'),
                              cell_types = ('cell_type', lambda x: list(x))).reset_index().sort_values('n_cell_types', ascending = False).iloc[:50]

In [None]:
for ct in sen_degs['cell_type'].unique():
    
    goi = sen_degs.loc[sen_degs['cell_type'] == ct]['gene_name'].unique().tolist()
    filtered_senmayo_dict = {k: [gene for gene in v if gene in goi] for k, v in senmayo_dict.items()}
    filtered_senmayo_dict = {k: v for k, v in filtered_senmayo_dict.items() if v}
    dotplot = sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine] == ct],
                            filtered_senmayo_dict,
                            groupby=col_age_group,
                            categories_order=[c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine] == ct][col_age_group].unique().tolist()],
                            cmap='magma',
                            mean_only_expressed=True).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_{ct}_senMarkers_dotplot.pdf')

## Cell cycle activity

In [None]:
gene_program = 'cellCycle'
cell_cycle_genes_df = pd.read_excel('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/T_NK_compartment/data/curated/regev_cell_cycle_genes.xlsx', skiprows = 6, usecols=[0,1])

cell_cycle_genes = {}
for col in cell_cycle_genes_df.columns:
    cell_cycle_genes_df[col] = cell_cycle_genes_df[col].str.strip()
    cell_cycle_genes[col] = [g.strip() for g in cell_cycle_genes_df[col].dropna().tolist() if g.strip() in adata.var_names]
    
import pprint
pprint.pprint(cell_cycle_genes, compact=True)

In [None]:
# Score cell cycle
sc.tl.score_genes_cell_cycle(adata, s_genes=cell_cycle_genes['G1/S'], g2m_genes=cell_cycle_genes['G2/M'])

In [None]:
adata.obs

In [None]:
cell_cycle_score_df = adata.obs[adata.obs[col_cell_type_fine].isin(t_nk_groupings['dev_early'])][['S_score', 'G2M_score', 'donor', col_age_group, col_cell_type_fine, 'phase', 'study', 'sample']].copy()
df = cell_cycle_score_df.groupby([col_age_group, col_cell_type_fine, 'donor', 'study', 'sample'])['S_score'].mean().reset_index()
df = df.groupby([col_age_group, col_cell_type_fine, 'study', 'donor'])['S_score'].mean().reset_index()

df

In [None]:
from plotting.utils import plot_grouped_boxplot, calc_figsize,thyAgeing_colors, get_tint_palette
plot_grouped_boxplot(data = df, x = col_cell_type_fine, y = 'S_score', hue = col_age_group, order = t_nk_groupings['dev_early'], hue_order = col_age_group_levels, 
                     x_label = 'Cell population', y_label = 'Mean S-score', legend_title = 'Age group', add_stats = True, format_percent = False, figsize=calc_figsize(width = 70, height = 50),
                     save_stats = f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_sscore_',
                     palette = get_tint_palette(thyAgeing_colors['magenta']),
                     #legend_kwargs = {'ncol' : 2, 'loc' : 'upper right', 'bbox_to_anchor' : (1.1, 1)}
                     )
plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_sscore_boxplot.pdf', dpi = 300, bbox_inches = 'tight')

In [None]:
cell_cycle_score_df = adata.obs[adata.obs[col_cell_type_fine].isin(t_nk_groupings['dev_early'])][['S_score', 'G2M_score', 'donor', col_age_group, col_cell_type_fine, 'phase', 'study', 'sample']].copy()
df = freq_by_donor(cell_cycle_score_df, sample_col = 'sample', donor_col = 'donor', summary_col='phase', cluster_col=[col_cell_type_fine], add_meta = [col_age_group])
df = df.loc[df['phase'] == 'S']
df

In [None]:
from plotting.utils import plot_grouped_boxplot, calc_figsize,thyAgeing_colors, get_tint_palette
plot_grouped_boxplot(data = df.loc[df[col_cell_type_fine].isin(['T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DP(P)'])], x = col_cell_type_fine, y = 'mean_prop', hue = col_age_group, order = ['T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DP(P)'], hue_order = col_age_group_levels, 
                     x_label = 'Cell population', y_label = 'Mean proportion (S-phase)', legend_title = 'Age group', add_stats = True, format_percent = True, figsize=calc_figsize(width = 50, height = 50),
                     save_stats = f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_sprop_', ylim = (0.05, 1.05),
                     palette = get_tint_palette(thyAgeing_colors['magenta']),
                     #legend_kwargs = {'ncol' : 2, 'loc' : 'upper right', 'bbox_to_anchor' : (1.1, 1)}
                     )
plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_sprop_boxplot.pdf', dpi = 300, bbox_inches = 'tight')

In [None]:
# How many genes per cell type are differntially expressed
cell_cycle_degs =  deg_df.loc[(abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05) & (deg_df['cell_type'].isin(t_nk_groupings['dev_early'])),:] \
        .merge(pd.DataFrame.from_dict(cell_cycle_genes, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'inner').drop(columns = 'variable')
cell_cycle_degs.to_csv(f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_dea_{gene_program}.csv', index = False)

cell_cycle_degs.dropna().groupby(['cell_type','up_down'], observed=True).size()

In [None]:
# Which genes are differentially expressed in DN(P) and are cell cycle genes?
cell_cycle_degs

## Growth factor signalling

In [None]:
gene_program = 'growthFactors'
gf_signalling = {'IGFBPs': ['IGFBP1', 'IGFBP2', 'IGFBP3', 'IGFBP4', 'IGFBP5', 'IGFBP6', 'IGFBP7','IGF1R', 'IGF2R','IGF1', 'IGF2BP2','IGF2BP3', 'IGFLR1'],
                 'TGF': ['TGFBRAP1', 'TGFBR2', 'TGFBR1'],
                 'FGF' : ['FGFR1OP', 'FGFR1', 'FGFBP3', 'FGFR1OP2', 'FGF14-AS2']}

[gene for genes in gf_signalling.values() for gene in genes if gene not in adata.var_names]

In [None]:
import itertools
cat_levels = ["_".join(t) for t in list(itertools.product(['T_DN(early)','T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)'], col_age_group_levels))]
sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine].isin(['T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)'])],
                            gf_signalling,
                            groupby=[col_cell_type_fine, col_age_group],
                            categories_order=cat_levels,
                            cmap='magma',
                            mean_only_expressed=True).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/growthFactors/thyAgeing_devT_growthFactorSignalling_dotplot.pdf')

In [None]:
# How many genes per cell type are differntially expressed?
gf_degs = deg_df[(deg_df['gene_name'].isin([g for genes in gf_signalling.values() for g in genes if g in adata.var_names])) & (deg_df['padj'] < 0.05) & (abs(deg_df['log2FoldChange']) >= 1) & (deg_df['cell_type'].isin(['T_DN(early)','T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)']))]
gf_degs = gf_degs.merge(pd.DataFrame.from_dict(gf_signalling, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna())
gf_degs.groupby('cell_type', observed=True).size()

In [None]:
gf_degs.to_csv(f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_dea_{gene_program}.csv', index = False)

In [None]:
import matplotlib.colors as mcolors
import textwrap
df = gf_degs.copy()
df['program'] = df['program'].str.replace(')', ') ').str.replace('/', '/ ')

# Create a divergent colormap centered around 0
vmin, vmax, vcenter = df['log2FoldChange'].min(), df['log2FoldChange'].max(), 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.diverging_palette(220, 20, center="light", as_cmap=True, s=100)

xranges = df.groupby("program")["gene_name"].nunique()
xranges *= 1.1
g = sns.relplot(data=df, x='gene_name', y='cell_type', hue='log2FoldChange', size='log2FoldChange',
                palette=cmap, hue_norm=normalize, height=3, aspect=0.5,
                col='program', col_order=xranges.index, legend=None,
                facet_kws={'sharey': True, 'sharex': False, 'gridspec_kws': dict(width_ratios=xranges)})

# Function to wrap text
def wrap_text(text, width=15):
    return '\n'.join(textwrap.wrap(text, width))

# Apply the text wrapping function to facet titles
g.set_titles("{col_name}", fontweight='bold')
for ax in g.axes.flat:
    title = ax.get_title()
    ax.set_title(wrap_text(title), fontweight='bold')

g.set_xticklabels(rotation=90)
g.set_xlabels('Gene')
g.set_ylabels('Cell type')

g.tight_layout()

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
g.figure.colorbar(sm, ax=g.axes, orientation='vertical', label='log2FoldChange', pad=0.01)

plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tCells_{gene_program}_deg_heatmap.pdf', dpi = 300, bbox_inches = 'tight')

## Fate transition genes

**DN to DP markers**

Notch signalling (T cell lineage commitment and survival): 
- receptor: NOTCH1-4
- ligands (jagged and delta-like): Jag1, Jag2, Dll1, Dll3, Dll4 
- ADAM10 enables release of intracellular notch
- RBP-Jκ and recruits a transcriptional co-activator of the Mastermind-like family (MAML1-3)
- ERAD induction (pre-requisite for beta-selection)

Early DN survival: IL7 signalling axis related to Bcl-2 family signalling
- IL7 production by stroma (other cytokines??)
- SCF/CD117 (KIT) expression on DN1/DN2
- IL7R on DN2/DN3
- Pro-apoptotic Bcl2: Bim, Bax, Bak, Bik, Puma, Noxa (PMAIP1), Bad
- Anti-apoptotic Bcl2: Bcl-2, Bcl-xL, Bcl2-A1, Bcl-w, Mcl-1

DN3 survival/progression to DN4: successful TRB recombination (recombination enzyme expression), PTCRA
- Pro-survival factor in beta-selection: BCL2-A1, Akt, CXCR4
- Death factors at beta-selection: FADDdd signalling 

Pre-TCR and Notch-signalling: target Fbxl1 and Fbxl12 to downregulate Cdkn1b 

DN-DP-transition: zinc finger protein Zfp335 controlled thymocyte survival and DN to DP transition by inducing Bcl-6/Rorc expression or cGAS/STING suppression in a pre-TCR independent manner

DP survival prior to positive selection: Bcl-xL up, potentially also Mcl-1

Death by neglect: Bim, downregulation of Bcl-2 sensitises DP cells to Bim-mediated apoptosis (might in part also depend on steroid hormones)

Positive selection: MAPK, ERK1/2, JNK1/2, p38, ERK5, GRB2, Sos

Negative selection death: Nur77, Nor-1 (nuclear steroid receptors), Bim

In [None]:
transition_genes = {'DN_Survival\n(general)' : ['IL7R', 'KIT', 'NOTCH1',
                                                #'DLL1', 'DLL4', 'DLL3', 'JAG2', 'JAG1', 'EDEM1', 'ADAM10', 'RBPJ', 'MAML3', 'MAML1', 'MAML2'
                                                ],
                    'DN_Survival\n(pro-apoptotic)': ['BCL2L11', # BIM
                                                  'BAX', 'BAK1', 'BIK', 'BBC3', # PUMA 
                                                  'PMAIP1', 'BAD',],
                    'DN_Survival\n(anti-apoptotic)': ['BCL2', 'BCL2L1', 'BCL2A1', 'BCL2L2', 'MCL1'],
                    'VDJ_recombination': ['RAG1', 'RAG2', 'HMGB1', 'HMGB2', 'DNTT'],
                    'β-selection\n(positive)': ['BCL2A1', 'AKT1', 'CXCR4', 'PTCRA'],
                    'β-selection\n(negative)' : ['FADD', 'CASP8'],
                    'DP_Survival': ['BCL2L1', 'MCL1'],
                    'Death by\nneglect': ['BCL2L11', 'BCL2'],
                    'Positive selection': ['MAPK3', 'MAPK1', # ERK1/2
                                                    'MAPK8', 'MAPK9',  # JNK1/2
                                                    'MAPK14', 'GRB2', 'SOS1', 'SOS2'],
                    'Negative selection': ['NR4A1', 'NR4A3', 'BCL2L11'],
                    'TCR activation': ['JUN', 'NFATC4', 'NFATC2IP', 'NFATC3', 'NFAT5', 'NFATC1', 'NFATC2'],
                    'TCR recombination': ['RAG1', 'RAG2','XRCC5', 'XRCC6','PRKDC', 'DCLRE1C', 'LIG4', 'XRCC4','DNTT']}

transition_genes_all =  list(set([gene for sublist in transition_genes.values() for gene in sublist]))
genes_to_test = [g for g in transition_genes_all if g in adata.var_names]

In [None]:
from warnings import filterwarnings
filterwarnings('ignore', category=ad.ImplicitModificationWarning)

ct_dict = {'T_DN(early)': ['DN_Survival\n(general)', 'DN_Survival\n(pro-apoptotic)', 'DN_Survival\n(anti-apoptotic)'], 
           'T_DN(P)': ['DN_Survival\n(general)', 'DN_Survival\n(pro-apoptotic)', 'DN_Survival\n(anti-apoptotic)'], 
           'T_DN(Q)': ['DN_Survival\n(general)', 'DN_Survival\n(pro-apoptotic)', 'DN_Survival\n(anti-apoptotic)', 'VDJ_recombination', 'β-selection\n(positive)', 'β-selection\n(negative)'], 
           'T_DN(late)': ['DN_Survival\n(general)', 'DN_Survival\n(pro-apoptotic)', 'DN_Survival\n(anti-apoptotic)', 'β-selection\n(positive)', 'β-selection\n(negative)', 'DP_Survival'], 
           'T_DP(P)': ['β-selection\n(positive)', 'β-selection\n(negative)','DP_Survival'], 
           'T_DP(Q)': ['Death by\nneglect', 'Positive selection', 'Negative selection', 'VDJ_recombination'], 
           'T_αβT(entry)' : ['Negative selection']}

for cell_type in t_nk_groupings['dev_early']:
    marker_dict = {k:v for k,v in transition_genes.items() if k in ct_dict[cell_type]}
    #Subset the AnnData object for the current cell type
    adata_sub = adata[(adata.obs[col_cell_type_fine].isin([cell_type]))]
    dp = sc.pl.DotPlot(adata_sub, 
                var_names=marker_dict, 
                groupby=col_age_group, 
                categories_order=[c for c in col_age_group_levels if c in adata_sub.obs[col_age_group].unique()],
                mean_only_expressed=True,
                vmin = 0, vmax = 3,
                cmap = 'magma').add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/earlyFate/earlyFate_transitionGenes_{cell_type}_dotplot.pdf')

In [None]:
# Save transtion-related DEGs to file
early_deg = deg_df.loc[(abs(deg_df['log2FoldChange']) >= 1 ) & (deg_df['padj'] < 0.05) & (deg_df['cell_type'].isin(t_nk_groupings['dev_early'])),:] \
        .merge(pd.DataFrame.from_dict(transition_genes, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'inner') 
early_deg.to_csv(f'{data_path}/analyses/phenoAnalysis/genePrograms/earlyFate/thyAgeing_dea_earlyFate_transitionGenes.csv', index = False)

early_deg

In [None]:
from utils import aggClusters,df_from_aggClusters

adata.obs['agg_group'] = adata.obs[col_cell_type_fine] + adata.obs['donor']

agg_adata = aggClusters(adata[adata.obs[col_cell_type_fine].isin(['T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)']), [gene for genes in transition_genes.values() for gene in genes]], raw = 'counts', lognorm = 'X', cluster_key='agg_group', preserve_meta = [col_age_group, 'sex', col_cell_type_fine])
agg_adata_df = df_from_aggClusters(agg_adata, preserve_meta = [col_age_group, 'sex', col_cell_type_fine])
agg_adata_df = agg_adata_df.merge(pd.DataFrame.from_dict(transition_genes, orient='index').reset_index(names='group').melt(id_vars='group').dropna().drop(columns='variable').rename(columns={'value': 'gene_name'}), on = 'gene_name')

agg_adata_df

In [None]:
# Plot gene expression in boxplots
ctoi = ['T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)']
ct = ctoi[1]
agg_adata_df_sub = agg_adata_df.loc[(agg_adata_df[col_cell_type_fine] == ct) & (agg_adata_df['group'].isin(ct_dict[ct]))]

In [None]:
%%R -i agg_adata_df_sub,ct -h 200 -w 700 -u mm

n_genes = agg_adata_df_sub$gene_name %>% unique() %>% length()

agg_adata_df_sub %>%
dplyr::filter(!!rlang::sym(col_age_group) != 'adult(late)') %>%
dplyr::mutate(!!col_age_group := factor(!!rlang::sym(col_age_group), levels = col_age_group_levels),
              !!col_cell_type_fine := factor(!!rlang::sym(col_cell_type_fine), levels = col_cell_type_fine_levels)) %>%
dplyr::rowwise() %>%
dplyr::mutate(facet_labels = prettify_labels(substr(group, 4, nchar(group)), width = 10)) %>%
ggplot(aes(x = gene_name, y = ecf, color = !!rlang::sym(col_age_group))) +
geom_boxplot(outlier.size = 0.5, position = position_dodge(preserve = 'single')) +
ggpubr::stat_kruskal_test(aes(group = !!rlang::sym(col_age_group)), label = "{p.signif}", p.adjust.method = 'BH',
                          significance = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("****", "***", "**", "*", ""))
                          ) +
ggh4x::force_panelsizes(cols = unit(n_genes*15, 'mm')) +
ggforce::facet_row(~facet_labels, scales = 'free_x', space = 'free' ) +
labs(x = 'Cell state', y = 'ECF', color = 'Age group') +
grafify::scale_color_grafify(palette = "fishy") +
scale_y_continuous(labels = scales::percent_format(scale = 100), expand = expansion(mult = 0, add = c(0,0.15)),
                   limits = c(0,1.1), breaks = seq(0,1,0.25)) +
theme_simple(facet = T) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave(paste0('earlyFate_transitionGenes_', ct, '_boxplot.pdf'), path = file.path(plots_path, 'phenoAnalysis', 'genePrograms', 'earlyFate'),
       width = n_genes*15+20, height = 120, units = 'mm', dpi = 300)

In [None]:
deg_df.loc[deg_df['gene_name'] == 'PTCRA']

In [None]:
# Plot TCR recombination and signalling genes
import itertools
combinations = list(itertools.product(adata.obs[adata.obs[col_cell_type_fine].isin(['T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)'])][col_cell_type_fine].unique(),
                                      [c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine].isin(['T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)'])][col_age_group].unique().tolist()]))
concatenated_combinations = ['_'.join(map(str, combo)) for combo in combinations]
marker_dict = {k: v for k, v in transition_genes.items() if 'TCR' in k}

# Filter sen_mayo_dict by dn_goi
dotplot = sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine].isin(['T_DN(P)', 'T_DN(Q)', 'T_DP(P)', 'T_DP(Q)', 'T_αβT(entry)'])],
                        marker_dict,
                        groupby=[col_cell_type_fine, col_age_group],
                        categories_order=concatenated_combinations,
                        cmap='magma',
                        mean_only_expressed=True).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/earlyFate/thyAgeing_tcrRecombSignalling_transitionGenes_dotplot.pdf')

In [None]:
[g for g in adata.var_names if 'CD25' in g]

## Notch signalling

[Brandstadter,2019](https://doi.org/10.1098/rsob.190187):
The four mammalian heterodimeric Notch receptor paralogs (Notch1–4) interact with one of five Notch ligands in the Jagged (Jag1 and Jag2) and Delta-like (Dll1, Dll3 and Dll4) families. Notch ligands activate Notch signalling, except Dll3 which is thought to act as a natural antagonist of the pathway. A mechanical force induced by ligand–receptor interactions triggers sequential proteolytic cleavages in the Notch receptor. First, an ADAM-family metalloprotease (ADAM10) targets the receptor's membrane-proximal extracellular domain, rendering it susceptible to the γ-secretase complex, which induces intramembrane proteolysis and releases intracellular Notch (ICN) into the cytoplasm. After migration into the nucleus, ICN interacts with the DNA-binding transcription factor RBP-Jκ and recruits a transcriptional co-activator of the Mastermind-like family (MAML1-3). MAML in turn interacts with other transcriptional activators, including chromatin-modifying enzymes such as histone acetyltransferases and other components of the transcriptional activation machinery. [...] Genetic inactivation of Notch1 or its downstream transcriptional machinery results in a hypoplastic thymus permissive for intrathymic B lineage development [3,30]. Reciprocally, overexpression of constitutively active Notch results in the development of thymic-independent T cells and suppression of bone marrow B cell development. However, Notch also exerts negative regulation of myeloid fates in the thymus [31,32]. Thus, the original model of Notch controlling a T/B binary cell fate decision gave rise to a more complex pattern of Notch driving T lineage development while repressing multiple alternative cell fates.[...] Notch signals persist until the pre-T-cell receptor checkpoint, after which Notch signalling intensity decreases.

- T fate commitment: Notch1, ADAM10, RPBJ, MAML1-3 -> HES1, DTX, WASP
- Mature T cell differentiation: Tbx21, Il4, Gata3-1a, Il17a and Rorc

B cell dev markers from [Lee, 2021](https://www.nature.com/articles/s41467-021-27232-5) & [Huang, 2024](https://www.cell.com/cancer-cell/fulltext/S1535-6108(24)00088-6)

DN traj analysis -> increased B cell path due to decreases Notch signalling?
Hypothesis: progenitors with decreased proliferative capacity and increased B/myeloid potential

In [None]:
gene_program = 'notchSignalling'
notch_dict = {'ETP' : ['CD34', 'PCNA',],
              'B commit': ['VPREB1', 'CD24', 'EBF1', 'IGLL1', 'IFITM2', 'IFITM3', 'FLT3'],
              'Meyloid commit' : ['RUNX2', 'TYROBP', 'IRF8'],
              'Survival': ['IL7R', 'KIT'],
              'NOTCH (early)': ['NOTCH1', 'ADAM10', 'RBPJ', 'MAML1', 'MAML2', 'MAML3'],
              'NOTCH (late)': ['HES1', 'DTX1', 'WAS', 'NOTCH3', 'NRP1', 'HIVEP3', 'MYC', 'IL2RA', 'CDKN1B']}

[g for genes in notch_dict.values() for g in genes if g not in adata.var_names]

In [None]:
os.makedirs(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}', exist_ok = True)
for ct in ['B_dev','T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DN(late)', 'T_DP(P)'
           ]:
    sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine] == ct], 
                var_names=notch_dict, 
                groupby=col_age_group, 
                categories_order= [c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine] == ct][col_age_group].unique()],
                mean_only_expressed = True,
                vmin = 0, vmax = 3,
                cmap = 'magma').add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/{gene_program}_{ct}_dotplot.pdf')

In [None]:
for k,v in notch_dict.items():
    sc.tl.score_genes(adata, gene_list=v, score_name=f'{k}', use_raw=False)

In [None]:
'D score'.replace(' ', '_')

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

with PdfPages(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_boxplots_multipage.pdf') as pdf:
    for k in notch_dict.keys():
        df = adata.obs.groupby([col_age_group, col_cell_type_fine, 'donor']).agg(mean_score = (k, 'mean')).reset_index()
        df = df.loc[df[col_cell_type_fine].isin(t_nk_groupings['dev_early'])]
        plot_grouped_boxplot(data = df, x = col_cell_type_fine, y = 'mean_score', hue = col_age_group, order = t_nk_groupings['dev_early'], hue_order = col_age_group_levels, 
                     x_label = 'Cell population', y_label = f'Mean {k} score', legend_title = 'Age group', add_stats = True, format_percent = False, figsize=calc_figsize(width = 70, height = 50),
                     save_stats = f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_{k.replace(" ", "_")}_',
                     palette = get_tint_palette(thyAgeing_colors['magenta']),
                     #legend_kwargs = {'ncol' : 2, 'loc' : 'upper right', 'bbox_to_anchor' : (1.1, 1)}
                     )
        pdf.savefig(bbox_inches='tight')
        plt.close()

In [None]:
adata.obs

In [None]:
deg_df.loc[ (deg_df['padj'] < 0.05) & (deg_df['cell_type'].isin(['T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DN(late)'])),:] \
        .merge(pd.DataFrame.from_dict(notch_dict, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'inner')


- CEBPA up: [De Obaldia, 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC4038953/): Ectopic C/EBPa expression in CD4−CD8− double-negative (DN) thymocytes experiencing Notch signaling inhibits survival and subsequent T cell development33. Consistently, ETP with higher C/EBPa expression (as identified in a reporter mouse strain) are less efficient progenitor cells of T cells than are those with lower C/EBPa expression34.

In [None]:
deg_df.loc[(abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05) & (deg_df['cell_type'].isin(['T_DN(early)'])),:].sort_values(by = 'log2FoldChange', ascending = False).iloc[:50]

Canonical Notch Target Genes in T cell Development
- HES1 — Hairy/enhancer of split 1
- HEY1, HEY2 — Hes-related transcription factors
- DTX1 — Deltex E3 ubiquitin ligase 1 (modulates Notch signalling output)
- NRARP — Notch-regulated ankyrin repeat protein
- IL7R — Interleukin-7 receptor α chain (expression supported by Notch in early thymocytes)
- CD3D, CD3E, CD3G — Early TCR complex components induced downstream of Notch1 in thymopoiesis
- PTCRA — Pre–T cell receptor α chain gene (β-selection stage)

In [None]:
notch_targets = ['HES1', 'HEY1', 'HEY2', 'DTX1', 'NRARP', 'IL7R', 'CD3D', 'CD3E', 'CD3G', 'PTCRA']
sc.tl.score_genes(adata, gene_list=notch_targets, score_name='notch_score')

In [None]:
df = adata.obs.groupby([col_age_group, col_cell_type_fine, 'donor']).agg(mean_score = ('notch_score', 'mean')).reset_index()
df = df.loc[df[col_cell_type_fine].isin(['T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DP(P)'])]
plot_grouped_boxplot(data = df, x = col_cell_type_fine, y = 'mean_score', hue = col_age_group, order = ['T_DN(early)', 'T_DN(P)', 'T_DN(Q)', 'T_DP(P)'], hue_order = col_age_group_levels, 
                x_label = 'Cell population', y_label = f'Mean Notch signalling score', legend_title = 'Age group', add_stats = True, format_percent = False, figsize=calc_figsize(width = 50, height = 50),
                save_stats = f'{data_path}/analyses/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_notchScore_', ylim = (-0.25, 1.25),
                palette = get_tint_palette(thyAgeing_colors['magenta']),
                #legend_kwargs = {'ncol' : 2, 'loc' : 'upper right', 'bbox_to_anchor' : (1.1, 1)}
                )
plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tSplit_{gene_program}_notchScore_boxplot.pdf', dpi = 300, bbox_inches = 'tight')

## Early commitment genes

**Dandelion paper** <br>
*(Suo, 2024)*

The top genes that were positively correlated with the CD8+ T cell lineage choice included CD8A and CD8B, which are markers for CD8+ T cells6. The top genes that were negatively correlated included CD40LG, which is a marker for CD4+ T helper cells6, and ITM2A (induced during positive selection and causes CD8 downregulation36). Other markers of CD4+ T cells such as CD4 (ref. 6), together with highly validated transcription factors (TFs) that are known to be involved in CD8+ T or CD4+ T lineage decisions34, including RUNX3 (ref. 37,38), ZBTB7B39,40, TOX41 and GATA3 (ref. 42,43), all displayed significant correlations in the expected directions. However, when using GEX pseudotime, the correlations were notably reduced and some (for example, TOX and RUNX3) were no longer statistically significant (Fig. 4c). For TOX, the direction of the correlation was wrongly inverted (Fig. 4c). In addition, the V(D)J pseudotime also revealed new associations for TFs such as ZNF496, MBNL2 and RORC for CD8+ T, and SATB1, STAT5A and STAT1 for CD4+ T (Extended Data Fig. 6d and Supplementary Table 6).

In [None]:
gene_program = 'cd4Cd8Commit'
commit_genes_dict = {'CD8 committed' : ['CD8A', 'CD8B', 'ZNF496', 'MBNL2', 'RORC', 'RUNX3'],
                     'CD4 committed' : ['CD40LG', 'ITM2A', 'SATB1', 'STAT5A', 'STAT1', 'TOX', 'ZBTB7B', 'GATA3'],
                     'Treg committed': ['TNFRSF9', 'IKZF2', 'NR4A1', 'FOXP3', 'IL2RA','IL2', 'STAT5A', 'STAT5B'],
                     'Negative\nselection' : ['NR4A1', 'NR4A2', 'NR4A3', 'BCL2L11']}
commit_genes = [g for genes in commit_genes_dict.values() for g in genes]

In [None]:
for ct in ['T_αβT(entry)', 'T_CD8_naive', 'T_CD4_naive', 'T_Treg', 'T_Treg(agonist)']:
    sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine] == ct], 
                var_names=commit_genes_dict, 
                groupby=col_age_group, 
                categories_order= [c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine] == ct][col_age_group].unique()],
                mean_only_expressed = True,
                cmap = 'magma').add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/cd4Cd8Commit/cd8cd4CommitGenes_{ct}_dotplot.pdf')

In [None]:
from utils import aggClusters,df_from_aggClusters

adata.obs['agg_group'] = adata.obs[col_cell_type_fine] + adata.obs['donor']

agg_adata = aggClusters(adata[adata.obs[col_cell_type_fine].isin(['T_αβT(entry)']), commit_genes], raw = 'counts', lognorm = 'X', cluster_key='agg_group', preserve_meta = [col_age_group, 'sex', col_cell_type_fine])
agg_adata_df = df_from_aggClusters(agg_adata, preserve_meta = [col_age_group, 'sex', col_cell_type_fine])
agg_adata_df = agg_adata_df.merge(pd.DataFrame.from_dict(commit_genes_dict, orient='index').reset_index(names='group').melt(id_vars='group').dropna().drop(columns='variable').rename(columns={'value': 'gene_name'}), on = 'gene_name')

agg_adata_df.head()

In [None]:
adata_sub = adata[adata.obs[col_cell_type_fine].isin(['T_αβT(entry)'])]
adata_sub.obs['NR4A1_expr'] = adata_sub[:, 'NR4A1'].X.toarray().flatten()
adata_sub.obs['NR4A1_expr_category'] = pd.cut(adata_sub.obs['NR4A1_expr'], bins=[-float('inf'), 0, 3, float('inf')], labels=['no expr', 'low expr', 'high expr'])
adata_sub

In [None]:
sc.pl.DotPlot(adata_sub, 
              var_names=commit_genes_dict, 
              groupby=['NR4A1_expr_category', col_age_group], 
              #categories_order= [c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine] == 'T_αβT(entry)'][col_age_group].unique()],
              mean_only_expressed = True,
              cmap = 'viridis').add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/cd4Cd8Commit/cd8cd4CommitGenes_NR4A1groups_dotplot.pdf')

In [None]:
adata_sub.obs.groupby([col_age_group])['NR4A1_expr_category'].value_counts(normalize = True).unstack().fillna(0).reset_index()

In [None]:
from utils import aggClusters,df_from_aggClusters

adata_sub.obs['agg_group'] = adata_sub.obs['NR4A1_expr_category'].astype(str) + adata_sub.obs['donor'].astype(str) + adata_sub.obs[col_cell_type_fine].astype(str)

agg_adata = aggClusters(adata_sub[:, [g for genes in commit_genes_dict.values() for g in genes]], raw = 'counts', lognorm = 'X', cluster_key='agg_group', preserve_meta = [col_age_group, 'sex', col_cell_type_fine, 'NR4A1_expr_category'])
agg_adata_df = df_from_aggClusters(agg_adata, preserve_meta = [col_age_group, 'sex', col_cell_type_fine, 'NR4A1_expr_category'])
agg_adata_df = agg_adata_df.merge(pd.DataFrame.from_dict(commit_genes_dict, orient='index').reset_index(names='group').melt(id_vars='group').dropna().drop(columns='variable').rename(columns={'value': 'gene_name'}), on = 'gene_name')

agg_adata_df

In [None]:
# Save transtion-related DEGs to file
deg_df.loc[(abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05) & (deg_df['cell_type'].isin(['T_DP(P)','T_αβT(entry)'])),:] \
        .merge(pd.DataFrame.from_dict(commit_genes_dict, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'right') \
        .dropna().to_csv(f'{data_path}/analyses/phenoAnalysis/genePrograms/cd4Cd8Commit/thyAgeing_dea_cd4Cd8CommitGenes.csv', index = False)

In [None]:
commit_deg = deg_df.loc[(abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05) & (deg_df['cell_type'].isin(['T_DP(P)','T_αβT(entry)'])),:] \
        .merge(pd.DataFrame.from_dict(commit_genes_dict, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'right').dropna() 
commit_deg

In [None]:
import matplotlib.colors as mcolors
import textwrap
df = commit_deg.copy()
df['program'] = df['program'].str.replace(')', ') ').str.replace('/', '/ ')

# Create a divergent colormap centered around 0
vmin, vmax, vcenter = df['log2FoldChange'].min(), df['log2FoldChange'].max(), 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.diverging_palette(220, 20, center="light", as_cmap=True, s=100)

xranges = df.groupby("program")["gene_name"].nunique()
xranges *= 1.1
g = sns.relplot(data=df, x='gene_name', y='cell_type', hue='log2FoldChange', size='log2FoldChange',
                palette=cmap, hue_norm=normalize, height=5, aspect=0.5,
                col='program', col_order=xranges.index, legend=None,
                facet_kws={'sharey': True, 'sharex': False, 'gridspec_kws': dict(width_ratios=xranges)})

# Function to wrap text
def wrap_text(text, width=15):
    return '\n'.join(textwrap.wrap(text, width))

# Apply the text wrapping function to facet titles
g.set_titles("{col_name}", fontweight='bold')
for ax in g.axes.flat:
    title = ax.get_title()
    ax.set_title(wrap_text(title), fontweight='bold')

g.set_xticklabels(rotation=90)
g.set_xlabels('Gene')
g.set_ylabels('Cell type')

g.tight_layout()

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
g.figure.colorbar(sm, ax=g.axes, orientation='vertical', label='log2FoldChange', pad=0.01)

plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tCells_{gene_program}_deg_heatmap.pdf', dpi = 300, bbox_inches = 'tight')

## GC formation and reaction

Note: SLAMF7 is expressed by cytotoxic CD8 T cells (Loyal, 2020) and SLAMF6 is an exhaustion marker of CD8 T cells (Yigit, 2017)

In [None]:
import warnings
warnings.filterwarnings('ignore', category=ad.ImplicitModificationWarning)

b_gc_goi = {'DZ_entry': ['CXCR5'], 
            'LZ_entry': ['CCR7'],
            'BCR_activation' : ['EBI3', 'S1PR2'],
            'T_B_interaction' : ['ICAM1', 'ICAM2', 'ICAM3', 'ICAM4', 'ICAM5', 'CD40', 'ICOSLG'], 
            'GC_formation': ['PLXNB1', 'PLXNB2', 'BASP1']}
t_gc_goi = {'LZ_entry': ['CCR7'],
            'SLAM_interaction': ['SLAMF1', 'LY9', 'CD244', 'SLAMF7', 'SLAMF6', 'CD84'],
            'LFA1_activation': ['ITGB2', 'ITGA4', 'SEMA4B'], 
            'BCR_activation': ['CD40LG', 'ICOS'],
            'interleukin_signalling' : ['IL21', 'IL4', 'IL6', 'IL10'],
            'CD8' : ['CD8A', 'CD8B'],}

# sc.pl.DotPlot(adata[adata.obs['taa_l3'].str.startswith('T_CD')],
#               var_names=t_gc_goi,
#               groupby='taa_l5',
#               mean_only_expressed=True,
#               cmap = 'viridis',).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/gcReaction/gcFormationMarkers_tCells_dotplot.pdf', bbox_inches='tight')

sc.pl.DotPlot(adata[adata.obs['taa_l3'].str.startswith('T_CD4')],
              var_names=t_gc_goi,
              groupby='taa_l5',
              mean_only_expressed=True,
              cmap = 'magma',).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/gcReaction/gcFormationMarkers_cd4Cells_dotplot.pdf', bbox_inches='tight')

In [None]:
sc.pl.DotPlot(adata[adata.obs['taa_l5'].str.startswith('T_CD4_fh')],
              var_names=t_gc_goi,
              groupby=col_age_group,
              categories_order=[c for c in col_age_group_levels if c in adata.obs[adata.obs['taa_l5'].str.startswith('T_CD4_fh')][col_age_group].unique()],
              mean_only_expressed=True,
              cmap = 'viridis',).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/gcReaction/gcFormationMarkers_T_Cd4_fh_dotplot.pdf', bbox_inches='tight')

In [None]:
deg_df.loc[(deg_df['gene_name'].isin([g for genes in t_gc_goi.values() for g in genes])) & ((deg_df['cell_type'].str.startswith('T_CD4') | deg_df['cell_type'].str.startswith('T_Treg'))) & (abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05),:]

## Inflammatory signalling

![image.pdf](attachment:image.pdf)

Signalling out:
- Th1: IFNG
- Th2: IL4, IL5, IL13
- Th9: IL9, IL10, IL21
- Th17: IL17A, IL22, IL23, IL10, IL21
- Tfh: IL21, IL4
- Treg: IL10, TGFB, IL35

In [None]:
inflammatory_dict = {'T_h1' : ['IFNG'],
                     'T_h2' : ['IL4', 'IL13', 'IL5'],
                     'T_h9' : ['IL9', 'IL10', 'IL21'],
                     'T_h17' : ['IL17A', 'IL22', 'IL23A', 'IL10', 'IL21'],
                     'Tfh': ['IL21', 'IL4'],
                     'T_reg' : ['IL10', 'TGFB1', 'TGFB2','TGFB3','EBI3', 'IL12A'],
}

# Make one list with all values from inflammatory_dict
[g for genes in inflammatory_dict.values() for g in genes if g not in adata.var_names]

In [None]:
import warnings
warnings.filterwarnings('ignore', category=ad.ImplicitModificationWarning)

sc.pl.DotPlot(adata[adata.obs['taa_l3'].str.startswith('T_CD')],
              var_names=inflammatory_dict,
              groupby=[col_cell_type_fine, col_age_group],
              mean_only_expressed=True,
              cmap = 'viridis',).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/proinflammatory/inflammatoryCd4Markers_tCells_dotplot.pdf', bbox_inches='tight')

## Lymphotoxin signalling

Genes from https://geneglobe.qiagen.com/us/knowledge/pathways/lymphotoxin-beta-receptor-signaling

In [None]:
# Define the set of gene names
gene_program = 'lymphotoxin'
lymphotoxin_dict = {'Receiver\nsignalling':['AKT1', 'AKT2', 'AKT3', 'APAF1', 'BIRC2', 'CASP3', 'CASP9', 'CHUK', 'CREBBP', 'CXCL1', 'CYC1',
                    'CYCS', 'DIABLO', 'ELP1', 'EP300', 'IKBKB', 'IKBKE', 'IKBKG', 'LTBR', 'MAP3K14',
                    'MAPK1', 'MAPK14', 'MAPK3', 'NFKB1', 'NFKB2', 'NFKBIA', 'NFKBID', 'PDPK1', 'PIK3C2A', 'PIK3C2B',
                    'PIK3C2G', 'PIK3C3', 'PIK3CA', 'PIK3CB', 'PIK3CD', 'PIK3CG', 'PIK3R1', 'PIK3R2', 'PIK3R3', 'PIK3R4',
                    'PIK3R5', 'PIK3R6', 'RELA', 'RELB', 'TNFSF14', 'TRAF1', 'TRAF2', 'TRAF3', 'TRAF4', 'TRAF5', 'TRAF6',
                    'VCAM1'],
                    'Sender\nsignalling' : ['LTA', 'LTB', 'TNFSF14'], 
                    'Progenitor\nhoming': ['CCL19', 'CCL21', 'ICAM1', 'VCAM1'],
                    'mTEC\nmaintenance' : ['FEZF2'],
                    'Lymphoid tissue\ninducer' : ['TNFRSF11A', 'TNFSF11']}

[g for genes in lymphotoxin_dict.values() for g in genes if g not in adata.var_names]

In [None]:
adata.obs[adata.obs['taa_l4'].str.startswith('T_DN(P)')][col_age_group].unique()

In [None]:
for ct in adata.obs[col_cell_type_fine].unique():
    sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine] == ct],
                var_names=lymphotoxin_dict,
                groupby=col_age_group,
                categories_order=[c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine] == ct][col_age_group].unique()],
                mean_only_expressed=True,
                cmap = 'viridis',).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/lymphotoxin/lymphotoxinMarkers_{ct}_dotplot.pdf', bbox_inches='tight')

In [None]:
lymphotoxin_deg = deg_df.loc[(deg_df['gene_name'].isin([g for genes in lymphotoxin_dict.values() for g in genes])) & (abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05),:] \
        .merge(pd.DataFrame.from_dict(lymphotoxin_dict, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'right').dropna() 

lymphotoxin_deg

In [None]:
import matplotlib.colors as mcolors
import textwrap
df = lymphotoxin_deg.copy()
df['program'] = df['program'].str.replace(')', ') ').str.replace('/', '/ ')

# Create a divergent colormap centered around 0
vmin, vmax, vcenter = df['log2FoldChange'].min(), df['log2FoldChange'].max(), 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.diverging_palette(220, 20, center="light", as_cmap=True, s=100)

xranges = df.groupby("program")["gene_name"].nunique()
xranges *= 1.1
g = sns.relplot(data=df, x='gene_name', y='cell_type', hue='log2FoldChange', size='log2FoldChange',
                palette=cmap, hue_norm=normalize, height=5, aspect=0.5,
                col='program', col_order=xranges.index, legend=None,
                facet_kws={'sharey': True, 'sharex': False, 'gridspec_kws': dict(width_ratios=xranges)})

# Function to wrap text
def wrap_text(text, width=15):
    return '\n'.join(textwrap.wrap(text, width))

# Apply the text wrapping function to facet titles
g.set_titles("{col_name}", fontweight='bold')
for ax in g.axes.flat:
    title = ax.get_title()
    ax.set_title(wrap_text(title), fontweight='bold')

g.set_xticklabels(rotation=90)
g.set_xlabels('Gene')
g.set_ylabels('Cell type')

g.tight_layout()

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
g.figure.colorbar(sm, ax=g.axes, orientation='vertical', label='log2FoldChange', pad=0.01)

plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tCells_{gene_program}_deg_heatmap.pdf', dpi = 300, bbox_inches = 'tight')

In [None]:
# Calculate activity of signalling
for k,v in lymphotoxin_dict.items():
    sc.tl.score_genes(adata, gene_list=v, score_name=k)
    
df = adata.obs[[col_age_group, col_cell_type_fine, 'donor'] + list(lymphotoxin_dict.keys())].melt(id_vars=[col_age_group, col_cell_type_fine, 'donor'], value_name='score', var_name='score_name')
df = df.groupby([col_age_group, col_cell_type_fine, 'donor', 'score_name']).agg(mean_score = ('score', 'mean')).reset_index()

In [None]:
%%R -i df,ct -h 400 -w 400 -u mm

df %>%
dplyr::filter(!!rlang::sym(col_age_group) != 'adult(late)') %>%
dplyr::mutate(!!col_age_group := factor(!!rlang::sym(col_age_group), levels = col_age_group_levels),
              !!col_cell_type_fine := factor(!!rlang::sym(col_cell_type_fine), levels = col_cell_type_fine_levels)) %>%
ggplot(aes(x = !!rlang::sym(col_cell_type_fine), y = mean_score, color = !!rlang::sym(col_age_group))) +
geom_boxplot(outlier.size = 0.5, position = position_dodge(preserve = 'single')) +
ggpubr::stat_kruskal_test(aes(group = !!rlang::sym(col_age_group)), label = "{p.signif}", p.adjust.method = 'BH',
                          significance = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("****", "***", "**", "*", ""))
                          ) +
geom_hline(yintercept = 0, linetype = 'dashed') +
ggforce::facet_col(~score_name, scales = 'free_y', strip.position = 'right') +
labs(x = 'Cell state', y = 'Activity', color = 'Age group') +
grafify::scale_color_grafify(palette = "fishy") +
scale_y_continuous(expand = expansion(mult = 0, add = c(0,0.15))) +
theme_simple(facet = T) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggsave(paste0('lymphotoxinActivity_byCtAndAge_boxplot.pdf'), path = file.path(plots_path, 'phenoAnalysis', 'genePrograms', 'lymphotoxin'),
       width = 400, height = 300, units = 'mm', dpi = 300)

## Regenerative signalling

**ILC2 produce AREG, IL-13, CSF2, and IL-5 to promote mTEC differentiation in mice:**
- Type 2 response genes (up): Il5, Il13, Il4, Arg1, Areg, Csf2, Tph1, Ccl1, and Cxcl2
- Interferon responsive genes (up):
- Cell proliferation genes (up): Ccnd1 and Ccnd2, Mki67:
- ILC2 differentiation and activation markers (up): Klrg1, Rora, Lmo4, Icos, Il1rl1, Batf, Cd69, and Id2
- Increase in expression of the class II major histocompatibility complex (MHCII) genes H2-Aa, H2-Eb1, H2-Ab1, and of the IL-1 signaling–related genes Il1b and Il1r2
- In MHC-II expressing mTEC, NK and Treg pop: AREG, IL-13, IL-4

ZFP36L1+ and AREG+ recirculating Tregs promote regeneration post-injury in mice and humans

In [None]:
gene_program = 'regeneration'
regen_markers = {'Type 2 response': ['IL4', 'IL5', 'IL3', 'ARG1', 'AREG', 'CSF2', 'TPH1', 'CCL1', 'CXCL2'],
                 'IFN response' : ['ISG15', 'ISG20L2', 'ISG20', 'NFKBIZ', 'NFKB1', 'NFKBIL1', 'NFKBIE', 'NFKB2', 'NFKBIA', 'NFKBID', 'NFKBIB'],
                 'Cell proliferation': ['CCND1', 'CCND2', 'MKI67'],
                 'ILC2 differentiation': ['KLRG1', 'RORA', 'LMO4', 'ICOS', 'IL1RL1', 'BATF', 'CD69', 'ID2'],
                 'IL-1 signalling': ['IL1B', 'IL1R2'],
                 'HLA class II': ['HLA-DRA', 'HLA-DRB1', 'HLA-DRB5', 'HLA-DPA1', 'HLA-DPB1', 'HLA-DQA1', 'HLA-DQB1', 'HLA-DMA', 'HLA-DMB', 'HLA-DOA', 'HLA-DOB'],
                 'Other': ['IL33', 'IL13', 'ZFP36L1']
                 }

In [None]:
for ct in adata.obs[col_cell_type_fine].unique():
    sc.pl.DotPlot(adata[adata.obs[col_cell_type_fine] == ct],
                var_names=regen_markers,
                groupby=col_age_group,
                categories_order=[c for c in col_age_group_levels if c in adata.obs[adata.obs[col_cell_type_fine] == ct][col_age_group].unique()],
                mean_only_expressed=True,
                cmap = 'magma',).add_totals().savefig(f'{plots_path}/phenoAnalysis/genePrograms/regeneration/regenerationMarkers_{ct}_dotplot.pdf', bbox_inches='tight')

- spec: ZFP36, AREG, TNFAIP3, FOS, JUND, SRGN, DUSP1, BTG1, IL7R
- ILC all: NFKIBA, HLA-A/B/C, CD69
- ILC2/3: FXYD5
- ILC1/3: TYROBP, CD74, CTSW
- ILC2: KLRG1

In [None]:
regen_deg = deg_df.loc[(deg_df['gene_name'].isin([g for genes in regen_markers.values() for g in genes])) & (abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05),:] \
            .merge(pd.DataFrame.from_dict(regen_markers, orient='index').reset_index(names = 'program').melt(id_vars = 'program', value_name = 'gene_name').dropna(), how = 'right').dropna() 
regen_deg

In [None]:
import matplotlib.colors as mcolors
import textwrap
df = regen_deg.copy()
df['program'] = df['program'].str.replace(')', ') ').str.replace('/', '/ ')

# Create a divergent colormap centered around 0
vmin, vmax, vcenter = df['log2FoldChange'].min(), df['log2FoldChange'].max(), 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.diverging_palette(220, 20, center="light", as_cmap=True, s=100)

xranges = df.groupby("program")["gene_name"].nunique()
xranges *= 1.1
g = sns.relplot(data=df, x='gene_name', y='cell_type', hue='log2FoldChange', size='log2FoldChange',
                palette=cmap, hue_norm=normalize, height=5, aspect=0.5,
                col='program', col_order=xranges.index, legend=None,
                facet_kws={'sharey': True, 'sharex': False, 'gridspec_kws': dict(width_ratios=xranges)})

# Function to wrap text
def wrap_text(text, width=15):
    return '\n'.join(textwrap.wrap(text, width))

# Apply the text wrapping function to facet titles
g.set_titles("{col_name}", fontweight='bold')
for ax in g.axes.flat:
    title = ax.get_title()
    ax.set_title(wrap_text(title), fontweight='bold')

g.set_xticklabels(rotation=90)
g.set_xlabels('Gene')
g.set_ylabels('Cell type')

g.tight_layout()

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
g.figure.colorbar(sm, ax=g.axes, orientation='vertical', label='log2FoldChange', pad=0.01)

plt.savefig(f'{plots_path}/phenoAnalysis/genePrograms/{gene_program}/thyAgeing_tCells_{gene_program}_deg_heatmap.pdf', dpi = 300, bbox_inches = 'tight')

## Recirculating T cells

In [None]:
sc.pl.umap(adata, color = 'PRDM1')

In [None]:
recirc_n_deg = deg_df.loc[(abs(deg_df['log2FoldChange']) >= 1.3 ) & (deg_df['padj'] < 0.05) & (deg_df['cell_type'].str.endswith('recirc')) ,:] \
    .groupby(['gene_name', 'up_down']).agg(n_cell_types = ('cell_type', 'nunique'),
                              cell_types = ('cell_type', lambda x: list(x))).reset_index().sort_values('n_cell_types', ascending = False)
    
recirc_n_deg.iloc[:50]

In [None]:
recirc_n_deg.value_counts('n_cell_types')

In [None]:
recirc_deg = deg_df.loc[(deg_df.gene_name.isin(recirc_n_deg.loc[recirc_n_deg['n_cell_types'] >= 3].gene_name.unique())) & (deg_df['cell_type'].str.endswith('recirc')) ,:]

In [None]:
%%R -i recirc_deg -h 200 -w 500 -u mm

gene_program = 'recirc'

recirc_deg %>%
dplyr::mutate(cell_type := factor(!!rlang::sym('cell_type'), levels = col_cell_type_fine_levels),
              #program = prettify_labels(program, 25)
              ) %>%
ggplot(aes(x = gene_name, y = cell_type, color = log2FoldChange, size = -log10(padj))) +
geom_point() +
#ggforce::facet_row(~program, scales = 'free_x', space = 'free') +
labs(x = 'Gene', y = 'Cell population', color = 'log2FC', size = '-log10(padj)') +
scale_color_gradientn(colours = colorRampPalette(rev(grafify::graf_palettes$OrBl_div))(100),
                      breaks=seq(-5,5,5), limits=c(-7.5,7.5), guide = guide_colorbar(barwidth = unit(5,'cm'))) +
theme_simple(facet = F) +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
      strip.text = element_text(size = 8, angle = 90), legend.position = 'top') 
ggsave(paste0('thyAgeing_recircT_', gene_program,'Degs_dotplot.pdf'), path = file.path(plots_path, 'phenoAnalysis', 'genePrograms', gene_program),
       height = 100, width = 500, units = 'mm', dpi = 300)