# Thymus ageing atlas: Medullary B cells

In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import anndata as ad
import hdf5plugin
import scFates as scf

import warnings
warnings.filterwarnings("ignore", category=ad.ImplicitModificationWarning)

# Add repo path to sys path (allows to access scripts and metadata from repo)
#repo_path,_ = os.path.split(os.path.split(os.getcwd())[0])
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/B_compartment'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Autoreload custom scripts
%load_ext autoreload
%autoreload 2

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

# Formatting
from matplotlib import font_manager
sc.settings.set_figure_params(dpi = 150, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'pdf')
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels,age_group_levels
from plotting.utils import plot_grouped_boxplot,calc_figsize,thyAgeing_colors,plot_faceted_grouped_boxplot

In [None]:
# Load adata
object_version = 'v5_2025-04-16'
adata = ad.read_h5ad(f'{data_path}/objects/rna/thyAgeing_bSplit_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
for c in [c for c in ct_anno.columns if c in adata.obs.columns]:
    adata.obs.drop(columns = c, inplace = True)
adata.obs = adata.obs.join(ct_anno, how = 'left')
adata = adata[adata.obs['anno_status'] == 'include']

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

adata

In [None]:
# Define columns
col_cell_type_broad = 'taa_l4'
col_cell_type_fine = 'taa_l5'
col_cell_type_broad_levels = [c for c in get_ct_levels(col_cell_type_broad, taa_l1 = ['B']) if c in adata.obs[col_cell_type_broad].unique().tolist()]
col_cell_type_fine_levels = [c for c in get_ct_levels(col_cell_type_fine, taa_l1 = ['B']) if c in adata.obs[col_cell_type_fine].unique().tolist()]
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

In [None]:
# Subset to GC-like B cells
adata_sub = adata[adata.obs[col_cell_type_broad].str.contains('GC')].copy()

## GC formation

**ILCs negatively regulating GC formation:** <br>
*(Haddad, 2021)*

The GC-Tfh/GC-B cell interactions are tightly regulated by receptor/ligand interactions including CD40L/CD40 and ICOS/ICOSL as well as secretion of cytokines that can mediate GC-Tfh and GC-B cell help, such as IL-21, IL-4, IL-6, and IL-10. All cells entering the B cell follicles (the dark zone) respond to CXCL13 gradients via upregulation of their CXCR5 receptor and modulation of CCR7 expression, which is required for their entry into the germinal center13,14. Within the germinal center, activated Tfh cells produce Interleukin-21 (IL-21), and express co-activation markers such as CD40L and ICOS, to allow for appropriate interaction and activation of the GC-B cells. Currently, there are fundamental gaps in understanding the cellular and molecular mechanisms during immune dysregulation and irregularities in germinal centers.

These ILCs expressed similar amounts of common ILC markers when compared to ILC3 positive controls including IL-2Rγ, CD7, IL12RB1, and CCR6, but lacked or exhibited very low expression of markers for ILC1 (NKp44, CD56, Tbet), ILC2 (KLRG1, GATA3), and ILC3 (NKp44) or other leukocyte lineage markers16,17,18 shown by flow cytometry (Fig. 1c). ILCFR lack the surface protein expression of IL7RA, a common surface marker for ILCs, which has recently been shown to have a redundant signaling role to IL15RA as ILCs persist in the small intestinal lamina propria (siLP) of adult and neonatal Il7ra KO mice. IL-15 sustains wild-type and Il7ra KO ILC survival in vitro and compensates for IL-7R deficiency19

**T cell interactions with B cells during germinal center formation, a three-step model** <br>
*(Biram, 2019)*

- Naive B cells are attracted to GCs via CXCR5 (binding to CXCL13 in GCs)
- Naive T cells express CCR7 and reside in the T-cell zone where the ligands for this GPCR, CCL21 and CCL19
- Upon antigen encounter and BCR activation, B cell motility is reduced. Several hours after antigen encounter, B cells regain their motility and migrate to the boundary between the B-cell-rich follicles and the T-cell zone This movement is orchestrated by upregulation of CCR7, which binds CCL21 and CCL19, chemotactic signals that are expressed by stromal cells in the T-cell area. In addition to CCR7 expression in B cells, the Epstein-Barr virus-induced molecule 2 (EBI2) is upregulated upon B-cell activation
- Entry of B cells into the GC is accompanied by robust downregulation of EBI2 and an increase in expression of sphingosine-1-phosphate receptor 2 (S1PR2). In humans, an additional G protein-coupled receptor, P2RY8 prevents GC confinement, and its downregulation is likely to be required for generation of an intact GC response.
- In the case of T and B cells, interactions of three major pairs of molecules constitute the adhesive machinery essential for long-lasting T-cell engagement with B cells during GC seeding: (a) Signaling lymphocytic activation molecule (SLAM: SLAM (CD150, Slamf1), Ly9 (CD229, Slamf3), 2B4 (CD244, Slamf4), CRACC (CD319), Ly108 (Slamf6, NTB-A in human), and CD84) homophilic interactions that are not subjected to regulation by MHCII or TCR signals,63 (b) TCR interactions with MHCII loaded with cognate peptides (pMHCII, CD40L, ICOS), and (c) LFA-1 activation on T cells (LFA-1 (αLβ2, CD11a/CD18) and very late antigen, VLA-4 (α4β1, CD49d/CD29)) and its binding to ICAMs expressed by B cells
- The first study that examined plexins in immune cells showed that T cells express Plexin B1 (PlxnB1), while B cells express the ligand, Sema4D, and that this interaction provides survival and proliferation signals to B cells.137 Plexin D1 (PlxnD1) was found to be essential for GC formation, and it was shown that PlxnD1-deficient B cells fail to migrate toward the GC chemokines or to produce IgG1 antibodies.138 In addition, Plexin B2 (PlxnB2) was found to be highly expressed in GC B cells in a T-cell-dependent manner together with other axon guidance genes, such as BASP1.139, 140 The PlxnB2 ligand, Sema4C is specifically expressed in Tfh cells and increased surface expression resulted in effective positioning of Tfh cells in the GC. PlxnB2 expression in GC B cells is not essential for GC formation, but was shown to promote effective surface engagements of T and B cells, PC generation, and antibody affinity maturation. 


In [None]:
sc.pp.normalize_total(adata_sub, target_sum=1e4)
sc.pp.log1p(adata_sub)

In [None]:
import warnings
warnings.filterwarnings('ignore', category=ad.ImplicitModificationWarning)

b_gc_goi = {'DZ_entry': ['CXCR5', 'AICDA'], 
            'LZ_entry': ['CCR7'],
            'APC' : ['AIRE', 'LTB', 'LTA'],
            'BCR_activation' : ['EBI3', 'S1PR2'],
            'T_B_interaction' : ['ICAM1', 'ICAM2', 'ICAM3', 'ICAM4', 'ICAM5', 'CD40', 'ICOSLG'], 
            'GC_formation': ['PLXNB1', 'PLXNB2', 'BASP1', 'P2RY8', 'BATF'],
            'IGs': ['IGHA1', 'IGHA2', 'IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGKC', 'IGLC1', 'IGLC2', 'IGHE', 'IGHD'],}
t_gc_goi = {'LZ_entry': ['CCR7'],
            'SLAM_interaction': ['SLAMF1', 'LY9', 'CD244', 'SLAMF7', 'SLAMF6', 'CD84'],
            'LFA1_activation': ['ITGB2', 'ITGA4', 'SEMA4B'], 
            'BCR_activation': ['CD40LG', 'ICOS'],
            'interleukin_signalling' : ['IL21', 'IL4', 'IL6', 'IL10']}

sc.pl.DotPlot(adata_sub,
              var_names=b_gc_goi,
              groupby=['taa_l5' , 'age_group'],
              mean_only_expressed=True,
              cmap = 'magma',).add_totals().savefig(os.path.join(plots_path, 'phenoAnalysis', 'GC_like', 'thyAgeing_bGC_gcFormationMarkers_dotplot.png'), bbox_inches='tight', dpi=300)

In [None]:
b_markers = {'DZ' : ['AICDA', 'CXCR4', 'MYC', 'MKI67', 'TOP2A', 'PCNA', 'BACH2', 'TCF3', 'PAX5', 'IRF4', 'MEF2B', 'FOXO1'],
               'LZ' : ['CXCR5', 'CD83', 'CD86', 'MYBL1', 'SOCS3', 'CD40'],
               'T cell contact': ['CXCL10', 'CCL5', 'CCL3'],
               'BCR activtation' : ['CCL22', 'CCL17', 'EBI3', 'CCL3', 'ICAM1'],
               'B_med' : ['HLA-DRA', 'HLA-DRB1','AIRE', 'IL15', 'LTA', 'LTB', 'PTPRC', 'CD5', 'SPN', 'CD80' ,'LY6G6C'],}

sc.pl.DotPlot(adata_sub[~adata_sub.obs[col_cell_type_fine].str.contains('locnt')], 
                b_markers,
                categories_order=[c for c in col_cell_type_fine_levels if c in adata_sub[~adata_sub.obs[col_cell_type_fine].str.contains('locnt')].obs[col_cell_type_fine].tolist()],
                groupby = col_cell_type_fine,
                figsize = calc_figsize(width = 150, height = 20),
                mean_only_expressed=True,
                cmap = 'magma').add_totals(size = 0.3).style(smallest_dot=0, largest_dot = 40, dot_edge_lw=0.05).savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_gcSplit_gcMarkers_dotplot.pdf')  

In [None]:
sc.tl.rank_genes_groups(adata_sub, groupby='taa_l5', use_raw=False, n_genes=100, method='wilcoxon', key_added='rank_genes_groups')
gene_df = {c:sc.get.rank_genes_groups_df(adata_sub, key='rank_genes_groups', group = c) for c in adata_sub.obs['taa_l5'].cat.categories}

[Basso,2012](https://rupress.org/jem/article/209/13/2455/41016/BCL6-positively-regulates-AID-and-germinal-center): In B cells, miR-155 has been shown to modulate the expression of AICDA (also known as AID; Dorsett et al., 2008; Teng et al., 2008), as well as of other genes important for the GC reaction, including those involved in differentiation, such as SPI1 (Vigorito et al., 2007) and CEBPB (Costinean et al., 2009), in B cell migration, such as HGAL (Dagan et al., 2012), in TGFB1 and BMP signal transduction, such as SMAD5 (Rai et al., 2010), and in BCR and PI3K signaling, such as SHIP1 (Costinean et al., 2009; Pedersen et al., 2009). Accordingly, mice lacking miR-155 display a reduced number of GC B cells and compromised affinity maturation (Rodriguez et al., 2007; Thai et al., 2007), whereas mice engineered to constitutively express miR-155 in mature B cells show an increase in GC B cells and an enhanced antibody response (Thai et al., 2007).

In [None]:
gene_df['B_GC-like_AIRE+'].head(50)

In [None]:
gene_df['B_GC-like_prolif'].head(50)

In [None]:
gene_df['B_GC-like'].head(50)

In [None]:
sc.pl.umap(adata[(adata.obs['taa_l1'] == 'B') & (adata.obs['taa_l5'] != 'B_dev_thy')], color = ['taa_l5'])

Plot LTo/LTi markers across all cell populations:

In [None]:
# Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
adata.obs = adata.obs.join(ct_anno, how = 'left')
adata = adata[(adata.obs['anno_status'] == 'include')]

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
tls_markers = {'LTo': ['LTBR', 'TNFRSF1A', 'TNFRSF1B', 'CXCL13', 'CCL19', 'CCL21'],
               'LTi': ['TNF', 'TNFSF14', 'LTA', 'LTB']
 }

[g for g in tls_markers.values() for g in g if g not in adata_sub.var_names]

sc.pl.DotPlot(adata,
              var_names=tls_markers,
              groupby=['taa_l4'],
              mean_only_expressed=True,
              cmap = 'magma',).add_totals().swap_axes().savefig(os.path.join(plots_path, 'phenoAnalysis', 'GC_like', 'thyAgeing_allCells_gcFormationMarkers_dotplot.png'), bbox_inches='tight', dpi=300)

In [None]:
degs = pd.read_pickle(f'{general_data_path}/analyses/dea/thyAgeing_dea_taa_l4_adult_vs_infant_ageEffect.pkl')
degs = pd.concat(degs).reset_index(names = ['cell_type', 'gene_name'])
degs['is_signif'] = degs['padj'].apply(lambda x: '*' if x < 0.05 else '')

degs.head()

In [None]:
degs.loc[(degs['gene_name'].isin([g for g in tls_markers.values() for g in g]))]

In [None]:
import matplotlib
df = degs.loc[(degs['gene_name'].isin([g for g in tls_markers.values() for g in g]))].pivot_table(index = 'cell_type', columns = 'gene_name', values = 'log2FoldChange')
df_annot = degs.loc[(degs['gene_name'].isin([g for g in tls_markers.values() for g in g]))].pivot_table(index = 'cell_type', columns = 'gene_name', values = 'is_signif', aggfunc = 'first', fill_value='')

# Convert df_annot to contain only numerical values or properly formatted strings
df_annot = df_annot.applymap(lambda x: '*' if x == '*' else '')

plt.figure(figsize = calc_figsize(width = 80, height = 280))
p = sns.heatmap(df, cmap='PuOr_r', center=0, vmin=-10, vmax=10, cbar_kws={'label': 'log2FC'}, xticklabels=True, yticklabels=True,
                annot=np.array(df_annot), fmt="", annot_kws={'fontsize': 10, 'va': 'top', 'ha': 'center', 'ma' : 'center'})

for t in p.texts:
    trans = t.get_transform()
    offs = matplotlib.transforms.ScaledTranslation(0, 0,
                    matplotlib.transforms.IdentityTransform())
    t.set_transform( offs + trans )
    
p.set_xlabel('Cell type')
p.set_xticklabels(p.get_xticklabels(), rotation = 90)
p.set_ylabel('Gene')
p.set_yticklabels(p.get_yticklabels(), rotation = 0)
p.figure.tight_layout()
#p.figure.set_size_inches(calc_figsize(width = 80, height = 60))
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_tlsFormation_degs_heatmap.pdf', bbox_inches = 'tight')

## BCR analysis

In [None]:
# Add BCR metadata
bcr_meta = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_bSplit_scvi_v4_2024-11-06_bcr_v3_2025-02-19.csv', index_col = 0)
adata.obs = adata.obs.join(bcr_meta, how = 'left')

bcr_meta.loc[bcr_meta['changeo_clone_id'] != 'No_contig'].head()

In [None]:
bcr_meta.columns.to_numpy()

## Isotype status

In [None]:
anno_df = adata.obs[['sample','donor', col_cell_type_fine, col_cell_type_broad, col_age_group]+ bcr_meta.columns.tolist()].copy()
remove_samples = anno_df.groupby('sample')['isotype_status'].value_counts(normalize=True).reset_index(name='freq').loc[lambda x: (x['freq'] == 1) & (x['isotype_status'] == 'No_contig'), 'sample'].unique().tolist()
anno_df = anno_df[~anno_df['sample'].isin(remove_samples)].copy()
anno_df.head()

In [None]:
df = freq_by_donor(anno_df, sample_col = 'sample', donor_col = 'donor', summary_col='isotype_status', cluster_col=[col_cell_type_broad], add_meta = [col_age_group])
df = df.loc[df['isotype_status'] != 'No_contig'].copy()
plot_grouped_boxplot(data = df, x = col_cell_type_broad, y = 'mean_prop', hue = 'isotype_status', order = col_cell_type_broad_levels, hue_order = ['IgM','IgD','IgM/IgD', 'IgA', 'IgG', 'IgE', 'Multi'], 
                     x_label = 'Cell population', y_label = 'Mean proportion', legend_title = 'Isotype', add_stats = False, format_percent = True, figsize = calc_figsize(width = 80, height = 50),
                     save_stats= f'{data_path}/analyses/phenoAnalysis/GC_like/thyAgeing_bSplit_isotypeStatus_meanProp',
                     palette = list(thyAgeing_colors.values()), remove_legend= False,
                     legend_kwargs = {'bbox_to_anchor':(1.05, 1), 'loc':'upper left', 'frameon':True,})
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_bSplit_{col_cell_type_broad}_isotypeStatus_boxplot.pdf', bbox_inches = 'tight', dpi = 300)

In [None]:
df = freq_by_donor(anno_df, sample_col = 'sample', donor_col = 'donor', summary_col='isotype_status', cluster_col=[col_cell_type_fine], add_meta = [col_age_group])
df = df.loc[df['isotype_status'] != 'No_contig'].copy()
plot_grouped_boxplot(data = df, x = col_cell_type_fine, y = 'mean_prop', hue = 'isotype_status', order = col_cell_type_fine_levels, hue_order = ['IgM','IgD','IgM/IgD', 'IgA', 'IgG', 'IgE', 'Multi'], 
                     x_label = 'Cell population', y_label = 'Mean proportion', legend_title = 'Isotype', add_stats = False, format_percent = True, figsize = calc_figsize(width = 100, height = 60),
                     save_stats= f'{data_path}/analyses/phenoAnalysis/GC_like/thyAgeing_bSplit_isotypeStatus_meanProp',
                     palette = list(thyAgeing_colors.values()), remove_legend= False,
                     legend_kwargs = {'loc':'upper center', 'ncol': df['isotype_status'].nunique(), 'bbox_to_anchor':(0.5, 1.4), 'frameon':False,})
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_bSplit_{col_cell_type_fine}_isotypeStatus_boxplot.pdf', bbox_inches = 'tight', dpi = 300)

In [None]:
annno

### BCR mutational load

In [None]:
col_ct = col_cell_type_broad
df = anno_df.groupby(['donor', 'sample', col_ct], observed = True)[['mu_count_IGL', 'mu_freq_IGL', 'mu_count_IGK','mu_freq_IGK']].mean().reset_index()
df = df.groupby(['donor', col_ct], observed = True).agg({'mu_count_IGL': 'mean', 'mu_freq_IGL': 'mean', 'mu_count_IGK': 'mean', 'mu_freq_IGK': 'mean'}).reset_index()
df = df.merge(anno_df[['donor', col_age_group]].drop_duplicates(), on = 'donor', how = 'left')
df.head()

In [None]:
df.melt(id_vars = ['donor', col_ct, col_age_group], value_vars = ['mu_count_IGL', 'mu_freq_IGL', 'mu_count_IGK', 'mu_freq_IGK'],
        var_name = 'metric', value_name = 'value').head()


In [None]:
plot_faceted_grouped_boxplot(data = df.melt(id_vars = ['donor', col_ct, col_age_group], value_vars = ['mu_count_IGL', 'mu_freq_IGL', 'mu_count_IGK', 'mu_freq_IGK'], var_name = 'metric', value_name = 'value'), 
                             x = col_ct, y = 'value', hue = col_age_group, hue_order = col_age_group_levels, order = col_cell_type_fine_levels,
                             palette = get_tint_palette(thyAgeing_colors['magenta']),
                             facet_kwargs = dict(col = 'metric', col_wrap = 2, col_order = ['mu_count_IGL', 'mu_count_IGK', 'mu_freq_IGL', 'mu_freq_IGK'], sharey = False, sharex = True),
                             add_stats = True, #save_stats = f'{data_path}/analyses/vdj/cdr3Analysis/thyAgeing_devSplit_TRB_kidera_prod_', 
                             format_percent = False, format_log = False, x_label = 'Cell population', y_label = 'Prop. prod. rearr.', legend_title='Age group', figsize = calc_figsize(width = 80, height = 80),
                             ylim = None, #legend_kwargs = dict(loc = "upper left", bbox_to_anchor=(1, 1))
                             )
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_bSplit_{col_ct}_bcrMutationalLoad_boxplot.pdf', bbox_inches = 'tight', dpi = 300)

In [None]:
plot_grouped_boxplot(data = df, x = col_ct, y = 'mu_freq_IGL', order = col_cell_type_fine_levels, 
                     hue = col_age_group, hue_order = col_age_group_levels, palette = get_tint_palette(thyAgeing_colors['magenta']),
                     x_label = 'Cell population', y_label = 'Mean IGL mutation freq.', legend_title = 'Isotype', add_stats = False, format_percent = True, 
                     figsize = calc_figsize(width = 100, height = 60), remove_legend=False,
                     save_stats= f'{data_path}/analyses/phenoAnalysis/GC_like/thyAgeing_bSplit_{col_ct}_muIGL_meanProp',
                     #palette = list(thyAgeing_colors.values()), remove_legend= False,
)
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_bSplit_{col_ct}_muIGL_meanProp_boxplot.pdf', bbox_inches = 'tight', dpi = 300)

### Somatic hypermutation

## Differential abundance analysis

In [None]:
object_version = 'v5_2025-04-16'
adata = ad.read_h5ad(f'{data_path}/objects/rna/thyAgeing_bSplit_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
if any([c for c in ct_anno.columns if c in adata.obs.columns]):
    adata.obs.drop(columns = [c for c in ct_anno.columns if c in adata.obs.columns], inplace = True)
adata.obs = adata.obs.join(ct_anno, how = 'left')
adata = adata[adata.obs['anno_status'] == 'include']

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

adata

In [None]:
import pertpy
milo = pertpy.tl.Milo()

# Construct nhoods
mdata = milo.load(adata)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", n_neighbors=50)
milo.make_nhoods(mdata["rna"], prop=0.1)
# Count nhoods
mdata = milo.count_nhoods(mdata, sample_col="donor")

In [None]:
# Create and reorder categories
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["age_group"]
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].astype("category")
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].cat.reorder_categories(['infant','paed','adult','aged'])

comparisons = [('aged', 'adult'), ('adult', 'paed'), ('paed', 'infant'), ('adult', 'infant'),]
milo_dict = {}
for c in comparisons:
    # Differential abundance testing
    contrast = f'da_age_group{c[0]}-da_age_group{c[1]}'
    milo.da_nhoods(mdata, design="~da_age_group+sex", model_contrasts=contrast)
    
    milo_dict[f'{c[0]}_vs_{c[1]}'] = mdata['milo'].var.copy()

In [None]:
# Annotate nhoods and save as dataframe
milo.annotate_nhoods(mdata, anno_col = col_cell_type_fine)
milo_df = pd.concat(milo_dict, axis=0).reset_index(names=['comparison','nhood_id'])
milo_df = milo_df.merge(mdata['milo'].var[[ 'nhood_annotation', 'nhood_annotation_frac']].reset_index(names = 'nhood_id'), on = 'nhood_id', how = 'left')
milo_df.head()
milo_df.to_csv(f'{data_path}/objects/rna/thyAgeing_bSplit_scvi_{object_version}_milo_ageGroups.csv')

In [None]:
# Read in the milo dataframe
milo_df = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_bSplit_scvi_{object_version}_milo_ageGroups.csv', index_col = 0)
milo_df.head()

In [None]:
from plotting.utils import thyAgeing_colors,thyAgeing_greys

In [None]:
df_median = milo_df.groupby(['comparison', 'nhood_annotation'])['logFC'].median().round(decimals = 2).reset_index()
df_median

In [None]:
# Milo beeswarm plot
groups_of_interest = ['adult_vs_paed', 'paed_vs_infant']
group_labels = ['-> adult', '-> paed']
df = milo_df.loc[milo_df['comparison'].isin(groups_of_interest)].copy()
df['nhood_annotation'] = pd.Categorical(df['nhood_annotation'], categories=col_cell_type_fine_levels, ordered=True)
df['nhood_annotation'].cat.remove_unused_categories()
df['is_sig'] = df['SpatialFDR'] < 0.05

# Calculate median logFC for each nhood_annotation and comparison
df_median = milo_df.groupby(['comparison', 'nhood_annotation'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['comparison'].isin(groups_of_interest)].copy()
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'logFC', 'y': 'nhood_annotation', 'hue': 'comparison', 'hue_order': groups_of_interest}
    
plt.figure(figsize=calc_figsize(height = len(df['nhood_annotation'].unique().tolist())*5+20, width = 60))
# ax = plt.axes()
ax =sns.violinplot(**args, data = df,
                   bw_adjust=.8, cut=0, split=True, density_norm = 'width',
                   gap = 0.3,
                   palette = [thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                   edgecolor = 'black', linewidth = 0, inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data = df.loc[df['is_sig'] == False],
              palette = [thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha = 0.5, ax=ax)
sns.stripplot(**args, data = df.loc[df['is_sig'] == True],
              palette = [thyAgeing_colors['magenta'], thyAgeing_colors['magenta']],
              dodge=True, size=0.5, alpha = 0.5, ax=ax)

plt.xlabel('log2(FC)')
plt.ylabel('Cell type')

# Remove the old legend
ax.get_legend().remove()
# # Add just the handles/labels from the box plot back
# ax.legend(
#     handles,
#     ['-> Adult', '-> Paed'],
#     title='Comparison',
#     loc='upper center',
#     bbox_to_anchor=(0.5, 1.2),
#     ncol=2
# )

plt.text(x=ax.get_xlim()[0] + 0.1, y=ax.get_yaxis().get_ticklabels()[0].get_text(), s=group_labels[0], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'])
plt.text(x=ax.get_xlim()[0] + 0.1, y=ax.get_yaxis().get_ticklabels()[0].get_text(), s=group_labels[1], ha='left', va='top', fontsize=5, color=thyAgeing_greys['grey2'])

for i, row in df_median.iterrows():
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(x=max(df['logFC']) + 1, y=row['nhood_annotation'], s=row['logFC'], ha='left', va='bottom', fontsize=5, color=thyAgeing_colors['magenta'])
    else:
        plt.text(x=max(df['logFC']) + 1, y=row['nhood_annotation'], s=row['logFC'], ha='left', va='top', fontsize=5, color=thyAgeing_colors['magenta'])

# Style axes
sns.despine(left=True, trim = True, offset = 2)
plt.axvline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_xticks([-10, -5, 0, 5, 10])
ax.yaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth = 0.5)
ax.tick_params(axis='y', length=0)

plt.tight_layout()
    
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_bSplit_{col_cell_type_fine}_logFcCellTypes_adult_paed.pdf')

In [None]:
# Milo beeswarm plot (GC-like only)
groups_of_interest = ['paed_vs_infant','adult_vs_paed']
group_labels = ['paed','adult']
df = milo_df.loc[milo_df['comparison'].isin(groups_of_interest)].copy()
#df.loc[df['nhood_annotation'] == 'B_GC-like-locnt', 'nhood_annotation'] = 'B_GC-like'
df = df.loc[df['nhood_annotation'].isin(['B_GC-like_AIRE+', 'B_GC-like_prolif', 'B_GC-like'])]
df['nhood_annotation'] = pd.Categorical(df['nhood_annotation'], categories=['B_GC-like_prolif', 'B_GC-like', 'B_GC-like_AIRE+'], ordered=True)
df['nhood_annotation'].cat.remove_unused_categories()
df['is_sig'] = df['SpatialFDR'] < 0.05

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['comparison', 'nhood_annotation'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['comparison'].isin(groups_of_interest)].copy()
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'nhood_annotation', 'y': 'logFC', 'hue': 'comparison', 'hue_order': groups_of_interest}
    
plt.figure(figsize=calc_figsize(height=60, width=40))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=1.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['magenta'], thyAgeing_colors['magenta']],
              dodge=True, size=01.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

# Add group labels
ax.set_ylim(-5.5,2)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_annotation'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.1, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['magenta'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.1, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['magenta'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()
    
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_gcSplit_logFcCellTypes_adult_paed_flipped.pdf')

In [None]:
# Milo beeswarm plot (GC-like only)
groups_of_interest = ['adult_vs_paed', 'paed_vs_infant']
group_labels = ['adult', 'paed']
df = milo_df.loc[milo_df['comparison'].isin(groups_of_interest)].copy()
#df.loc[df['nhood_annotation'] == 'B_GC-like-locnt', 'nhood_annotation'] = 'B_GC-like'
df = df.loc[df['nhood_annotation'].isin(['B_GC-like_AIRE+', 'B_GC-like_prolif', 'B_GC-like'])]
df['nhood_annotation'] = pd.Categorical(df['nhood_annotation'], categories=['B_GC-like_AIRE+', 'B_GC-like_prolif', 'B_GC-like'], ordered=True)
df['nhood_annotation'].cat.remove_unused_categories()
df['is_sig'] = df['SpatialFDR'] < 0.05

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['comparison', 'nhood_annotation'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['comparison'].isin(groups_of_interest)].copy()
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'logFC', 'y': 'nhood_annotation', 'hue': 'comparison', 'hue_order': groups_of_interest}
    
plt.figure(figsize=calc_figsize(height = len(df['nhood_annotation'].unique().tolist())*5+20, width = 60))
# ax = plt.axes()
ax =sns.violinplot(**args, data = df,
                   bw_adjust=.8, cut=0, split=True, density_norm = 'width',
                   gap = 0.3,
                   palette = [thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                   edgecolor = 'black', linewidth = 0, inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data = df.loc[df['is_sig'] == False],
              palette = [thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha = 0.5, ax=ax)
sns.stripplot(**args, data = df.loc[df['is_sig'] == True],
              palette = [thyAgeing_colors['magenta'], thyAgeing_colors['magenta']],
              dodge=True, size=0.5, alpha = 0.5, ax=ax)

plt.xlabel('log2(FC)')
plt.ylabel('Cell type')

# Remove the old legend
ax.get_legend().remove()
# # Add just the handles/labels from the box plot back
# ax.legend(
#     handles,
#     ['-> Adult', '-> Paed'],
#     title='Comparison',
#     loc='upper center',
#     bbox_to_anchor=(0.5, 1.2),
#     ncol=2
# )

plt.text(x=ax.get_xlim()[0] + 0.1, y=ax.get_yaxis().get_ticklabels()[0].get_text(), s=group_labels[0], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'])
plt.text(x=ax.get_xlim()[0] + 0.1, y=ax.get_yaxis().get_ticklabels()[0].get_text(), s=group_labels[1], ha='left', va='top', fontsize=5, color=thyAgeing_greys['grey2'])

ax_labels = [c.get_text() for c in ax.get_yticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_annotation'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(x=ax.get_xlim()[1] + 0.1, y=ax.get_yticks()[tick_pos] - 0.1, s=row['logFC'], ha='left', va='bottom', fontsize=5, color=thyAgeing_colors['magenta'])
    else:
        plt.text(x=ax.get_xlim()[1] + 0.1, y=ax.get_yticks()[tick_pos] + 0.1, s=row['logFC'], ha='left', va='top', fontsize=5, color=thyAgeing_colors['magenta'])

# Style axes
sns.despine(left=True, trim = True, offset = 2)
plt.axvline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
ax.set_xticks([-4,-2, 0, 2])
ax.yaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth = 0.5)
ax.tick_params(axis='y', length=0)

plt.tight_layout()
    
plt.savefig(f'{plots_path}/phenoAnalysis/GC_like/thyAgeing_gcSplit_logFcCellTypes_adult_paed.pdf')

In [None]:
ax.get_yticks()

In [None]:
ax.get_yticklabels()[0].get_text()

### Nhood expr correlation

In [None]:
milo.add_nhood_expression(mdata)
milo.annotate_nhoods(mdata, anno_col = col_cell_type_broad)

In [None]:
gc_milo = mdata['milo'][:,mdata['milo'].var['nhood_annotation'] == 'B_GC-like'].copy()
gc_milo_expr = gc_milo.varm['expr'].copy()
gene_names = mdata['rna'].var_names.tolist()

gene_names = np.array(gene_names)[np.array(gc_milo_expr.sum(axis=0) > 0).flatten()]
gc_milo_expr = gc_milo_expr[:, np.array(gc_milo_expr.sum(axis=0) > 0).flatten()]
gc_milo_logfc = gc_milo.var['logFC'].copy()

In [None]:
from scipy.stats import pearsonr,spearmanr

# Convert sparse matrix to dense for computation
gc_milo_expr_dense = gc_milo_expr.toarray()

# Perform correlation test
correlation_results = {}
for i, gene in enumerate(gene_names):
    corr, p_value = spearmanr(gc_milo_logfc, gc_milo_expr_dense[:, i])
    correlation_results[gene] = {'correlation': corr, 'p_value': p_value}

# Convert results to DataFrame for better visualization
correlation_df = pd.DataFrame.from_dict(correlation_results, orient='index')
correlation_df = correlation_df.sort_values(by='correlation', ascending=False)

# Display top results
correlation_df.head()

In [None]:
correlation_df['abs_correlation'] = correlation_df['correlation'].abs()

correlation_df.sort_values(by='abs_correlation', ascending=False).head(50)

In [None]:
correlation_df.loc[['AIRE', 'AICDA', 'BCL6', 'MKI67', 'CXCR5']]

In [None]:

b_gc_goi = {'DZ_entry': ['CXCR5', 'AICDA'], 
            'LZ_entry': ['CCR7'],
            'APC' : ['AIRE', 'LTB', 'LTA'],
            'BCR_activation' : ['EBI3', 'S1PR2'],
            'T_B_interaction' : ['ICAM1', 'ICAM2', 'ICAM3', 'ICAM4', 'ICAM5', 'CD40', 'ICOSLG'], 
            'GC_formation': ['PLXNB1', 'PLXNB2', 'BASP1', 'P2RY8', 'BATF'],
            'IGs': ['IGHA1', 'IGHA2', 'IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGKC', 'IGLC1', 'IGLC2', 'IGHE', 'IGHD'],}
correlation_df.loc[correlation_df.index.isin([g for g in b_gc_goi.values() for g in g])].sort_values(by='abs_correlation', ascending=False)


In [None]:
correlation_df.loc[correlation_df.index.isin([g for g in gc_markers.values() for g in g])].sort_values(by='abs_correlation', ascending=False)

In [None]:
milo_df.loc[milo_df['comparison'] == 'adult_vs_paed', 'logFC']

In [None]:
logfc_adult = milo_df.loc[milo_df['comparison'] == 'adult_vs_paed', 'logFC'].to_numpy()
logfc_paed = milo_df.loc[milo_df['comparison'] == 'paed_vs_infant', 'logFC'].to_numpy()
cells2nhoods = mdata['rna'].obsm['nhoods']

adata.obs['cell_logfc_adult'] = cells2nhoods.multiply(1/np.array(cells2nhoods.sum(axis=1)).flatten()[:, None]) * logfc_adult
adata.obs['cell_logfc_paed'] = cells2nhoods.multiply(1/np.array(cells2nhoods.sum(axis=1)).flatten()[:, None]) * logfc_paed

In [None]:
adata_sub = adata[(adata.obs[col_cell_type_broad].str.contains('GC')) & (~adata.obs[col_cell_type_fine].str.contains('locnt'))].copy()
adata_sub

In [None]:
plt.hist(adata_sub.obs['cell_logfc_paed'], bins = 100, color = 'blue', alpha = 0.5, label = 'Adult')

In [None]:
adata_sub.obs['nhood_status_adult'] = adata_sub.obs['cell_logfc_adult'].apply(lambda x: 'enriched' if x >= 1 else 'depleted' if x <= -1 else 'None')
adata_sub.obs['nhood_status_paed'] = adata_sub.obs['cell_logfc_paed'].apply(lambda x: 'enriched' if x >= 1 else 'depleted' if x <= -1 else 'None')

In [None]:
from utils import aggClusters, add_batch_pca
from pydeseq2.dds import DeseqDataSet
from pydeseq2.default_inference import DefaultInference
from pydeseq2.ds import DeseqStats

nhood_dea_res = {}
nhood_contrast = {'nhood_status_adult' : ['enriched', 'depleted'], 
                  'nhood_status_paed' : ['depleted', 'None']}
for nhood_status,contrast in nhood_contrast.items():
    
    factors = [nhood_status, 'sex']
    #contrast = [col_age_group, 'adult', 'paed']

    # Remove cells with no age group 
    adata_dea = adata_sub[~adata_sub.obs[nhood_status].isna()].copy()

    # Add metadata columns to adata
    adata_dea.obs['agg_key'] = [c + "__" + d for c,d in zip(adata_dea.obs['donor'].astype(str), adata_dea.obs[nhood_status].astype(str))]

    # Aggregate across clusters and complete metadata
    agg_adata = aggClusters(adata_dea, lognorm=None,cluster_key='agg_key',raw = 'X', preserve_meta=['donor', nhood_status, 'sex', 'age', 'age_months', 'study'])

    # Remove pseudobulks with too few cells (n < 10)
    agg_adata = agg_adata[agg_adata.obs.n_cells >= 10,:]

    # Add PCs significanlty associated with study for batch correction
    add_batch_pca(agg_adata, batch_col='sex', interest_col=nhood_status, variance_explained=0.7, n_pcs = None)
    
    # Preparing metadata and counts
    metadata = agg_adata.obs[factors]
    counts = pd.DataFrame(agg_adata.X, index=agg_adata.obs_names, columns=agg_adata.var_names)
    genes_to_keep = counts.columns[counts.sum(axis=0) >= 10]
    counts = counts[genes_to_keep]

    # Set up DeSeq dataset
    dds = DeseqDataSet(
        counts=counts,
        metadata=metadata,
        design = f'sex + {nhood_status}',
        min_replicates = 3,
        refit_cooks=True,
        n_cpus = 8,
    )

    # Estimate size factors and dispersion and fitting LFC
    dds.deseq2()

    # Run DE analysis for age effect
    dea_res = DeseqStats(dds, contrast=[nhood_status] + contrast, inference=DefaultInference(n_cpus=8))
    dea_res.summary()
    nhood_dea_res[nhood_status] = dea_res.results_df 

In [None]:
nhood_dea_res['nhood_status_paed'].sort_values(by='log2FoldChange').head(50)

In [None]:
nhood_dea_res['nhood_status_paed'].loc[nhood_dea_res['nhood_status_paed'].index.isin([g for g in b_gc_goi.values() for g in g])].sort_values(by='log2FoldChange', ascending=False)

In [None]:
nhood_dea_res['nhood_status_adult'].loc[nhood_dea_res['nhood_status_adult'].index.isin([g for g in b_gc_goi.values() for g in g])].sort_values(by='log2FoldChange', ascending=False)

In [None]:
gc_markers = {'General': ['AICDA', 'AIRE', 'TNFRSF8', 'JUNB', 'MYC', 'TCF3', 'ID2', 'ID3', 'CD83', 'CD40', 'IRF8', 'TCF7'],
              'GC': ['PAX5', 'BCL6', 'IRF4'],
              'DZ' : ['CXCR4'],
              'LZ': ['CXCR5'],
              #'Extrafollicular': ['TCF3'],
              'EBV' : ['CCL22', 'CCL17', 'EBI3', 'CCL3', 'ICAM1'],
              'Immunoglobulin': ['IGHM', 'IGHD', 'IGHA1', 'IGHA2','IGHG1', 'IGHG3', 'IGHE'],
              'PC': ['PRDM1', 'XBP1', 'IRF4'],
              'ABC': ['TBX21', 'RGS1', 'IFI44L', 'ALOX5AP']}


nhood_dea_res['nhood_status_adult'].loc[nhood_dea_res['nhood_status_adult'].index.isin([g for g in gc_markers.values() for g in g])].sort_values(by='log2FoldChange', ascending=False)

In [None]:
dea_res_df['abs_log2FoldChange'] = dea_res_df['log2FoldChange'].abs()
dea_res_df = dea_res_df.reset_index(names = 'gene_name')

dea_res_df.sort_values(by='abs_log2FoldChange', ascending=False).head(50)

In [None]:
from adjustText import adjust_text
#Create a column to indicate significant genes
dea_res_df['is_signif'] = (dea_res_df['padj'] < 0.05) & (abs(dea_res_df['log2FoldChange']) >= 1.3)

# Create the volcano plot
plt.figure(figsize=(10, 8))
sns.scatterplot(data=dea_res_df, x='log2FoldChange', y=-np.log10(dea_res_df['pvalue']), hue='is_signif', legend=False)

texts = []
for i in range(dea_res_df.shape[0]):
    if dea_res_df.iloc[i]['is_signif']:
        texts.append(plt.text(dea_res_df.iloc[i]['log2FoldChange'], -np.log10(dea_res_df.iloc[i]['pvalue']), dea_res_df.iloc[i]['gene_name'], fontsize=8))
adjust_text(texts, force_points=0.2, force_text=1)

plt.axhline(y=-np.log10(0.05), color='grey', linestyle='--')
plt.axvline(x=0, color='grey', linestyle='--')
plt.xlabel('Log2 Fold Change')
plt.ylabel('-Log10 p-value')