In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import anndata as ad
import mudata as mu
import hdf5plugin

# Add repo path to sys path (allows to access scripts and metadata from repo)
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Autoreload custom scripts
%load_ext autoreload
%autoreload 2

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette
from plotting.utils import plot_grouped_boxplot, calc_figsize, thyAgeing_colors, thyAgeing_greys, get_tint_palette

In [None]:
#Â Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
for c in ct_anno.columns:
    if c in adata.obs.columns:
        adata.obs.drop(c, axis = 1, inplace = True)
adata.obs = adata.obs.join(ct_anno)

# Filter data (only include annotated cells)
adata = adata[(adata.obs['anno_status'] == 'include'),:]

# Load metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

latest_meta.head()

In [None]:
n_donors = adata.obs[['age_group', 'donor', 'sort']].copy()
n_donors = n_donors.drop_duplicates()
n_donors = n_donors.groupby(['age_group', 'sort']).agg(n_donors = ('donor', 'nunique')).reset_index()

n_cells = adata.obs[['age_group', 'donor', 'sort']].copy()
n_cells = n_cells.groupby(['age_group', 'sort', 'donor']).agg(n_cells = ('donor', 'count')).reset_index()

tot_n_donors = n_donors.loc[n_donors['sort'] == 'TOT', ['age_group', 'n_donors']].copy()
tot_n_cells = n_cells.loc[n_cells['sort'] == 'TOT', ['age_group', 'n_cells','donor']].copy()

tot_n_donors, tot_n_cells

In [None]:
stromal_sorts = ['CD45N', 'CD3N', 'EPCAM', 'EPCAM+CD205+', 'CD45NCD3N', 'CD205+CD49F+']
stromal_n_donors = n_donors[all_n_donors['sort'].isin(stromal_sorts)]
stromal_n_donors = stromal_n_donors.groupby('age_group').agg(n_donors = ('n_donors', 'sum')).reset_index()

stromal_n_cells = n_cells[n_cells['sort'].isin(stromal_sorts)]
stromal_n_cells = stromal_n_cells.groupby(['age_group', 'donor']).agg(n_cells = ('n_cells', 'sum')).reset_index()

stromal_n_donors, stromal_n_cells

In [None]:
fig, axes = plt.subplots(2, 2, figsize=calc_figsize(width = 120, height = 60))

# First column: TOT
sns.boxplot(
    ax=axes[0, 0],
    data=tot_n_cells,
    x='age_group',
    y='n_cells',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels,
    gap=0.1,
    showfliers=False, # No need to show as we have the jitter plot
    # fliersize = plt.rcParams['boxplot.flierprops.markersize'],
    # flierprops={"marker": plt.rcParams['boxplot.flierprops.marker']},
    showcaps = False,
    medianprops={"color": "white"},
    whiskerprops = {"color": 'black', 'linestyle': 'solid'}, 
    boxprops = {'linewidth' : 0},
)
axes[0, 0].set_title('Total sorts')
axes[0, 0].set_xlabel('Age group')
axes[0, 0].set_ylabel('N(cells) per donor')
axes[0, 0].set_xticklabels([])
axes[0, 0].set_xlabel('')
sns.despine(ax=axes[0, 0], trim =True, offset = 2)

sns.barplot(
    ax=axes[1, 0],
    data=tot_n_donors,
    x='age_group',
    y='n_donors',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels
)
axes[1, 0].set_xlabel('Age group')
axes[1, 0].set_ylabel('N(donors)')
sns.despine(ax=axes[1, 0], trim =True, offset = 2)

# Second column: Stromal
sns.boxplot(
    ax=axes[0, 1],
    data=stromal_n_cells,
    x='age_group',
    y='n_cells',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels,
    gap=0.1,
    showfliers=False, # No need to show as we have the jitter plot
    # fliersize = plt.rcParams['boxplot.flierprops.markersize'],
    # flierprops={"marker": plt.rcParams['boxplot.flierprops.marker']},
    showcaps = False,
    medianprops={"color": "white"},
    whiskerprops = {"color": 'black', 'linestyle': 'solid'}, 
    boxprops = {'linewidth' : 0},
)
axes[0, 1].set_title('Stromal sorts')
axes[0, 1].set_xlabel('Age group')
axes[0, 1].set_ylabel('N(cells) per donor')
axes[0, 1].set_xticklabels([])
axes[0, 1].set_xlabel('')
sns.despine(ax=axes[0, 1], trim =True, offset = 2)

sns.barplot(
    ax=axes[1, 1],
    data=stromal_n_donors,
    x='age_group',
    y='n_donors',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels
)
axes[1, 1].set_xlabel('Age group')
axes[1, 1].set_ylabel('N(donors)')
sns.despine(ax=axes[1, 1], trim =True, offset = 2)

plt.tight_layout()
plt.show()

In [None]:
tcr_anno = pd.read_csv('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/T_NK_compartment/data/objects/rna/thyAgeing_tSplit_scvi_v9_2025-03-28_tcrab_v6.csv', index_col = 0)
tcr_df = adata[adata.obs['taa_l1'] == 'T'].obs.join(tcr_anno, how = 'inner')
tcr_df = tcr_df.loc[tcr_df.locus_VDJ != 'No_contig', :]
tcr_df.shape

In [None]:
tcr_n_donors = tcr_df[['age_group', 'donor']].copy()
tcr_n_donors = tcr_n_donors.drop_duplicates()
tcr_n_donors = tcr_n_donors.groupby('age_group').agg(n_donors = ('donor', 'nunique')).reset_index()

tcr_n_cells = tcr_df[['age_group', 'donor']].copy()
tcr_n_cells = tcr_n_cells.groupby(['age_group', 'donor']).agg(n_cells = ('donor', 'count')).reset_index()

tcr_n_donors, tcr_n_cells

In [None]:
bcr_anno = pd.read_csv('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/B_compartment/data/objects/rna/thyAgeing_bSplit_scvi_v4_2024-11-06_bcr_v3_2025-02-19.csv', index_col = 0)
bcr_df = adata[adata.obs['taa_l1'] == 'B'].obs.join(bcr_anno, how = 'inner')
bcr_df = bcr_df.loc[bcr_df.locus_VDJ != 'No_contig', :]
bcr_df.shape

In [None]:
bcr_n_donors = bcr_df[['age_group', 'donor']].copy()
bcr_n_donors = bcr_n_donors.drop_duplicates()
bcr_n_donors = bcr_n_donors.groupby('age_group').agg(n_donors = ('donor', 'nunique')).reset_index()

bcr_n_cells = bcr_df[['age_group', 'donor']].copy()
bcr_n_cells = bcr_n_cells.groupby(['age_group', 'donor']).agg(n_cells = ('donor', 'count')).reset_index()

bcr_n_donors, bcr_n_cells

In [None]:
fig, axes = plt.subplots(2, 4, figsize=calc_figsize(width = 180, height = 60))

# First column: TOT
sns.boxplot(
    ax=axes[0, 0],
    data=tot_n_cells,
    x='age_group',
    y='n_cells',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels,
    gap=0.1,
    showfliers=False,
    showcaps=False,
    medianprops={"color": "white"},
    whiskerprops={"color": 'black', 'linestyle': 'solid'},
    boxprops={'linewidth': 0},
)
axes[0, 0].set_title('Total sorts')
axes[0, 0].set_ylabel('N(cells) per donor')
axes[0, 0].set_xticklabels([])
axes[0, 0].set_xlabel('')
sns.despine(ax=axes[0, 0], trim=True, offset=2)

sns.barplot(
    ax=axes[1, 0],
    data=tot_n_donors,
    x='age_group',
    y='n_donors',
    linewidth=0.5,
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels
)
axes[1, 0].set_xlabel('Age group')
axes[1, 0].set_ylabel('N(donors)')
sns.despine(ax=axes[1, 0], trim=True, offset=2)

# Second column: Stromal
sns.boxplot(
    ax=axes[0, 1],
    data=stromal_n_cells,
    x='age_group',
    y='n_cells',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels,
    gap=0.1,
    showfliers=False,
    showcaps=False,
    medianprops={"color": "white"},
    whiskerprops={"color": 'black', 'linestyle': 'solid'},
    boxprops={'linewidth': 0},
)
axes[0, 1].set_title('Stromal sorts')
axes[0, 1].set_ylabel('N(cells) per donor')
axes[0, 1].set_xticklabels([])
axes[0, 1].set_xlabel('')
sns.despine(ax=axes[0, 1], trim=True, offset=2)

sns.barplot(
    ax=axes[1, 1],
    data=stromal_n_donors,
    x='age_group',
    y='n_donors',
    linewidth = 0.5,
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels
)
axes[1, 1].set_xlabel('Age group')
axes[1, 1].set_ylabel('N(donors)')
sns.despine(ax=axes[1, 1], trim=True, offset=2)

# Third column: TCR
sns.boxplot(
    ax=axes[0, 2],
    data=tcr_n_cells,
    x='age_group',
    y='n_cells',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels,
    gap=0.1,
    showfliers=False,
    showcaps=False,
    medianprops={"color": "white"},
    whiskerprops={"color": 'black', 'linestyle': 'solid'},
    boxprops={'linewidth': 0},
)
axes[0, 2].set_title('TCR')
axes[0, 2].set_ylabel('N(cells) per donor')
axes[0, 2].set_xticklabels([])
axes[0, 2].set_xlabel('')
sns.despine(ax=axes[0, 2], trim=True, offset=2)

sns.barplot(
    ax=axes[1, 2],
    data=tcr_n_donors,
    x='age_group',
    y='n_donors',
    linewidth = 0.5,
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels
)
axes[1, 2].set_xlabel('Age group')
axes[1, 2].set_ylabel('N(donors)')
sns.despine(ax=axes[1, 2], trim=True, offset=2)

# Fourth column: BCR
sns.boxplot(
    ax=axes[0, 3],
    data=bcr_n_cells,
    x='age_group',
    y='n_cells',
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels,
    gap=0.1,
    showfliers=False,
    showcaps=False,
    medianprops={"color": "white"},
    whiskerprops={"color": 'black', 'linestyle': 'solid'},
    boxprops={'linewidth': 0},
)
axes[0, 3].set_title('BCR')
axes[0, 3].set_ylabel('N(cells) per donor')
axes[0, 3].set_xticklabels([])
axes[0, 3].set_xlabel('')
sns.despine(ax=axes[0, 3], trim=True, offset=2)

sns.barplot(
    ax=axes[1, 3],
    data=bcr_n_donors,
    x='age_group',
    y='n_donors',
    linewidth = 0.5,
    palette=get_tint_palette(thyAgeing_colors['magenta'], n=len(age_group_levels)),
    order=age_group_levels
)
axes[1, 3].set_xlabel('Age group')
axes[1, 3].set_ylabel('N(donors)')
sns.despine(ax=axes[1, 3], trim=True, offset=2)

plt.tight_layout()
plt.savefig(f'{plots_path}/metadata/thyAgeing_nDonors_nCells_meta.pdf')