# Thymus ageing atlas: Global differential abundance test

In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import anndata as ad
import mudata as mu
import hdf5plugin

import pertpy
milo = pertpy.tl.Milo()

# Add repo path to sys path (allows to access scripts and metadata from repo)
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Autoreload custom scripts
%load_ext autoreload
%autoreload 2

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette
from plotting.utils import plot_grouped_boxplot, calc_figsize

In [None]:
# Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
adata.obs = adata.obs.join(ct_anno, how = 'left')
adata = adata[(adata.obs['anno_status'] == 'include')]

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

adata

In [None]:
# Define columns
col_cell_type_broad = 'taa_l2_v2'
col_cell_type_fine = 'taa_l4'
col_cell_type_broad_levels = get_ct_levels(col_cell_type_broad)
col_cell_type_fine_levels = get_ct_levels(col_cell_type_fine)
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

col_cell_type_broad_levels.remove('DC')
col_cell_type_broad_levels.remove('TEC-EMT')

In [None]:
# Create new cell type annotations
adata.obs['taa_l3_l2'] = adata.obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)

taa_l3_l2_levels = get_ct_levels('taa_l3', include_ct = ['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)'])
taa_l3_l2_levels.extend(get_ct_levels('taa_l2', include_ct = np.setdiff1d(adata.obs['taa_l3_l2'].unique(), np.array(['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)']))))

## Differential abundance analysis (Milo)

In [None]:
adata = adata[adata.obs['sort'] == 'TOT']

In [None]:
# Construct nhoods
mdata = milo.load(adata)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", n_neighbors=100)
milo.make_nhoods(mdata["rna"], prop=0.1)
# Count nhoods
mdata = milo.count_nhoods(mdata, sample_col="donor")

In [None]:
mdata['rna'].obs['path_cellranger_arc'] = mdata['rna'].obs['path_cellranger_arc'].astype('str')
mdata['rna'].obs['cite'] = mdata['rna'].obs['cite'].astype('str')
mdata['rna'].obs['age_num'] = mdata['rna'].obs['age_num'].astype(float)
mdata['rna'].obs['age_cont'] = mdata['rna'].obs['age_cont'].astype(float)
mdata['rna'].obs['age_months'] = mdata['rna'].obs['age_months'].astype(int)

for c in mdata['rna'].obs.columns:
        if mdata['rna'].obs[c].dtype == 'object':
            mdata['rna'].obs[c] = mdata['rna'].obs[c].astype('str')

mdata.write_h5mu(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_ageGroups.zarr',
                compression=hdf5plugin.FILTERS["zstd"],
                compression_opts=hdf5plugin.Zstd(clevel=5).filter_options,
        )

In [None]:
# # Load mdata
# object_version = 'v5_2025-04-03'
# mdata = mu.read_h5mu(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_ageGroups.zarr')

# mdata

In [None]:
mdata['rna'].obs['taa_l3_l2'] = mdata['rna'].obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)

mdata['rna'].obs['taa_l3_l2'].value_counts()

In [None]:
adata.obs['taa_l3_l2'] = adata.obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)

taa_l3_l2_levels = get_ct_levels('taa_l3', include_ct = ['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)'])
taa_l3_l2_levels.extend(get_ct_levels('taa_l2', include_ct = np.setdiff1d(adata.obs['taa_l3_l2'].unique(), np.array(['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)']))))

In [None]:
for anno in ['taa_l3', 'taa_l4', 'taa_l3_l2']:
    milo.annotate_nhoods(mdata, anno_col = anno)
    mdata['milo'].var.rename(columns = {'nhood_annotation' : f'nhood_{anno}',
                                        'nhood_annotation_frac' : f'nhood_{anno}_frac'}, inplace = True)
    
mdata['milo'].var.head()

In [None]:
# Create and reorder categories
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["age_group"]
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].astype("category")
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].cat.reorder_categories(['infant','paed','adult','aged'])

comparisons = [('aged', 'adult'), ('adult', 'paed'), ('paed', 'infant'), ('adult', 'infant'),]
milo_dict = {}
for c in comparisons:
    # Differential abundance testing
    contrast = f'da_age_group{c[0]}-da_age_group{c[1]}'
    milo.da_nhoods(mdata, design="~da_age_group+sex", model_contrasts=contrast)
    
    milo_dict[f'{c[0]}_vs_{c[1]}'] = mdata['milo'].var.copy()

In [None]:
milo_df = pd.concat(milo_dict, axis=0).reset_index(names=['comparison','nhood_id'])
milo_df.to_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_ageGroups.csv')

milo_df = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_ageGroups.csv', index_col = 0)
milo_df

In [None]:
milo_df = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_ageGroups.csv', index_col = 0)
anno_levels = pd.read_excel(f'{general_data_path}/curated/thyAgeing_full_curatedAnno_v11_2025-08-29_levels.xlsx')

df = milo_df.copy()
df[f'nhood_{col_cell_type_fine}'] = pd.Categorical(df[f'nhood_{col_cell_type_fine}'], categories=col_cell_type_fine_levels, ordered=True)
df = df.merge(anno_levels[['taa_l4', 'taa_l2_v2']], left_on = 'nhood_taa_l4', right_on = 'taa_l4')
df['taa_l2_v2'] = pd.Categorical(df['taa_l2_v2'], categories=col_cell_type_broad_levels, ordered=True)
# df[f'nhood_{col_cell_type_broad}'] = pd.Categorical(df[f'nhood_{col_cell_type_broad}'], categories=col_cell_type_broad_levels, ordered=True)
# df['nhood_taa_l3_l2'] = pd.Categorical(df['nhood_taa_l3_l2'], categories=taa_l3_l2_levels, ordered=True)
# df['nhood_taa_l3_l2'] = df['nhood_taa_l3_l2'].cat.remove_unused_categories()
df['is_sig'] = df['SpatialFDR'] < 0.05

df.head()

In [None]:
from plotting.utils import thyAgeing_greys,thyAgeing_colors

In [None]:
# Milo beeswarm (landscape)

groups_of_interest = ['paed_vs_infant', 'adult_vs_infant']
group_labels = ['paed', 'adult']

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['comparison', 'taa_l2_v2'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['comparison'].isin(groups_of_interest)]
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'taa_l2_v2', 'y': 'logFC', 'hue': 'comparison', 'hue_order': groups_of_interest}
    
plt.figure(figsize=calc_figsize(height=60, width=180))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['magenta'], thyAgeing_colors['magenta']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['taa_l2_v2'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['magenta'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['magenta'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l2_v2_logFcCellTypes_adult_infant_pub.pdf')

In [None]:
from matplotlib import colors as mcolors
df_median['comparison'] = pd.Categorical(df_median['comparison'], categories=groups_of_interest[::-1], ordered=True)
df_median = df_median.loc[df_median['comparison'].isin(groups_of_interest)]
df_median['comparison'] = df_median['comparison'].cat.remove_unused_categories()
df_median['logFC'] = df_median['logFC'].apply(lambda x: float(x))
df_median['abs_logFC'] = df_median['logFC'].abs()

plt.figure(figsize=calc_figsize(height=30, width=180))
max_abs = np.abs(df_median['logFC']).max()
vmin, vmax, vcenter = -max_abs, max_abs, 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.blend_palette([thyAgeing_colors['teal'], 'white', thyAgeing_colors['orange']], as_cmap=True)

sizenorm = plt.Normalize(vmin=0, vmax=df_median['abs_logFC'].max())

p = sns.scatterplot(
    data=df_median,
    y='comparison', x='taa_l2_v2', hue='logFC',
    palette=cmap, hue_norm=normalize, size='abs_logFC', sizes=(1, 70), legend=False
)
p.set_ylabel('Comparison')
p.set_xlabel('Cell population')
p.set_xlim(-0.5, len(df_median['taa_l2_v2'].unique())-0.5)
p.set_ylim(len(groups_of_interest)-0.5, -0.5)  # Reverse y-axis
p.set_xticklabels(df_median['taa_l2_v2'].cat.categories, rotation=90, va='top', ha='center')
p.set_yticklabels(group_labels[::-1], rotation=0, va='center', ha='right')

sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
cbar = p.figure.colorbar(sm, ax=p.axes, orientation='vertical', label='logFC', pad=0.01, aspect=5)
plt.tight_layout()

# Add legend for dot size
for logfc in [2,4,6]:
    size = sizenorm(logfc) * (70 - 1) + 1
    plt.scatter([], [], s=size, c='gray', alpha=0.5, label=f'{logfc}')
p.legend(title='|logFC|', loc='upper left', bbox_to_anchor=(1.08, 1.1), frameon=False)

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l2_v2_logFcCellTypes_adult_infant_heatmap_pub.pdf')


In [None]:
vmax

In [None]:
args = {'data': df, 'x': 'logFC', 'y': col_cell_type_fine, 'hue': 'comparison', 'hue_order': ['adult_vs_paed', 'paed_vs_infant']}
    
plt.figure(figsize=calc_figsize(height = len(col_cell_type_fine_levels)*5+12, width = 60))
# ax = plt.axes()
ax =sns.violinplot(**args,
                bw_adjust=.8, cut=0, split=True,
                height = 10, aspect = 0.8,  
                edgecolor = 'black', linewidth = 0, palette = 'colorblind', inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, dodge=True, size=1, palette='colorblind', edgecolor='gray', ax=ax)

plt.xlabel('log2(FC)')
plt.ylabel('Cell type')

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['Paed -> Adult', 'Infant -> Paed'],
    title='Comparison',
    loc='upper center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=2
)

plt.axvline(0, color = 'grey', linestyle = '--')

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_{col_cell_type_fine}_logFcCellTypes_adult_paed.pdf')

In [None]:
df = milo_df.loc[milo_df['comparison'].isin(['adult_vs_infant', 'paed_vs_infant'])].copy()
# df[col_cell_type_fine] = pd.Categorical(df[col_cell_type_fine], categories=col_cell_type_fine_levels, ordered=True)
# df[col_cell_type_broad] = pd.Categorical(df[col_cell_type_broad], categories=col_cell_type_broad_levels, ordered=True)
df['nhood_taa_l3_l2'] = pd.Categorical(df['nhood_taa_l3_l2'], categories=taa_l3_l2_levels, ordered=True)
df['is_sig'] = df['SpatialFDR'] < 0.05

In [None]:
args = {'data': df, 'x': 'logFC', 'y': 'nhood_taa_l3_l2', 'hue': 'comparison', 'hue_order': ['adult_vs_infant', 'paed_vs_infant']}
    
plt.figure(figsize=calc_figsize(height = len(taa_l3_l2_levels)*5, width = 60))
# ax = plt.axes()
ax =sns.violinplot(**args,
                bw_adjust=.8, cut=0, split=True,
                edgecolor = 'black', linewidth = 0, palette = 'colorblind', inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, dodge=True, size=1, palette='colorblind', edgecolor='gray', ax=ax)

plt.xlabel('log2(FC)')
plt.ylabel('Cell type')

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['Infant -> Adult', 'Infant -> Paed'],
    title='Comparison',
    loc='upper center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=2
)

plt.axvline(0, color = 'grey', linestyle = '--')

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant.pdf')

In [None]:
args = {'data': df, 'y': 'logFC', 'x': 'nhood_taa_l3_l2', 'hue': 'comparison', 'hue_order': ['adult_vs_infant', 'paed_vs_infant']}
    
plt.figure(figsize=calc_figsize(height = 40, width = 150))
# ax = plt.axes()
ax =sns.violinplot(**args,
                bw_adjust=.8, cut=0, split=True,
                #height = 10, aspect = 0.8,  
                edgecolor = 'black', linewidth = 0, palette = 'colorblind', inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, dodge=True, size=1, palette='colorblind', edgecolor='gray', ax=ax)

plt.xlabel('Cell type')
plt.ylabel('log2(FC)')
plt.xticks(rotation=45)

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['Infant -> Adult', 'Infant -> Paed'],
    title='Comparison',
    loc='lower right',
    bbox_to_anchor=(1.2, 0.5),
    ncol=1
)

plt.axhline(0, color = 'grey', linestyle = '--')

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant_pub.pdf')

In [None]:
np.array(taa_l3_l2_levels)

In [None]:
plot_df = df.loc[df['nhood_taa_l3_l2'].isin(['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)', 'T_mature', 'cTEC', 'mcTEC', 'mTEC'])].copy()
plot_df['nhood_taa_l3_l2'] = plot_df['nhood_taa_l3_l2'].cat.remove_unused_categories()

args = {'data': plot_df, 'y': 'logFC', 'x': 'nhood_taa_l3_l2', 'hue': 'comparison', 'hue_order': ['paed_vs_infant', 'adult_vs_infant']}
    
plt.figure(figsize=calc_figsize(height=50, width=80))
ax = sns.violinplot(**args,
                    bw_adjust=.8, cut=0, split=True, density_norm = 'width',
                    edgecolor='black', linewidth=0, palette='colorblind', inner=None)

handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, dodge=True, size=0.5, palette='colorblind', edgecolor='gray', ax=ax)

plt.xlabel('Cell type')
plt.ylabel('log2(FC)')
plt.xticks(rotation=45)

# Remove unused x labels
ax.set_xticklabels([label.get_text() for label in ax.get_xticklabels() if label.get_text() in plot_df['nhood_taa_l3_l2'].cat.categories])

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['Infant -> Paed', 'Infant -> Adult'],
    title='Comparison',
    loc='lower right',
    bbox_to_anchor=(1.3, 0.8),
    ncol=1
)

plt.axhline(0, color='grey', linestyle='--')

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant_bifTalkEarly.pdf')

In [None]:
plot_df = df.loc[df['nhood_taa_l3_l2'].isin(['T_recirc', 'T_innate', 'NK', 'Fb', 'EC-blood', 'EC-lymphatic', 'B_cell',
       'B_plasma', 'Neutrophil', 'Mono', 'Mac', 'DC1','DC2', 'aDC', 'pDC'])].copy()
plot_df['nhood_taa_l3_l2'] = plot_df['nhood_taa_l3_l2'].cat.remove_unused_categories()

args = {'data': plot_df, 'y': 'logFC', 'x': 'nhood_taa_l3_l2', 'hue': 'comparison', 'hue_order': ['paed_vs_infant', 'adult_vs_infant']}
    
plt.figure(figsize=calc_figsize(height=50, width=100))
ax = sns.violinplot(**args,
                    bw_adjust=.8, cut=0, split=True, density_norm = 'width',
                    edgecolor='black', linewidth=0, palette='colorblind', inner=None)

handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, dodge=True, size=0.5, palette='colorblind', edgecolor='gray', ax=ax)

plt.xlabel('Cell type')
plt.ylabel('log2(FC)')
plt.xticks(rotation=45)

# Remove unused x labels
ax.set_xticklabels([label.get_text() for label in ax.get_xticklabels() if label.get_text() in plot_df['nhood_taa_l3_l2'].cat.categories])

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['Infant -> Paed', 'Infant -> Adult'],
    title='Comparison',
    loc='lower right',
    bbox_to_anchor=(1.1, 0.8),
    ncol=1
)

plt.axhline(0, color='grey', linestyle='--')

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant_bifTalkLate.pdf')

In [None]:
data = milo_df.groupby(['comparison', 'nhood_taa_l3_l2'])['logFC'].median().reset_index()
data.head()

In [None]:
from matplotlib import colors as mcolors

comparison_map = {'adult_vs_infant': '-> Adult', 'paed_vs_infant': '-> Paed'}
data = data.loc[data['comparison'].isin(['adult_vs_infant', 'paed_vs_infant'])].copy()
data['comparison'] = pd.Categorical(data['comparison'], categories=['paed_vs_infant','adult_vs_infant'], ordered=True)
data['nhood_taa_l3_l2'] = pd.Categorical(data['nhood_taa_l3_l2'], categories=taa_l3_l2_levels[::-1], ordered=True)
data['nhood_taa_l3_l2'] = data['nhood_taa_l3_l2'].cat.remove_unused_categories()
data['abs_logFC'] = np.abs(data['logFC'])


plt.figure(figsize=calc_figsize(height = 80, width = 45))
max_abs = np.abs(data['logFC']).max()
vmin, vmax, vcenter = -max_abs, max_abs, 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.diverging_palette(220, 20, center="light", as_cmap=True, s=100)
    
p = sns.scatterplot(data = data, x = 'comparison', y = 'nhood_taa_l3_l2', hue = 'logFC', palette = cmap, hue_norm = normalize, size = 'abs_logFC', sizes = (1,70), legend = False)
p.set_xlabel('Comparison')
p.set_ylabel('Cell population')
p.set_ylim(-0.5, len(data['nhood_taa_l3_l2'].unique())-0.5)
p.set_xlim(-0.5, len(data['comparison'].unique())-0.5)
p.set_xticklabels([comparison_map[label.get_text()] for label in p.get_xticklabels()])

#p.get_legend().remove()

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
cbar = p.figure.colorbar(sm, ax=p.axes, orientation='vertical', label='logFC', pad=0.05, aspect=40)
plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant_heatmap.pdf')

In [None]:
data = {}
for anno in ['taa_l3', 'taa_l4', 'taa_l3_l2']:
    data[anno] = milo_df.groupby(['comparison', f'nhood_{anno}'])['logFC'].median().reset_index().rename(columns = {f'nhood_{anno}' : 'anno'})
data = pd.concat(data, axis = 0).reset_index(names=['anno_level','drop']).drop(columns = 'drop')

data.to_csv(f'{data_path}/analyses/freqAnalysis/thyAgeing_all_scvi_{object_version}_milo_ageGroups_medianLogFC.csv')
data

## Gender effect

### Baseline gender effect

In [None]:
# Load mdata
object_version = 'v5_2025-04-03'
mdata = mu.read_h5mu(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_ageGroups.zarr')

In [None]:
# Differential abundance testing
contrast = f'-sexM'
milo.da_nhoods(mdata, design="~da_age_group+sex", model_contrasts=contrast)

milo_df_sex = mdata['milo'].var.copy()

In [None]:
milo_df_sex['nhood_id'] = milo_df_sex.index.astype(int)
milo_df_sex = milo_df_sex.merge(nhood2type_anno, on='nhood_id', how='left')

milo_df_sex.head()

In [None]:
df = milo_df_sex.copy()
df['comparison'] = 'F vs M'
df[col_cell_type_fine] = pd.Categorical(df[col_cell_type_fine], categories=col_cell_type_fine_levels, ordered=True)
df[col_cell_type_broad] = pd.Categorical(df[col_cell_type_broad], categories=col_cell_type_broad_levels, ordered=True)
df['is_sig'] = df['SpatialFDR'] < 0.05

In [None]:
args = {'data': df, 'x': 'logFC', 'y': col_cell_type_fine, 'hue': 'comparison', 'hue_order': ['F vs M']}
    
plt.figure(figsize=calc_figsize(height = len(col_cell_type_fine_levels)*5, width = 60))
# ax = plt.axes()
ax =sns.violinplot(**args,
                bw_adjust=.8, cut=0, 
                height = 10, aspect = 0.8,  
                edgecolor = 'black', linewidth = 0, palette = 'colorblind', inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, dodge=True, size=1, palette='colorblind', edgecolor='gray', ax=ax)

plt.xlabel('log2(FC)')
plt.ylabel('Cell type')

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['F vs M'],
    title='Comparison',
    loc='upper center',
    bbox_to_anchor=(0.5, 1.05),
    ncol=2
)

plt.axvline(0, color = 'grey', linestyle = '--')

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_{col_cell_type_fine}_logFcCellTypes_gender.pdf')

### Sex-based difference by age group

In [None]:
# Subset adata
adata_sub = adata[(adata.obs['sort'] == 'TOT') & (adata.obs['age_group'] == 'adult') & (adata.obs['donor'] != 'A66')].copy()

# Construct nhoods
mdata = milo.load(adata_sub)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", n_neighbors=50)
milo.make_nhoods(mdata["rna"], prop=0.1)
# Count nhoods
mdata = milo.count_nhoods(mdata, sample_col="donor")

# Create and reorder categories
mdata["rna"].obs["da_group"] = mdata["rna"].obs["sex"].astype("category")
mdata["rna"].obs["da_group"] = mdata["rna"].obs["da_group"].cat.reorder_categories(['M','F'])

# Annotate nhoods
for anno in ['taa_l3', 'taa_l4', 'taa_l3_l2']:
    milo.annotate_nhoods(mdata, anno_col = anno)
    mdata['milo'].var.rename(columns = {'nhood_annotation' : f'nhood_{anno}',
                                        'nhood_annotation_frac' : f'nhood_{anno}_frac'}, inplace = True)

# Differential abundance testing
comparisons = [('F', 'M')]
milo_dict = {}
for c in comparisons:
    # Differential abundance testing
    contrast = f'da_group{c[0]}-da_group{c[1]}'
    milo.da_nhoods(mdata, design="~da_group", model_contrasts=contrast)
    
    milo_dict[f'{c[0]}_vs_{c[1]}'] = mdata['milo'].var.copy()

# # Create cell type annotation
# mdata['rna'].obs['taa_l3_l2'] = mdata['rna'].obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)

# Constructe milo df
milo_df = pd.concat(milo_dict, axis=0).reset_index(names=['comparison','nhood_id'])

# Save milo df
milo_df.to_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_adultGender.csv')

In [None]:
# Subset adata
adata_sub = adata[(adata.obs['sort'] == 'TOT') & (adata.obs['age_group'] == 'infant') & (adata.obs['donor'] != 'A66')].copy()

# Construct nhoods
mdata = milo.load(adata_sub)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", n_neighbors=50)
milo.make_nhoods(mdata["rna"], prop=0.1)
# Count nhoods
mdata = milo.count_nhoods(mdata, sample_col="donor")

# Create and reorder categories
mdata["rna"].obs["da_group"] = mdata["rna"].obs["sex"].astype("category")
mdata["rna"].obs["da_group"] = mdata["rna"].obs["da_group"].cat.reorder_categories(['M','F'])

# Annotate nhoods
for anno in ['taa_l3', 'taa_l4', 'taa_l3_l2']:
    milo.annotate_nhoods(mdata, anno_col = anno)
    mdata['milo'].var.rename(columns = {'nhood_annotation' : f'nhood_{anno}',
                                        'nhood_annotation_frac' : f'nhood_{anno}_frac'}, inplace = True)

# Differential abundance testing
comparisons = [('F', 'M')]
milo_dict = {}
for c in comparisons:
    # Differential abundance testing
    contrast = f'da_group{c[0]}-da_group{c[1]}'
    milo.da_nhoods(mdata, design="~da_group", model_contrasts=contrast)
    
    milo_dict[f'{c[0]}_vs_{c[1]}'] = mdata['milo'].var.copy()

# # Create cell type annotation
# mdata['rna'].obs['taa_l3_l2'] = mdata['rna'].obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)

# Constructe milo df
milo_df = pd.concat(milo_dict, axis=0).reset_index(names=['comparison','nhood_id'])

# Save milo df
milo_df.to_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_infantGender.csv')

In [None]:
milo_adult = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_adultGender.csv', index_col = 0)
milo_adult['age_group'] = 'adult'
milo_infant = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_infantGender.csv', index_col = 0)
milo_infant['age_group'] = 'infant'

milo_df = pd.concat([milo_adult, milo_infant], axis=0).reset_index(drop=True)
milo_df

In [None]:
from plotting.utils import thyAgeing_colors,thyAgeing_greys

# Milo beeswarm (landscape)
df = milo_df.copy()
df['is_sig'] = df['SpatialFDR'] <= 0.05
df['nhood_taa_l3_l2'] = pd.Categorical(df['nhood_taa_l3_l2'], categories=taa_l3_l2_levels, ordered=True)
df['nhood_taa_l3_l2'] = df['nhood_taa_l3_l2'].cat.remove_unused_categories()
groups_of_interest = ['infant', 'adult']
group_labels = ['infant', 'adult']

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['age_group', 'nhood_taa_l3_l2'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['age_group'].isin(groups_of_interest)]
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'nhood_taa_l3_l2', 'y': 'logFC', 'hue': 'age_group', 'hue_order': groups_of_interest}

plt.figure(figsize=calc_figsize(height=60, width=180))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['purple'], thyAgeing_colors['purple']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_taa_l3_l2'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant_gender_pub.pdf')

In [None]:
from matplotlib import colors as mcolors

# Extract numeric value from logFC (e.g., '+0.49' -> 0.49, '-1.29' -> -1.29, '+-0.0' -> 0.0)
df_median['logFC'] = pd.to_numeric(df_median['logFC'].str.replace('+-', '0').str.replace('+', ''), errors='coerce')
df_median['abs_logFC'] = df_median['logFC'].abs()
df_median['age_group'] = pd.Categorical(df_median['age_group'], categories=['infant','adult'], ordered=True)

plt.figure(figsize=calc_figsize(height=30, width=180))
max_abs = np.abs(df_median['logFC']).max()
vmin, vmax, vcenter = -max_abs, max_abs, 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.blend_palette([thyAgeing_colors['teal'], 'white', thyAgeing_colors['orange']], as_cmap=True)

p = sns.scatterplot(
    data=df_median,
    y='age_group', x='nhood_taa_l3_l2', hue='logFC',
    palette=cmap, hue_norm=normalize, size='abs_logFC', sizes=(1, 70), legend=False
)
p.set_ylabel('Age Group')
p.set_xlabel('Cell Population')
p.set_xlim(-0.5, len(df_median['nhood_taa_l3_l2'].unique())-0.5)
p.set_ylim(-0.5, len(df_median['age_group'].unique())-0.5)
p.set_xticklabels(df_median['nhood_taa_l3_l2'].cat.categories, rotation=90, va='top', ha='center')
p.set_yticklabels(group_labels, rotation=0, va='center', ha='right')

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
cbar = p.figure.colorbar(sm, ax=p.axes, orientation='vertical', label='logFC(M->F)', pad=0.01, aspect=5)
plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_adult_infant_gender_heatmap_pub.pdf')

In [None]:
from plotting.utils import thyAgeing_colors,thyAgeing_greys

# Milo beeswarm (landscape)
df = milo_df.copy()
df['is_sig'] = df['SpatialFDR'] <= 0.05
df['nhood_taa_l4'] = pd.Categorical(df['nhood_taa_l4'], categories=col_cell_type_fine_levels, ordered=True)
df['nhood_taa_l4'] = df['nhood_taa_l4'].cat.remove_unused_categories()
groups_of_interest = ['infant', 'adult']
group_labels = ['infant', 'adult']

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['age_group', 'nhood_taa_l4'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['age_group'].isin(groups_of_interest)]
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'nhood_taa_l4', 'y': 'logFC', 'hue': 'age_group', 'hue_order': groups_of_interest}

plt.figure(figsize=calc_figsize(height=60, width=350))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['purple'], thyAgeing_colors['purple']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_taa_l4'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l4_logFcCellTypes_adult_infant_gender_pub.pdf')

In [None]:
from plotting.utils import thyAgeing_colors,thyAgeing_greys

# Milo beeswarm (landscape)
df = milo_df.copy()
df['is_sig'] = df['SpatialFDR'] <= 0.05
df['nhood_taa_l4'] = pd.Categorical(df['nhood_taa_l4'], categories=col_cell_type_fine_levels[::-1], ordered=True)
df = df.loc[df['nhood_taa_l4'].str.startswith('Fb')].copy()
df['nhood_taa_l4'] = df['nhood_taa_l4'].cat.remove_unused_categories()
groups_of_interest = ['infant', 'adult']
group_labels = ['infant', 'adult']

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['age_group', 'nhood_taa_l4'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['age_group'].isin(groups_of_interest)]
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'nhood_taa_l4', 'y': 'logFC', 'hue': 'age_group', 'hue_order': groups_of_interest}

plt.figure(figsize=calc_figsize(height=60, width=60))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['purple'], thyAgeing_colors['purple']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_taa_l4'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_fbSplit_taa_l4_logFcCellTypes_adult_infant_gender_pub.pdf')

### Different aging between genders (subset by gender)

In [None]:
adata[(adata.obs['sort'] == 'TOT') & (adata.obs['sex'] == 'F') & (adata.obs['age_group'] != 'aged') & (adata.obs['donor'] != 'A66')].obs[['donor', 'age_group', 'sex']].drop_duplicates().groupby(['age_group', 'sex']).size()

In [None]:
adata[(adata.obs['sort'] == 'TOT') & (adata.obs['sex'] == 'F') & (adata.obs['age_group'] != 'aged') & (adata.obs['donor'] != 'A66')].obs[['donor', 'age_group', 'sex', 'age_months']].drop_duplicates().groupby(['age_group', 'sex'])['age_months'].mean()

In [None]:
adata[(adata.obs['sort'] == 'TOT') & (adata.obs['sex'] == 'M') & (adata.obs['age_group'] != 'aged') & (adata.obs['donor'] != 'A66')].obs[['donor', 'age_group', 'sex']].drop_duplicates().groupby(['age_group', 'sex']).size()

In [None]:
adata[(adata.obs['sort'] == 'TOT') & (adata.obs['sex'] == 'M') & (adata.obs['age_group'] != 'aged') & (adata.obs['donor'] != 'A66')].obs[['donor', 'age_group', 'sex', 'age_months']].drop_duplicates().groupby(['age_group', 'sex'])['age_months'].mean()

In [None]:
sex = 'F'
# Subset adata
adata_sub = adata[(adata.obs['sort'] == 'TOT') & (adata.obs['sex'] == sex) & (adata.obs['age_group'] != 'aged') & (adata.obs['donor'] != 'A66')].copy()

# Construct nhoods
mdata = milo.load(adata_sub)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", n_neighbors=100)
milo.make_nhoods(mdata["rna"], prop=0.1)
# Count nhoods
mdata = milo.count_nhoods(mdata, sample_col="donor")

# Create and reorder categories
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["age_group"]
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].astype("category")
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].cat.reorder_categories(['infant','paed','adult'])

# Annotate nhoods
for anno in ['taa_l3', 'taa_l4', 'taa_l3_l2']:
    milo.annotate_nhoods(mdata, anno_col = anno)
    mdata['milo'].var.rename(columns = {'nhood_annotation' : f'nhood_{anno}',
                                        'nhood_annotation_frac' : f'nhood_{anno}_frac'}, inplace = True)

# Differential abundance testing
comparisons = [('adult', 'paed'), ('paed', 'infant'), ('adult', 'infant'),]
milo_dict = {}
for c in comparisons:
    # Differential abundance testing
    contrast = f'da_age_group{c[0]}-da_age_group{c[1]}'
    milo.da_nhoods(mdata, design="~da_age_group", model_contrasts=contrast)
    
    milo_dict[f'{c[0]}_vs_{c[1]}'] = mdata['milo'].var.copy()

# Create cell type annotation
mdata['rna'].obs['taa_l3_l2'] = mdata['rna'].obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)
    
# Constructe milo df
milo_df = pd.concat(milo_dict, axis=0).reset_index(names=['comparison','nhood_id'])

# Save milo df
milo_df.to_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_{sex}.csv')

# Save milo object
mdata['rna'].obs['path_cellranger_arc'] = mdata['rna'].obs['path_cellranger_arc'].astype('str')
mdata['rna'].obs['cite'] = mdata['rna'].obs['cite'].astype('str')
mdata['rna'].obs['sort_type'] = mdata['rna'].obs['sort_type'].astype('str')
mdata['rna'].obs['age_num'] = mdata['rna'].obs['age_num'].astype(float)
mdata['rna'].obs['age_cont'] = mdata['rna'].obs['age_cont'].astype(float)
mdata['rna'].obs['age_months'] = mdata['rna'].obs['age_months'].astype(int)

mdata['milo'].uns['annotation_labels'] = mdata['milo'].uns['annotation_labels'].tolist()
mdata.write_h5mu(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_{sex}.zarr',
                compression=hdf5plugin.FILTERS["zstd"],
                compression_opts=hdf5plugin.Zstd(clevel=5).filter_options,
        )

In [None]:
milo_df_male = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_M.csv', index_col = 0)
milo_df_male['sex'] = 'M'
milo_df_female = pd.read_csv(f'{data_path}/objects/rna/thyAgeing_all_scvi_{object_version}_milo_F.csv', index_col = 0)
milo_df_female['sex'] = 'F'

milo_df_sex = pd.concat([milo_df_female, milo_df_male], axis = 0)

milo_df_sex.head()

In [None]:
df = milo_df_sex.loc[milo_df_sex['comparison'].isin(['adult_vs_infant'])].copy()
# df[col_cell_type_fine] = pd.Categorical(df[col_cell_type_fine], categories=col_cell_type_fine_levels, ordered=True)
# df[col_cell_type_broad] = pd.Categorical(df[col_cell_type_broad], categories=col_cell_type_broad_levels, ordered=True)
df['nhood_taa_l3_l2'] = pd.Categorical(df['nhood_taa_l3_l2'], categories=taa_l3_l2_levels, ordered=True)
df['is_sig'] = df['SpatialFDR'] < 0.05

df.head()

In [None]:
from plotting.utils import thyAgeing_colors,thyAgeing_greys

# Milo beeswarm (landscape)
df['is_sig'] = df['SpatialFDR'] <= 0.05
df['nhood_taa_l3_l2'] = pd.Categorical(df['nhood_taa_l3_l2'], categories=taa_l3_l2_levels, ordered=True)
df['nhood_taa_l3_l2'] = df['nhood_taa_l3_l2'].cat.remove_unused_categories()
groups_of_interest = ['M', 'F']
group_labels = ['male', 'female']

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['sex', 'nhood_taa_l3_l2'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['sex'].isin(groups_of_interest)]
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'nhood_taa_l3_l2', 'y': 'logFC', 'hue': 'sex', 'hue_order': groups_of_interest}

plt.figure(figsize=calc_figsize(height=60, width=180))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['purple'], thyAgeing_colors['purple']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_taa_l3_l2'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_agingGender_pub.pdf')

In [None]:
df_median

In [None]:
from matplotlib import colors as mcolors

df_median['logFC'] = df_median['logFC'].astype(float)
df_median['abs_logFC'] = df_median['logFC'].abs()
df_median['sex'] = pd.Categorical(df_median['sex'], categories=['M','F'], ordered=True)

plt.figure(figsize=calc_figsize(height=30, width=180))
max_abs = np.abs(df_median['logFC']).max()
vmin, vmax, vcenter = -max_abs, max_abs, 0
normalize = mcolors.TwoSlopeNorm(vcenter=vcenter, vmin=vmin, vmax=vmax)
cmap = sns.blend_palette([thyAgeing_colors['teal'], 'white', thyAgeing_colors['orange']], as_cmap=True)

p = sns.scatterplot(
    data=df_median,
    y='sex', x='nhood_taa_l3_l2', hue='logFC',
    palette=cmap, hue_norm=normalize, size='abs_logFC', sizes=(1, 70), legend=False
)
p.set_ylabel('Sex')
p.set_xlabel('Cell Population')
p.set_xlim(-0.5, len(df_median['nhood_taa_l3_l2'].unique())-0.5)
p.set_ylim(-0.5, len(df_median['sex'].unique())-0.5)
p.set_xticklabels(df_median['nhood_taa_l3_l2'].cat.categories, rotation=90, va='top', ha='center')
p.set_yticklabels(group_labels, rotation=0, va='center', ha='right')

# Add colorbar
sm = plt.cm.ScalarMappable(cmap=cmap, norm=normalize)
sm.set_array([])
cbar = p.figure.colorbar(sm, ax=p.axes, orientation='vertical', label='logFC(infant->adult)', pad=0.01, aspect=5)
plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l3_l2_logFcCellTypes_agingGender_heatmap_pub.pdf')

In [None]:
from plotting.utils import thyAgeing_colors,thyAgeing_greys

# Milo beeswarm (landscape)
df['is_sig'] = df['SpatialFDR'] <= 0.05
df['nhood_taa_l4'] = pd.Categorical(df['nhood_taa_l4'], categories=col_cell_type_fine_levels, ordered=True)
df = df.loc[df['nhood_taa_l4'].str.contains('Fb')]
df['nhood_taa_l4'] = df['nhood_taa_l4'].cat.remove_unused_categories()
groups_of_interest = ['M', 'F']
group_labels = ['male', 'female']

# Calculate median logFC for each nhood_annotation and comparison
df_median = df.groupby(['sex', 'nhood_taa_l4'])['logFC'].median().round(decimals=2).reset_index()
df_median = df_median.loc[df_median['sex'].isin(groups_of_interest)]
df_median['logFC'] = df_median['logFC'].apply(lambda x: f"+{x}" if x >= 0 else str(x))

args = {'x': 'nhood_taa_l4', 'y': 'logFC', 'hue': 'sex', 'hue_order': groups_of_interest}

plt.figure(figsize=calc_figsize(height=60, width=60))
ax = sns.violinplot(**args, data=df,
                    bw_adjust=.8, cut=0, split=True, density_norm='width',
                    gap=0.3,
                    palette=[thyAgeing_greys['grey2'], thyAgeing_greys['grey2']],
                    edgecolor='black', linewidth=0, inner=None,
                    )
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()

for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(**args, data=df.loc[df['is_sig'] == False],
              palette=[thyAgeing_greys['grey4'], thyAgeing_greys['grey4']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)
sns.stripplot(**args, data=df.loc[df['is_sig'] == True],
              palette=[thyAgeing_colors['purple'], thyAgeing_colors['purple']],
              dodge=True, size=0.5, alpha=0.5, ax=ax)

plt.ylabel('log2(FC)')
plt.xlabel('Cell type')

# Remove the old legend
ax.get_legend().remove()

plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[0], ha='right', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)
plt.text(x=ax.get_xaxis().get_ticklabels()[-1].get_text(), y = ax.get_ylim()[0] + 0.2, s=group_labels[1], ha='left', va='bottom', fontsize=5, color=thyAgeing_greys['grey2'], rotation=90)

ax_labels = [c.get_text() for c in ax.get_xticklabels()]
for i, row in df_median.iterrows():
    tick_pos = ax_labels.index(row['nhood_taa_l4'])
    if row[args['hue']] == args['hue_order'][0]:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] - 0.1, s=row['logFC'], ha = 'right', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)
    else:
        plt.text(y=ax.get_ylim()[1] + 0.2, x=ax.get_xticks()[tick_pos] + 0.1, s=row['logFC'], ha = 'left', fontsize=5, color=thyAgeing_colors['purple'], rotation=90)

# Style axes
sns.despine(bottom=True, trim=True, offset=2)
plt.axhline(0, color=thyAgeing_greys['grey4'], linestyle=(0, (3, 3)), linewidth=0.5)
#ax.set_yticks([-4, -2, 0, 2])
ax.xaxis.grid(True, linestyle='solid', color=thyAgeing_greys['grey2'], alpha=0.5, linewidth=0.5)
ax.tick_params(axis='x', length=0, rotation = 90)

plt.tight_layout()

plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_taa_l4_Fb_logFcCellTypes_agingGender_pub.pdf')

In [None]:
from plotting.utils import thyAgeing_colors

plt.figure(figsize=(10, 2))
sns.palplot(thyAgeing_colors.values())
plt.xticks(range(len(thyAgeing_colors)), thyAgeing_colors.keys(), rotation=90)
plt.title("thyAgeing_colors")
plt.tight_layout()
plt.show()

In [None]:
args = {'data': df, 'x': 'logFC', 'y': 'nhood_taa_l3_l2', 'hue': 'sex', 'hue_order': ['F', 'M']}
    
plt.figure(figsize=calc_figsize(height = len(taa_l3_l2_levels)*5+12, width = 60))
# ax = plt.axes()
ax =sns.violinplot(**args,
                bw_adjust=.8, cut=0, split=True, density_norm = 'width',
                gap = 0.3,
                palette = ['grey', 'grey'],
                #height = 10, aspect = 0.8,  
                edgecolor = 'black', linewidth = 0, inner=None,
)
# Get the legend from just the box plot
handles, labels = ax.get_legend_handles_labels()
    
for violin in ax.collections:
    violin.set_alpha(0.5)
sns.stripplot(data = df.loc[df['is_sig'] == False], x = 'logFC', y = 'nhood_taa_l3_l2', hue = 'sex', hue_order = ['F', 'M'],
              palette = ["#7A7A7A", '#7A7A7A'],
              dodge=True, size=0.5, alpha = 0.5, ax=ax)
sns.stripplot(data = df.loc[df['is_sig'] == True], x = 'logFC', y = 'nhood_taa_l3_l2', hue = 'sex', hue_order = ['F', 'M'],
              palette = [thyAgeing_colors['orange'], thyAgeing_colors['blue']],
              dodge=True, size=0.5, alpha = 0.5, ax=ax)

plt.xlabel('log2(FC)')
plt.ylabel('Cell type')

# Remove the old legend
ax.get_legend().remove()
# Add just the handles/labels from the box plot back
ax.legend(
    handles,
    ['Female', 'Male'],
    title='Comparison',
    loc='upper center',
    bbox_to_anchor=(0.5, 1.06),
    ncol=2
)

sns.despine(left=True, trim = True)

plt.axvline(0, color='#7A7A7A', linestyle=(0, (3, 3)), linewidth=0.8)
ax.set_xticks([-10, -5, 0, 5, 10])

ax.yaxis.grid(True, linestyle='solid', color='grey', alpha=0.5, linewidth = 0.5)
ax.tick_params(axis='y', length=0)

plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/thyAgeing_all_{col_cell_type_fine}_logFcCellTypes_agingSex.pdf')