In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import anndata as ad
import mudata as mu
import hdf5plugin

import pertpy
milo = pertpy.tl.Milo()

# Add repo path to sys path (allows to access scripts and metadata from repo)
#repo_path,_ = os.path.split(os.path.split(os.getcwd())[0])
repo_path = '/lustre/scratch126/cellgen/team205/lm25/thymus_projects/thymus_ageing_atlas/T_NK_compartment'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/lustre/scratch126/cellgen/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Add R libs path
#os.environ['LD_LIBRARY_PATH'] = '' # Uncomment on jhub
#os.environ['R_HOME'] = '/nfs/team205/lm25/condaEnvs/thymusAgeing/lib/R' #Â Uncomment on jhub
os.environ['R_LIBS_USER'] = f'{os.path.split(sys.path[0])[0]}/R/library'

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/lustre/scratch126/cellgen/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/lustre/scratch126/cellgen/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette
from plotting.utils import plot_grouped_boxplot, calc_figsize

In [None]:
# Load adata
object_version = 'v9_2025-03-28'
adata = ad.read_h5ad(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
for c in ct_anno.columns:
    if c in adata.obs.columns:
        adata.obs.drop(c, axis = 1, inplace = True)
adata.obs = adata.obs.join(ct_anno)
adata = adata[~adata.obs['taa_l5'].isna()].copy()

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

adata

In [None]:
# Define columns
col_cell_type_broad = 'taa_l3'
col_cell_type_fine = 'taa_l4'
col_cell_type_broad_levels = [c for c in get_ct_levels(col_cell_type_broad, taa_l1 = ['T', 'NK']) if c in adata.obs[col_cell_type_broad].unique().tolist()]
col_cell_type_fine_levels = [c for c in get_ct_levels(col_cell_type_fine, taa_l1 = ['T', 'NK']) if c in adata.obs[col_cell_type_fine].unique().tolist()]
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

## Differential abundance analysis (Milo)

In [None]:
# Construct nhoods
mdata = milo.load(adata)
sc.pp.neighbors(mdata["rna"], use_rep="X_scVI", n_neighbors=100)
milo.make_nhoods(mdata["rna"], prop=0.1)
# Count nhoods
mdata = milo.count_nhoods(mdata, sample_col="donor")

In [None]:
mdata['rna'].obs['path_cellranger_arc'] = mdata['rna'].obs['path_cellranger_arc'].astype('str')
mdata['rna'].obs['cite'] = mdata['rna'].obs['cite'].astype('str')
mdata['rna'].obs['age_num'] = mdata['rna'].obs['age_num'].astype(float)
mdata['rna'].obs['age_cont'] = mdata['rna'].obs['age_cont'].astype(float)
mdata['rna'].obs['age_months'] = mdata['rna'].obs['age_months'].astype(int)

mdata.write_h5mu(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}_milo_ageGroups.zarr',
                compression=hdf5plugin.FILTERS["zstd"],
                compression_opts=hdf5plugin.Zstd(clevel=5).filter_options,
        )

In [None]:
object_version = 'v9_2025-03-28'
mdata = mu.read_h5mu(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}_milo_ageGroups.zarr')

mdata

In [None]:
mdata['rna'].uns['nhood_neighbors_key'] = 'X_scVI'

# Create and reorder categories
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["age_group"]
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].astype("category")
mdata["rna"].obs["da_age_group"] = mdata["rna"].obs["da_age_group"].cat.reorder_categories(['infant','paed','adult','aged'])

comparisons = [('aged', 'adult'), ('adult', 'paed'), ('paed', 'infant'), ('adult', 'infant'),]
milo_dict = {}
for c in comparisons:
    # Differential abundance testing
    contrast = f'da_age_group{c[0]}-da_age_group{c[1]}'
    milo.da_nhoods(mdata, design="~da_age_group+sex", model_contrasts=contrast)
    
    milo_dict[f'{c[0]}_vs_{c[1]}'] = mdata['milo'].var.copy()

In [None]:
milo_df = pd.concat(milo_dict, axis=0).reset_index(names=['comparison','nhood_id'])
milo_df.to_csv(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}_milo_ageGroups.csv')
milo_df

In [None]:
from scipy.sparse import csr_matrix
cell2type= pd.get_dummies(mdata['rna'].obs[col_cell_type_fine])
celltype_names = cell2type.columns
cell2type = csr_matrix(cell2type)

nhood2type = cell2type.transpose() * mdata['rna'].obsm['nhoods']
nhood2type = nhood2type.multiply(1 / nhood2type.sum(axis=0))

nhood2type_anno = [celltype_names[i] for i in np.array(nhood2type.argmax(axis=0)).flatten()]
nhood2type_max_perc = np.array(nhood2type.max(axis=0).todense()).flatten()
nhood2type_anno = pd.DataFrame(zip(nhood2type_anno, nhood2type_max_perc), columns=[col_cell_type_fine, 'mv_prop']).reset_index(names=['nhood_id'])
nhood2type_anno.to_csv(f'{data_path}/objects/rna/thyAgeing_tSplit_scvi_{object_version}_milo_ageGroups_nhood2type.csv')

nhood2type_anno.head()

In [None]:
milo_df['nhood_id'] = milo_df['nhood_id'].astype(int)
nhood2type_anno['nhood_id'] = nhood2type_anno['nhood_id'].astype(int)

milo_df = milo_df.merge(nhood2type_anno, on='nhood_id', how='left')

In [None]:
milo_df

In [None]:
df = milo_df.loc[milo_df['comparison'].isin(['adult_vs_paed', 'paed_vs_infant'])].copy()
df[col_cell_type_fine] = pd.Categorical(df[col_cell_type_fine], categories=col_cell_type_fine_levels, ordered=True)
df['is_sig'] = df['SpatialFDR'] < 0.05

In [None]:
with sns.plotting_context('paper', font_scale = 1.4):
    
    args = {'data': df, 'x': 'logFC', 'y': col_cell_type_fine, 'hue': 'comparison', 'hue_order': ['adult_vs_paed', 'paed_vs_infant']}
    
    plt.figure(figsize=(10, 10))
    # ax = plt.axes()
    ax =sns.violinplot(**args,
                   bw_adjust=.8, cut=0, split=True,
                   #height = 10, aspect = 0.8,  
                   edgecolor = 'black', linewidth = 0, palette = 'colorblind', inner=None,
    )
    for violin in ax.collections:
        violin.set_alpha(0.5)
    sns.stripplot(**args, dodge=True, size=1, palette='colorblind', edgecolor='gray', ax=ax)
    
    plt.xlabel('log2(FC)')
    plt.ylabel('Cell type')
    
    # Modify legend
    plt.legend(title = 'Comparison', loc = 'upper left')
    new_labels = ['Paed -> Adult', 'Infant -> Paed']
    for t, l in zip(plt.legend().texts, new_labels):
        t.set_text(l)
    
    plt.axvline(0, color = 'grey', linestyle = '--')
    
    plt.tight_layout()
    
plt.savefig(f'{plots_path}/freqAnalysis/all_curatedAnno_v10/thyAgeing_tSplit_logFcCellTypes_adult_paed.png', dpi=300, bbox_inches='tight')