# Thymus ageing atlas: Global differential abundance test

In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import anndata as ad
import mudata as mu
import hdf5plugin

# Add repo path to sys path (allows to access scripts and metadata from repo)
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Autoreload custom scripts
%load_ext autoreload
%autoreload 2

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette
from plotting.utils import plot_grouped_boxplot, calc_figsize, thyAgeing_colors, thyAgeing_greys, get_tint_palette

In [None]:
# Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
adata.obs = adata.obs.join(ct_anno, how = 'left')
adata = adata[(adata.obs['anno_status'] == 'include')]

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

adata

In [None]:
# Define columns
col_cell_type_broad = 'taa_l3'
col_cell_type_fine = 'taa_l4'
col_cell_type_broad_levels = get_ct_levels(col_cell_type_broad)
col_cell_type_fine_levels = get_ct_levels(col_cell_type_fine)
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

In [None]:
# Create new cell type annotations
adata.obs['taa_l3_l2'] = adata.obs.apply(lambda x: x['taa_l3'] if x['taa_l2'] in ['T_predev', 'T_dev'] else x['taa_l2'], axis = 1)

taa_l3_l2_levels = get_ct_levels('taa_l3', include_ct = ['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)'])
taa_l3_l2_levels.extend(get_ct_levels('taa_l2', include_ct = np.setdiff1d(adata.obs['taa_l3_l2'].unique(), np.array(['T_DN(early)', 'T_DN', 'T_DP', 'T_αβT(entry)']))))

In [None]:
drug_targets = pd.read_excel(f'{general_data_path}/references/drugs/commonDrugs_adults.xlsx', index_col = None)
drug_targets['Target'] = drug_targets['Target'].str.replace('\n', '', regex=True).str.replace(' ', '', regex=False)
drug_targets['Target'] = drug_targets['Target'].str.split(',')

drug_targets_dict = drug_targets.groupby('Condition')['Target'].apply(lambda x: sum(x, [])).to_dict()

any([g for g in drug_targets_dict.values() for g in g if g not in adata.var_names])

In [None]:
# Subset and normalize data
adata_sub = adata[:, list(set([g for g in drug_targets_dict.values() for g in g if g in adata.var_names]))].copy()
sc.pp.normalize_total(adata_sub, target_sum=1e4)
sc.pp.log1p(adata_sub)

In [None]:
sc.pl.DotPlot(adata_sub, 
            groupby='taa_l3_l2',
            categories_order=taa_l3_l2_levels,
            var_names=drug_targets_dict,
            figsize = calc_figsize(width = 150, height = 80),
            mean_only_expressed=True,
            cmap = sns.blend_palette([thyAgeing_colors['blue'], thyAgeing_colors['purple'], thyAgeing_colors['magenta'], thyAgeing_colors['orange'], thyAgeing_colors['yellow']], as_cmap=True,), #'magma',
            ).style(smallest_dot=0, largest_dot = 40, dot_edge_lw=0.05).add_totals(size = 0.5).savefig(f'{plots_path}/drugTargets/thyAgeing_drugTargets_taa_l3_l2_dotplot.pdf')