In [None]:
import os
import sys
import session_info
from datetime import datetime
today = datetime.today().strftime('%Y-%m-%d')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import anndata as ad
import mudata as mu
import hdf5plugin

# Add repo path to sys path (allows to access scripts and metadata from repo)
#repo_path,_ = os.path.split(os.path.split(os.getcwd())[0])
repo_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/T_NK_compartment'
sys.path.insert(1, repo_path) 
sys.path.insert(2, '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts')

# Add R libs path
#os.environ['LD_LIBRARY_PATH'] = '' # Uncomment on jhub
#os.environ['R_HOME'] = '/nfs/team205/lm25/condaEnvs/thymusAgeing/lib/R' # Uncomment on jhub
os.environ['R_LIBS_USER'] = f'{os.path.split(sys.path[0])[0]}/R/library'

# Define paths
plots_path = f'{repo_path}/plots/'
data_path = f'{repo_path}/data/'
model_path = os.path.join(repo_path, 'models')
general_data_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data'

print('Dir for plots: {}'.format(plots_path))
print('Dir for data: {}'.format(data_path))

%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

# Formatting
from matplotlib import font_manager
font_manager.fontManager.addfont("/nfs/team205/ny1/ThymusSpatialAtlas/software/Arial.ttf")
plt.style.use('/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/scripts/plotting/thyAgeing.mplstyle')

# Import custom scripts
from utils import get_latest_version,update_obs,freq_by_donor
from anno_levels import get_ct_levels, get_ct_palette, age_group_levels, age_group_palette
from plotting.utils import plot_grouped_boxplot, calc_figsize,thyAgeing_colors,thyAgeing_greys,get_tint_palette

In [None]:
# Load adata
object_version = 'v5_2025-04-03'
adata = ad.read_h5ad(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_{object_version}.zarr')

# Add new annotations to adata
ct_anno = pd.read_csv(f'{general_data_path}/objects/rna/thyAgeing_all_scvi_v4_2025-02-04_curatedAnno_v10.csv', index_col = 0)
for c in ct_anno.columns:
    if c in adata.obs.columns:
        adata.obs.drop(c, axis = 1, inplace = True)
adata.obs = adata.obs.join(ct_anno)

# Filter data (only include annotated cells)
adata = adata[~adata.obs['taa_l5'].str.contains('locnt|-sp|explore', na = True)]

# Update metadata
latest_meta_path = get_latest_version(dir = f'{general_data_path}/metadata', file_prefix='Thymus_ageing_metadata')
latest_meta = pd.read_excel(latest_meta_path)
update_obs(adata, latest_meta, on = 'index', ignore_warning = True)

In [None]:
# Define columns
col_cell_type_broad = 'taa_l3'
col_cell_type_fine = 'taa_l4'
col_cell_type_broad_levels = get_ct_levels(col_cell_type_broad, taa_l1 = ['T', 'NK'])
col_cell_type_fine_levels = get_ct_levels(col_cell_type_fine, taa_l1 = ['T', 'NK'])
col_age_group = 'age_group'
col_age_group_levels = eval(f'{col_age_group}_levels')

In [None]:
import pickle

# Define the file path
gsea_path = f'{general_data_path}/analyses/gsea/thyAgeing_gsea_taa_l4_adult_vs_infant_ageEffect.pkl'

# Load the pickle file
with open(gsea_path, 'rb') as file:
    gsea = pickle.load(file)
gsea = pd.concat(gsea).reset_index(names = ['cell_type', 'x']).drop(columns = ['x', 'level_1'])

In [None]:
gsea['cell_type'].unique()

In [None]:
geneset = 'c3_tft'

df = gsea.loc[(gsea['geneset'] != 'c7_immunesigdb') & (gsea['cell_type'].isin(['T_CD4_h', 'T_CD4_naive_recirc', 'T_CD4_naive','B_naive',
                                                                            #    'T_CD8_em', 'T_CD8_naive_recirc', 'T_CD8_rm',
                                                                            #    'T_Treg_recirc', 'T_Treg_tr'
                                                                               ]))].copy()
df = df.loc[df['geneset'] == geneset].copy()
df = df.pivot_table(index = 'cell_type', columns = 'Term', values = 'NES').astype(float)
df = df.loc[:,['CREB_01', 'CREB_02', 'CREB_Q2', 'CREB_Q4', 'CREB_Q4_01', 'CREBP1CJUN_01','CREBP1_Q2', 
             'HNF3_Q6', 
             'BACH2_01',
             'CGTSACG_PAX3_B',
             'DNMT3A_TARGET_GENES',
             'PIAS4_TARGET_GENES',
             'YY1_01', 'GCCATNTTG_YY1_Q6',
             'ATF1_Q6', 'TGACGTCA_ATF3_Q6', 'TGAYRTCA_ATF3_Q6', 'ATF4_Q2', 'ATF6_01',
             'ARNT_01',
             'NFKAPPAB_01',
             'IK1_01',
             'MAPK3_TARGET_GENES']].copy()
df

- HNF3: FOXA3
- IK1 : Ikaros
- FREAC3: FOXC1
- MAPK3: https://pubmed.ncbi.nlm.nih.gov/21333552/

B cells:
- ARNT: https://pmc.ncbi.nlm.nih.gov/articles/PMC4450626/ -> may induce GC formation
- DNMT3A: DNMT3A -> methylation necessary for follicular B cells and plasma cell differentiation
- YY1: https://pubmed.ncbi.nlm.nih.gov/27335461/ -> essential for all stages of B differentiation
- MAX: https://www.embopress.org/doi/full/10.15252/embr.201845770 -> BCT activtation and differentitition finetuning



In [None]:
from scipy.cluster.hierarchy import linkage, leaves_list

# Plot the reordered heatmap
p = sns.heatmap(df, cmap=sns.blend_palette([thyAgeing_colors['teal'], 'white',thyAgeing_colors['orange']], as_cmap=True),
                center=0, vmin=-3, vmax=3, cbar_kws={'label': 'NES'}, xticklabels=True, yticklabels=True,
                #annot=df_annot.iloc[row_order, column_order], fmt='', annot_kws={'size': 8,'weight': 'bold'},
                )
p.set_xlabel('Cell type')
p.set_ylabel('Gene')
p.tick_params(axis='x', rotation=90)
p.tick_params(axis='y', rotation=0)
p.figure.set_size_inches(calc_figsize(width=90, height=40))
p.figure.tight_layout(rect=[0, 0, 1, 0.95], pad = 0)
plt.savefig(f'{plots_path}/phenoAnalysis/gsea/thyAgeing_recircSplit_gsea_taa_l4_adult_vs_infant_ageEffect_{geneset}_heatmap.pdf')

In [None]:
# Open the specified file as 'deg'
dea_path = '/nfs/team205/lm25/thymus_projects/thymus_ageing_atlas/General_analysis/data/analyses/dea/thyAgeing_dea_taa_l4_adult_vs_infant_ageEffect.pkl'

with open(dea_path, 'rb') as file:
    deg = pickle.load(file)
    
deg = pd.concat(deg).reset_index(names = ['cell_type', 'gene_name'])
deg.head()


In [None]:
deg.loc[deg['gene_name'].isin(['AIRE', 'AICDA'])]

In [None]:
deg_filtered = deg.loc[deg['padj'] < .05].copy()
deg_filtered.groupby('cell_type').size().sort_values(ascending = False)

In [None]:
deg_filtered.loc[deg_filtered['cell_type'] == 'T_CD4_naive'].sort_values('log2FoldChange', ascending = False)

In [None]:
deg_filtered.groupby('gene_name').size().sort_values(ascending = False).head(20)