# Overview of thymus metadata

In [None]:
import os
import sys
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import anndata as ad

# Add repo path to sys path (allows to access scripts and metadata from repo)
repo_path,_ = os.path.split(os.path.split(os.getcwd())[0])
sys.path.insert(1, repo_path) 

# Add R libs path
#os.environ['LD_LIBRARY_PATH'] = '' # Uncomment on jhub
#os.environ['R_HOME'] = '/nfs/team205/lm25/condaEnvs/thymusAgeing/lib/R' # Uncomment on jhub
os.environ['R_LIBS_USER'] = '/nfs/team205/lm25/condaEnvs/thymusAgeing/lib/R/library'

%load_ext rpy2.ipython

In [None]:
%%capture
%%R

library(tidyverse)
library(patchwork)
library(magrittr)

source('/nfs/team205/lm25/customScripts/visualisation/customTheme.R')

options(max.print=150)

In [None]:
# Define paths
plots_path = f'{repo_path}/plots/meta'

# Load metadata
meta_path = '/lustre/scratch126/cellgen/team205/lm25/thymus_ageing_atlas/General_analysis/results/Thymus_ageing_metadata_v6_2024-14-03.xlsx'
meta = pd.read_excel(meta_path)
meta['age_months'] = meta.apply(lambda x: x['age_cont']*12 if x['age_unit'] == 'y' else x['age_cont']/4 if x['age_unit'] == 'w' else x['age_cont'], axis = 1)

meta.head()

## Overall data

In [None]:
meta.groupby(['health_status', 'type', 'chemistry_simple']).agg(n_donors = ('donor', 'nunique'),
                                                                n_samples = ('sample', 'nunique'))

In [None]:
%%R -i meta -h 230 -w 300 -u mm -r 300

# Library-level plot
meta %>%
#dplyr::mutate(age_months = floor(age_months/12)*12) %>% # Uncomment for rounding age to full years
dplyr::select(age_months, sort, health_status, donor, chemistry_simple) %>% 
dplyr::group_by(age_months, health_status) %>%
dplyr::arrange(chemistry_simple) %>%
dplyr::mutate(n_pos = dplyr::row_number()) %>%
ggplot(aes(x = age_months, y = n_pos, color = chemistry_simple, shape = chemistry_simple)) +
geom_point(size = 5) +
geom_vline(xintercept = 0, linetype = 'dashed', color = 'grey40') +
ggforce::facet_col(~ health_status, strip.position = 'right', scales = 'free_y', space = 'free',
                   labeller = as_labeller(c('healthy' = 'Healthy',
                                            'myasthenia_gravis' = "Myasthenia\ngravis",
                                            'down_syndrome' = "Down\nsyndrome",
                                            'patau_syndrome' = "Patau\nsyndrome"))) +
labs(x = "Age [y]", y = "N(libraries)", color = 'Protocol', shape = "Protocol") +
ggsci::scale_color_locuszoom(breaks = c('5GEX', '3GEX', 'ATAC')) +
scale_shape_manual(values = c(16, 17 ,18),
                   breaks = c('5GEX', '3GEX', 'ATAC'),
                   guide = guide_legend(nrow = 1)) +
scale_x_continuous(breaks = seq(-2,80,2)*12, label = seq(-2,80,2), expand = expansion(add = 8,mult = 0)) +
scale_y_continuous(limits = c(0.5,NA), breaks = seq(0,100,2), expand = expansion(add=c(0.5,1),0)) +
theme_simple(facet = T, base_size = 10) 
#theme(strip.text.y = element_text(angle = 0))

In [None]:
%%R -i meta -h 50 -w 300 -u mm -r 300

# Donor-level plot
meta %>%
dplyr::mutate(age_months = floor(age_months/12)*12) %>%
dplyr::distinct(age_months, health_status, donor) %>% 
dplyr::group_by(age_months) %>%
dplyr::arrange(health_status) %>%
dplyr::mutate(n_pos = dplyr::row_number()) %>%
ggplot(aes(x = age_months, y = n_pos, color = health_status, shape = health_status)) +
geom_point(size = 5) +
geom_vline(xintercept = 0, linetype = 'dashed', color = 'grey40') +
labs(x = "Age [y]", y = "N(donors)", color = 'Protocol', shape = "Protocol") +
ggsci::scale_color_locuszoom(labels = c('healthy' = 'Healthy',
                                            'myasthenia_gravis' = "Myasthenia gravis",
                                            'down_syndrome' = "Down syndrome",
                                            'patau_syndrome' = "Patau syndrome")) +
scale_shape_manual(values = c(16, 17 ,18,20),
                   labels = c('healthy' = 'Healthy',
                                            'myasthenia_gravis' = "Myasthenia gravis",
                                            'down_syndrome' = "Down syndrome",
                                            'patau_syndrome' = "Patau syndrome"),
                   guide = guide_legend(nrow = 1)) +
scale_x_continuous(breaks = seq(-2,80,2)*12, label = seq(-2,80,2), expand = expansion(add = 8,0)) +
scale_y_continuous(limits = c(0.5,NA), breaks = seq(0,20,2), expand = expansion(add=c(0,0.5),0)) +
theme_simple(facet = T, base_size = 10) 
#theme(strip.text.y = element_text(angle = 0))

## Thymus ageing

In [None]:
ta_meta = meta.loc[(meta.health_status == 'healthy') & (meta.age_cont > 0)]

ta_meta.groupby(['age_group2', 'sort','type', 'chemistry_simple']).agg(n_donors = ('donor', 'nunique'),
                                                                      n_samples = ('sample', 'nunique'))

In [None]:
%%R -i ta_meta -h 100 -w 200 -u mm -r 300

ta_meta %>%
#dplyr::filter(chemistry_simple == '5GEX') %>%
dplyr::select(age_months, sort, health_status, donor, chemistry_simple) %>% 
dplyr::group_by(age_months, health_status) %>%
dplyr::arrange(chemistry_simple) %>%
dplyr::mutate(n_pos = dplyr::row_number()) %>%
ggplot(aes(x = age_months, y = n_pos, color = chemistry_simple, shape = chemistry_simple)) +
geom_point(size = 5) +
labs(x = "Age [y]", y = "N(libraries)", color = 'Protocol', shape = "Protocol") +
ggsci::scale_color_locuszoom(breaks = c('5GEX', '3GEX', 'ATAC')) +
scale_shape_manual(values = c(16, 17 ,18),
                   breaks = c('5GEX', '3GEX', 'ATAC'),
                   guide = guide_legend(nrow = 1)) +
scale_x_continuous(breaks = seq(-2,80,2)*12, label = seq(-2,80,2), expand = expansion(add = 6,mult = 0)) +
scale_y_continuous(limits = c(0.5,NA), breaks = seq(0,20,2), expand = expansion(add=c(0,0.5),0)) +
theme_simple(facet = F) 
#theme(strip.text.y = element_text(angle = 0))