In [None]:
import os
import senepy as sp
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import random 

random.seed(42)

# Set working directory
os.chdir('/fs/scratch/PAS2598/Morales/CSF_workspace/csf')

# Check current working directory
print(os.getcwd())

In [None]:
hubs = sp.load_hubs(species = 'Human') 
hubs.metadata

In [None]:
blood_hub = hubs.metadata[(hubs.metadata.tissue == 'blood')]
blood_hub
hubs.merge_hubs(blood_hub, new_name = 'blood')

In [None]:
# Load your h5ad file
adata = sc.read_h5ad('adata_with_raw_age_filtered.h5ad')

# Check what you loaded
print(adata)
print(f"Number of cells: {adata.n_obs}")
print(f"Number of genes: {adata.n_vars}")

# View the metadata (equivalent to @meta.data in R)
print(adata.obs.head())

In [None]:
adata.obs

In [None]:
# Subset samples with "control" in the disease_group column
cd8 = adata[(adata.obs['cell_type'] == 'CD8 T cells')].copy()

old = cd8[(cd8.obs['organ'] == 'CSF') & 
                    (cd8.obs['disease_group'] == 'control') &
                    (cd8.obs['age_comparison'] == '≥60 years')].copy()

young = cd8[(cd8.obs['organ'] == 'CSF') & 
                    (cd8.obs['disease_group'] == 'control') &
                    (cd8.obs['age_comparison'] == '≤25 years')].copy()

adata_cd8_coy = old.concatenate(young)

In [None]:
translator = sp.translator(hub = hubs.hubs[('blood')], data = adata_cd8_coy)

In [None]:
#score_hub returns a list and we can save this directly to the adata.obs dataframe

# CD4_subset = adata[adata.obs['cell_type'] == 'CD4 T cells'].copy()

adata_cd8_coy.obs['sen_score'] = sp.score_all_cells(adata_cd8_coy, hubs.hubs[('blood')], 
                                     identifiers = ['sex', 'age_comparison'])

In [None]:
adata_cd8_coy.obs.head()

In [None]:
young = adata_cd8_coy[adata_cd8_coy.obs['age_comparison'] == '≤25 years'].copy()

In [None]:
e = young.obs.sen_score.mean() #distribution mean

In [None]:
std = young.obs.sen_score.std() #distribution std

In [None]:
thresh = e + 3*std
thresh

In [None]:
#function to add senescent label
def is_putative_sen(x):
    if x >= thresh:
        return 1
    else:
        return 0

In [None]:
#map function to a new row in adata.obs
adata_cd8_coy.obs['putative_sen'] = adata_cd8_coy.obs.sen_score.map(is_putative_sen)

In [None]:
adata_cd8_coy.obs.head()

In [None]:
# Filter to only include the two groups of interest
comparison_data = adata_cd8_coy.obs[adata_cd8_coy.obs['age_comparison'] != 'exclude']

# Calculate senescence percentages grouped by BOTH cell type AND age group
senescence_by_group_cell = comparison_data.groupby(['cell_type', 'age_comparison'])['putative_sen'].agg(['mean', 'count', 'std']).reset_index()
senescence_by_group_cell['percent_senescent'] = senescence_by_group_cell['mean'] * 100
senescence_by_group_cell['sem'] = (senescence_by_group_cell['std'] / np.sqrt(senescence_by_group_cell['count'])) * 100

# Print summary statistics
print("cd8 senescence by cell type and age group:")
print(senescence_by_group_cell)


# Create the grouped bar plot
plt.figure(figsize=(6, 6))

# Create a grouped bar plot
ax = sns.barplot(
    data=senescence_by_group_cell,
    #x='cell_type',
    y='percent_senescent',
    hue='age_comparison',
    palette={'≤25 years': '#3498DB', '≥60 years': '#E74C3C'},
    errorbar=('ci', 95),
    capsize=0.1
)

# Create a dictionary to store the x positions of bars for each cell type and age group
bar_positions = {}
cell_types = senescence_by_group_cell['cell_type'].unique()
num_cell_types = len(cell_types)

# Get the bar positions 
for i, cell_type in enumerate(cell_types):
    bar_positions[cell_type] = {}
    bar_positions[cell_type]['≤25 years'] = i - 0.2  # Position for young group
    bar_positions[cell_type]['≥60 years'] = i + 0.2   # Position for old group

# Add sample sizes and percentages
for _, row in senescence_by_group_cell.iterrows():
    cell_type = row['cell_type']
    age_group = row['age_comparison']
    count = row['count']
    percent = row['percent_senescent']
    
    # Calculate position
    x_pos = bar_positions[cell_type][age_group]
    
    # Determine y position for sample size - make it relative to bar height
    y_offset = 1.0  # Fixed offset above bar
    if percent > 5:  # For very short bars
        y_pos = percent + y_offset
    else:
        # For taller bars, place within the bar
        y_pos = percent - 0.075
        # But ensure it's visible (not below bar)
        #if y_pos < 1:
        #    y_pos = percent + 0.1
    
    # For extremely tall bars like the myeloid cells
    if percent > 20:
        y_pos = percent - 2
    
    # Add sample size
    plt.text(
        x_pos, 
        y_pos,
        f'n={count}',
        ha='center',
        va='center',
        fontsize=9,
        fontweight='bold',
        color='black',
        bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=1)
    )
    
    # Add percentage to the top of each bar
    plt.text(
        x_pos,
        percent + 0.01,  # Position slightly above the bar
        f'{percent:.1f}%',  # Format to 1 decimal place
        ha='center',
        va='bottom',
        fontsize=9,
        fontweight='bold',
        color='black',
        bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=1)
    )

plt.title('Proportion of Senescent Cells: Young vs Old Patients (Controls)', fontsize=15, fontweight='bold')
plt.ylabel('Senescent Cells (%)', fontsize=13)
plt.yticks(fontsize=10, fontweight='bold')  # Rotate labels for better readability
plt.xlabel('CD8 T cells', fontsize=12, rotation=45, ha='right')
plt.xticks(rotation=45, ha='right')  # Rotate labels for better readability
plt.legend(title='Age Group')
plt.tight_layout()
ax.spines['top'].set_linewidth(1.5)
ax.spines['right'].set_linewidth(1.5)
ax.spines['bottom'].set_linewidth(1.5)
ax.spines['left'].set_linewidth(1.5)
ax.tick_params(axis='x', width=2, length=6)
ax.tick_params(axis='y', width=1.5, length=6)

# Increase y-limit to accommodate the percentage labels
max_percent = max(senescence_by_group_cell['percent_senescent'])
plt.ylim(0, max_percent * 1.25)  # Increased headroom for labels

# Save as TIFF (publication quality)
plt.savefig('senepy_cd8_controls_oldvsyoung_7-28-25.tiff', dpi=300, bbox_inches='tight')

plt.show()