In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
#from muon import prot as pt
import matplotlib.pyplot as plt

from matplotlib import colors
%matplotlib inline

from typing import Optional, Literal

import muon as mu

import decoupler as dc
import seaborn as sns

from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

In [None]:
# load citeseq data
mdata = mu.read("./citeseq_mdata_allsamples_filtered_fine_clustering.h5mu")

In [None]:
gd_mapping = {
    'sc01': 62,
    'sc02': 64,
    'sc08': 51,
    'sc09': 55,
    'sc13': 50,
    'sc15': 48,
    'sc16': 48
}

mdata.obs['gest_day'] = mdata.obs['donor_id'].map(gd_mapping)
sex_mapping = {
    'sc01': 'male',
    'sc02': 'female',
    'sc08': 'male',
    'sc09': 'male',
    'sc13': 'female',
    'sc15': 'female',
    'sc16': 'female'
}

mdata.obs['fetal_sex'] = mdata.obs['donor_id'].map(sex_mapping)
sex_mapping = {
    'sc01': 'primi',
    'sc02': 'multi',
    'sc08': 'primi',
    'sc09': 'primi',
    'sc13': 'primi',
    'sc15': 'primi',
    'sc16': 'primi'
}

mdata.obs['gravida'] = mdata.obs['donor_id'].map(sex_mapping)
sex_mapping = {
    'sc01': 31,
    'sc02': 34,
    'sc08': 26,
    'sc09': 20,
    'sc13': 22,
    'sc15': 22,
    'sc16': 27
}

mdata.obs['mat_age'] = mdata.obs['donor_id'].map(sex_mapping)

### prepare annotation for plotting

In [None]:
mdata.obs['celltype_lores'] = mdata.obs['celltype_lores'].replace({'FIB': 'Fib', 'MURAL': 'Mur','Tcell':'T cell',
                                                                   'MAST':'Mast','EpiCell':'Epi','TROPHO':'TB',
                                                                  'ENDO':'Endo','BCell':'B cell','MAC':'Mac',
                                                                  'PLASMA':'Plasma','Lymphatic':'LEC'})

In [None]:
mdata.obs['celltype_hires'] = mdata.obs['celltype_hires'].replace({'MAC_prol':'pMac','monoMAC':'Mono',
                                                                  'Mono_CD16+':'CD16+ Mono',
                                                                  'decFIB':'decFib','NK_CD39-':'CD39- NK',
                                                                  'NK_CD39-CD103+':'CD39-CD103+ NK',
                                                                  'NK_CD39+':'CD39+ NK','NK_prol':'pNK'})

### apply color to celltype_lores

In [None]:
color_dict = {
    "B cell": "#8A8DBE",  
    "DC": "#8E033D",  
    "Endo": "#FFDF22",
    'Epi':'#A32392',
    'Fib':'#335AF2',
    'Mast':'#85BDC1',
    'LEC':'#9FC13E',
    'Mac':'#EF9203',
    'Mur':'#19E6FF',
    'NK':'#06C945',
    'ILC':'#ec3c94',
    'Plasma':'#6a29ca',
    'T cell':'#784902',
    'TB':'#DB231A'
}

## Figure 2A - UMAP with low resolution clusters

In [None]:
color_dict = {
    "B cell": "#8A8DBE",  
    "DC": "#8E033D",  
    "TB": "#FFDF22",
    'Epi':'#A32392',
    'Fib':'#335AF2',
    'Mast':'#85BDC1',
    'LEC':'#9FC13E',
    'Mac':'#EF9203',
    'Mur':'#19E6FF',
    'NK':'#06C945',
    'ILC':'#ec3c94',
    'Plasma':'#6a29ca',
    'T cell':'#784902',
    'Endo':'#DB231A'
}

In [None]:
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=True, transparent=True, dpi_save=450)
sc.pl.umap(mdata, color=["celltype_lores"], frameon=False, legend_loc='on data',
           title=None, size=8, palette=color_dict, alpha=0.8
           ,save='20250430_Fig1b-UMAP-celltype-lores.pdf'
          )

In [None]:
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=True, transparent=True, dpi_save=450)
sc.pl.umap(mdata, color=["tissue"], frameon=False, legend_loc='on data',
           title=None, size=8, palette={"basalis":"#800000",'parietalis':'#005f87'}, alpha=0.8
           ,save='20250430_Fig1b-UMAP-tissue.pdf'
          )

In [None]:
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=True, transparent=True, dpi_save=450)
sc.pl.umap(mdata, color=["tissue"], frameon=False, legend_loc='on data',
           title=None, size=8, palette={"basalis":"#800000",'parietalis':'#005f87'}, alpha=0.25
           ,save='20250430_Fig1b-UMAP-tissue_alpha.pdf'
          )

In [None]:
sc.pl.umap(mdata, color=["celltype_hires"], frameon=False, legend_loc='on data',
           title=None, size=8
           #, palette=color_dict
           , alpha=0.8
           ,save='20250430_Fig1b-UMAP-celltype-hires.pdf'
          )

## Extended Figure 1E

In [None]:
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=False, transparent=False, dpi_save=450)
sc.pl.violin(mdata["rna"], ['n_genes_by_counts', 'total_counts', 'pct_counts_mt','pct_counts_ribo'],
             jitter=0, groupby = 'sample_id', rotation = 45, size=0 ,save='20250430_ExtFig1-QC_CITE.pdf')

## Figure 2B - Top DEG (RNA) per low resolution cluster

In [None]:
# generate DEG for lores clusters
mdata["rna"].uns['log1p']["base"] = None
mdata["rna"].obs["celltype_lores"]=mdata.obs["celltype_lores"]
sc.tl.rank_genes_groups(mdata["rna"], 'celltype_lores', method='wilcoxon')
result = mdata["rna"].uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
# generate matrix plot for lores clusters
sc.tl.dendrogram(mdata["rna"], "celltype_lores")

sc.set_figure_params(
    #figsize=(9, 2.5), 
    fontsize=8, vector_friendly=True, transparent=True)

sc.pl.rank_genes_groups_matrixplot(mdata["rna"], groupby="celltype_lores", n_genes=5, standard_scale='var',
                                  swap_axes=False
                                   ,save="Fig1c-matrixplot-all-top5-rna.pdf"
                                   ,cmap='Blues'
                                  #, figsize=(11,2)
                                  )

In [None]:
# generate matrix plot for lores clusters
sc.tl.dendrogram(mdata["rna"], "celltype_lores")

sc.set_figure_params(
    #figsize=(9, 2.5), 
    fontsize=8, vector_friendly=True, transparent=True)

sc.pl.rank_genes_groups_matrixplot(mdata["rna"], groupby="celltype_lores", n_genes=3, standard_scale='var',
                                  swap_axes=False
                                   ,save="Fig1c-matrixplot-all-top3-rna.pdf"
                                   ,cmap='Blues'
                                  #, figsize=(11,2)
                                  )

## Figure 2C - Top DEG (Prot) per low resolution immune cell cluster

In [None]:
mdata_subset = mdata[mdata.obs['celltype_lores'].isin(['Mac','B cell','Plasma','Mast','NK','ILC','T cell','DC']),:]

In [None]:
# generate DEG for lores clusters (prot)
mdata_subset["prot"].obs["celltype_lores"]=mdata_subset.obs["celltype_lores"]
mdata_subset["prot"].obs['celltype_lores'] = mdata_subset["prot"].obs['celltype_lores'].cat.remove_unused_categories()
sc.tl.rank_genes_groups(mdata_subset["prot"], 'celltype_lores', method='wilcoxon')
result = mdata_subset["prot"].uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(20)

In [None]:
# generate matrix plot for lores clusters
sc.tl.dendrogram(mdata_subset["prot"], "celltype_lores")

sc.set_figure_params(
    #figsize=(6, 2), 
    fontsize=15, vector_friendly=True, transparent=True)

sc.pl.rank_genes_groups_matrixplot(mdata_subset["prot"], groupby="celltype_lores", n_genes=5, standard_scale='var',
                                  swap_axes=False
                                   , save="Fig1d-matrixplot-all-top5-prot.pdf"
                                   ,cmap='Greens')

In [None]:
# generate matrix plot for lores clusters
sc.tl.dendrogram(mdata_subset["prot"], "celltype_lores")

sc.set_figure_params(
    #figsize=(6, 2), 
    fontsize=15, vector_friendly=True, transparent=True)

sc.pl.rank_genes_groups_matrixplot(mdata_subset["prot"], groupby="celltype_lores", n_genes=3, standard_scale='var',
                                  swap_axes=False
                                   , save="Fig1d-matrixplot-all-top3-prot.pdf"
                                   ,cmap='Greens')

# Figure 3 - Macrophages

In [None]:
mdata_raw = mu.read("./citeseq_mdata_allsamples_filtered.h5mu")
adata = mdata_raw['rna']

In [None]:
# transfer .obs column
adata.obs = mdata.obs

In [None]:
# store raw data
adata.layers["counts"] = adata.X

In [None]:
# remove trophoblast since they are decB specific
adata = adata[adata.obs['celltype_lores'] != 'TB'].copy()

In [None]:
# get filtered pseudobulk
pdata = dc.pp.pseudobulk(adata,
                          sample_col='rna:sample_id',
                          groups_col='celltype_lores',
                          layer='counts',
                          mode='sum'
                         )

In [None]:
dc.pl.filter_samples(
    adata=pdata,
    groupby=["tissue", "rna:sample_id", "rna:donor_id"],
    min_cells=10,
    min_counts=1000,
    figsize=(5, 8),
)

In [None]:
dc.pp.filter_samples(pdata, min_cells=10, min_counts=1000)

In [None]:
dc.pl.obsbar(adata=pdata, y="celltype_lores", hue="tissue", figsize=(6, 3))

In [None]:
# Store raw counts in layers
pdata.layers["counts"] = pdata.X.copy()

# Normalize, scale and compute pca
sc.pp.normalize_total(pdata, target_sum=1e4)
sc.pp.log1p(pdata)
sc.pp.scale(pdata, max_value=10)
sc.tl.pca(pdata)

# Return raw counts to X
dc.pp.swap_layer(adata=pdata, key="counts", inplace=True)

In [None]:
sc.pl.pca_variance_ratio(pdata)

In [None]:
sc.pl.pca(
    pdata,
    color=["tissue","rna:donor_id","celltype_lores"],
    ncols=1,
    size=300,
    frameon=True,
)

In [None]:
# initialize list
results_list = []
results_dict = {}

# extract unique celltypes
unique_cell_types = pdata.obs['celltype_lores'].unique()

# create loop for each celltype
for cell_type in unique_cell_types:
    selected_cell_types = [cell_type]
    cells_subset = pdata[pdata.obs['celltype_lores'].isin(selected_cell_types)].copy()

    # get genes above threshold
    dc.pp.filter_by_expr(cells_subset, group='tissue', min_count=1, min_total_count=1, min_prop=0)
    #dc.pp.filter_by_prop(cells_subset, min_prop=0.05, min_smpls=3, inplace=True)
    
    dc.pl.filter_by_expr(
        adata=cells_subset,
        group="tissue",
        min_count=10,
        min_total_count=100,
        #large_n=10,
        min_prop=0,
        )


    # create Deseq object
    dds = DeseqDataSet(
        adata=cells_subset,
        design_factors=['rna:donor_id','rna:tissue'],
        refit_cooks=True
    )

    # compute lfc
    dds.deseq2()

    # get contrast
    stat_res = DeseqStats(
        dds,
        contrast=["rna:tissue", 'basalis', 'parietalis']
    )

    # compute wald test
    stat_res.summary()

    # get results
    results_df = stat_res.results_df
    
    results_dict[cell_type] = results_df

    # save the entire results DataFrame for each cell type with the cell type in the file name
    results_df.to_csv(f'./final_results_{cell_type}_pseudobulk_deg.csv')

    dc.pl.volcano(
    results_df,
    x='log2FoldChange',
    y='padj',
    top=50,
    figsize=(10, 10),
    save=f'./final_volcanoplot_{cell_type}_pseudobulk_deg.svg',
    return_fig=True)
    
    
    # filter for genes with padj less than 0.05
    significant_genes = results_df[results_df['padj'] < 0.05]

    # get upregulated and downregulated genes
    upregulated_genes = significant_genes[significant_genes['log2FoldChange'] > 0.5]
    downregulated_genes = significant_genes[significant_genes['log2FoldChange'] < -0.5]

    # count upregulated and downregulated genes
    num_upregulated = upregulated_genes.shape[0]
    num_downregulated = downregulated_genes.shape[0]

    # append to list
    results_list.append({
        'celltype': cell_type,
        'num_upregulated': num_upregulated,
        'num_downregulated': num_downregulated
    })

# convert list to df
results_df_summary = pd.DataFrame(results_list)


## Figure 3A

In [None]:
results_df_summary['total_deg'] = results_df_summary['num_upregulated'] + results_df_summary['num_downregulated']

results_df_sorted = results_df_summary.sort_values(by='total_deg', ascending=True)

#create figure and axis
fig, ax = plt.subplots(figsize=(4, 3))

ax.grid(False)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

font_size = 10

#plot number of upregulated genes
bars_up = ax.barh(results_df_sorted['celltype'], results_df_sorted['num_upregulated'],
                 color='#87001A')

#plot number of downregulated genes as negative values
bars_down = ax.barh(results_df_sorted['celltype'], -results_df_sorted['num_downregulated'],
                   color='#005F87')

#draw vertical line at x=0
ax.axvline(0, color='grey', linewidth=0.8)

ax.set_xlim(-2400, 2400)

#add labels and title
ax.set_xlabel('Number of DEG (DB vs. DP)', fontsize=font_size)

#rotate y-axis labels if needed
plt.yticks(rotation=0, fontsize=10)

# Set x-axis ticks font size
ax.tick_params(axis='x', labelsize=font_size)

#add number of upregulated genes as labels
for bar in bars_up:
    xval = bar.get_width()
    ax.text(xval, bar.get_y() + bar.get_height()/2, int(xval), ha='left', va='center', fontsize=10, color='black')

#add number of downregulated genes as labels
for bar in bars_down:
    xval = bar.get_width()
    ax.text(xval, bar.get_y() + bar.get_height()/2, int(abs(xval)), ha='right', va='center', fontsize=10, color='black')

#save figure
plt.savefig('./final_Fig2a-n_deg_per_celltypelores_2025-10.pdf')

#plot
plt.tight_layout()
plt.show()

## Figure 3B

In [None]:
#isolate MACs
mdata_subset = mdata[mdata.obs['celltype_hires'].isin(["decBAM1","decBAM2","decPAM1","pMac",
                                                      "Mono","decPAM2","CD16+ Mono"]),:]
mdata_subset.obs['celltype_hires'] = mdata_subset.obs['celltype_hires'].cat.remove_unused_categories()

In [None]:
#change order of clusters
cluster_order = ["decPAM1","decBAM1","decBAM2",'pMac','decPAM2','Mono','CD16+ Mono']

mdata_subset.obs['celltype_hires'] = pd.Categorical(mdata_subset.obs['celltype_hires'],
                                                        categories=cluster_order,
                                                        ordered=True)

mdata_subset = mdata_subset[mdata_subset.obs['celltype_hires'].sort_values().index]

In [None]:
#calculate nearest neighbors for UMAP
sc.pp.neighbors(mdata_subset['rna'], use_rep="X_harmony")
sc.pp.neighbors(mdata_subset['prot'], use_rep="X_harmony")

#calculate wnn
mu.pp.neighbors(mdata_subset, key_added='wnn')

In [None]:
#calculate UMAP
mu.tl.umap(mdata_subset, neighbors_key='wnn', random_state=10)

In [None]:
#plot UMAP
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=True, transparent=True, dpi_save=1200)

sc.pl.umap(mdata_subset, color=["celltype_hires"], frameon=False,legend_loc='on data',size=36, alpha=0.8 
           ,save='Fig2a-MAC-UMAP-celltype-hires.pdf'
          )

## Figure 3C

In [None]:
mdata_subset = mdata[mdata.obs['celltype_hires'].isin(["decBAM1","decBAM2","decPAM1",
                                                      "decPAM2"]),:]

adata_subset = mdata_subset['rna']

# get the data from adata
composition_data = adata_subset.obs[['sample_id', 'celltype_hires']]

# create a crosstab of tissue_combined by fib_subtypes
crosstab = pd.crosstab(composition_data['sample_id'], composition_data['celltype_hires'], normalize='index')

# define a custom order for sample_id
custom_order = ['sc01dp', 'sc02dp', 'sc08dp','sc09dp',
               'sc13dp','sc15dp','sc16dp',
               'sc01db', 'sc02db', 'sc08db','sc09db',
               'sc13db','sc15db','sc16db'] 

# reindex the crosstab to follow the custom order
sorted_crosstab = crosstab.reindex(custom_order)

# plot the sorted stacked bar chart
fig, ax = plt.subplots(figsize=(4, 4))  # Adjust figure size
sorted_crosstab.plot(kind='bar', stacked=True, width=1, edgecolor='black', ax=ax, legend=False)

ax.invert_yaxis()
plt.xticks(rotation=45, ha='right')
plt.ylabel('Proportion')

plt.tight_layout()

plt.savefig("./Fig2d-bargraph-composition-MAC.pdf", bbox_inches='tight')

plt.show()

In [None]:
mdata_subset = mdata[mdata.obs['celltype_hires'].isin(["decBAM1","decBAM2","decPAM1",
                                                      "decPAM2"]),:]

adata_subset = mdata_subset['rna']

# get the data from adata
composition_data = adata_subset.obs[['sample_id', 'celltype_hires']]

# create a crosstab of tissue_combined by fib_subtypes
crosstab = pd.crosstab(composition_data['sample_id'], composition_data['celltype_hires'], normalize='index')

# define a custom order for sample_id
custom_order = ['sc01dp', 'sc02dp', 'sc08dp','sc09dp',
               'sc13dp','sc15dp','sc16dp',
               'sc01db', 'sc02db', 'sc08db','sc09db',
               'sc13db','sc15db','sc16db'] 

# reindex the crosstab to follow the custom order
sorted_crosstab = crosstab.reindex(custom_order)

# plot the sorted stacked bar chart
fig, ax = plt.subplots(figsize=(4, 4))  # Adjust figure size
sorted_crosstab.plot(kind='bar', stacked=True, width=1, edgecolor='black', ax=ax, legend=False)

ax.invert_yaxis()
plt.xticks(rotation=45, ha='right')
plt.ylabel('Proportion')

plt.tight_layout()

plt.savefig("./Fig2d-bargraph-composition-MAC.pdf", bbox_inches='tight')

plt.show()

## Figure 3D

In [None]:
mdata_subset = mdata[mdata.obs['celltype_hires'].isin(["decBAM1","decBAM2","decPAM1",
                                                      "decPAM2"]),:]

mdata_subset.obs['celltype_hires'] = mdata_subset.obs['celltype_hires'].cat.remove_unused_categories()

In [None]:
markers = ['HLA-DR_TotalSeqC','CD45RO_TotalSeqC','CD45RA_TotalSeqC','CD38_TotalSeqC','CD39_TotalSeqC',
           'CD40_TotalSeqC',
           'CD11b_TotalSeqC','CD11c_TotalSeqC','CD44_TotalSeqC','CD31_TotalSeqC']

sc.set_figure_params(figsize=(2.5, 2.5), fontsize=8, vector_friendly=True, transparent=True)

mdata_subset['prot'].obs['celltype_hires'] = mdata_subset.obs['celltype_hires']

sc.pl.matrixplot(mdata_subset['prot'], markers, groupby='celltype_hires',
                 #, use_raw=True,
                 standard_scale='var',
                 #dendrogram=True,
                 categories_order=['decPAM1','decPAM2','decBAM1','decBAM2'],
                 cmap='Greens'
                 ,save="final_Fig3D-matrixplot-mac-adt-prot.pdf"
                 )

In [None]:
markers = ["C1QA","FOLR2","MS4A6A","MS4A4A","MRC1",'TREM2','CD209','CXCL10','CXCL9','ISG15','SLAMF7','IDO1','IL4I1',
           "SPP1",'CTSD',"HMOX1",'CD28',"APOE",
          'MARCO',"CXCL8","CXCL2",'CCL3','SEMA3C','AQP9','IL1B',"NLRP3"]

mdata_subset['rna'].obs['celltype_hires'] = mdata_subset.obs['celltype_hires']

sc.set_figure_params(figsize=(2.5, 2.5), fontsize=8, vector_friendly=True, transparent=True)

df=sc.pl.matrixplot(mdata_subset['rna'], markers, groupby='celltype_hires',
                 #, use_raw=True,
                 standard_scale='var',
                 #dendrogram=True,
                categories_order=['decPAM1','decPAM2','decBAM1','decBAM2'],
                 cmap='Blues'
                 ,save="final_Fig3D-matrixplot-mac-adt-rna.pdf"
                 )

## Figure S2A

In [None]:
markers = ['PLTP', 'MAF', 'SELENOP', 'SLC40A1','CXCL1','PTGS2','TNF','IL6','S100A4','S100A6', 'S100A8', 'S100A9', 'CD300E', 'FCN1', 'CD36', 'G0S2',
           'IL1B', 'CCL4', 'CXCL2', 'AREG', 'EGFR', 'NFKB1', 'NFKBIA', 'EREG', 'FN1']

# now call matrixplot without the 'order' argument
sc.pl.matrixplot(
    mdata_subset['rna'],
    var_names=markers,
    groupby='celltype_hires',
    cmap='Blues',
    standard_scale='var',
    categories_order=['decPAM1','decPAM2','decBAM1','decBAM2'],
    swap_axes=False,
    save='final_20250624_ExtFig2G_clean.pdf'
)

In [None]:
mdata.obs['celltype_hires'].cat.categories

In [None]:
mdata_subset = mdata[mdata.obs['celltype_hires'].isin(["decBAM1","decBAM2","decPAM1",
                                                      "decPAM2",'pMac','Mono','CD16+ Mono']),:]

mdata_subset.obs['celltype_hires'] = mdata_subset.obs['celltype_hires'].cat.remove_unused_categories()

In [None]:
markers = ['CD14_TotalSeqC','CD16_TotalSeqC']

sc.set_figure_params(figsize=(2.5, 2.5), fontsize=8, vector_friendly=True, transparent=True)

mdata_subset['prot'].obs['celltype_hires'] = mdata_subset.obs['celltype_hires']

sc.pl.matrixplot(mdata_subset['prot'], markers, groupby='celltype_hires',
                 #, use_raw=True,
                 standard_scale='var',
                 #dendrogram=True,
                 categories_order=['decPAM1','decPAM2','decBAM1','decBAM2','pMac','Mono','CD16+ Mono'],
                 cmap='Greens'
                 ,save="final_ExtFig2-matrixplot-mac-adt-prot.pdf"
                 )

In [None]:
markers = ["MKI67",'CDK1','TOP2A','S100A12','FCN1','VCAN','LST1','LILRB2','WARS',
          'PLTP', 'MAF', 'SELENOP', 'SLC40A1','CXCL1','PTGS2','TNF','IL6','S100A4','S100A6', 'S100A8', 'S100A9', 'CD300E', 'FCN1', 'CD36', 'G0S2',
           'IL1B', 'CCL4', 'CXCL2', 'AREG', 'EGFR', 'NFKB1', 'NFKBIA', 'EREG', 'FN1']

mdata_subset['rna'].obs['celltype_hires'] = mdata_subset.obs['celltype_hires']


sc.pl.matrixplot(
    mdata_subset['rna'],
    var_names=markers,
    groupby='celltype_hires',
    cmap='Blues',
    standard_scale='var',
    categories_order=['decPAM1','decPAM2','decBAM1','decBAM2','pMac','Mono','CD16+ Mono'],
    swap_axes=False,
    save='final_ExtFig2_clean.pdf'
)

# Figure S3

In [None]:
mdata_nk = mdata[mdata.obs['celltype_lores'].isin(['NK'])]

In [None]:
mdata_nk.obs['celltype_hires'] = mdata_nk.obs['celltype_hires'].cat.remove_unused_categories()

mdata_nk['prot'].obs['celltype_hires'] = mdata_nk.obs['celltype_hires'].copy()
mdata_nk['rna'].obs['celltype_hires'] = mdata_nk.obs['celltype_hires'].copy()

In [None]:
#calculate nearest neighbors for UMAP
sc.pp.neighbors(mdata_nk['rna'], use_rep="X_harmony")
sc.pp.neighbors(mdata_nk['prot'], use_rep="X_harmony")

#calculate wnn
mu.pp.neighbors(mdata_nk, key_added='wnn')

In [None]:
#calculate UMAP
mu.tl.umap(mdata_nk, neighbors_key='wnn', random_state=10)

In [None]:
#plot UMAP
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=True, transparent=True, dpi_save=450)

sc.pl.umap(mdata_nk, color=["celltype_hires"], frameon=False, legend_loc='on data', size=36, alpha=0.8
           ,save='20250505_Fig3-NK-UMAP-celltype-hires.pdf'
           ,palette = ['#DAC4F7','#FFCF56','#2E933C','#2D93AD','#F4989C']
          )

In [None]:
mdata_subset["rna"].uns['log1p']["base"] = None
mdata_subset["rna"].obs["celltype_hires"]=mdata_subset.obs["celltype_hires"]
sc.tl.rank_genes_groups(mdata_subset["rna"], 'celltype_hires', method='wilcoxon')
result = mdata_subset["rna"].uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(50)

In [None]:
sc.tl.dendrogram(mdata_subset["rna"], "celltype_hires")
sc.pl.rank_genes_groups_matrixplot(mdata_subset["rna"], groupby="celltype_hires", n_genes=5, standard_scale='var',
                                  swap_axes=False
                                   , save="Fig4b-matrixplot-NK-top5.pdf"
                                   ,cmap='Blues')

In [None]:
mdata_subset["prot"].obs["celltype_hires"]=mdata_subset.obs["celltype_hires"]
sc.tl.rank_genes_groups(mdata_subset["prot"], 'celltype_hires', method='wilcoxon')
result = mdata_subset["prot"].uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.set_option('display.max_columns', 100)
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(50)

In [None]:
sc.tl.dendrogram(mdata_subset["prot"], "celltype_hires")
sc.pl.rank_genes_groups_matrixplot(mdata_subset["prot"], groupby="celltype_hires", n_genes=5, standard_scale='var',
                                  swap_axes=False
                                   #, save="Fig2c-matrixplot-macros-top10.pdf"
                                   ,cmap='Greens')

In [None]:
markers = ['CD16_TotalSeqC','CD45RA_TotalSeqC','CD11b_TotalSeqC'
           ,'CD127_TotalSeqC','CD62L_TotalSeqC','CD56_TotalSeqC'
           ,'CD45RO_TotalSeqC','CD39_TotalSeqC',
          'CD38_TotalSeqC','CD69_TotalSeqC','CD11c_TotalSeqC',
          'CD27_TotalSeqC','CD44_TotalSeqC','CD103_TotalSeqC'
           ,'TIGIT_TotalSeqC','CD314_TotalSeqC']

sc.pl.matrixplot(mdata_subset['prot'], markers, groupby='celltype_hires',
                 #, use_raw=True,
                 standard_scale='var',
                 #dendrogram=True,
                 cmap='Greens'
                 ,save="Fig4c-matrixplot-nk-adt-prot.pdf"
                 )

In [None]:
markers = ['FCGR3A','PTPRC','ITGAM'
           ,'IL7R','SELL','NCAM1'
           ,'PTPRC','ENTPD1',
          'CD38','CD69','ITGAX',
          'CD27','CD44','ITGAE'
           ,'TIGIT','KLRK1']

sc.pl.matrixplot(mdata_subset['rna'], markers, groupby='celltype_hires',
                 #, use_raw=True,
                 standard_scale='var',
                 #dendrogram=True,
                 cmap='Blues'
                 ,save="Fig4c-matrixplot-nk-rna.pdf"
                 )

## Figure 5D

In [None]:
mdata_nk = mdata[mdata.obs['celltype_hires'].isin(['CD39+ NK',
       'CD39- NK', 'CD39-CD103+ NK'])]

In [None]:
mdata_nk.obs['celltype_hires'] = mdata_nk.obs['celltype_hires'].cat.remove_unused_categories()

mdata_nk['prot'].obs['celltype_hires'] = mdata_nk.obs['celltype_hires'].copy()
mdata_nk['rna'].obs['celltype_hires'] = mdata_nk.obs['celltype_hires'].copy()

In [None]:
markers = {'inhibitory':['KIR2DL1','KIR2DL3','KIR3DL1','KIR3DL2','KIR3DL3','LILRB1','LILRB2','KLRD1','KLRG1','KLRC1'],
           'activating':['KLRC2','KLRC3','KIR2DL4','KLRK1','NCR1','NCR2','NCR3','KLRF1','CD226']}

sc.pl.matrixplot(mdata_nk['rna'], markers, groupby='celltype_hires',
                 standard_scale='var',
                 dendrogram=False,
                 cmap='Blues',
                 swap_axes=False,
                 #use_raw=True,
                 save='final_Fig5E-matrixplot-NK-act-inh-receptors.pdf'
                 )

## Figure S3D

In [None]:
mdata_dc = mdata[mdata.obs['celltype_lores'].isin(['DC','Mac'])]

In [None]:
mdata_dc.obs['celltype_midres'] = mdata.obs['celltype_hires'].copy()

In [None]:
mdata_dc.obs['celltype_midres'] = mdata_dc.obs['celltype_midres'].replace({'DC2_prol':'DC2','DC1_prol':'DC1','decPAM2':'Mac',
                                                                          'decPAM1':'Mac','decBAM2':'Mac','decBAM1':'Mac',
                                                                          'monoMAC':'Mono','pMac':'Mac','CD16+ Mono':'Mono'})

mdata_dc.obs['celltype_midres'] = mdata_dc.obs['celltype_midres'].cat.remove_unused_categories()

mdata_dc['prot'].obs['celltype_midres'] = mdata_dc.obs['celltype_midres']
mdata_dc['rna'].obs['celltype_midres'] = mdata_dc.obs['celltype_midres']

In [None]:
#calculate nearest neighbors for UMAP
sc.pp.neighbors(mdata_dc['rna'], use_rep="X_harmony")
sc.pp.neighbors(mdata_dc['prot'], use_rep="X_harmony")

#calculate wnn
mu.pp.neighbors(mdata_dc, key_added='wnn')

In [None]:
#calculate UMAP
mu.tl.umap(mdata_dc, neighbors_key='wnn', random_state=10)

In [None]:
#plot UMAP
sc.set_figure_params(figsize=(3.5, 3.5), fontsize=8, vector_friendly=True, transparent=True, dpi_save=450)

sc.pl.umap(mdata_dc, color=["celltype_midres"], frameon=False,legend_loc='on data',size=36, alpha=0.8
           ,save='20250505_Fig3a-MYELO-UMAP-celltype-hires.pdf'
           ,palette = ['#23CE6B','#E94F37','#CC59D2','#55C1FF']
          )

## Figure S3F

In [None]:
sc.pl.matrixplot(mdata_dc['prot'], ['CD366_TotalSeqC', 'CD103_TotalSeqC','CD45RO_TotalSeqC',
           'CD45RA_TotalSeqC','CD11c_TotalSeqC', 'CD1c_TotalSeqC', 
       'HLA-DR_TotalSeqC', 'CD11b_TotalSeqC','CD40_TotalSeqC','CD14_TotalSeqC','CD62L_TotalSeqC'],
                 groupby='celltype_midres', cmap='Greens', standard_scale = 'var'
                 ,save='20250505_MYELO-celltypeMidres_prot.pdf',
                swap_axes=False)

In [None]:
markers = ['HAVCR2','ITGAE','PTPRC','PTPRC','ITGAX','CD1C','HLA-DRA',
           'ITGAM','CD40','CD14','SELL','XCR1','CLEC9A','IDO1','CLEC10A','FCN1','CD1E']

sc.pl.matrixplot(mdata_dc['rna'], markers,
                 groupby='celltype_midres', cmap='Blues', standard_scale = 'var'
                 ,save='20250505_MYELO-celltypeMidres_rna.pdf',
                swap_axes=False)

# Figure 6

In [None]:
mdata_subset = mdata[mdata.obs['celltype_lores'].isin(['Fib']),:]
mdata_subset.obs['celltype_hires'] = mdata_subset.obs['celltype_hires'].cat.remove_unused_categories()
mdata_subset['rna'].obs['celltype_hires'] = mdata_subset.obs['celltype_hires']
adata_subset = mdata_subset['rna']

In [None]:
adata_subset.obs['celltype_hires']

In [None]:
#change order of clusters
cluster_order = ["hpFib","decFib"]

adata_subset.obs['celltype_hires'] = pd.Categorical(adata_subset.obs['celltype_hires'],
                                                        categories=cluster_order,
                                                        ordered=True)

adata_subset = adata_subset[adata_subset.obs['celltype_hires'].sort_values().index]

In [None]:
color_dict_cells = {
    "hpFib": "#0DD3D3",  
    "decFib": "#D449D4",
    "basalis": "#800000",  
    "parietalis": "#005F87"
}

In [None]:
df = adata_subset.obs.copy()

fractions = (
    df.groupby(['donor_id', 'tissue'])
      .apply(lambda group: (group['celltype_hires'] == 'decFib').sum() / len(group))
      .reset_index(name='fraction_decFibs')
)

# Create the plot
plt.figure(figsize=(4, 4))
sns.boxplot(data=fractions, x='tissue', y='fraction_decFibs', color='lightgray', width=0.5, fliersize=0)
sns.stripplot(data=fractions, x='tissue', y='fraction_decFibs',
              color='black', size=10, jitter=True, alpha=0.5)

plt.ylabel('Fraction of decFibs per Sample')
plt.xlabel('Tissue')
plt.title('Fraction of decFibs by Tissue')
plt.savefig('20250416_decFib_fraction.pdf')
plt.show()


## create pseudobulk for Figure 6A and 6I

In [None]:
mdata_raw = mu.read("./citeseq_mdata_allsamples_filtered.h5mu")

In [None]:
#load citeseq data
mdata = mu.read("./citeseq_mdata_allsamples_filtered_fine_clustering.h5mu")

In [None]:
adata_raw = mdata_raw["rna"]

In [None]:
mdata["rna"].obs['celltype_hires'] = mdata.obs['celltype_hires']

In [None]:
#limit adata_raw.
adata = mdata["rna"]
adata_raw_filtered = adata_raw[adata_raw.obs_names.isin(adata.obs_names)].copy()
adata_raw_filtered

In [None]:
del mdata
del mdata_raw

In [None]:
adata_raw_filtered.obs = adata[adata_raw_filtered.obs_names].obs.copy()

In [None]:
adata_raw_filtered.obs['cell_type'] = adata_raw_filtered.obs['celltype_hires'].copy()

In [None]:
NUM_OF_CELL_PER_DONOR = 30


def aggregate_and_filter(
    adata,
    cell_identity,
    donor_key="sample_id",
    condition_key="tissue",
    cell_identity_key="cell_type",
    obs_to_keep=None, 
    replicates_per_patient=1,
):
    # subset adata to the given cell identity
    if obs_to_keep is None:
        obs_to_keep = []
    adata_cell_pop = adata[adata.obs[cell_identity_key] == cell_identity].copy()
    size_by_donor = adata_cell_pop.obs.groupby([donor_key]).size()
    donors_to_drop = [
        donor
        for donor in size_by_donor.index
        if size_by_donor[donor] <= NUM_OF_CELL_PER_DONOR
    ]
    if len(donors_to_drop) > 0:
        print("Dropping the following samples:")
        print(donors_to_drop)
    df = pd.DataFrame(columns=[*adata_cell_pop.var_names, *obs_to_keep])

    adata_cell_pop.obs[donor_key] = adata_cell_pop.obs[donor_key].astype("category")
    for i, donor in enumerate(donors := adata_cell_pop.obs[donor_key].cat.categories):
        print(f"\tProcessing donor {i+1} out of {len(donors)}...", end="\r")
        if donor not in donors_to_drop:
            adata_donor = adata_cell_pop[adata_cell_pop.obs[donor_key] == donor]
            # create replicates for each donor
            indices = list(adata_donor.obs_names)
            random.shuffle(indices)
            indices = np.array_split(np.array(indices), replicates_per_patient)
            for i, rep_idx in enumerate(indices):
                adata_replicate = adata_donor[rep_idx]
                agg_dict = {gene: "sum" for gene in adata_replicate.var_names}
                for obs in obs_to_keep:
                    agg_dict[obs] = "first"
                # create a df with all genes, donor and condition info
                df_donor = pd.DataFrame(adata_replicate.X.A)
                df_donor.index = adata_replicate.obs_names
                df_donor.columns = adata_replicate.var_names
                df_donor = df_donor.join(adata_replicate.obs[obs_to_keep])
                # aggregate
                df_donor = df_donor.groupby(donor_key).agg(agg_dict)
                df_donor[donor_key] = donor
                df.loc[f"donor_{donor}_{i}"] = df_donor.loc[donor]
    print("\n")
    # create AnnData object from the df
    adata_cell_pop = sc.AnnData(
        df[adata_cell_pop.var_names], obs=df.drop(columns=adata_cell_pop.var_names)
    )
    return adata_cell_pop

In [None]:
obs_to_keep = ["cell_type", "donor_id", "sample_id","tissue"]

In [None]:
adata_raw_filtered_f = adata_raw_filtered[adata_raw_filtered.obs['cell_type'].isin(['decFib','hpFib'])]

In [None]:
# process first cell type separately...
cell_type = adata_raw_filtered_f.obs["cell_type"].cat.categories[0]
print(
    f'Processing {cell_type} (1 out of {len(adata_raw_filtered_f.obs["cell_type"].cat.categories)})...'
)
adata_pb = aggregate_and_filter(adata_raw_filtered_f, cell_type, obs_to_keep=obs_to_keep)
for i, cell_type in enumerate(adata_raw_filtered_f.obs["cell_type"].cat.categories[1:]):
    print(
        f'Processing {cell_type} ({i+2} out of {len(adata_raw_filtered_f.obs["cell_type"].cat.categories)})...'
    )
    adata_cell_type = aggregate_and_filter(adata_raw_filtered_f, cell_type, obs_to_keep=obs_to_keep)
    adata_pb = adata_pb.concatenate(adata_cell_type)

In [None]:
# Export the counts matrix (if using the 'counts' layer)
counts_df = pd.DataFrame(adata_pb.X, index=adata_pb.obs_names, columns=adata_pb.var_names)
counts_df.to_csv("cite_fib_pseudobulk_counts.csv")

# Export the sample metadata (obs)
adata_pb.obs.to_csv("cite_fib_pseudobulk_sample_metadata.csv")

# Optionally, export the gene metadata (var)
adata_pb.var.to_csv("cite_fib_pseudobulk_gene_metadata.csv")

In [None]:
adata_pb.write('cite_bulk_fibs.h5ad')