# Scripts for reproducibility of RNA analysis 
# Figures 1 and S2 <br />

## Step1: Read "HSC_multiome_cleanup_lognorm_young.h5ad"
## Step2: Processing with scanpy pipeline <br />

## Figures S2A-S2D

## MAST analysis: Figure 1A

## For defining KIT subsets 

## Figure 1B

## Figure S2E

## Figure 1C

## Figure S2H: INGEST Analysis (for HSC subset annotation of Old HSCs) 

## Step 3: Read "HSC_multiome_cleanup_lognorm_083123.h5ad" file

## Figure S2J

## Figures S2I, 1D-1E

## Figure 1G-1I

## Figure S2M

## Figures S2K-S2L: MiloR Analysis

## Same as Step 3

## Figures S2F-S2G

## Step 4: Download GSE243197 dataset

# Figures 4 and S10

In [None]:
# read in an example gene list
Low_Old_vs_Young= pd.read_csv("Data Table 5.csv")

In [None]:
enrichr_agingdn_hm_bp = gp.enrichr(gene_list=OldLow_dn,
                 gene_sets=['GO_Biological_Process_2023','MSigDB_Hallmark_2020'],
                 organism='mouse', 
                 outdir=None)

In [None]:
# read in an example gene lis
Zbtb1_targets= pd.read_csv("Zbtb1_nonNotch_targets.csv")

In [None]:
enrichr_bp = gp.enrichr(gene_list=Zbtb1_targets,
                 gene_sets=['GO_Biological_Process_2023'],
                 organism='mouse', 
                 outdir=None)

In [None]:
ax = dotplot(enrichr_bp.results, 
             title='GO_BP Pathways',cmap='inferno_r', 
              top_term=50,
               size=15, figsize=(3, 5))

ax.set_xlabel('Combined Score', fontweight='bold')
ax.grid(False)
ax.figure.savefig('Zbtb1_nonNotch_bp.pdf', bbox_inches='tight')

# Figures 6 and S11

## Step 5: Download raw matrices from https://osf.io/vdf42/ 

In [None]:
%%R
bm_sommarin = readRDS("./scrna_yBM_oBM_adt_hto.rds")
hsc_sommarin = readRDS("./scrna_adt_hto.rds")

In [None]:
%%R
install.packages("scCustomize", repos = "https://cloud.r-project.org/")

In [None]:
%%R
library(scCustomize)
library(dittoSeq)
library(ComplexHeatmap)
library(SeuratData)
library(ggplot2)

In [None]:
%%R -w 600 -h 600
p<- DimPlot(bm_sommarin, reduction = "umap", group.by = 'clust_names')+
theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 20, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and black border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.0),  # Increased thickness of axis lines
  )
p

In [None]:
%%R -w 500 -h 500
p<- Cluster_Highlight_Plot(bm_sommarin, cluster_name = c("HSC-I", "HSC-II", "HSC-III"), highlight_color = c("red", "blue", "green"),
background_color = "lightgray",  pt.size = 0.5)+
theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 20, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and black border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.0),  # Increased thickness of axis lines
  )
p

In [None]:
%%R
HSC_subsets <- c("HSC-I" = "red", "HSC-II" = "blue", "HSC-III" = "green")
age <- c("old" = "darkgrey", "young" = "darkorange") ##assign specific colors

In [None]:
## subset HSCs
bm_sommarin_HSC <- bm_sommarin[, (bm_sommarin$clust_names == 'HSC-I'|bm_sommarin$clust_names == 'HSC-II'|bm_sommarin$clust_names == 'HSC-III') ]
# Identify the cells with CD117 expression
cd117_expression <- FetchData(object = bm_sommarin_HSC, vars = c("ADT-CD117"))
cells_of_interest <- which(cd117_expression > 1.3)
# Create a new Seurat object with only the cells of interest
HSC_clean <- bm_sommarin_HSC[, cells_of_interest]

In [None]:
%%R -w 500 -h 500 
p<- DimPlot(HSC_clean, reduction = "umap", group.by = 'age', pt.size=0.05,repel=TRUE)+
theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 20, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and black border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.0),  # Increased thickness of axis lines
  ) + scale_color_manual(values=c("young"="darkorange", "old"="darkgrey"))
p


In [None]:
ggplot(HSC_clean@meta.data, aes(age)) + geom_bar(aes(fill = clust_names), position='fill')+
scale_fill_manual(values = HSC_subsets)

In [None]:
%%R
# Normalize ADT data,
DefaultAssay(bm_sommarin_HSC) <- "ADT"
bm_sommarin_HSC <- NormalizeData(bm_sommarin_HSC, normalization.method = "CLR", margin = 2)
DefaultAssay(bm_sommarin_HSC) <- "RNA"

# Note that the following command is an alternative but returns the same result
bm_sommarin_HSC <- NormalizeData(bm_sommarin_HSC, normalization.method = "CLR", margin = 2, assay = "ADT")

In [None]:
%%R
# Extract a list of features measured in the ADT assay
rownames(bm_sommarin_HSC[["ADT"]])

In [None]:
%%R -w 500 -h 500
p<-ggplot(HSC_clean@meta.data, aes(age)) + geom_bar(aes(fill = clust_names), position='fill')+
scale_fill_manual(values = HSC_subsets) +  # Assign specific colors to the clusters
 theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 20, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and no border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.5),  # Increased thickness of axis lines
    panel.grid = element_blank()# Remove background gridlines
  )

p

In [None]:
%%R -w 500 -h 500
p<- VlnPlot(object = HSC_clean, feature = c("ADT-CD117"))+
scale_fill_manual(values = age) +  # Assign specific colors to the clusters
theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 20, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and black border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.0),  # Increased thickness of axis lines
  )
p

In [None]:
# Extract the expression values for "ADT-CD117" from the old samples
cd117_old <- FetchData(object=old, vars= "ADT-CD117")
cd117_young <-FetchData(object=young, vars="ADT-CD117")

df <- data.frame(cd117_old)
df <- rownames_to_column(df, var = "CellNames")
old_cd117<- df$ADT.CD117

df1 <- data.frame(cd117_young)
df1 <- rownames_to_column(df1, var = "CellNames")
young_cd117<- df1$ADT.CD117

# run Mann-whitney 
result <- wilcox.test(old_cd117, young_cd117)
result

In [44]:
%%R
Kitlo_genes

[1] "Xist, Rap1b, Zeb2, Mki67, Diaph3, Top2a, Hmgb2, Pbx1, Rad51b, Plxdc2, Atad2, Cenpp, Plcb1, Hspa5, Pola1, Pdcd4, Pf4, Psd3, Knl1, Smc4, Mef2c, Smc2, Cep128, Prkca, Dlg2, Lmnb1, Dek, Lims1, Nucks1, Ezh2, Hist1h2ap, Rab27b, Asap1, Rrm1, Dtl, H2afz, Tsc22d2, Kif11, Vcl, Itga2b, Vwf, Kif15, Prim2, Sik2, Mms22l, Kif20b, Fli1, Hist1h1b, Etf1, Sik3, Ncam2, Smchd1, Gm47283, Atad5, Cit, Cenpe, Cntln, AU020206, Hist1h2ae, Purb, Nol4l, Cks2, Klf9, Rabgap1l, Zbtb11, Cbx5, Klf13, Klf6, Smarca5, Cenpf, Brip1, Dnmt1, Tmpo, Blm, Tgfb1, Elf1, Ncapg2, AI504432, Polq, Kpna1, Serpina3g, Nup153, Sfmbt1, Rad18, Rbbp8, Ncapd3, Calm1, Lockd, Topbp1, Ranbp2, Hjurp, Neil3, Spata5, Coro2a, Dgkd, Mgat5, Mapk1, Ctdspl, Hells, Kalrn, Pde5a, Ube2s, Pds5a, Gnas, Ckap5, Brca1, Adgrl1, C1galt1, Bmp2k, Tpx2, Srgap2, Suz12, Mid1, Incenp, Dut, Tra2b, Pcna, Xpo1, Tiparp, Slc9a9, Sntb1, Ctnnal1, Tmsb4x, Hdgf, Spidr, Kif2a, Ckap2l, Ccnl1, Yaf2, Usp24, Anln, Mapre1, Nasp, Igf2bp2, Plek, Hspa4, Fyn, Mmp16, Ube2k, Gucy1a1, 

In [46]:
%%R
library(biomaRt)

In [47]:
%%R
human <- useMart(biomart = "ensembl", 
                   dataset = "hsapiens_gene_ensembl", 
                   host = "https://www.ensembl.org")

mouse <- useMart(biomart = "ensembl", 
                   dataset = "mmusculus_gene_ensembl", 
                   host = "https://www.ensembl.org")

In [46]:
%%R
mouse <- useMart( "ensembl", dataset = "mmusculus_gene_ensembl")
human <- useMart( "ensembl", dataset = "hsapiens_gene_ensembl")

In [47]:
%%R
mouse <- useMart( "ensembl", dataset = "mmusculus_gene_ensembl")
mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

In [None]:
%%R
convert_mouse_to_human <- function(gene_list){

  output = c()

  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }

  return (output)
}

In [None]:
%%R
human_genes <- convert_mouse_to_human(gene_list)

In [None]:
signatures <- list(kitlo = c(human_genes))
available_gene_names <- rownames(HSC_clean)
human_list <- human_genes %in% available_gene_names

In [None]:
library(UCell)
HSC_clean <- AddModuleScore_UCell(
                     HSC_clean, features= signatures, name= NULL)

In [None]:
%%R -w 400 -h 400
p<- FeaturePlot(HSC_clean, reduction = "umap", features = names(signatures),repel = TRUE,
  min.cutoff = -1, max.cutoff = 0.175)+
            scale_colour_gradientn(colours = rev(brewer.pal(n = 10, name = "RdBu")))+
theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 15, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and black border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.0),  # Increased thickness of axis lines
  )
p

In [None]:
%%R
Idents(object = HSC_clean) <- "age"

p<- VlnPlot(HSC_clean, c("kitlo"), group.by='age')+
scale_fill_manual(values = age) +  # Assign specific colors to the clusters
theme(axis.text.x = element_text(size = 20, color= 'black', face='bold'),  # Increase font size of x-axis labels
    axis.text.y = element_text(size = 20, color='black',face='bold'),
    axis.title = element_text(size= 20, color='black',face = "bold"),
    legend.text = element_text(size= 20, color='black',face = "bold"),
    legend.title = element_text(size= 20, color='black',face = "bold"),
    panel.background = element_rect(fill = "white"),  # Set white background and black border
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", size = 1.0),  # Increased thickness of axis lines
  )

p

In [None]:
# Extract the expression values for "mouse kitlo gene signature" from young and old samples
low_old <- FetchData(object=old, vars= "kitlo")
low_young <-FetchData(object=young, vars="kitlo")

df <- data.frame(low_old)
df <- rownames_to_column(df, var = "CellNames")
old_kitlo<- df$kitlo

df1 <- data.frame(low_young)
df1 <- rownames_to_column(df1, var = "CellNames")
young_kitlo<- df1$kitlo

# run Mann-whitney 
result <- wilcox.test(old_kitlo, young_kitlo)
result
