# Install packages

In [None]:
# install.packages("devtools")
# install.packages("readr")
# install.packages("pheatmap")
# install.packages("tibble")
# install.packages("ggpubr")

# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")

# BiocManager::install("progeny")

# ## To install the new version until it is submitted to Bioconductor use:
# devtools::install_github("saezlab/progeny")

# install.packages('devtools')
# devtools::install_github('immunogenomics/presto')


# Set up environment

In [None]:
library(progeny)
library(dplyr)
library(Seurat)
library(ggplot2)
library(tidyr)
library(readr)
library(pheatmap)
library(tibble)


# Load data

In [None]:
# Load fresh and frozen samples, then merge them
# seu_frozen <- readRDS("../output/seu_GSE184357_frozen.rds")
# seu_fresh <- readRDS("../output/seu_GSE184357_fresh.rds")
seu.combined <- readRDS("../output/seu_GSE184357_fresh&frozen.rds")

# seu <- seu_frozen
# seu <- seu_fresh
seu <- seu.combined
Idents(seu) <- "annotation"

seu


# Pathway activity per cell population

In [None]:
unique(seu$clinical_status)

In [None]:
unique(seu$location)

In [None]:
unique(seu$annotation)

In [None]:
seu_pons <- subset(seu, subset= location == 'pontine')
seu_pons

seu_thalamic <- subset(seu, subset= location == 'thalamic')
seu_thalamic

seu_primary <- subset(seu, subset= clinical_status == 'primary')
seu_primary

seu_recurrence <- subset(seu, subset= clinical_status == 'recurrence')
seu_recurrence


## Umap

In [None]:

print("scRNA for all locations:")
DimPlot(seu, label = TRUE, pt.size = 0.5) + NoLegend()

print("scRNA for pontine location only:")
DimPlot(seu_pons, label = TRUE, pt.size = 0.5) + NoLegend()
print("scRNA for thalamic location only:")
DimPlot(seu_thalamic, label = TRUE, pt.size = 0.5) + NoLegend()

print("scRNA for primary only:")
DimPlot(seu_primary, label = TRUE, pt.size = 0.5) + NoLegend()
print("scRNA for recurrence only:")
DimPlot(seu_recurrence, label = TRUE, pt.size = 0.5) + NoLegend()

## Heatmap

In [None]:
pathway_analysis <- function(seu){
    # Umap for the cell type label
    DimPlot(seu, label = TRUE, pt.size = 0.5) 

    # Assign the cluster name in dataframe
    CellsClusters <- data.frame(Cell = names(Idents(seu)), 
        CellType = as.character(Idents(seu)),
        stringsAsFactors = FALSE)

    ## Finally, we compute PROGENy pathway activity scores on the scRNA-seq data, and we then characterice the different cell populations based on these scores.
    ## We compute the Progeny activity scores and add them to our Seurat object as a new assay called Progeny. 
    seu.progeny <- progeny(seu, scale=FALSE, organism="Human", top=500, perm=1, 
        return_assay = TRUE)

    ## We can now directly apply Seurat functions in our Progeny scores. 
    ## For instance, we scale the pathway activity scores. 
    seu.progeny <- Seurat::ScaleData(seu.progeny, assay = "progeny") 

    ## We transform Progeny scores into a data frame to better handling the results
    progeny_scores_df <- 
        as.data.frame(t(GetAssayData(seu.progeny, slot = "scale.data", 
            assay = "progeny"))) %>%
        rownames_to_column("Cell") %>%
        gather(Pathway, Activity, -Cell) 

    ## We match Progeny scores with the cell clusters.
    progeny_scores_df <- inner_join(progeny_scores_df, CellsClusters)

    ## We summarize the Progeny scores by cellpopulation
    summarized_progeny_scores <- progeny_scores_df %>% 
        group_by(Pathway, CellType) %>%
        summarise(avg = mean(Activity), std = sd(Activity))

    ## We prepare the data for the plot
    summarized_progeny_scores_df <- summarized_progeny_scores %>%
        dplyr::select(-std) %>%   
        spread(Pathway, avg) %>%
        data.frame(row.names = 1, check.names = FALSE, stringsAsFactors = FALSE) 

    # We plot the different pathway activities for the different cell populations
    paletteLength = 100
    myColor = colorRampPalette(c("Darkblue", "white","red"))(paletteLength)

    progenyBreaks = c(seq(min(summarized_progeny_scores_df), 0, 
                        length.out=ceiling(paletteLength/2) + 1),
                    seq(max(summarized_progeny_scores_df)/paletteLength, 
                        max(summarized_progeny_scores_df), 
                        length.out=floor(paletteLength/2)))
                        
    progeny_hmap = pheatmap(t(summarized_progeny_scores_df[,-1]),fontsize=14, 
                            fontsize_row = 10, 
                            color=myColor, breaks = progenyBreaks, 
                            main = "PROGENy (500)", angle_col = 45,
                            treeheight_col = 0,  border_color = NA)
    return (progeny_scores_df)
}


In [None]:
print("pathway for all scRNA:")
all_pathwayscore <- pathway_analysis(seu)

print("pathway for all pontine scRNA only:")
pons_pathwayscore <- pathway_analysis(seu_pons)
pons_pathwayscore$Condition <- 'pons'
print("pathway for all thalamic scRNA only:")
thalamic_pathwayscore <- pathway_analysis(seu_thalamic)
thalamic_pathwayscore$Condition <- 'thalamic'

print("pathway for all seu_primary scRNA only:")
primary_pathwayscore <- pathway_analysis(seu_primary)
primary_pathwayscore$Condition <- 'primary'
print("pathway for all recurrence scRNA only:")
recurrence_pathwayscore <- pathway_analysis(seu_recurrence)
recurrence_pathwayscore$Condition <- 'recurrence'


## Compare the signalling pathway under different conditions

### Compare the sample distribution for each pathway

In [None]:
violin_plot_per_pathway <- function(pathway_score, pathway, conditions){
    library(ggplot2)
    res <- 0
    pathway_score <- pathway_score[pathway_score$Pathway == pathway, ]
    t1 <- pathway_score$Activity[(pathway_score$Condition==conditions[1]) ]
    t2 <- pathway_score$Activity[(pathway_score$Condition==conditions[2]) ]
    res <- ks.test(t1, t2)
    
    if (res$p <= 0.05){
      print(paste("p-val is: ", res$p, "within ", pathway, "signalling pathway"))
      violin_plot <- ggplot(pathway_score, aes(x = Condition, y = Activity, color = Condition) ) +
        geom_violin() +
        labs(title = paste("Pathway Activation Scores for ", pathway),
            x = "Condition",
            y = "Activation Score")+
            geom_boxplot(width=0.1)+
              theme(
              legend.text = element_text(size = 18),
              legend.title = element_text(size = 18),
              plot.title = element_text(size = 20),
              plot.subtitle = element_text(size = 20),
              axis.text=element_text(size=25),
              axis.title=element_text(size=25)
        )

      print(violin_plot)
    }
  }


In [None]:
location_pathwayscore <- rbind(pons_pathwayscore, thalamic_pathwayscore)
clinic_status_pathwayscore <- rbind(primary_pathwayscore, recurrence_pathwayscore)


In [None]:
conditions <- c('pons', 'thalamic')
for (pathway in unique(location_pathwayscore$Pathway)) {
    violin_plot_per_pathway(pathway_score = location_pathwayscore, pathway = pathway, conditions = conditions)
}

In [None]:
conditions <- c('primary', 'recurrence')
for (pathway in unique(clinic_status_pathwayscore$Pathway)) {
    violin_plot_per_pathway(pathway_score = clinic_status_pathwayscore, pathway = pathway, conditions = conditions)
}


### Compare the sample distribution for each cell type

In [None]:
violin_plot_per_celltype <- function(pathway_score, celltype, conditions){
    library(ggplot2)
    res <- 0
    pathway_score <- pathway_score[pathway_score$CellType == celltype, ]
    t1 <- pathway_score$Activity[(pathway_score$Condition==conditions[1]) ]
    t2 <- pathway_score$Activity[(pathway_score$Condition==conditions[2]) ]
    res <- ks.test(t1, t2)
    
    if (res$p <= 0.05){
      print(paste("p-val is: ", res$p, "within ", celltype, "signalling pathway"))
      violin_plot <- ggplot(pathway_score, aes(x = Condition, y = Activity, color = Condition) ) +
        geom_violin() +
        labs(title = paste("Pathway Activation Scores \nfor ", celltype),
            x = "Condition",
            y = "Activation Score")+
            geom_boxplot(width=0.1)+
              theme(
              legend.text = element_text(size = 18),
              legend.title = element_text(size = 18),
              plot.title = element_text(size = 20),
              plot.subtitle = element_text(size = 20),
              axis.text=element_text(size=25),
              axis.title=element_text(size=25)
        )

      print(violin_plot)
    }
  }


In [None]:
conditions <- c('primary', 'recurrence')
for (celltype in c('OPC-like')) {
        violin_plot_per_celltype(pathway_score = clinic_status_pathwayscore, 
                                celltype = celltype,
                                conditions = conditions)
    }

In [None]:
conditions <- c('pons', 'thalamic')
for (celltype in c('OPC-like')) {
        violin_plot_per_celltype(pathway_score = location_pathwayscore, 
                                celltype = celltype,
                                conditions = conditions)
    }

### Compare the sample distribution for each pathway and each cell type.

In [None]:
violin_plot_per_pathway_celltype <- function(pathway_score, pathway, conditions, celltype){
    library(ggplot2)
    res <- 0
    pathway_score <- pathway_score[pathway_score$CellType == celltype, ]
    pathway_score <- pathway_score[pathway_score$Pathway == pathway, ]
    t1 <- pathway_score$Activity[(pathway_score$Condition==conditions[1]) ]
    t2 <- pathway_score$Activity[(pathway_score$Condition==conditions[2]) ]
    res <- ks.test(t1, t2)
    
    if (res$p <= 0.05){
      print(paste("p-val is: ", res$p, "within ", pathway, "signalling pathway", "\nfor ", celltype))
      violin_plot <- ggplot(pathway_score, aes(x = Condition, y = Activity, color = Condition) ) +
        geom_violin() +
        labs(title = paste("Pathway Activation Scores within ", pathway, "\nfor ", celltype),
            x = "Condition",
            y = "Activation Score")+
            geom_boxplot(width=0.1)+
              theme(
              legend.text = element_text(size = 18),
              legend.title = element_text(size = 18),
              plot.title = element_text(size = 20),
              plot.subtitle = element_text(size = 20),
              axis.text=element_text(size=25),
              axis.title=element_text(size=25)
        )

      print(violin_plot)
    }
  }


In [None]:
conditions <- c('primary', 'recurrence')
for (pathway in unique(clinic_status_pathwayscore$Pathway)) {
    for (celltype in c('OPC-like')) {
        violin_plot_per_pathway_celltype(pathway_score = clinic_status_pathwayscore, 
                                pathway = pathway, 
                                conditions = conditions,
                                celltype = celltype)
    }
}

In [None]:
conditions <- c('pons', 'thalamic')
for (pathway in unique(location_pathwayscore$Pathway)) {
    for (celltype in c('OPC-like')) {
        violin_plot_per_pathway_celltype(pathway_score = location_pathwayscore, 
                                pathway = pathway, 
                                conditions = conditions,
                                celltype = celltype)
    }
}