In [None]:
library(Seurat)
library(ape)
library(ggtree)
library(ggplot2)
library(dplyr)
library(tidyverse)


In [None]:
scc <- read.delim("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_full_object_final_Nov26_metadata.txt", sep = "\t", stringsAsFactors = FALSE)
#scc <- scc[!(scc$Level2_Cancer == "KC Cancer" & scc$sample_ID == "P4_N"), ]
scc<-scc[-45506,]
head(scc)

In [None]:
scc[(scc$Level2_Cancer == "KC Cancer" & scc$sample_ID == "P4_N"), ]

In [None]:
sccc <- subset(scc, Level2_Cancer == "KC Cancer")
sccc %>% group_by(sample_ID) %>% tally()

In [None]:
meta<-data.frame(Level2=scc$Level2_Cancer, sample_ident = scc$sample_ID, cancer_status = scc$cancer_status)
head(meta)
unique(meta$Level2)

In [None]:
tail(meta)

In [None]:
meta$general_type <- meta$Level2
meta$general_type <- gsub("KC Basal", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC Dysplastic", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC Granular", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC Hair", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC Cornified", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC IFN", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC Cancer", "KC", meta$general_type) #KC
meta$general_type <- gsub("KC Differentiating", "KC", meta$general_type) #KC

meta$general_type <- gsub("B Cell",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("T Cell",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("Treg",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("Macrophage",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("Monocytes",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("NK",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("Plasma",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("DC",  "Immune", meta$general_type) #imm
meta$general_type <- gsub("LC",  "Immune", meta$general_type) #imm

unique(meta$general_type)

In [None]:
meta$patient <- meta$sample_ident
meta$patient <- gsub("_N", "", meta$patient)
meta$patient <- gsub("_SCC1", "", meta$patient)
meta$patient <- gsub("_SCC2", "", meta$patient)
meta$patient <- gsub("_SCC", "", meta$patient)
meta$patient <- gsub("_BCC", "", meta$patient)
meta$patient <- gsub("_IEC", "", meta$patient)
head(meta)

In [None]:
library(dplyr)
meta <- mutate(meta, patient_status = paste0(patient, "_", cancer_status))
#meta <- meta[!(meta$Level2 == "KC Cancer" & meta$sample_ID == "P4_N"), ]

counts <- meta
# remove zeroes
#counts <- counts[counts$Freq > 0,]
# relevel "Level2" celltypes
Level2_order <- unique(counts[,c("Level2", "general_type")])
Level2_order <- Level2_order[order(Level2_order$general_type),]
Level2_order <- Level2_order$Level2
counts$Level2 <- factor(counts$Level2, levels = Level2_order) #this orders them alphabetically by general_type then by Level2
# relevel samples
counts$patient_status <- factor(counts$patient_status, levels = c("P1_Normal", "P2_Normal", "P3_Normal", "P4_Normal", "P5_Normal", "P1_Cancer", "P2_Cancer", "P3_Cancer", "P4_Cancer", "P5_Cancer"))

counts$Level2<-gsub("Endothelial Cell","Endothelial",counts$Level2)
counts$general_type<-gsub("Endothelial Cell","Endothelial",counts$general_type)

palette <- c(
  `Endothelial` = "orange",
  `Fibroblast` = "forestgreen",
  `Immune` = "yellowgreen",
  `KC` = "deeppink",
  `Melanocytes` = "saddlebrown", `KC Cancer`="black")

palette_status = c(`Cancer` = "deeppink3",
              `Normal` = "turquoise3") 


counts2<- as.data.frame(counts %>% group_by(Level2,general_type,patient_status) %>% tally(n = "Freq"))
library(ggplot2)
dotplot_raw<-ggplot(counts2, aes(x = patient_status, y = Level2, color = general_type, size = Freq)) +
  scale_y_discrete(limits = rev(levels(counts$Level2))) +
  geom_point() +
    scale_size(range = c(0.3,9)) +
  scale_color_manual(values = palette) +
  scale_x_discrete(labels = c("P1", "P2", "P3", "P4", "P5", "P1", "P2", "P3", "P4", "P5")) +
  xlab("patient") +
  ylab("celltype") +
  theme_bw()


options(repr.plot.width = 8, repr.plot.height = 8)

library(ggmap)
library(patchwork)
my.labels <- as.data.frame(x = c(rep("normal", 5), rep("cancer", 5)))
colnames(my.labels) <- c("status_redundant")
my.labels$unique <- make.unique(my.labels$status_redundant)
colnames(my.labels) <- c("status_redundant", "status")
my.labels$status <- factor(my.labels$status, my.labels$status)

labels <- my.labels %>%
  ggplot(aes(x=status, y = 1, fill = status)) +
  geom_tile() + 
  scale_fill_manual(values = c(rep(unname(palette_status["Normal"]), 5), rep(unname(palette_status["Cancer"]), 5))) + 
  theme_nothing()

labels + dotplot_raw + plot_layout(ncol = 1, heights = c(0.9, 20))


In [None]:
meta_cancer <- subset(meta, Level2 == "KC Cancer")
meta_cancer %>% group_by(sample_ident) %>% tally()

### Mel data

In [None]:
mel <- read.delim("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/Mel/Mel_full_object_final_Nov26_metadata.txt", sep = "\t", stringsAsFactors = FALSE)
meta_mel<-data.frame(Level2=mel$Level3_Cancer, general_type=mel$Level1, sample_ident=mel$orig.ident, cancer_status = "Cancer")
head(meta_mel)

In [None]:
meta_mel$patient_status<-meta_mel$sample_ident
meta_mel$patient_status <- gsub("MPS13","MPS13_Malignant",meta_mel$patient_status)
meta_mel$patient_status <- gsub("MPS42","MPS42_Benign",meta_mel$patient_status)
meta_mel$patient_status <- gsub("MPS43","MPS43_Benign",meta_mel$patient_status)
head(meta_mel)

In [None]:
unique(meta_mel$Level2)
unique(meta_mel$general_type)

In [None]:
meta_mel$Level2<-gsub("Endothelial Cell","Endothelial",meta_mel$Level2)
meta_mel$general_type<-gsub("Endothelial cell","Endothelial",meta_mel$general_type)

counts_mel <- meta_mel
# remove zeroes
#counts <- counts[counts$Freq > 0,]
# relevel "Level2" celltypes
Level2_order <- unique(counts_mel[,c("Level2", "general_type")])
Level2_order <- Level2_order[order(Level2_order$general_type),]
Level2_order <- Level2_order$Level2
counts_mel$Level2 <- factor(counts_mel$Level2, levels = Level2_order) #this orders them alphabetically by general_type then by Level2
# relevel samples
counts_mel$patient_status <- factor(counts_mel$patient_status, levels = c("MPS13_Malignant","MPS42_Benign","MPS43_Benign"))

palette <- c(
  `Endothelial` = "orange",
  `Fibroblast` = "forestgreen",
  `Immune` = "yellowgreen",
  `KC` = "deeppink",
  `Melanocytes` = "saddlebrown",
  `Other` = "royalblue2",`Melanoma` = "black")

palette_status = c(`Cancer` = "deeppink3",
              `Normal` = "turquoise3") 
counts2_mel<- as.data.frame(counts_mel %>% group_by(Level2,general_type,patient_status) %>% tally(n = "Freq"))


options(repr.plot.width = 6, repr.plot.height = 8)
ggplot(counts2_mel, aes(x = patient_status, y = Level2, color = general_type, size = Freq)) +
  scale_y_discrete(limits = rev(levels(counts2_mel$Level2))) +
  geom_point() +
    scale_size(range = c(0.3,9)) +
  scale_color_manual(values = palette) +
  scale_x_discrete(labels = c("MPS13","MPS42","MPS43")) +
  xlab("patient") +
  ylab("celltype") +
  theme_bw()

In [None]:
counts2$Level2<-gsub("Endothelial Cell","Endothelial",counts2$Level2)


In [None]:
final_counts2<-rbind(counts2,counts2_mel)
unique(final_counts2$Level2)

final_counts2$Level2<-gsub("cell","Cell",final_counts2$Level2)
final_counts2$Level2<-gsub("Endothelial Cell","Endothelial",final_counts2$Level2)

unique(final_counts2$Level2)

In [None]:
desired_order <- c("Endothelial","Fibroblast","LC","DC","T Cell","NK","Macrophage","Monocytes","Plasma","B Cell","KC Basal","KC Differentiating",
                   "KC Granular","KC Hair","KC Cornified","KC IFN","KC Dysplastic","KC Cancer","Melanocytes","Melanoma")  # Replace with your actual labels in the desired order

# Ensure Level2 is a factor and set the levels
final_counts2$Level2 <- factor(final_counts2$Level2, levels = desired_order)

dotplot_raw_combined<-ggplot(final_counts2, aes(x = patient_status, y = Level2, color = general_type, size = Freq)) +
  scale_y_discrete(limits = rev(levels(final_counts2$Level2))) +
  geom_point() +
    scale_size(range = c(0.3,9)) +
  scale_color_manual(values = palette) +
  scale_x_discrete(labels = c("P1", "P2", "P3", "P4", "P5", "P1", "P2", "P3", "P4", "P5","MPS13","MPS42","MPS43")) +
  xlab("patient") +
  ylab("celltype") +
  theme_bw() +   theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))



palette <- c(
  `Endothelial` = "orange",
  `Fibroblast` = "forestgreen",
  `Immune` = "yellowgreen",
  `KC` = "deeppink",
  `Melanocytes` = "saddlebrown",
  `Other` = "royalblue2",`Melanoma` = "black")

palette_status = c(`Cancer SCC/BCC` = "deeppink3",
              `Normal` = "turquoise3",`Melanoma`="yellow") 

my.labels <- as.data.frame(x = c(rep("Normal", 5), rep("Cancer SCC/BCC", 5),rep("Melanoma", 3)))
colnames(my.labels) <- c("status_redundant")
my.labels$unique <- make.unique(my.labels$status_redundant)
colnames(my.labels) <- c("status_redundant", "status")
my.labels$status <- factor(my.labels$status, my.labels$status)

labels <- my.labels %>%
  ggplot(aes(x=status, y = 1, fill = status)) +
  geom_tile() + 
  #scale_fill_manual(values = c(rep(unname(palette_status["Normal"]), 5), rep(unname(palette_status["Cancer SCC/BCC"]), 5)),rep(unname(palette_status["Melanoma"]), 3)) + 
  scale_fill_manual(values = c(rep("deeppink3",5),rep("turquoise3",5),rep("yellow",3))) +
  theme_nothing()

options(repr.plot.width = 8, repr.plot.height = 8)
labels + dotplot_raw_combined + plot_layout(ncol = 1, heights = c(0.9, 20))


In [None]:
pdf("/QRISdata/Q2051/SCC_Paper/resources/data/reanalysis_figs/SCC_Mel_CT_abundance_dotplot.pdf")
labels + dotplot_raw_combined + plot_layout(ncol = 1, heights = c(0.9, 20))
dev.off()

### DA stats

In [None]:
## Creating mel seurat object
#--------------------------------------------------------------------------------------------------------------------------------
# # R script
library(Seurat)
message("Reading counts...")
x <- read.csv("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate/Mel_raw_counts.csv",header=TRUE)
rownames(x) <- x[,1]
x[,1] <- NULL
print(dim(x))
print(x[1:5,1:5])
message("Reading metadata...")
m <- read.csv("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/Mel/Mel_full_object_final_Nov26_metadata.txt",sep="\t",header=TRUE)
rownames(m) <- m[,1]
colnames(m)[1] <- "sample"
print(dim(m))
print(head(m))
message("Writing seurat object...")
saveRDS(
  CreateSeuratObject(counts=t(x),meta.data=m,project="seurat",min.cells=0,min.features=0),
  "/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/Mel/Melanoma_seurat.Rds")

In [None]:
x <- read.csv("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate/SCC_BCC_raw_counts.csv",header=TRUE)
rownames(x) <- x[,1]
x[,1] <- NULL
print(dim(x))
print(x[1:5,1:5])
message("Reading metadata...")
m <- read.delim("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_full_object_final_Nov26_metadata.txt", sep = "\t", stringsAsFactors = FALSE)

rownames(m) <- m[,1]
colnames(m)[1] <- "sample"
print(dim(m))
print(head(m))
message("Writing seurat object...")
saveRDS(
  CreateSeuratObject(counts=t(x),meta.data=m,project="seurat",min.cells=0,min.features=0),
  "/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_BCC_seurat.rds")


In [None]:
scc_bcc<-readRDS("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_BCC_seurat.rds")
mel<-readRDS("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/Mel/Melanoma_seurat.Rds")


In [None]:
specific_counts_scc <- as.data.frame.matrix(table(scc_bcc[[c("Level2_Cancer", "sample_ID")]]))
specific_counts_mel<-as.data.frame.matrix(table(mel[[c("Level2_Cancer", "orig.ident")]]))

In [None]:
# Assuming your data frame is named df
specific_counts_scc["KC Cancer", "P4_N"] <- 0
specific_counts_scc

In [None]:
unique(rownames(specific_counts_scc))
unique(rownames(specific_counts_mel))

In [None]:
# Assuming df1 and df2 have the same row names and are ordered the same way
#cbind(specific_counts_scc, specific_counts_mel)
# Assuming df1 and df2 are your data frames
specific_counts <- merge(specific_counts_scc, specific_counts_mel, by = "row.names", all = TRUE)

# Optionally, set the row names to the first column (which now contains the original row names)
rownames(specific_counts) <- specific_counts$Row.names
specific_counts$Row.names <- NULL
specific_counts

In [None]:
write.table(specific_counts,"/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate/specific_counts_for_DA_SCC_Mel.txt",sep="\t",quote = FALSE)

In [None]:
library(Seurat)
library(edgeR)
library(ggplot2)
library(gtools)  # For stars.pval

# Load and preprocess data
specific_counts <- read.csv("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate/specific_counts_for_DA_SCC_Mel.txt", header = TRUE, sep="\t")
specific_counts[is.na(specific_counts)] <- 0

meta <- read.csv("meta_for_DA_scc_mel.txt", sep="\t")
rownames(meta) <- meta$samples

# Ensure disease is a factor with the correct levels
meta$disease <- factor(meta$disease, levels = c("Normal", "SCC", "Melanoma"))

# Create DGEList object
y.ab <- DGEList(counts = specific_counts, group = meta$disease)
#keep <- filterByExpr(y.ab)
#y.ab <- y.ab[keep,]

# Design matrix for multiple group comparisons
design <- model.matrix(~ disease, data = meta)
colnames(design) <- make.names(colnames(design))

# Recompute dispersion
y.ab <- estimateDisp(y.ab, design)

# Fit model
fit.ab <- glmQLFit(y.ab, design, robust = TRUE)

# Perform tests for all comparisons
# Normal vs. SCC
res_normal_vs_scc <- glmQLFTest(fit.ab, coef = "diseaseSCC")
# Normal vs. Mel
res_normal_vs_mel <- glmQLFTest(fit.ab, coef = "diseaseMelanoma")
# SCC vs. Mel
res_scc_vs_mel <- glmQLFTest(fit.ab, contrast = c(0, -1, 1))

# Results for Normal vs. SCC
summary(decideTests(res_normal_vs_scc))
topTags(res_normal_vs_scc, n = 20)
tagtable_normal_vs_scc <- topTags(res_normal_vs_scc, n = 20)$table
tagtable_normal_vs_scc$stats <- gtools::stars.pval(tagtable_normal_vs_scc$FDR)

# Results for Normal vs. Mel
summary(decideTests(res_normal_vs_mel))
topTags(res_normal_vs_mel, n = 20)
tagtable_normal_vs_mel <- topTags(res_normal_vs_mel, n = 20)$table
tagtable_normal_vs_mel$stats <- gtools::stars.pval(tagtable_normal_vs_mel$FDR)

# Results for SCC vs. Mel
summary(decideTests(res_scc_vs_mel))
topTags(res_scc_vs_mel, n = 20)
tagtable_scc_vs_mel <- topTags(res_scc_vs_mel, n = 20)$table
tagtable_scc_vs_mel$stats <- gtools::stars.pval(tagtable_scc_vs_mel$FDR)

# Save results if needed
 write.table(tagtable_normal_vs_scc, "/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate//results_normal_vs_scc.txt", sep = "\t", quote = FALSE)
 write.table(tagtable_normal_vs_mel, "/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate//results_normal_vs_mel.txt", sep = "\t", quote = FALSE)
 write.table(tagtable_scc_vs_mel, "/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/intermediate//results_scc_vs_mel.txt", sep = "\t", quote = FALSE)


In [None]:
tagtable_normal_vs_scc

In [None]:
tagtable_normal_vs_mel

In [None]:
tagtable_scc_vs_mel