In [None]:
library(edgeR)
library(Seurat)
library(dplyr)
library(tidyr)

In [None]:
#  decideTestsDGE.R

decideTests.DGEExact <- decideTests.DGELRT <- function(object,adjust.method="BH",p.value=0.05,lfc=0,...)
{
	decideTestsDGE(object=object,adjust.method=adjust.method,p.value=p.value,lfc=lfc)
}

decideTestsDGE <- function(object,adjust.method="BH",p.value=0.05,lfc=0)
#	Accept or reject hypothesis tests across genes and contrasts
#	edgeR team. Original author was Davis McCarthy.
#	Created 15 August 2010. Last modified 15 July 2018.
{
#	Check object class
	if( !(is(object,"DGEExact") || is(object,"DGELRT")) ) stop("Need DGEExact or DGELRT object")

#	Apply multiple testing
	p <- object$table$PValue
	p <- p.adjust(p, method=adjust.method)
	isDE <- as.integer(p < p.value)

#	Extract logFC
	logFC <- object$table$logFC

#	Check for F-test with multiple logFC columns
	FTest <- is.null(logFC)

#	With multiple contrasts, apply lfc threshold to maximum logFC
	if(FTest) {
		if(lfc>0) {
			coef.col <- grep("^logFC",colnames(object$table))
			logFC <- object$table[,coef.col]
			SmallFC <- rowSums(abs(logFC) >= lfc) == 0
			isDE[SmallFC] <- 0L
		}

#	With single contrast, apply directionality and lfc threshold
	} else {
		isDE[isDE & logFC<0] <- -1L
		SmallFC <- (abs(logFC) < lfc)
		isDE[SmallFC] <- 0L
	}

#	Assemble TestResults object
	isDE <- matrix(isDE, ncol=1)
	row.names(isDE) <- row.names(object)
	colnames(isDE) <- paste(rev(object$comparison),collapse="-")

#	Record possible values
	if(FTest) {
		attr(isDE,"levels") <- c(0L,1L)
		attr(isDE,"labels") <- c("NotSig","Sig")
	} else {
		attr(isDE,"levels") <- c(-1L,0L,1L)
		attr(isDE,"labels") <- c("Down","NotSig","Up")
	}		

	new("TestResults", isDE)
}

## SCC BCC Finding KC Cancer

In [None]:
scc_bcc<-readRDS("/QRISdata/Q2051/SCC_Paper/resources/data/scc_bcc_sc.rds")
m<-read.csv("skin_atlas/SCC_final_object_Nov1_metadata.txt",sep="\t")

In [None]:
matching_cells <- colnames(scc_bcc)

# Subset meta_df to include only matching cells
filtered_meta_df <- m[m$X %in% matching_cells, ]  # Replace 'cell_name' with your actual cell identifier column

# Check if the filtered metadata has the same cell names as in seurat_obj
filtered_meta_df <- filtered_meta_df[match(matching_cells, filtered_meta_df$X), ]


In [None]:
scc_bcc@meta.data$cancer_status_corrected<-filtered_meta_df$cancer_status
scc_bcc@meta.data$sample_ident_corrected<-filtered_meta_df$sample_ID
scc_bcc@meta.data$Level1<-filtered_meta_df$Level1_Final
scc_bcc@meta.data$Level2<-filtered_meta_df$Level2_Cancer
scc_bcc@meta.data$Level3<-filtered_meta_df$Level3_Cancer
scc_bcc@meta.data$cell_types<-filtered_meta_df$Level3_final
scc_bcc@meta.data$leiden_KC<-filtered_meta_df$KC_leiden_R

KC_cells<-rownames(scc_bcc@meta.data[grep("^KC", scc_bcc@meta.data$Level1),])
KC_only <- subset(scc_bcc, cells = KC_cells, value = TRUE)
cancer_samples<-rownames(KC_only@meta.data[grep("Cancer", KC_only@meta.data$cancer_status_corrected),])
cancer_only <- subset(KC_only, cells = cancer_samples, value = TRUE)
filt<-rownames(cancer_only@meta.data[grep("Cancer|Normal", cancer_only@meta.data$`X2CNV_mod0.5`),])
cancer <- subset(cancer_only, cells = filt, value = TRUE)

In [None]:
names(KC_only@meta.data)

In [None]:
y<-Seurat2PB(KC_only,sample="cancer_status_corrected", cluster = "cell_types")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
#plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)



In [None]:
#top<-20
#topMarkers<-list()
#for(i in 1:ncls){ 
#  ord<-order(qlf[[i]]$table$PValue,decreasing=FALSE) 
#  up<-qlf[[i]]$table$logFC>0 
#  topMarkers[[i]]<-rownames(y)[ord[up][1:top]] 
#} 
#topMarkers<-unique(unlist(topMarkers)) 
#write.table(topMarkers,"KC_only_SCC_topmarkers_edger.txt",sep="\t",row.names = TRUE, quote = FALSE)

top<-50
topMarkers<-list()
for (i in 1:ncls) { 
  ord <- order(qlf[[i]]$table$PValue, decreasing = FALSE) 
  up <- qlf[[i]]$table$logFC > 0 
  selected_genes <- rownames(y)[ord[up]]  
  # Remove genes starting with "RPS" or "RPL"
  selected_genes <- selected_genes[!grepl("^RPS|^RPL|^LINC", selected_genes)]  
  # Take the top 50 genes after filtering
  topMarkers[[i]] <- head(selected_genes, top)
} 
topMarkers <- unique(unlist(topMarkers))



ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
#write.table(qlf[[1]]$table,"KCs_cancer_samples_Tumor_vs_nonTumor_qlf1_edger_labels_corrected.txt",sep="\t",row.names = TRUE, quote = FALSE)
head(qlf[[1]]$table[topMarkers,])

ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
# Get the results from decideTestsDGE
dt2 <- lapply(qlf, decideTests)

# For the first comparison (Lesion_vs_others), extract the gene names that are significant
significant_genes <- rownames(dt2[[1]])[dt2[[1]] != 0]

# View the names of significant genes
print(significant_genes)

top_table<-qlf[[1]]$table[significant_genes,]
top_table <- top_table[top_table$PValue <= 0.05, ]


In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)


lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:24) 
names(ann_colors$cluster)<-levels(cluster)
ann_colors

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[rownames(top_table),]

heatmap <- pheatmap::pheatmap(lcpm[rownames(top_table),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmap <- heatmap$tree_row$order
genes_on_heatmap <- rownames(heatmap_data)[genes_on_heatmap]

In [None]:
genes_on_heatmap

In [None]:
# Assuming heatmap_data is your dataframe
heatmap_data_sub <- heatmap_data[rowSums(heatmap_data[, 1:6] > 0.7) == 6, ]
heatmap2 <- pheatmap::pheatmap(lcpm[rownames(heatmap_data_sub),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmap2 <- heatmap2$tree_row$order
genes_on_heatmap2 <- rownames(heatmap_data_sub)[genes_on_heatmap2]

genes_on_heatmap2

In [None]:
# Transpose the heatmap data
heatmap_data_transposed <- t(heatmap_data_sub)

# Plot the transposed heatmap
pheatmap::pheatmap(lcpm[,colnames(heatmap_data_sub)],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=t(annot),annotation_colors=t(ann_colors))


In [None]:
genes_on_heatmap2

In [None]:
dim(heatmap_data[rowSums(heatmap_data[, 1:6] > 0.7) == 6, ])


In [None]:
colnames(lcpm)

In [None]:
dim(counts_matrix)
dim(metadata_df)

In [None]:
lcpm

## Finding Cancer Mel

In [None]:
mel<-readRDS("/QRISdata/Q2051/SCC_Paper/resources/data/melanoma_sc.rds")
m<-read.csv("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/Melanoma_final_object_Nov10_metadata.csv")
names(m)

In [None]:
head(m)

In [None]:
mel
dim(m)

In [None]:
rownames(m)<-m$BC
#m$BC <- rownames(m)  # Create a cell_id column in m to be used for matching
mel@meta.data$BC <- rownames(mel@meta.data)  # Create cell_id in meta.data to match with 'm'
m<-m[colnames(mel),]
dim(m)

In [None]:
mel@meta.data$Level2_final<-m$Level2
mel@meta.data$mel_leiden<-m$mel_leiden


In [None]:
melanocytes<-rownames(mel@meta.data[grep("Melanocytes", mel@meta.data$Level2_final),])
mel_only <- subset(mel, cells = melanocytes, value = TRUE)

In [None]:
y<-Seurat2PB(mel_only,sample="orig.ident", cluster = "mel_leiden")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
#plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)



In [None]:
#top<-20
#topMarkers<-list()
#for(i in 1:ncls){ 
#  ord<-order(qlf[[i]]$table$PValue,decreasing=FALSE) 
#  up<-qlf[[i]]$table$logFC>0 
#  topMarkers[[i]]<-rownames(y)[ord[up][1:top]] 
#} 
#topMarkers<-unique(unlist(topMarkers)) 
#write.table(topMarkers,"KC_only_SCC_topmarkers_edger.txt",sep="\t",row.names = TRUE, quote = FALSE)

top<-50
topMarkers<-list()
for (i in 1:ncls) { 
  ord <- order(qlf[[i]]$table$PValue, decreasing = FALSE) 
  up <- qlf[[i]]$table$logFC > 0 
  selected_genes <- rownames(y)[ord[up]]  
  # Remove genes starting with "RPS" or "RPL"
  selected_genes <- selected_genes[!grepl("^RPS|^RPL|^LINC", selected_genes)]  
  # Take the top 50 genes after filtering
  topMarkers[[i]] <- head(selected_genes, top)
} 
topMarkers <- unique(unlist(topMarkers))



ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
#write.table(qlf[[1]]$table,"KCs_cancer_samples_Tumor_vs_nonTumor_qlf1_edger_labels_corrected.txt",sep="\t",row.names = TRUE, quote = FALSE)
head(qlf[[1]]$table[topMarkers,])

ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
# Get the results from decideTestsDGE
dt2 <- lapply(qlf, decideTests)

# For the first comparison (Lesion_vs_others), extract the gene names that are significant
significant_genes <- rownames(dt2[[1]])[dt2[[1]] != 0]

# View the names of significant genes
print(significant_genes)

top_table<-qlf[[1]]$table[significant_genes,]
top_table <- top_table[top_table$PValue <= 0.05, ]


In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)


lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:24) 
names(ann_colors$cluster)<-levels(cluster)
ann_colors

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[rownames(top_table),]

heatmap <- pheatmap::pheatmap(lcpm[rownames(top_table),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmap <- heatmap$tree_row$order
genes_on_heatmap <- rownames(heatmap_data)[genes_on_heatmap]

In [None]:
colnames(lcpm)

In [None]:
# Assuming heatmap_data is your dataframe
heatmap_data_sub <- heatmap_data[rowSums(heatmap_data[, 1:11] > 7) == 11, ][1:150,]
heatmap2 <- pheatmap::pheatmap(lcpm[rownames(heatmap_data_sub),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmap2 <- heatmap2$tree_row$order
genes_on_heatmap2 <- rownames(heatmap_data_sub)[genes_on_heatmap2]

In [None]:
genes_on_heatmap2

In [None]:
colnames(heatmap_data)

In [None]:
n<-heatmap_data[rowSums(heatmap_data[, 1:11] > 0.5) == 11, ]
dim(n)

In [None]:
mean(heatmap_data[47:523,1:11])

In [None]:
y<-Seurat2PB(mel_only,sample="mel_leiden", cluster = "orig.ident")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
#plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)



In [None]:
#top<-20
#topMarkers<-list()
#for(i in 1:ncls){ 
#  ord<-order(qlf[[i]]$table$PValue,decreasing=FALSE) 
#  up<-qlf[[i]]$table$logFC>0 
#  topMarkers[[i]]<-rownames(y)[ord[up][1:top]] 
#} 
#topMarkers<-unique(unlist(topMarkers)) 
#write.table(topMarkers,"KC_only_SCC_topmarkers_edger.txt",sep="\t",row.names = TRUE, quote = FALSE)

top<-50
topMarkers<-list()
for (i in 1:ncls) { 
  ord <- order(qlf[[i]]$table$PValue, decreasing = FALSE) 
  up <- qlf[[i]]$table$logFC > 0 
  selected_genes <- rownames(y)[ord[up]]  
  # Remove genes starting with "RPS" or "RPL"
  selected_genes <- selected_genes[!grepl("^RPS|^RPL|^LINC", selected_genes)]  
  # Take the top 50 genes after filtering
  topMarkers[[i]] <- head(selected_genes, top)
} 
topMarkers <- unique(unlist(topMarkers))



ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
#write.table(qlf[[1]]$table,"KCs_cancer_samples_Tumor_vs_nonTumor_qlf1_edger_labels_corrected.txt",sep="\t",row.names = TRUE, quote = FALSE)
head(qlf[[1]]$table[topMarkers,])

ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
# Get the results from decideTestsDGE
dt2 <- lapply(qlf, decideTests)

# For the first comparison (Lesion_vs_others), extract the gene names that are significant
significant_genes <- rownames(dt2[[1]])[dt2[[1]] != 0]

# View the names of significant genes
print(significant_genes)

top_table<-qlf[[1]]$table[significant_genes,]
top_table <- top_table[top_table$PValue <= 0.05, ]
options(repr.plot.width = 8, repr.plot.height = 15)


lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:24) 
names(ann_colors$cluster)<-levels(cluster)
ann_colors

#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_datac <- lcpm[rownames(top_table),]

heatmapc <- pheatmap::pheatmap(lcpm[rownames(top_table),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmapc <- heatmapc$tree_row$order
genes_on_heatmapc <- rownames(heatmap_data)[genes_on_heatmapc]

In [None]:
heatmapc

In [None]:
colnames(lcpm)

In [None]:
# Subset the data to include clusters 9 and 10 vs other clusters
# Assuming 'seurat_obj' is your Seurat object and 'seurat_obj$cluster' contains cluster assignments
cluster_9_10 = mel_only[mel_only$mel_leiden %in% [9, 10], ]
rest_of_clusters = mel_only[!mel_only$mel_leiden %in% [9, 10], ]

# Assign group labels for differential expression analysis
mel_only$group <- ifelse(mel_only$mel_leiden %in% [9, 10], "MPS13", "Others")

# Perform differential expression analysis
markers = FindMarkers(mel_only, ident.1 = "MPS13", ident.2 = "Others")

# View the top markers
head(markers)


In [None]:
## pseudo assignment

In [None]:

# Step 1: Get the total number of cells in the Seurat object
n_cells <- ncol(mel_only)

# Step 2: Set the number of groups
n_groups <- 5

# Step 3: Randomly assign cells to pseudo_groups (1-5) with equal numbers
set.seed(42)  # Set seed for reproducibility
pseudo_groups <- sample(rep(1:n_groups, length.out = n_cells))

# Step 4: Add the pseudo_groups to the Seurat object as metadata
mel_only$pseudo_groups <- pseudo_groups

# Step 5: Verify by checking the first few rows of the new pseudo_groups column
head(mel_only$pseudo_groups)


In [None]:
mel_only@meta.data %>% group_by(pseudo_groups,orig.ident) %>% tally()

In [None]:
y<-Seurat2PB(mel_only,sample="orig.ident", cluster = "pseudo_groups")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
#plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)



In [None]:
#top<-20
#topMarkers<-list()
#for(i in 1:ncls){ 
#  ord<-order(qlf[[i]]$table$PValue,decreasing=FALSE) 
#  up<-qlf[[i]]$table$logFC>0 
#  topMarkers[[i]]<-rownames(y)[ord[up][1:top]] 
#} 
#topMarkers<-unique(unlist(topMarkers)) 
#write.table(topMarkers,"KC_only_SCC_topmarkers_edger.txt",sep="\t",row.names = TRUE, quote = FALSE)

top<-50
topMarkers<-list()
for (i in 1:ncls) { 
  ord <- order(qlf[[i]]$table$PValue, decreasing = FALSE) 
  up <- qlf[[i]]$table$logFC > 0 
  selected_genes <- rownames(y)[ord[up]]  
  # Remove genes starting with "RPS" or "RPL"
  selected_genes <- selected_genes[!grepl("^RPS|^RPL|^LINC", selected_genes)]  
  # Take the top 50 genes after filtering
  topMarkers[[i]] <- head(selected_genes, top)
} 
topMarkers <- unique(unlist(topMarkers))



ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
#write.table(qlf[[1]]$table,"KCs_cancer_samples_Tumor_vs_nonTumor_qlf1_edger_labels_corrected.txt",sep="\t",row.names = TRUE, quote = FALSE)
head(qlf[[1]]$table[topMarkers,])

ord<-order(qlf[[1]]$table$PValue,decreasing=FALSE) 
up<-qlf[[1]]$table$logFC>0 
# Get the results from decideTestsDGE
dt2 <- lapply(qlf, decideTests)

# For the first comparison (Lesion_vs_others), extract the gene names that are significant
significant_genes <- rownames(dt2[[1]])[dt2[[1]] != 0]

# View the names of significant genes
print(significant_genes)

top_table<-qlf[[1]]$table[significant_genes,]
top_table <- top_table[top_table$PValue <= 0.05, ]

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)


lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:24) 
names(ann_colors$cluster)<-levels(cluster)
ann_colors

#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_datac <- lcpm[rownames(top_table),]

heatmapc <- pheatmap::pheatmap(lcpm[rownames(top_table),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmapc <- heatmapc$tree_row$order
genes_on_heatmapc <- rownames(heatmap_data)[genes_on_heatmapc]

In [None]:
# Assuming heatmap_data is your dataframe
heatmap_data_sub <- heatmap_data[rowSums(heatmap_data[, 1:5] > 7) == 5, ][1:150,]
heatmap2 <- pheatmap::pheatmap(lcpm[rownames(heatmap_data_sub),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=5,
                   show_colnames=FALSE,
                   annotation_col=annot,annotation_colors=ann_colors)

#save_pheatmap_pdf(heatmap,"phaseSplit_Condition.pdf",width = 10,height = 20)


# Retrieve genes plotted on the heatmap in the same order
genes_on_heatmap2 <- heatmap2$tree_row$order
genes_on_heatmap2 <- rownames(heatmap_data_sub)[genes_on_heatmap2]

In [None]:
colnames(lcpm)

In [None]:
genes_on_heatmap2[1:100]

In [None]:
# Assuming your Seurat object is called 'mel_only'

# Add a new column 'malignant' to distinguish malignant from benign cells
mel_only$malignant <- ifelse(mel_only$mel_leiden %in% c(9, 10), "Malignant", "Benign")


In [None]:
# Normalize the data if it hasn't been done yet
mel_only <- NormalizeData(mel_only)
Idents(mel_only)<-mel_only$malignant
# Perform differential expression between malignant and benign cells
de_genes <- FindMarkers(mel_only, ident.1 = "Malignant", ident.2 = "Benign", test.use = "wilcox")
de_genes
# Visualize DE genes using a volcano plot
library(ggplot2)
ggplot(de_genes, aes(x = avg_log2FC, y = -log10(p_val))) +
    geom_point(aes(color = p_val < 0.05), size = 2) +
    labs(title = "Volcano plot of DE genes", x = "Log2 Fold Change", y = "-Log10 p-value")


In [None]:
de_genes_sig<-de_genes[de_genes$p_val_adj<0.05,]
de_genes_sig<-de_genes[de_genes$avg_log2FC>3,]

de_genes_sig

In [None]:
rownames(de_genes_sig)

# SCC vs Melanoma

In [None]:
scc_bcc<-readRDS("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_BCC_seurat.rds")
mel<-readRDS("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/Mel/Melanoma_seurat.Rds")


In [None]:
unique(scc_bcc$KC_leiden_R)

In [None]:
KCcancer<-rownames(scc_bcc@meta.data[grep("Cancer", scc_bcc@meta.data$Level2_Cancer),])
KCcancer <- subset(scc_bcc, cells = KCcancer, value = TRUE)
melanoma<-rownames(mel@meta.data[grep("Melanoma", mel$Level2_Cancer),])
Melanoma <- subset(mel, cells = melanoma, value = TRUE)

Melanoma@meta.data$seurat_clusters<-Melanoma$mel_leiden
KCcancer@meta.data$seurat_clusters<-KCcancer$KC_leiden_R
KC_Mel_cancer<-merge(KCcancer,Melanoma, add.cell.ids = c("KC","Mel"))

KC_Mel_cancer@meta.data$cancer_type<-KC_Mel_cancer$Level2_Cancer
#KC_Mel_cancer$cancer_type<-gsub("KC Basal|KC Hair|KC Differentiating|KC Cornified","KC",KC_Mel_cancer$cancer_type)
unique(KC_Mel_cancer$cancer_type)

In [None]:
KC_Mel_cancer<-JoinLayers(object = KC_Mel_cancer, layers = "counts")
y<-Seurat2PB(KC_Mel_cancer,sample="cancer_type")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)


In [None]:
qlf[[1]]$table$FDR <- p.adjust(qlf[[1]]$table$PValue, method = "BH")

# Add a column for significance
logFC_threshold <- 2  # Fold change threshold (adjust as needed)
fdr_threshold <- 0.05 # FDR threshold for significance

qlf[[1]]$table$Significance <- with(qlf[[1]]$table, ifelse(FDR < fdr_threshold & logFC > logFC_threshold, "Upregulated",
                                          ifelse(FDR < fdr_threshold & logFC < -logFC_threshold, "Downregulated", "Not Significant")))

kc_up_sig<-rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])
mel_up_sig<-rownames(qlf[[1]]$table[qlf[[1]]$table$logFC < -logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])
length(kc_up_sig)
length(mel_up_sig)

In [None]:
top <- 50  # Number of top genes to select from each direction (up and down)
topMarkers <- list()

for (i in 1:ncls) {
  # Get the table for the current cluster
  current_table <- qlf[[i]]$table
  
  # Order genes by PValue, then by logFC (positive and negative separately)
  ordered_genes <- current_table[order(current_table$PValue, decreasing = FALSE), ]
  
  # Filter for upregulated genes (logFC > 0)
  upregulated <- ordered_genes[ordered_genes$logFC > 0, ]
  upregulated <- upregulated[order(-upregulated$logFC), ]  # Sort by descending logFC
  
  # Filter for downregulated genes (logFC < 0)
  downregulated <- ordered_genes[ordered_genes$logFC < 0, ]
  downregulated <- downregulated[order(downregulated$logFC), ]  # Sort by ascending logFC
  
  # Remove unwanted genes (e.g., RPS, RPL, LINC)
  upregulated <- upregulated[!grepl("^RPS|^RPL|^LINC", rownames(upregulated)), ]
  downregulated <- downregulated[!grepl("^RPS|^RPL|^LINC", rownames(downregulated)), ]
  
  # Select top N genes from both up and down
  top_up <- head(rownames(upregulated), top)
  top_down <- head(rownames(downregulated), top)
  
  # Combine selected genes for this cluster
  topMarkers[[i]] <- c(top_up, top_down)
}

# Combine all selected genes into a single list
combined_topMarkers <- unlist(topMarkers, use.names = FALSE)

# Remove duplicates if necessary
combined_topMarkers <- unique(combined_topMarkers)

# Print the total number of genes in the final list
length(combined_topMarkers)

# Final result: `combined_topMarkers` contains top genes (upregulated and downregulated) for all clusters
topMarkers <-combined_topMarkers

In [None]:
qlf[[1]]$table[topMarkers,]

In [None]:
lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:16) 
names(ann_colors$cluster)<-levels(cluster)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


#rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[topMarkers,]

kc_mel <- pheatmap::pheatmap(lcpm[topMarkers,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)



genes_on_heatmap <- rownames(heatmap_data)[kc_mel$tree_row$order]


In [None]:
top <- 50  # Number of top genes to select from each direction (up and down)
topMarkers <- list()

for (i in 1:ncls) {
  # Get the table for the current cluster
  current_table <- qlf[[1]]$table
  # Order genes by PValue, then by logFC (positive and negative separately)
  ordered_genes <- current_table[order(current_table$FDR, decreasing = FALSE), ]
  # Filter for upregulated genes (logFC > 0)
  upregulated <- ordered_genes[ordered_genes$logFC > 0, ]
  upregulated <- upregulated[order(-upregulated$logFC), ]  # Sort by descending logFC
  # Filter for downregulated genes (logFC < 0)
  downregulated <- ordered_genes[ordered_genes$logFC < 0, ]
  downregulated <- downregulated[order(downregulated$logFC), ]  # Sort by ascending logFC
  # Remove unwanted genes (e.g., RPS, RPL, LINC)
  upregulated <- upregulated[!grepl("^RPS|^RPL|^LINC", rownames(upregulated)), ]
  downregulated <- downregulated[!grepl("^RPS|^RPL|^LINC", rownames(downregulated)), ]
  # Select top N genes from both up and down
  top_up <- head(rownames(upregulated), top)
  top_down <- head(rownames(downregulated), top)
    # Combine selected genes for this cluster
  topMarkers[[i]] <- c(top_up, top_down)
}
# Combine all selected genes into a single list
combined_topMarkers <- unlist(topMarkers, use.names = FALSE)
# Remove duplicates if necessary
combined_topMarkers <- unique(combined_topMarkers)
# Print the total number of genes in the final list
length(combined_topMarkers)
# Final result: `combined_topMarkers` contains top genes (upregulated and downregulated) for all clusters
topMarkers <-combined_topMarkers



options(repr.plot.width = 8, repr.plot.height = 15)

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


#rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[topMarkers,]

kc_mel <- pheatmap::pheatmap(lcpm[topMarkers,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)



genes_on_heatmap <- rownames(heatmap_data)[kc_mel$tree_row$order]


In [None]:
genes_on_heatmap

In [None]:
# Set plot dimensions
options(repr.plot.width = 20, repr.plot.height = 5)
heatmap_data_transposed <- t(heatmap_data)

# Generate the heatmap
kc_mel_flipped <- pheatmap::pheatmap(
  heatmap_data_transposed,
  breaks = seq(-2, 2, length.out = 101),
  color = colorRampPalette(c("blue", "white", "red"))(100),
  scale = "row",
  cluster_cols = FALSE,
  border_color = "NA",
  fontsize_row = 8,
  show_colnames = TRUE,
  show_rownames = FALSE
)


In [None]:
colnames(lcpm)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)

kc_mel_sig <- pheatmap::pheatmap(lcpm[c(kc_up_sig,mel_up_sig),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)



In [None]:
colnames(lcpm)

In [None]:
topMarkers

In [None]:
# Define thresholds
logFC_threshold <- 2  # Change if necessary
pvalue_threshold <- 0.05  # Adjust based on significance level

# Extract upregulated genes in KC
kc_up <- rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$PValue < pvalue_threshold, ])

# Extract downregulated genes in KC (upregulated in MEL)
mel_up <- rownames(qlf[[1]]$table[qlf[[1]]$table$logFC < -logFC_threshold & qlf[[1]]$table$PValue < pvalue_threshold, ])

# Print summary
cat("Number of significantly upregulated genes in KC:", length(kc_up), "\n")
cat("Number of significantly downregulated genes in KC (up in MEL):", length(mel_up), "\n")


In [None]:
pheatmap::pheatmap(lcpm[mel_up,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)

pheatmap::pheatmap(lcpm[kc_up,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)


In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)

# Load ggplot2
library(ggplot2)

# Add a column for adjusted p-values
qlf[[1]]$table$FDR <- p.adjust(qlf[[1]]$table$PValue, method = "BH")

# Add a column for significance
logFC_threshold <- 5  # Fold change threshold (adjust as needed)
fdr_threshold <- 0.05 # FDR threshold for significance

qlf[[1]]$table$Significance <- with(qlf[[1]]$table, ifelse(FDR < fdr_threshold & logFC > logFC_threshold, "Upregulated",
                                          ifelse(FDR < fdr_threshold & logFC < -logFC_threshold, "Downregulated", "Not Significant")))

# Convert to a data frame for ggplot2
volcano_data <- data.frame(qlf[[1]]$table)

# Plot the volcano
volcano_plot <- ggplot(volcano_data, aes(x = logFC, y = -log10(PValue), color = Significance)) +
  geom_point(alpha = 0.8, size = 2) +
  scale_color_manual(values = c("Upregulated" = "red", "Downregulated" = "blue", "Not Significant" = "grey")) +
  theme_minimal() +
  labs(title = "Volcano Plot",
       x = "Log2 Fold Change",
       y = "-Log10 P-value") +
  theme(legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5))

# Display the plot
print(volcano_plot)


In [None]:
qlf[[1]]$table[topMarkers,]

# Melanpma vs melanocytes

In [None]:
mel_only<-rownames(mel@meta.data[grep("Melanocytes", mel$Level2),])
mel_only <- subset(mel, cells = mel_only, value = TRUE)
mel_only

In [None]:
mel_only$Level3_MPS13cancer

In [None]:
y<-Seurat2PB(mel_only,sample="Level3_MPS13cancer", cluster="mel_leiden")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)


In [None]:
qlf[[1]]$table$FDR <- p.adjust(qlf[[1]]$table$PValue, method = "BH")

# Add a column for significance
logFC_threshold <- 2  # Fold change threshold (adjust as needed)
fdr_threshold <- 0.05 # FDR threshold for significance

qlf[[1]]$table$Significance <- with(qlf[[1]]$table, ifelse(FDR < fdr_threshold & logFC > logFC_threshold, "Upregulated",
                                          ifelse(FDR < fdr_threshold & logFC < -logFC_threshold, "Downregulated", "Not Significant")))
length(rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ]))

norm_mel_up_sig<-rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])
cancer_mel_up_sig<-rownames(qlf[[1]]$table[qlf[[1]]$table$logFC < -logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])
length(norm_mel_up_sig)
length(cancer_mel_up_sig)

In [None]:
length(melanoma_up)

In [None]:
top <- 50  # Number of top genes to select from each direction (up and down)
topMarkers <- list()

for (i in 1:ncls) {
  # Get the table for the current cluster
  current_table <- qlf[[i]]$table
  
  # Order genes by PValue, then by logFC (positive and negative separately)
  ordered_genes <- current_table[order(current_table$PValue, decreasing = FALSE), ]
  
  # Filter for upregulated genes (logFC > 0)
  upregulated <- ordered_genes[ordered_genes$logFC > 0, ]
  upregulated <- upregulated[order(-upregulated$logFC), ]  # Sort by descending logFC
  
  # Filter for downregulated genes (logFC < 0)
  downregulated <- ordered_genes[ordered_genes$logFC < 0, ]
  downregulated <- downregulated[order(downregulated$logFC), ]  # Sort by ascending logFC
  
  # Remove unwanted genes (e.g., RPS, RPL, LINC)
  upregulated <- upregulated[!grepl("^RPS|^RPL|^LINC", rownames(upregulated)), ]
  downregulated <- downregulated[!grepl("^RPS|^RPL|^LINC", rownames(downregulated)), ]
  
  # Select top N genes from both up and down
  top_up <- head(rownames(upregulated), top)
  top_down <- head(rownames(downregulated), top)
  
  # Combine selected genes for this cluster
  topMarkers[[i]] <- c(top_up, top_down)
}

# Combine all selected genes into a single list
combined_topMarkers <- unlist(topMarkers, use.names = FALSE)

# Remove duplicates if necessary
combined_topMarkers <- unique(combined_topMarkers)

# Print the total number of genes in the final list
length(combined_topMarkers)

# Final result: `combined_topMarkers` contains top genes (upregulated and downregulated) for all clusters
topMarkers <-combined_topMarkers

In [None]:
lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:20) 
names(ann_colors$cluster)<-levels(cluster)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


#rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[topMarkers,]

mel_c_vs_n <- pheatmap::pheatmap(lcpm[topMarkers,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)



genes_on_heatmap <- rownames(heatmap_data)[kc_mel$tree_row$order]


In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)

mel_c_n_sig<-pheatmap::pheatmap(lcpm[c(norm_mel_up_sig,cancer_mel_up_sig),],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)

In [None]:
colnames(lcpm)

In [None]:
# Define thresholds
logFC_threshold <- 2  # Change if necessary
pvalue_threshold <- 0.05  # Adjust based on significance level

# Extract upregulated genes in KC
norm_melanocytes_up <- rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$PValue < pvalue_threshold, ])

# Extract downregulated genes in KC (upregulated in MEL)
melanoma_up <- rownames(qlf[[1]]$table[qlf[[1]]$table$logFC < -logFC_threshold & qlf[[1]]$table$PValue < pvalue_threshold, ])

# Print summary
cat("Number of significantly upregulated genes in norm mel:", length(norm_melanocytes_up), "\n")
cat("Number of significantly downregulated genes in KC (up in cancer mel):", length(melanoma_up), "\n")


# KC Normal Vs Cancer

In [None]:
kc_only<-rownames(scc_bcc@meta.data[grep("KC", scc_bcc$Level2),])
kc_only <- subset(scc_bcc, cells = kc_only, value = TRUE)
kc_only

In [None]:
kc_only@meta.data$cancer_type<-kc_only$Level2_Cancer
kc_only$cancer_type<-gsub("KC Basal|KC Hair|KC Differentiating|KC Cornified|KC Dysplastic|KC IFN","KC Normal",kc_only$cancer_type)
unique(kc_only$cancer_type)

kc_only$KC_leiden_R

In [None]:
y<-Seurat2PB(kc_only,sample="cancer_type", cluster="KC_leiden_R")
y<-normLibSizes(y)
donor<-factor(y$sample$sample)
design<-model.matrix(~donor)
colnames(design)<-gsub("donor","",colnames(design))
#colnames(design)<-gsub("cluster","",colnames(design))
colnames(design)[1]<-"Int"
y<-estimateDisp(y,design,robust=TRUE)
y$common.dispersion 
fit<-glmQLFit(y,design,robust=TRUE) 
ncls<-nlevels(donor) 
contr<-rbind(matrix(1/(1-ncls),ncls,ncls), +matrix(0,ncol(design)-ncls,ncls)) 
diag(contr)<-1 
contr[1,]<-0 
rownames(contr)<-colnames(design) 
colnames(contr)<-colnames(design)
qlf<-list() 
for(i in 1:ncls){ 
  qlf[[i]]<-glmQLFTest(fit,contrast=contr[,i]) 
  qlf[[i]]$comparison<-paste0(levels(donor)[i],"_vs_others") 
}
options(repr.plot.width = 6, repr.plot.height = 6)
cluster<-as.factor(y$samples$cluster) 
#pdf("MDS_edgeR_SCC_KC_only_corrected_labels.pdf")
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#legend("topleft",legend=paste0(levels(cluster)), pch=14,col=2:8,cex=0.8)
#dev.off()
#plotMDS(y,pch=16,col=c(2:8)[cluster],main="MDS") 
#pdf("dispersion.pdf")
#plotBCV(y)
#dev.off()

#pdf("QLdisp.pdf")
plotQLDisp(fit)
#dev.off()

dt<-lapply(lapply(qlf,decideTestsDGE),summary) 
dt.all<-do.call("cbind",dt) 
head(dt.all)
#write.table(dt.all,"KC_only_cluster_wise_stats_labels_corrected.txt",sep="\t",quote=FALSE,row.names=TRUE)


In [None]:
qlf[[1]]$table$FDR <- p.adjust(qlf[[1]]$table$PValue, method = "BH")

# Add a column for significance
logFC_threshold <- 2  # Fold change threshold (adjust as needed)
fdr_threshold <- 0.05 # FDR threshold for significance

qlf[[1]]$table$Significance <- with(qlf[[1]]$table, ifelse(FDR < fdr_threshold & logFC > logFC_threshold, "Upregulated",
                                          ifelse(FDR < fdr_threshold & logFC < -logFC_threshold, "Downregulated", "Not Significant")))

norm_kc_up_sig<-rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])
cancer_kc_up_sig<-rownames(qlf[[1]]$table[qlf[[1]]$table$logFC < -logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])
length(norm_kc_up_sig)
length(cancer_kc_up_sig)

In [None]:
top <- 50  # Number of top genes to select from each direction (up and down)
topMarkers <- list()

for (i in 1:ncls) {
  # Get the table for the current cluster
  current_table <- qlf[[i]]$table
  
  # Order genes by PValue, then by logFC (positive and negative separately)
  ordered_genes <- current_table[order(current_table$PValue, decreasing = FALSE), ]
  
  # Filter for upregulated genes (logFC > 0)
  upregulated <- ordered_genes[ordered_genes$logFC > 0, ]
  upregulated <- upregulated[order(-upregulated$logFC), ]  # Sort by descending logFC
  
  # Filter for downregulated genes (logFC < 0)
  downregulated <- ordered_genes[ordered_genes$logFC < 0, ]
  downregulated <- downregulated[order(downregulated$logFC), ]  # Sort by ascending logFC
  
  # Remove unwanted genes (e.g., RPS, RPL, LINC)
  upregulated <- upregulated[!grepl("^RPS|^RPL|^LINC", rownames(upregulated)), ]
  downregulated <- downregulated[!grepl("^RPS|^RPL|^LINC", rownames(downregulated)), ]
  
  # Select top N genes from both up and down
  top_up <- head(rownames(upregulated), top)
  top_down <- head(rownames(downregulated), top)
  
  # Combine selected genes for this cluster
  topMarkers[[i]] <- c(top_up, top_down)
}

# Combine all selected genes into a single list
combined_topMarkers <- unlist(topMarkers, use.names = FALSE)

# Remove duplicates if necessary
combined_topMarkers <- unique(combined_topMarkers)

# Print the total number of genes in the final list
length(combined_topMarkers)

# Final result: `combined_topMarkers` contains top genes (upregulated and downregulated) for all clusters
topMarkers <-combined_topMarkers

In [None]:
lcpm<-cpm(y,log=TRUE) 
annot<-data.frame(cluster=cluster) 
rownames(annot)<-colnames(y) 
ann_colors<-list(cluster=0:25) 
names(ann_colors$cluster)<-levels(cluster)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


#rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[topMarkers,]

kc_cvsn <- pheatmap::pheatmap(lcpm[topMarkers,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)



genes_on_heatmap_kc_cvsn <- rownames(heatmap_data)[kc_mel$tree_row$order]


In [None]:
top <- 50  # Number of top genes to select from each direction (up and down)
topMarkers <- list()

for (i in 1:ncls) {
  # Get the table for the current cluster
  current_table <- qlf[[1]]$table
  # Order genes by PValue, then by logFC (positive and negative separately)
  ordered_genes <- current_table[order(current_table$FDR, decreasing = FALSE), ]
  # Filter for upregulated genes (logFC > 0)
  upregulated <- ordered_genes[ordered_genes$logFC > 0, ]
  upregulated <- upregulated[order(-upregulated$logFC), ]  # Sort by descending logFC
  # Filter for downregulated genes (logFC < 0)
  downregulated <- ordered_genes[ordered_genes$logFC < 0, ]
  downregulated <- downregulated[order(downregulated$logFC), ]  # Sort by ascending logFC
  # Remove unwanted genes (e.g., RPS, RPL, LINC)
  upregulated <- upregulated[!grepl("^RPS|^RPL|^LINC", rownames(upregulated)), ]
  downregulated <- downregulated[!grepl("^RPS|^RPL|^LINC", rownames(downregulated)), ]
  # Select top N genes from both up and down
  top_up <- head(rownames(upregulated), top)
  top_down <- head(rownames(downregulated), top)
    # Combine selected genes for this cluster
  topMarkers[[i]] <- c(top_up, top_down)
}
# Combine all selected genes into a single list
combined_topMarkers <- unlist(topMarkers, use.names = FALSE)
# Remove duplicates if necessary
combined_topMarkers <- unique(combined_topMarkers)
# Print the total number of genes in the final list
length(combined_topMarkers)
# Final result: `combined_topMarkers` contains top genes (upregulated and downregulated) for all clusters
topMarkers <-combined_topMarkers



options(repr.plot.width = 8, repr.plot.height = 15)

library(pheatmap)
#o<-read.csv("KC_only_desired_order.txt",sep="\t",header=FALSE)
#desired_order<-o$V1
colnames(lcpm)<-gsub("cluster","",colnames(lcpm))
#desired_order<-c("0","1","11","2","4","6","3","5","7","8","9","10","12","13","14","15")
#lcpm_o <- lcpm[,match(desired_order, colnames(lcpm)), drop = FALSE]


#rownames(annot)<-gsub("cluster","",rownames(annot))
#annot_o <- annot[match(desired_order, rownames(annot)), , drop = FALSE]

heatmap_data <- lcpm[topMarkers,]

kc_c_n_sig <- pheatmap::pheatmap(lcpm[topMarkers,],breaks=seq(-2,2,length.out=101),color=colorRampPalette(c("blue","white","red"))(100),scale="row",
                   cluster_cols=FALSE,border_color="NA",fontsize_row=8,
                   show_colnames=FALSE)



genes_on_heatmap <- rownames(heatmap_data)[kc_mel$tree_row$order]


In [None]:
colnames(lcpm)

In [None]:
# Define thresholds
logFC_threshold <- 2  # Change if necessary
pvalue_threshold <- 0.05  # Adjust based on significance level

# Extract upregulated genes in KC
kc_cancer_up <- rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$PValue < pvalue_threshold, ])

# Extract downregulated genes in KC (upregulated in MEL)
kc_norm_up <- rownames(qlf[[1]]$table[qlf[[1]]$table$logFC < -logFC_threshold & qlf[[1]]$table$PValue < pvalue_threshold, ])

# Print summary
cat("Number of significantly upregulated genes in KC:", length(kc_cancer_up), "\n")
cat("Number of significantly downregulated genes in KC (up in MEL):", length(kc_norm_up), "\n")


In [None]:
norm_melanocytes_up
melanoma_up
kc_cancer_up
kc_norm_up
kc_up
mel_up

In [None]:
install.packages("VennDiagram")


In [None]:
library(VennDiagram)

# Create a list of gene sets
gene_lists <- list(
  List1 = kc_up,
  List2 = kc_cancer_up,
  List3 = mel_up,
  List4 = melanoma_up
)

# Generate the Venn diagram
venn.plot <- venn.diagram(
  x = gene_lists,
  category.names = c("List1", "List2", "List3", "List4"),
  filename = NULL, # Use NULL to draw in R's plotting window
  col = "black", 
  fill = c("red", "blue", "green", "yellow"),
  alpha = 0.5,
  cex = 1,
  cat.cex = 1,
  cat.col = "black",
  main = "4-Way Venn Diagram"
)

# Plot the Venn diagram
grid.newpage()
grid.draw(venn.plot)


In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)


# Create a list of gene sets
gene_lists <- list(
  List1 = kc_up_sig,
  List2 = cancer_kc_up_sig,
  List3 = mel_up_sig,
  List4 = cancer_mel_up_sig
)

# Generate the Venn diagram
venn.plot <- venn.diagram(
  x = gene_lists,
  category.names = c("List1", "List2", "List3", "List4"),
  filename = NULL, # Use NULL to draw in R's plotting window
  col = "black", 
  fill = c("red", "blue", "green", "yellow"),
  alpha = 0.5,
  cex = 1,
  cat.cex = 1,
  cat.col = "black",
  main = "4-Way Venn Diagram"
)

# Plot the Venn diagram
grid.newpage()
grid.draw(venn.plot)


In [None]:
options(repr.plot.width = 20, repr.plot.height = 5)
pdf("/QRISdata/Q2051/SCC_Paper/resources/data/reanalysis_figs/KCcancer_vs_mel.pdf", width=20, height=5)
kc_mel_flipped
dev.off()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)
pdf("/QRISdata/Q2051/SCC_Paper/resources/data/reanalysis_figs/cancer_vs_normal_mel_samples_sig.pdf", width=8, height=15)
mel_c_n_sig
dev.off()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)
pdf("/QRISdata/Q2051/SCC_Paper/resources/data/reanalysis_figs/cancer_vs_normal_KCs.pdf", width=8, height=15)
mel_cvsn
dev.off()

In [None]:
list1<-norm_mel_up_sig
list2<-cancer_mel_up_sig

list3<-cancer_kc_up_sig
list4<-norm_kc_up_sig
list5<-kc_up
list6<-mel_up

In [None]:
# Make all lists the same length by adding NAs
max_length <- max(length(list1), length(list2), length(list3), length(list4), length(list5), length(list6))

# Pad lists with NA values to match the maximum length
list1 <- c(list1, rep(NA, max_length - length(list1)))
list2 <- c(list2, rep(NA, max_length - length(list2)))
list3 <- c(list3, rep(NA, max_length - length(list3)))
list4 <- c(list4, rep(NA, max_length - length(list4)))
list5 <- c(list5, rep(NA, max_length - length(list5)))
list6 <- c(list6, rep(NA, max_length - length(list6)))

# Create a data frame with the lists
gene_lists_df <- data.frame(
  norm_melanocytes_up = list1,
  melanoma_up = list2,
  kc_cancer_up = list3,
  kc_norm_up = list4,
  kc_up = list5,
  mel_up = list6,
  stringsAsFactors = FALSE
)

# Write the data frame to a text file
write.table(gene_lists_df, "gene_lists_sig.txt", sep = "\t", row.names = FALSE, quote = FALSE)

# Confirm the contents of the file
head(gene_lists_df)

In [None]:
  list1 = kc_up_sig
  list2 = cancer_kc_up_sig
  list3 = mel_up_sig
  list4 = cancer_mel_up_sig

# Make all lists the same length by adding NAs
max_length <- max(length(list1), length(list2), length(list3), length(list4), length(list5), length(list6))

# Pad lists with NA values to match the maximum length
list1 <- c(list1, rep(NA, max_length - length(list1)))
list2 <- c(list2, rep(NA, max_length - length(list2)))
list3 <- c(list3, rep(NA, max_length - length(list3)))
list4 <- c(list4, rep(NA, max_length - length(list4)))
list5 <- c(list5, rep(NA, max_length - length(list5)))
list6 <- c(list6, rep(NA, max_length - length(list6)))

# Create a data frame with the lists
gene_lists_df <- data.frame(
  norm_melanocytes_up = list1,
  melanoma_up = list2,
  kc_cancer_up = list3,
  kc_norm_up = list4,
  kc_up = list5,
  mel_up = list6,
  stringsAsFactors = FALSE
)

# Write the data frame to a text file
write.table(gene_lists_df, "edger_gene_lists_sig.txt", sep = "\t", row.names = FALSE, quote = FALSE)

# Confirm the contents of the file
head(gene_lists_df)

In [None]:
qlf[[1]]$table$FDR <- p.adjust(qlf[[1]]$table$PValue, method = "BH")

# Add a column for significance
logFC_threshold <- 2  # Fold change threshold (adjust as needed)
fdr_threshold <- 0.05 # FDR threshold for significance

qlf[[1]]$table$Significance <- with(qlf[[1]]$table, ifelse(FDR < fdr_threshold & logFC > logFC_threshold, "Upregulated",
                                          ifelse(FDR < fdr_threshold & logFC < -logFC_threshold, "Downregulated", "Not Significant")))
rownames(qlf[[1]]$table[qlf[[1]]$table$logFC > logFC_threshold & qlf[[1]]$table$FDR < fdr_threshold, ])

In [None]:
length(kc_up_sig)
length(kc_up)
length(intersect(kc_up_sig,kc_up))

In [None]:
length(kc_up_sig)
length(genes_on_heatmap)
length(intersect(kc_up_sig,genes_on_heatmap))

In [None]:
options(repr.plot.width = 8, repr.plot.height = 15)
pdf("kc_mel_sig.pdf", width=8, height=15)
kc_mel
dev.off()

In [None]:
getwd()

In [None]:
intersect(kc_up_sig,cancer_kc_up_sig)

In [None]:
intersect(mel_up_sig,cancer_mel_up_sig)

In [None]:
setdiff(mel_up_sig, cancer_mel_up_sig)