In [None]:
library(Seurat)
library(ape)
library(ggtree)
library(ggplot2)
library(dplyr)
library(tidyverse)


# SCC BCC

In [None]:
scc <- read.delim("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_full_object_final_Nov26_metadata.txt", sep = "\t", stringsAsFactors = FALSE)
df<-data.frame(level1=scc$Level1, level2=scc$Level2_Cancer,  level3=scc$Level3_Cancer)
# Count the occurrences for each combination
counts <- df %>% group_by(level1, level2, level3) %>% tally()
counts$level1 <- gsub(" ", "", counts$level1)
counts$level2 <- gsub(" ", "", counts$level2)
counts$level3 <- gsub(" ", "", counts$level3)
#counts
counts <- df %>% group_by(level1, level2, level3) %>% tally()
counts$level1 <- gsub(" ", "", counts$level1)
counts$level2 <- gsub(" ", "", counts$level2)
counts$level3 <- gsub(" ", "", counts$level3)

# Function to create Newick string including Level3
create_newick_str <- function(df) {
  level1_nodes <- unique(df$level1)
  newick_str <- ""
  
  for (node in level1_nodes) {
    level2_nodes <- df %>% filter(level1 == node) %>% pull(level2) %>% unique()
    level2_str <- ""
    
    for (level2_node in level2_nodes) {
      children <- df %>% filter(level1 == node, level2 == level2_node) %>% pull(level3)
      children_str <- paste(children, collapse = ",")
      level2_str <- paste0(level2_str, "(", children_str, ")", level2_node, ",")
    }
    
    level2_str <- substr(level2_str, 1, nchar(level2_str) - 1)  # Remove trailing comma
    newick_str <- paste0(newick_str, "(", level2_str, ")", node, ",")
  }
  
  newick_str <- substr(newick_str, 1, nchar(newick_str) - 1)  # Remove trailing comma
  newick_str <- paste0("(", newick_str, ");")
  return(newick_str)
}

# Generate the Newick string
newick_str <- create_newick_str(counts)
print(newick_str)

# Create the tree from the Newick string
tree <- read.tree(text = newick_str)

# Add counts to the data
counts$counts <- counts$n

# Plot the tree
p <- ggtree(tree, layout="dendrogram") +
  geom_tiplab(aes(label=label), hjust=-0.3)

# Extract the data from the ggtree object
tree_data <- p$data

# Merge counts with tree data by matching level3 to label in tree_data
tree_data <- tree_data %>%
  left_join(counts, by = c("label" = "level3"), relationship = "many-to-many")

# Ensure counts is not NA
tree_data$counts[is.na(tree_data$counts)] <- 1

# Define color list based on cell types
color_list <- c(
'EndothelialCell'="#FFA500",
"Fibroblast"="#458B00",
"Imm_DC"="#5F9EA0",
"Imm_LC"="#0000CD",
"Imm_Macrophage"="#EEEE00",
"Imm_NK"="#9ACD32",
"Imm_TCell"="#1874CD",
"Imm_Treg"="#00B2EE",
#"pDC"="#8A2BE2",
"KCBasal"="#f16b6b",
"KCCornified"="#9a1f61",
"KCDifferentiating"="#9583bd",
"KCHair"="#eb2627",
"KCDysplastic"="#d8c0dd",
"KCHair"="#FF0000",
'KCIFN'='#f06ba8',
"Melanocytes"="#8B4513",
"KCCancer"="black",#Darkershadeof"saddlebrown"
'Imm_CD8Tem'='#3c6191',
'Imm_Treg'='#8fb0ff',
'Imm_CD4Tcm'='#79b5af',
'Imm_CX3CR1+Mono/Mac'='#8a7e29',
'Imm_mRegDC'='#809693',
'Imm_CD14+Mono'='#6a3a4c',
'Imm_CD16-NK'='#03fc39',
'Imm_Plasma'='#9e7e7a',
'Imm_LCKI67+'='#ff2f80',
'Imm_CD16+NK'='#9be0aa',
'Imm_NKT'='#7983b5',#
'Imm_TREM2+Mac'='#f5f531',
'Imm_BCell'='#fed9b9',
'Imm_CD169+Mac'='#e0e09b',
'Imm_IFN+Mac'='#eef7a6',
'Imm_PD-1+CTLA4+CD8Tcm'='#a6f7ee',
'Imm_PD-1+CD8Tem'='#0aa6d8',
'Imm_DC1/DC2'='#00846f',
"Ambiguous"="grey","KCDysplastic"="#dac0eb",
'EndothelialCell'="#f8a41e",
"Fibroblast"="#458b41",
"DC"="#5f9d9e",
"LC"="#37479b",
"Macrophage"="#eae71d",
"NK"="#99ca3e",
"TCell"="#41baeb",
"KCBasal"="#f16b6b",
"KCCornified"="#9a1f61",
"KCDifferentiating"="#9583bd",
"KCHair"="#eb2627",
"KCDysplastic"="#d8c0dd",
"Melanocytes"="#8b471f",
'Plasma'='#f1ea9d',
'BCell'='#fed9b9',
'KCIFN'='#f06ba8',
"Treg"="#bbe5f3",
"Monocytes"="#9cc7a1",
"Ambiguous"="grey",'KCCancer'='Black'
)

# Assign colors only to the matching labels
tree_data <- tree_data %>%
  mutate(color = color_list[tree_data$label])

# Plot the tree horizontally with counts and colors
options(repr.plot.width = 15, repr.plot.height = 2)

x_min <- min(tree_data$x, na.rm = TRUE)
x_max <- max(tree_data$x, na.rm = TRUE)

#pdf("skin_atlas/reanalysis_figs/scc/scc_tree_3levels_imm_New.pdf", width=15, height = 2)
ggtree(tree, layout="dendrogram") +
  geom_tiplab(aes(label=label), hjust=10) +
  geom_point(data=tree_data, aes(x=x, y=y, size=counts, fill=label), shape=21, color="black") +
  theme_tree2() +
  scale_size_continuous(range = c(3, 10), limits = c(1, max(tree_data$counts, na.rm = TRUE))) + # Adjust size range and limits
  coord_flip() +  # Flip coordinates to make the tree horizontal
  xlim(x_min - 1, x_max + 1) + # Expand x-axis limits
  theme(axis.text.x = element_blank(),  # Remove x-axis text
        axis.ticks.x = element_blank(), # Remove x-axis ticks
        axis.title.x = element_blank(), # Remove x-axis title
        axis.line.x = element_blank()) + # Remove x-axis line
  scale_fill_manual(values=color_list)
#dev.off()

In [None]:
scc <- read.delim("/QRISdata/Q2051/SCC_Paper/resources/data/frozen_objects_Nov2024_PP/SCC_BCC/SCC_full_object_final_Nov26_metadata.txt", sep = "\t", stringsAsFactors = FALSE)
df<-data.frame(level1=scc$Level1, level2=scc$Level2,  level3=scc$Level3_Cancer)
# Count the occurrences for each combination
counts <- df %>% group_by(level1, level2, level3) %>% tally()
counts$level1 <- gsub(" ", "", counts$level1)
counts$level2 <- gsub(" ", "", counts$level2)
counts$level3 <- gsub(" ", "", counts$level3)
#counts
counts <- df %>% group_by(level1, level2, level3) %>% tally()
counts$level1 <- gsub(" ", "", counts$level1)
counts$level2 <- gsub(" ", "", counts$level2)
counts$level3 <- gsub(" ", "", counts$level3)

# Function to create Newick string including Level3
create_newick_str <- function(df) {
  level1_nodes <- unique(df$level1)
  newick_str <- ""
  
  for (node in level1_nodes) {
    level2_nodes <- df %>% filter(level1 == node) %>% pull(level2) %>% unique()
    level2_str <- ""
    
    for (level2_node in level2_nodes) {
      children <- df %>% filter(level1 == node, level2 == level2_node) %>% pull(level3)
      children_str <- paste(children, collapse = ",")
      level2_str <- paste0(level2_str, "(", children_str, ")", level2_node, ",")
    }
    
    level2_str <- substr(level2_str, 1, nchar(level2_str) - 1)  # Remove trailing comma
    newick_str <- paste0(newick_str, "(", level2_str, ")", node, ",")
  }
  
  newick_str <- substr(newick_str, 1, nchar(newick_str) - 1)  # Remove trailing comma
  newick_str <- paste0("(", newick_str, ");")
  return(newick_str)
}

# Generate the Newick string
newick_str <- create_newick_str(counts)
print(newick_str)

# Create the tree from the Newick string
tree <- read.tree(text = newick_str)

# Add counts to the data
counts$counts <- counts$n

# Plot the tree
p <- ggtree(tree, layout="dendrogram") +
  geom_tiplab(aes(label=label), hjust=-0.3)

# Extract the data from the ggtree object
tree_data <- p$data

# Merge counts with tree data by matching level3 to label in tree_data
tree_data <- tree_data %>%
  left_join(counts, by = c("label" = "level3"), relationship = "many-to-many")

# Ensure counts is not NA
tree_data$counts[is.na(tree_data$counts)] <- 1

# Define color list based on cell types
color_list <- c(
'EndothelialCell'="#FFA500",
"Fibroblast"="#458B00",
"Imm_DC"="#5F9EA0",
"Imm_LC"="#0000CD",
"Imm_Macrophage"="#EEEE00",
"Imm_NK"="#9ACD32",
"Imm_TCell"="#1874CD",
"Imm_Treg"="#00B2EE",
#"pDC"="#8A2BE2",
"KCBasal"="#f16b6b",
"KCCornified"="#9a1f61",
"KCDifferentiating"="#9583bd",
"KCHair"="#eb2627",
"KCDysplastic"="#d8c0dd",
"KCHair"="#FF0000",
'KCIFN'='#f06ba8',
"Melanocytes"="#8B4513",
"KCCancer"="black",#Darkershadeof"saddlebrown"
'Imm_CD8Tem'='#3c6191',
'Imm_Treg'='#8fb0ff',
'Imm_CD4Tcm'='#79b5af',
'Imm_CX3CR1+Mono/Mac'='#8a7e29',
'Imm_mRegDC'='#809693',
'Imm_CD14+Mono'='#6a3a4c',
'Imm_CD16-NK'='#03fc39',
'Imm_Plasma'='#9e7e7a',
'Imm_LCKI67+'='#ff2f80',
'Imm_CD16+NK'='#9be0aa',
'Imm_NKT'='#7983b5',#
'Imm_TREM2+Mac'='#f5f531',
'Imm_BCell'='#fed9b9',
'Imm_CD169+Mac'='#e0e09b',
'Imm_IFN+Mac'='#eef7a6',
'Imm_PD-1+CTLA4+CD8Tcm'='#a6f7ee',
'Imm_PD-1+CD8Tem'='#0aa6d8',
'Imm_DC1/DC2'='#00846f',
"Ambiguous"="grey","KCDysplastic"="#dac0eb",
'EndothelialCell'="#f8a41e",
"Fibroblast"="#458b41",
"DC"="#5f9d9e",
"LC"="#37479b",
"Macrophage"="#eae71d",
"NK"="#99ca3e",
"TCell"="#41baeb",
"KCBasal"="#f16b6b",
"KCCornified"="#9a1f61",
"KCDifferentiating"="#9583bd",
"KCHair"="#eb2627",
"KCDysplastic"="#d8c0dd",
"Melanocytes"="#8b471f",
'Plasma'='#f1ea9d',
'BCell'='#fed9b9',
'KCIFN'='#f06ba8',
"Treg"="#bbe5f3",
"Monocytes"="#9cc7a1",
"Ambiguous"="grey",'KCCancer'='Black'
)

# Assign colors only to the matching labels
tree_data <- tree_data %>%
  mutate(color = color_list[tree_data$label])

# Plot the tree horizontally with counts and colors
options(repr.plot.width = 15, repr.plot.height = 2)

x_min <- min(tree_data$x, na.rm = TRUE)
x_max <- max(tree_data$x, na.rm = TRUE)

#pdf("skin_atlas/reanalysis_figs/scc/scc_tree_3levels_imm_New.pdf", width=15, height = 2)
ggtree(tree, layout="dendrogram") +
  geom_tiplab(aes(label=label), hjust=10) +
  geom_point(data=tree_data, aes(x=x, y=y, size=counts, fill=label), shape=21, color="black") +
  theme_tree2() +
  scale_size_continuous(range = c(3, 10), limits = c(1, max(tree_data$counts, na.rm = TRUE))) + # Adjust size range and limits
  coord_flip() +  # Flip coordinates to make the tree horizontal
  xlim(x_min - 1, x_max + 1) + # Expand x-axis limits
  theme(axis.text.x = element_blank(),  # Remove x-axis text
        axis.ticks.x = element_blank(), # Remove x-axis ticks
        axis.title.x = element_blank(), # Remove x-axis title
        axis.line.x = element_blank()) + # Remove x-axis line
  scale_fill_manual(values=color_list)
#dev.off()

In [None]:
pdf("/QRISdata/Q2051/SCC_Paper/resources/data/reanalysis_figs/scc/updated/SCC_tree_new_Level3_cancer.pdf", width=15, height = 2)
ggtree(tree, layout="dendrogram") +
  geom_tiplab(aes(label=label), hjust=10) +
  geom_point(data=tree_data, aes(x=x, y=y, size=counts, fill=label), shape=21, color="black") +
  theme_tree2() +
  scale_size_continuous(range = c(3, 10), limits = c(1, max(tree_data$counts, na.rm = TRUE))) + # Adjust size range and limits
  coord_flip() +  # Flip coordinates to make the tree horizontal
  xlim(x_min - 1, x_max + 1) + # Expand x-axis limits
  theme(axis.text.x = element_blank(),  # Remove x-axis text
        axis.ticks.x = element_blank(), # Remove x-axis ticks
        axis.title.x = element_blank(), # Remove x-axis title
        axis.line.x = element_blank()) + # Remove x-axis line
  scale_fill_manual(values=color_list)
dev.off()

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
#pdf("/QRISdata/Q2051/SCC_Paper/resources/data/reanalysis_figs/scc/updated//scc_tree_3levels_imm_Labels_for_ref.pdf", width=8, height = 8)

ggtree(tree, layout="rectangular") +
  geom_tiplab(aes(label=label), hjust=1) + # Adjust horizontal justification for labels
  geom_point(data=tree_data, aes(x=x, y=y, size=counts, fill=label), shape=21, color="black") +
  theme_tree2() +
  scale_size_continuous(range = c(3, 10), limits = c(1, max(tree_data$counts, na.rm = TRUE))) + # Adjust size range and limits
  theme(axis.text.y = element_blank(),  # Remove y-axis text
        axis.ticks.y = element_blank(), # Remove y-axis ticks
        axis.title.y = element_blank(), # Remove y-axis title
        axis.line.y = element_blank()) + # Remove y-axis line
  scale_fill_manual(values=color_list) 
#dev.off()

In [None]:
subset_counts <- counts[apply(counts, 1, function(row) any(grepl("Cancer", row, ignore.case = TRUE))), ]

# View the subset
print(subset_counts)
E

In [None]:
counts$counts

In [None]:
counts