In [None]:
library(Seurat)
library(sceasy)
library(data.table)
library(readxl)
library(tidyverse)
library(anndata)
library(SeuratDisk)
library(maestro) 
library(circlize)
library(PlantPhoneDB) 
library(glmGamPoi) 
library(ggplotify) 
library(viridis) 
library(patchwork) 
library(gridExtra)
library(pheatmap)
library(arules)
library(infotheo) 
library(parmigene) 
library(igraph) 
library(reticulate) 
library(ggplot2)
library(cowplot)
library(ggsci)

library(future)

future.seed = TRUE  

supported_strategies <- future:::supportedStrategies()
print(supported_strategies)

plan("multicore", workers = 10)     

options(future.globals.maxSize = 80 * 1024^3)  # 256GB

folder <- '6phase'

dataset <- paste0('/data/work/output/PlantPhone/Seurat/', folder, '/RNA_T_0.5/36clusters/Annotation/')
dir.create(dataset, recursive = TRUE, showWarnings = FALSE)

dataset0 <- paste0(dataset,'Annotation/')
dir.create(dataset0, recursive = TRUE, showWarnings = FALSE)

load(file = '/data/work/output/PlantPhone/Seurat/6phase/RNA_T_0.5/36clusters/Annotation/36clusters_merged_6phase.rdata"')

levels(objs)

order0 = c(
    '0',
    '1', 
    '2',
    '3', 
    '4', 
    '5', 
    '6', 
    '7',
    '8', 
    '9',
    '10',
    '11', 
    '12',
    '13', 
    '14',
    '15',
    '16',
    '17', 
    '18',
    '19', 
    '20',
    '21', 
    '22',
    '23', 
    '24', 
    '25',
    '26',
    '27', 
    '28', 
    '29', 
    '30',
    '31', 
    '32',
    '33',
    '34', 
    '35'
    )

objs$seurat_clusters <- factor(objs$seurat_clusters, levels = order0)

objs$seurat_clusters <- factor(objs$seurat_clusters, levels = levels(objs$seurat_clusters))

Idents(objs) <- objs@meta.data$seurat_clusters

new.cluster.ids <- c(
    "Rib/CZ",
    "Parenchyma",
    "Unknown",
    "Meristem2",
    "Meristem3",
    "Vessel",
    "Unknown",
    "Rib",
    "Rib",
    "LP/Parenchyma",
    "Meristem3",
    "CZ",
    "Meristem2",
    "FM-like",
    "Epidermis1",
    "Epidermis1",
    "BVB",
    "Rib/CZ",
    "CZ/Parenchyma",
    "BVB",
    "LP/Parenchyma",
    "Meristem1",
    "Epidermis2",
    "BVB",
    "PZ",
    "Epidermis1",
    "Meristem3",
    "Trichome",
    "LP/Parenchyma",
    "Trichome",
    "Parenchyma",
    "Trichome",
    "VP",
    "BVB",
    "SE-CC",
    "Parenchyma"
)

names(new.cluster.ids) <- levels(objs)
objs <- RenameIdents(objs, new.cluster.ids)

objs$assign.ident <- Idents(objs)

save(objs,file = paste0(dataset,'36clusters_reannotation_', folder, '.rdata'))

mycolor1 <- c(
  '#E41A1C', '#FF7F00', '#4DAF4A', '#984EA3', '#FFFF33',  
  '#377EB8', '#A65628', '#F781BF', '#66C2A5', '#FC8D62',  
  '#8DA0CB', '#E78AC3', '#A6D854', '#FFD92F', '#E5C494',    
  '#B3B3B3', '#1B9E77', '#D95F02', '#7570B3', '#E7298A',    
  '#66A61E', '#E6AB02', '#A6761D', '#666666', '#A6CEE3',    
  '#1F78B4', '#B2DF8A', '#33A022', '#8C564B', '#17BECF',    
  '#BCBD22', '#9467BD',                                    
  '#FF8C00',  
  '#FF4500',   
  '#FF1493',    
  '#DA70D6',  
  '#8B0000',   
  '#4682B4',    
  '#800080',    
  '#00CED1',  
  '#FFD700',    
  '#8B4513',    
  '#C71585',    
  '#FF6347',    
  '#00FF7F'     
)


pic1 <- DimPlot(objs, 
               group.by = 'seurat_clusters',
               raster = FALSE, 
               label = TRUE,        
               label.size = 6, 
               reduction = 'umap',
               cols = mycolor1) +
  ggtitle("") +
  theme(
    legend.position = "right",       
    legend.key.size = unit(0.5, "cm"),
    legend.text = element_text(size=8)
  ) + 
  guides(color = guide_legend(override.aes = list(size=3))) 

pic1

mycolor <- c('#1f77b4', '#ff7f0e', '#279e68', '#d62728', '#aa40fc', '#8c564b', '#e377c2', '#b5bd61', '#17becf', '#aec7e8', '#ffbb78')

mycolor_expanded <- colorRampPalette(mycolor)(70)

mycolor_expanded1 <- c('#1f77b4', '#ff7f0e', '#17becf', '#d62728', '#aa40fc', 
                     '#8c564b', '#e377c2', '#b5bd61', '#279e68', '#aec7e8',
                     '#ffbb78',  '#bd9e39','#5254a3',  '#6b6ecf', '#ad494a',
                         '#9edae5','#dbdb8d','#f7b6d2','#8c6d31','#c7c7c7')


mycolor <- mycolor_expanded1

pic2 <- DimPlot(objs, 
               group.by = 'assign.ident',
               raster = FALSE, 
               label = TRUE,          
               label.size = 4, 
               reduction = 'umap',
               cols = mycolor_expanded1) +
  ggtitle("") +
  theme(
    legend.position = "right",        
    legend.key.size = unit(0.5, "cm"),
    legend.text = element_text(size=8)
  ) + 
  guides(color = guide_legend(override.aes = list(size=3))) 

pic2

pdf(paste0(dataset,'Annotation/Cluster_Annotated_', folder, '.pdf'),width=10,height=8)
pic1
pic2
dev.off()

dataset1 <- paste0(dataset, '/allmarker/')
dir.create(dataset1, recursive = TRUE, showWarnings = FALSE)

allmarker_top5  <- read.csv(paste0(dataset1,'allmarker_RNA_T_0.5_Top3_6phase_WUS.csv')) 

order = c(
    '7','8',
    '17','0', 
    '11', 
    '18', 
    '9','20','28', 
    '24', 
    '21', 
    '3','12', 
    '4','10','26', 
    '1','35', 
    '30', 
    '14','15','25', 
    '22', 
    '27', 
    '29', 
    '31', 
    '16','19','23','33', 
    '5',
    '32',
    '34',
    '13',
    '2','6' 
    )


allmarker_top5_sorted <- allmarker_top5  %>%
  mutate(cluster = factor(cluster, levels = order, ordered = TRUE)) %>%
  arrange(cluster)


write.csv(allmarker_top5_sorted, file = paste0(dataset1,'allmarker_RNA_T_0.5_Top3_sorted', folder, '.csv'), row.names = FALSE)

allmarker_top5_sorted  <- read.csv(paste0(dataset1,'allmarker_RNA_T_0.5_Top3_sorted_6phase.csv')) 

objs$seurat_clusters <- factor(objs$seurat_clusters, levels = order)

objs$seurat_clusters <- factor(objs$seurat_clusters, levels = levels(objs$seurat_clusters))

pdf(paste0(dataset1,'Dotplot_', folder, '_allmarker_36cluster_sorted_WUS_top3.pdf'), height = 15, width = 15)

DefaultAssay(objs)="RNA"

p4 <- DotPlot(
    objs,
    features=unique(allmarker_top5_sorted$gene) ,
    cols = c("grey", "red"),
    group.by = "seurat_clusters") + 
    coord_flip() + 
  theme(panel.grid = element_blank(), 
        axis.text.x=element_text(angle = 45, hjust = 0.5,vjust=0.5))+ 
  labs(x=NULL,y=NULL) + 
  guides(size = guide_legend("Percent Expression") ) 

p4
dev.off()


unique_values <- unique(objs@meta.data[, c("seurat_clusters", "assign.ident")])

sorted_df <- unique_values[order(unique_values$assign.ident), ]

write.csv(sorted_df , file = paste0(dataset1,'Celltype_cluster_RNA_T_0.5_', folder, '.csv'), row.names = FALSE)

pics1 <- DimPlot(objs, 
               group.by = 'assign.ident',
               raster = FALSE, 
               label = FALSE,          # 显示簇编号标签
               split.by= 'sample',
               label.size = 4, 
               reduction = 'umap',
               cols = mycolor_expanded1,
               ncol =6) +
  ggtitle("") +
  theme(
    legend.position = "right",        # 强制图例显示在右侧
    legend.key.size = unit(0.5, "cm"),# 调整图例键大小
    legend.text = element_text(size=8)# 图例文字大小
  ) + 
  guides(color = guide_legend(override.aes = list(size=3))) # 增大图例点的大小

pdf(paste0(dataset0,'Cluster_Annotated_splited_', folder, '.pdf'),width=36,height=6)
pics1
dev.off()

options(repr.plot.width=10, repr.plot.height=4)
pics2 <- objs@meta.data %>%
    ggplot(aes(sample,fill=assign.ident,color=I('white')))+
        geom_bar(position = "fill")+
        coord_flip()+
        theme_bw()+
        ylab("")+
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.border=element_blank(),
              axis.title=element_text(size=7.82,face="bold"),
              axis.text=element_text(size=7.82,color='black'),
              legend.text=element_text(size=7.82),
              plot.title = element_text(size = 7.82, face = "bold"),
              axis.line=element_line(color='black'),
              legend.title = element_text(size = 7.82))+
        scale_y_continuous(position = "right",expand = c(0,0))+
        scale_fill_manual(values=mycolor)
pics2


dataset <- dataset0

pdf(paste0(dataset,'Celltype_Cellratio_', folder, '.pdf'),width=10,height=6)
pics2
dev.off()

options(repr.plot.width=10, repr.plot.height=4)
pics3 <- objs@meta.data %>%
    ggplot(aes(sample,fill=seurat_clusters,color=I('white')))+
        geom_bar(position = "fill")+
        coord_flip()+
        theme_bw()+
        ylab("")+
        theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.border=element_blank(),
              axis.title=element_text(size=7.82,face="bold"),
              axis.text=element_text(size=7.82,color='black'),
              legend.text=element_text(size=7.82),
              plot.title = element_text(size = 7.82, face = "bold"),
              axis.line=element_line(color='black'),
              legend.title = element_text(size = 7.82))+
        scale_y_continuous(position = "right",expand = c(0,0))+
        scale_fill_manual(values=mycolor1)
pdf(paste0(dataset,'Cluster_cellratio_', folder, '.pdf'),width=15,height=5)
pics3
dev.off()

tbl <- table(objs@meta.data$assign.ident,objs@meta.data$sample)

res = chisq.test(tbl)

expected = res$expected
roe = tbl/expected

tbl

write.csv(tbl, file = paste0(dataset,'Celltype_cellnum_', folder, '.csv'), row.names = TRUE)

# Figure 5C

options(repr.plot.width=6, repr.plot.height=5)

col_sample <- c(
    '#00FF00',
    '#00C800',
    '#FFDA33',
    '#FFA500',
    '#E64A19',
    '#8E24AA'
    )

pic3_1 <- roe %>%
    as.data.frame() %>%
    select(Cell=Var1, sample= Var2, roe=Freq) %>%
    ggplot(aes(Cell,roe,fill=sample))+
        geom_bar(stat='identity',position='dodge')+
    ylab('Ro/E')+xlab('')+
    theme_classic()+
                theme(axis.title=element_text(size=7.82,face="bold"),
                     axis.text=element_text(size=7.82,color='black'),
                     axis.text.x=element_text(angle=60,hjust=1),
                     legend.text=element_text(size=7.82),
                     plot.title = element_text(size = 7.82, face = "bold",color='black'),
                     legend.position="top")+
            geom_hline(aes(yintercept=1), colour="#990000",linetype="dashed")+
            scale_fill_manual(values=col_sample)
pic3_1

# c("lightblue", "purple")

pdf(paste0(dataset,'Celltype_roe_', folder, '.pdf'),width=8,height=5)
pic3_1
dev.off()

tbl_1 <- table(objs@meta.data$seurat_clusters,objs@meta.data$sample)

res1 = chisq.test(tbl_1)

expected1 = res1$expected
roe1 = tbl_1/expected1

write.csv(tbl_1, file = paste0(dataset,'Cluster_cellnum_', folder, '.csv'), row.names = TRUE)

options(repr.plot.width=6, repr.plot.height=5)
pic3_2 <- roe1 %>%
    as.data.frame() %>%
    select(Cell=Var1, sample= Var2, roe=Freq) %>%
    ggplot(aes(Cell,roe,fill=sample))+
        geom_bar(stat='identity',position='dodge')+
    ylab('Ro/E')+xlab('')+
    theme_classic()+
                theme(axis.title=element_text(size=7.82,face="bold"),
                     axis.text=element_text(size=7.82,color='black'),
                     axis.text.x=element_text(angle=60,hjust=1),
                     legend.text=element_text(size=7.82),
                     plot.title = element_text(size = 7.82, face = "bold",color='black'),
                     legend.position="top")+
            geom_hline(aes(yintercept=1), colour="#990000",linetype="dashed")+
            scale_fill_manual(values=col_sample)
pic3_2

pdf(paste0(dataset,'Cluster_roe_', folder, '.pdf'),width=10,height=5)
pic3_2
dev.off()


dataset3 = paste0(dataset, 'allmarker_splitedseob/')
dir.create(dataset3, recursive = TRUE, showWarnings = FALSE)

celltypes <- unique(objs@meta.data$sample)

for (i in celltypes){

    seob_i <- subset(
      objs,
      subset = sample == i
    )

    markers_i <- FindAllMarkers(seob_i, only.pos =T, min.pct = 0.5, logfc.threshold = 0.5) 

    write.csv(markers_i, file = paste0(dataset3,'allmarker_',i, '_splitedseob_', folder, '.csv'), row.names = FALSE)
}

dataset3 = paste0(dataset, 'allmarker_splitedseob/')
dir.create(dataset3, recursive = TRUE, showWarnings = FALSE)

celltypes <- unique(objs@meta.data$sample)

for (i in celltypes){

    seob_i <- subset(
      objs,
      subset = sample == i
    )

    markers_i <- FindAllMarkers(seob_i, only.pos = F, min.pct = 0, logfc.threshold = 0.1) 

    write.csv(markers_i, file = paste0(dataset3,'allmarker_',i, '_splitedseob_', folder, '.csv'), row.names = FALSE)
}

Idents(objs) <- objs@meta.data$assign.ident
celltypes <- unique(objs@meta.data$assign.ident)

dataset1 = paste0(dataset, 'Conservedmarker/Celltypes/')
dir.create(dataset1, recursive = TRUE, showWarnings = FALSE)

for (i in celltypes){
    m <- FindConservedMarkers(
        objs,
        ident.1 = i, 
        grouping.var = "sample" 
    )
    m1 <- m %>% 
    tibble::rownames_to_column(var = "GeneID_ITAG4.1")
    write.csv(m1, file = paste0(dataset1,'Conservedmarker_',gsub("/", "-", i), '_', folder, '.csv'), row.names = FALSE)
}

dataset2 = paste0(dataset, 'Conservedmarker/Clusters/')
dir.create(dataset2, recursive = TRUE, showWarnings = FALSE)

Idents(objs) <- objs@meta.data$seurat_clusters

celltypes <- unique(objs@meta.data$seurat_clusters)

for (i in celltypes){
    m <- FindConservedMarkers(
        objs,
        ident.1 = i, 
        grouping.var = "sample"
    )
    m1 <- m %>% 
    tibble::rownames_to_column(var = "GeneID_ITAG4.1")
    write.csv(m1, file = paste0(dataset2,'Conservedmarker_',i, '_', folder, '.csv'), row.names = FALSE)
}