This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

# Load some useful functions


In [None]:
library(pagoda2)
library(Seurat)
library(ggthemes)
library(ggpubr)
library(dplyr)
library(dendextend)
sn <- function(x) { names(x) <- x; return(x); }
# load 10x matrices from a named list of result folders
t.load.10x.data <- function(matrixPaths) {
  require(parallel)
  require(Matrix)
  mclapply(sn(names(matrixPaths)),function(nam) {
    matrixPath <- matrixPaths[nam];
    # read all count files (*_unique.counts) under a given path
    #cat("loading data from ",matrixPath, " ");
    x <- as(readMM(gzfile(paste(matrixPath,'matrix.mtx.gz',sep='/'))),'dgCMatrix'); # convert to the required sparse matrix representation
    cat(".")
    gs <- read.delim(gzfile(paste(matrixPath,'features.tsv.gz',sep='/')),header=F)
    rownames(x) <- gs[,2]
    cat(".")
    gs <- read.delim(gzfile(paste(matrixPath,'barcodes.tsv.gz',sep='/')),header=F)
    colnames(x) <- gs[,1]
    cat(".")
    colnames(x) <- paste(nam,colnames(x),sep='_');
    x
  },mc.cores=30)
}

doUMAP <- function(PCA,n_neighbors,min_dist,max_dim=2,seed.use=42){
  require(reticulate)
  if (!is.null(x = seed.use)) {
    set.seed(seed = seed.use)
    py_set_seed(seed = seed.use)
  }
  umap_import <- import(module = "umap", delay_load = TRUE)
  umap <- umap_import$UMAP(n_neighbors = as.integer(x = n_neighbors), 
                           n_components = as.integer(x = max_dim), metric = "correlation", 
                           min_dist = min_dist)
  
  umap_output <- umap$fit_transform(as.matrix(x = PCA))
  rownames(umap_output)=rownames(PCA)
  colnames(umap_output)=paste0("UMAP",1:max_dim)
  
  return(umap_output)
}

p2wrapper <- function(counts,n_neighbors=30,min_dist=0.3,k=100,npcs=200,do.log=F,selpc=TRUE,...) {
  rownames(counts) <- make.unique(rownames(counts))
  p2 <- Pagoda2$new(counts,log.scale=do.log,n.cores=parallel::detectCores()/2,...)
  p2$adjustVariance(plot=T,gam.k=10)
  optpoint=npcs
  if (selpc){
    p2$calculatePcaReduction(nPcs=npcs,n.odgenes=NULL,maxit=1000)
    x <- cbind(1:npcs, p2$misc$PCA$d)
    line <- x[c(1, nrow(x)),]
    proj <- princurve::project_to_curve(x, line)
    optpoint <- which.max(proj$dist_ind)-1
    dev.new(width=5, height=4)
    par(mfrow=c(1,1))
    plot(x,xlab="PC", ylab="Variance explained")
    abline(v=optpoint,lty=2,col=2)
    cat(paste0(optpoint," PCs retained\n"))
  }
  
  p2$calculatePcaReduction(use.odgenes = T, name='PCA', nPcs=optpoint, maxit=1000)
  p2$makeKnnGraph(k=k,type='PCA',center=T,distance='cosine');
  p2$getKnnClusters(method=conos::leiden.community,type='PCA',name = "leiden",resolution=.5)
  
  # Produce UMAP embedding
  cat("Computing UMAP... ")
  p2$embeddings$PCA$UMAP=doUMAP(PCA = p2$reductions$PCA[,1:optpoint],n_neighbors = n_neighbors,min_dist = min_dist)
  cat("done\n")
  invisible(p2)
}


p2webwrapper <- function(p2,clus=p2$clusters$PCA$leiden, app.title = "Pagoda2", extraWebMetadata = NULL, 
    n.cores = 4) 
{
    cat("Calculating hdea...\n")
    hdea <- p2$getHierarchicalDiffExpressionAspects(type = "PCA", 
        clusterName = "leiden", z.threshold = 3, n.cores = n.cores)
    metadata.forweb <- list()
    metadata.forweb$leiden <- p2.metadata.from.factor(clus, 
        displayname = "Leiden")
    metadata.forweb <- c(metadata.forweb, extraWebMetadata)
    genesets <- hierDiffToGenesets(hdea)
    appmetadata = list(apptitle = app.title)
    cat("Making KNN graph...\n")
    p2$makeGeneKnnGraph(n.cores = n.cores)
    make.p2.app(p2, additionalMetadata = metadata.forweb, geneSets = genesets, 
        dendrogramCellGroups = clus, show.clusters = F, 
        appmetadata = appmetadata)
}


# Load the files



In [None]:
path="/home/lfaure/backup/"
#cd <- t.load.10x.data(list(E11_CTRL='_Data/ML5_CTRL/',E11_MUT="_Data/ML4_MUT/"))
cd <- t.load.10x.data(list(ML8=paste0(path,"ML8/"),
                           ML9=paste0(path,"ML9/"),
                           ML10=paste0(path,"ML10/"),
                           ML11=paste0(path,"ML11/")))

#cd <- t.load.10x.data(list(ML9='_Data/ML9_E11-2/',ML10='_Data/ML10_E10/'))


# Filter cells



In [None]:
counts=mclapply(cd,function(x) gene.vs.molecule.cell.filter(x,max.cell.size = 1e5,min.cell.size = 5e3),
            mc.cores=length(cd))
counts=do.call(cbind,counts)

counts <- counts[rowSums(counts)>=10,]
mito.genes <- grep(pattern = "^mt-", x = rownames(x = counts), value = TRUE)
percent.mito <- Matrix::colSums(counts[mito.genes, ])/Matrix::colSums(counts)
counts = counts[,percent.mito<.1]


# Make pagoda2 object



In [None]:
batch=(sapply(strsplit(colnames(counts),"_"),"[[",1))
names(batch)=colnames(counts);batch=factor(batch)
p2=p2wrapper(counts,k=100,selpc = F,npcs = 50,batch=batch)


In [None]:
pl=ggplot(data.frame(p2$embeddings$PCA$UMAP,leiden=p2$clusters$PCA$leiden))+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1.5)+geom_point(aes(x=UMAP1,y=UMAP2,col=leiden),size=1)+
  scale_color_manual(values=tableau_color_pal("Tableau 20")(14)[as.numeric(levels(factor(p2$clusters$PCA$leiden)))])+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))

ggsave("_Figures/ML8-11_leiden.png",pl,width = 8,height = 8,dpi = 300)


pl=ggplot(data.frame(p2$embeddings$PCA$UMAP,leiden=p2$clusters$PCA$leiden))+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1.5)+geom_point(aes(x=UMAP1,y=UMAP2,col=batch),size=1)+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))

ggsave("_Figures/ML8-11_batch.png",pl,width = 8,height = 8,dpi = 300)


In [None]:
res=list()
for (clu in 1:14){
  cat(paste0("doing cluster ",clu,"\n"))
  cumnew=mclapply(1:100,function (x){
    topgenes=c();allgenes=c()
    cellnames=names(p2$clusters$PCA$leiden)[p2$clusters$PCA$leiden%in%clu]
    sel=sample(cellnames,100,replace = T)
    for (i in 1:100){
      topgenes=unique(c(topgenes,names(p2$counts[sel[i],order(p2$counts[sel[i],],decreasing = T)])[1:100]))
      allgenes[i]=length(topgenes)
    }
    return(allgenes)
  },mc.cores = 40)
  res[[clu]]=do.call(rbind,lapply(1:100,function(x) data.frame(rep=x,val=cumnew[[x]],cum=1:100)))
  res[[clu]]$cluster=clu
}

cumres=do.call(rbind,res)
cumres$leiden=factor(cumres$cluster)
cumplot=ggplot(cumres,aes(cum,val,fill=leiden,col=leiden))+ stat_summary(geom = "line", fun.y = mean)+
    stat_summary(geom = "ribbon", fun.data = mean_cl_boot, alpha = 0.3,fun.args=list(conf.int=.9))+
    scale_color_tableau("Tableau 20")+scale_fill_tableau("Tableau 20")+theme_pubr()+
    ylab("cumulative number of new genes\n per cell sampled")+xlab("nCells")+scale_x_continuous(expand = c(0,0),limits=c(0,100),breaks = c(0,100))+scale_y_continuous(limits=c(100,400),expand = c(0,0),breaks = c(100,400))+
  theme(legend.direction = "horizontal",legend.position = c(.5,.9),plot.margin = margin(10, 10, 10, 10, "pt"))

ggsave("_Figures/cumplot.png",cumplot,height = 5,width = 7,dpi = 600)


In [None]:
### props
batch=(sapply(strsplit(rownames(p2$counts),"_"),"[[",1))
names(batch)=rownames(p2$counts);batch=factor(batch)
devtime=as.character(batch)
devtime[devtime%in%c("ML8","ML9")]="E11.5"
devtime[devtime%in%c("ML10")]="E10.5"
devtime[devtime%in%c("ML11")]="E9.5"

names(devtime)=names(batch)

clucompa=data.frame(leiden=p2$clusters$PCA$leiden,devtime=devtime)
clucompa=group_by(clucompa,leiden)
clucompa
ggplot(clucompa,aes(x=leiden,fill=devtime))+geom_bar()
tcompa=table(clucompa)
for (i in 1:nrow(tcompa)){
  tcompa[i,]=tcompa[i,]/colSums(tcompa)/sum(tcompa[i,]/colSums(tcompa))
}
library(reshape2)
tcompa=melt(tcompa)
tcompa$leiden=as.character(tcompa$leiden)
tcompa$devtime=factor(tcompa$devtime,levels=c("E9.5","E10.5","E11.5"))

dendro=generateDendrogramOfGroups(p2,p2$clusters$PCA$leiden)
pl=ggplot(tcompa,aes(x=leiden,y=value,fill=devtime))+geom_bar(stat = "identity")+
  theme_pubr()+ylab("normalised proportion")+
  scale_fill_viridis_d()+scale_x_discrete(limits=as.character(dendro$hc$order))+
  theme(legend.position = "bottom")+
  scale_y_continuous(limits=c(0,1),expand = c(0,0),breaks = c(0,1))


dend=as.dendrogram(dendro$hc) %>% 
  set("branches_k_color",value = ggthemes::stata_pal()(5), k = 5)
  #dend=as.dendrogram(p2w$mainDendrogram$hc)
pa2=ggplot(as.ggdend(dend),horiz = F,labels = F)+theme_void()

combi=plot_grid(pa2,pl,align = "v",nrow = 2,rel_heights = c(1,3))

ggsave("_Figures/ML8-11_props.png",combi,width = 6,height = 6,dpi = 300)


clus=as.numeric(p2$clusters$PCA$leiden)
cutted5=cutree(dend, k = 5)
for (i in 1:5){
  clus[clus%in%which(cutted5==i)]=
    paste0(clus[clus%in%which(cutted5==i)],i,collapse = "_")
}


In [None]:
### diif expr
res=p2$getDifferentialGenes(groups=p2$clusters$PCA$leiden)

resf=lapply(res,function(x) x[x$M>1 & x$fe>.6 & x$highest,])

library(r2excel)
# create an Excel workbook. Both .xls and .xlsx file formats can be used.
wb <- createWorkbook(type="xlsx")
wb_f <- createWorkbook(type="xlsx")

for (l in 1:14){
  sheet <- createSheet(wb, sheetName = paste0("leiden ",l))
  sheet_f <- createSheet(wb_f, sheetName = paste0("leiden ",l))
  # add iris dat
  xlsx.addTable(wb_f,sheet_f, resf[[l]],startCol = 1,row.names = T,startRow = 1)
  xlsx.addTable(wb,sheet, res[[l]],startCol = 1,row.names = T,startRow = 1)
}
saveWorkbook(wb_f, "ML8-11_tables_filtered.xlsx")
saveWorkbook(wb, "ML8-11_tables.xlsx")


resf_s=lapply(resf,function(x) x[order(x$M,decreasing = T),])[dendro$hc$order]

pl=DotPlot(s,genes.plot = unique(do.call(c,lapply(resf_s,function(x) rownames(x)[1:4]))),group.by = "leiden",plot.legend = T,do.return = T)+theme_pubr()+ theme(axis.text.x = element_text(angle = 90,vjust=0.5, hjust = 1))+
  scale_y_discrete(limits=as.character(dendro$hc$order))+
  theme(axis.title.y=element_blank(),axis.title.x=element_blank(),
        plot.margin = unit(c(0,5.5,5.5,0), "pt"))

pa3=ggplot(as.ggdend(dend),horiz = T,labels = F)+theme_void()+
  theme(plot.margin = unit(c(-20,-20,-20,-20), "pt"))

combi=plot_grid(pa3,pl,align = "hv",nrow = 1,axis = "tb",rel_widths = c(1,5))

ggsave("_Figures/ML8-11_Dotplot2.png",combi,width = 12,height = 7,dpi=300)


res2=p2$getDifferentialGenes(groups = p2$clusters$PCA$leiden[p2$clusters$PCA$leiden%in%c(2,4)])

volc=data.frame(pval=2*pnorm(-abs(res2[[2]]$Z)),
                folchange=res2[[2]]$M,
                genes=rownames(res2[[2]]))
volc$genes=as.character(volc$genes)
volc[-c(1:10,(7302-10):7302),]$genes=""
pl=ggplot(volc,aes(x=folchange,y=pval,label=genes))+geom_point(color="grey")+
    geom_point(data=volc[c(1:10,(7302-10):7302),],color="red")+
    scale_y_log10()+theme_pubr()+geom_vline(xintercept = 0)+
    geom_text_repel(segment.size = .2,segment.color = "grey50",force = 10)


In [None]:
extract.aspects=function(a){
  print(a)
  topaspects=p2$misc$pathwayODInfo[(p2$misc$pathwayOD$cnam[[a]]),]
  topaspects=topaspects[order(topaspects$z,decreasing = T),]
  toplot=data.frame(p2$embeddings$PCA$UMAP,aspect=p2$reductions$pathwayPCA[a,])
  pl=ggplot(toplot[order(abs(toplot$aspect)),])+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1)+
    geom_point(aes(x=UMAP1,y=UMAP2,col=aspect),size=.75)+
    scale_color_gradient2(low = "darkgreen", mid="white", high="darkorange")+
    theme_void()+theme(aspect.ratio = 1, legend.position ="none")
  
  ggsave(paste0("aspect",a,".png"),pl,width = 7,height = 7,dpi = 300)
  return(data.frame(description=sapply(p2w$geneSets[sapply(strsplit(rownames(topaspects)," "),"[",2)],
                                       function(x) x$properties$shortdescription),topaspects))
}

aspects=mclapply(1:nrow(p2$reductions$pathwayPCA),extract.aspects,mc.cores = 20)


In [None]:
p2_ML8_9=p2wrapper(counts[,names(batch[batch%in%c("ML8","ML9")])],k=50,selpc = F,npcs = 50)
write.csv(p2_ML8_9$embeddings$PCA$UMAP,"_Output/UMAP_ML8-9.csv")
p2_ML8_9$clusters$PCA$leiden2=p2$clusters$PCA$leiden[names(p2_ML8_9$clusters$PCA$leiden)]
p2_ML10=p2wrapper(counts[,names(batch[batch%in%c("ML10")])],k=30,selpc = F,npcs = 50)
p2_ML10$clusters$PCA$leiden2=p2$clusters$PCA$leiden[names(p2_ML10$clusters$PCA$leiden)]
leiden2=p2$clusters$PCA$leiden2
leiden2=plyr::mapvalues(leiden2,1:14,c(1:3,2,5:14))



cellmeta$labels=p2.metadata.from.factor(leiden2,"labels",pal = cellmeta$leiden$palette[-4])



p2$clusters$PCA$labels=leiden2
write.csv(p2_ML10$embeddings$PCA$UMAP,"_Output/UMAP_ML10.csv")
p2_ML11=p2wrapper(counts[,names(batch[batch%in%c("ML11")])],k=30,selpc = F,npcs = 50)
p2_ML11$clusters$PCA$leiden2=p2$clusters$PCA$leiden[names(p2_ML11$clusters$PCA$leiden)]
write.csv(p2_ML11$embeddings$PCA$UMAP,"_Output/UMAP_ML11.csv")

save(p2,file="_Output/p2_ML8-11.RData")
save(p2_ML8_9,file="_Output/p2_ML8-9.RData")
save(p2_ML10,file="_Output/p2_ML10.RData")
save(p2_ML11,file="_Output/p2_ML11.RData")


pl1=ggplot(data.frame(p2_ML11$embeddings$PCA$UMAP,leiden=p2_ML11$clusters$PCA$leiden2))+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1.5)+geom_point(aes(x=UMAP1,y=UMAP2,col=leiden),size=1)+
  scale_color_manual(values=tableau_color_pal("Tableau 20")(14)[as.numeric(levels(factor(p2_ML11$clusters$PCA$leiden2)))])+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))
ggsave("_Figures/ML11.png",pl1,width = 8,height = 8,dpi = 300)
pl2=ggplot(data.frame(p2_ML10$embeddings$PCA$UMAP,leiden=p2_ML10$clusters$PCA$leiden2))+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1.5)+geom_point(aes(x=UMAP1,y=UMAP2,col=leiden),size=1)+
  scale_color_manual(values=tableau_color_pal("Tableau 20")(14)[as.numeric(levels(factor(p2_ML10$clusters$PCA$leiden2)))])+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))
ggsave("_Figures/ML10.png",pl2,width = 8,height = 8,dpi = 300)
pl3=ggplot(data.frame(p2_ML8_9$embeddings$PCA$UMAP,leiden=p2_ML8_9$clusters$PCA$leiden2))+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1.5)+geom_point(aes(x=UMAP1,y=UMAP2,col=leiden),size=1)+
  scale_color_manual(values=tableau_color_pal("Tableau 20")(14)[as.numeric(levels(factor(p2_ML8_9$clusters$PCA$leiden2)))])+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))
ggsave("_Figures/ML8-9.png",pl3,width = 8,height = 8,dpi = 300)


In [None]:
p2$n.cores=3
go.env <- p2.generate.mouse.go(p2)
p2$testPathwayOverdispersion(setenv = go.env,
                             recalculate.pca=F,
                             correlation.distance.threshold = 0.95)
myGeneNames <- colnames(p2$counts)
goSets <- p2.generate.mouse.go.web(myGeneNames)
deSets <- get.de.geneset(p2, groups = p2$clusters$PCA$leiden, prefix = 'de_')
geneSets <- c(goSets, deSets)

# Prepare metadata to show on web app
library(ggthemes)
additionalMetadata <- list()

additionalMetadata$leiden <- p2.metadata.from.factor(p2$clusters$PCA$leiden, displayname = 'Leiden', s = 0.7, v = 0.8,start = 0, end = 0.5,pal = tableau_color_pal(palette = "Tableau 20")(nlevels(p2$clusters$PCA$leiden)))

time=as.character(batch)
time[time%in%c("ML8","ML9")]="E11.5"
time[time=="ML10"]="E10.5"
time[time=="ML11"]="E9.5"
time=factor(time)
names(time)=names(p2$clusters$PCA$leiden)
additionalMetadata$time <-  p2.metadata.from.factor(time,displayname = 'Dev Time', s = 0.7, v = 0.8,start = 0, end = 0.5)

p2w <- make.p2.app(
  p2,
  dendrogramCellGroups = p2$clusters$PCA$leiden,
  additionalMetadata = additionalMetadata,
  geneSets = geneSets,
  show.clusters = FALSE # Hide the clusters that were used for the dendrogram from the metadata
)
p2w$serializeToStaticFast("_Output/p2w_ML8-11.bin")
saveRDS(p2w,"_Output/p2w_ML8-11.rds")

library(conos)

con <- Conos$new(list(ML6_7=p2_ML6_7,ML10=p2_ML10), n.cores=4)

con$buildGraph()

con$plotPanel(groups = p2_ML10$clusters$PCA$leiden2,embedding="UMAP")

new.label.info <- con$propagateLabels(labels = p2_ML10$clusters$PCA$leiden2, verbose=T )

con$plotPanel(colors=new.label.info$uncertainty, show.legend=T, legend.title="Uncertainty", legend.pos=c(1, 0),embedding="UMAP")

con$plotPanel(groups=new.label.info$labels, show.legend=F,embedding = "UMAP")

p2_ML6_7$clusters$PCA$leiden2=factor(new.label.info$labels[rownames(p2_ML6_7$counts)])

saveRDS(p2_ML6_7,"_Output/p2_ML6-7_epcam.rds")

pl4=ggplot(data.frame(p2_ML6_7$embeddings$PCA$UMAP,leiden=p2_ML6_7$clusters$PCA$leiden2))+
    geom_point(aes(x=UMAP1,y=UMAP2),size=1.5)+geom_point(aes(x=UMAP1,y=UMAP2,col=leiden),size=1)+
  scale_color_manual(values=tableau_color_pal("Tableau 20")(14)[as.numeric(levels(factor(p2_ML6_7$clusters$PCA$leiden2)))])+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))
ggsave("_Figures/ML6_7_epcam_leiden.png",pl4,width = 8,height = 8,dpi = 300)


pl5=ggplot(data.frame(p2_ML6_7$embeddings$PCA$UMAP,batch=batch))+
    geom_point(aes(x=UMAP1,y=UMAP2,col=batch),size=1,alpha=.7)+
  theme_void()+theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))
ggsave("_Figures/ML6_7_epcam_batches.png",pl5,width = 8,height = 8,dpi = 300)

cellcounts=sapply(levels(p2_ML6_7$clusters$PCA$leiden2),function(x)
  table(factor(sapply(strsplit(names(p2_ML6_7$clusters$PCA$leiden2[p2_ML6_7$clusters$PCA$leiden2%in%x]),"_"),"[[",1))))
cellcounts=do.call(cbind,cellcounts[-14])
prop=apply(cellcounts/c(24524,32484),2,function(x) x/sum(x))

clu=as.character(p2_ML6_7$clusters$PCA$leiden2)
for (i in colnames(prop)){clu[clu%in%(i)]=prop[1,i]}
clu[clu%in%"9"]=NA
clu=as.numeric(clu)

names(clu)=names(p2_ML6_7$clusters$PCA$leiden2)


compa=data.frame(p2_ML6_7$embeddings$PCA$UMAP,prop=clu[rownames(p2_ML6_7$embeddings$PCA$UMAP)])

pl5=ggplot(compa)+geom_point(aes(x=UMAP1,y=UMAP2,col=prop))+theme_bw()+
    scale_color_distiller(palette = "RdBu", limits = c(0.13,.87))+theme_void()+
  theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))

ggsave("_Figures/ML6_7_epcam_compo1.png",pl5,width = 8,height = 8,dpi = 300)

clu[clu>.8]=NA

compa=data.frame(p2_ML6_7$embeddings$PCA$UMAP,prop=clu[rownames(p2_ML6_7$embeddings$PCA$UMAP)])

pl6=ggplot(compa)+geom_point(aes(x=UMAP1,y=UMAP2,col=prop))+theme_bw()+
    scale_color_distiller(palette = "RdBu", limits = c(0.4,.6))+theme_void()+
  theme(aspect.ratio = 1,legend.direction = "horizontal",legend.position = c(.8,.9))

ggsave("_Figures/ML6_7_epcam_compo2.png",pl6,width = 8,height = 8,dpi = 300)


mart <- useMart(biomart = "ensembl", dataset = "mmusculus_gene_ensembl")

wb <- createWorkbook(type="xlsx")
k=1;pl=list();genestostring_down=list();genestostring_up=list()
for (cl in c(2,4,5,6,1,3)){
  selclu=p2_ML6_7$clusters$PCA$leiden2%in%cl
  clunames=names(p2_ML6_7$clusters$PCA$leiden2)[selclu]
  subbatch=sapply(strsplit(clunames,"_"),"[",1)
  names(subbatch)=clunames
  res=p2_ML6_7$getDifferentialGenes(groups=subbatch)
  res=res$ML6
  
  sheet <- createSheet(wb, sheetName = paste0("leiden ",cl))
  xlsx.addTable(wb,sheet, res,startCol = 1,row.names = T,startRow = 1)
  
  res$genes=rownames(res)
  res$chr=NA
  chr=getBM(attributes = c("external_gene_name","chromosome_name"), 
            filters = "external_gene_name", values = res$genes, 
            bmHeader = T, mart = mart)
  res[chr$`Gene name`,]$chr=chr$`Chromosome/scaffold name`
  idxtorem=grep("Rp[s|l]|mt|Hmg|Hba|Gm|Cox|Hbb",
                rownames(res),value = F)
  res$genes[idxtorem]=""
  res$genes[res$chr%in%"Y"]=""
  res$genes[abs(res$M)<.3]=""
  
  genestostring_down[[k]]=res[res$M<0,]$genes[!res[res$M<0,]$genes%in%""]
  genestostring_up[[k]]=res[res$M>0,]$genes[!res[res$M>0,]$genes%in%""]
  
  
  volc=data.frame(pval=2*pnorm(-abs(res$Z)),folchange=res$M,genes=(res$genes));
  pl[[k]]=ggplot(volc,aes(x=folchange,y=pval,label=genes))+geom_point(color="grey")+
    geom_point(data=subset(volc,abs(folchange)>.3),color="red")+
    scale_y_log10()+theme_pubr()+geom_vline(xintercept = 0)+
    geom_text_repel(segment.size = .2,segment.color = "grey50",force = 10)+ggtitle(cl)
  
  # res=res[abs(res$M)>.15,]
  # average=data.frame(ML6=colMeans(p2_ML6_7$counts[clunames[subbatch%in%"ML6"],]),
  #                    ML7=colMeans(p2_ML6_7$counts[clunames[subbatch%in%"ML7"],]))
  # #average=log1p(average)
  # average$genes=""
  # idxtorem=grep("Rp[s|l]|mt|Hmg|Hba|Gm|Cox|Hbb",
  #               rownames(res),value = F)
  # average[rownames(res)[-idxtorem],]$genes=rownames(res)[-idxtorem]
  # 
  # print(length(rownames(res)[-idxtorem]))
  # pl[[k]]=(ggplot(average,aes(x=ML6,y=ML7,label=genes))+
  #   geom_point(color = ifelse(average$genes == "", "grey50", "red"))+theme_pubr()+
  #   geom_text_repel()+ggtitle(cl))
  k=k+1
}

saveWorkbook(wb, "ML6-7_epcam_tables.xlsx")

ggsave("volcplots.png",do.call("grid.arrange", c(pl, ncol=3,nrow=2)),width=18,height = 12,dpi=300)

ggsave("final.png",do.call("grid.arrange", c(pl, ncol=4,nrow=2)),width=20,height = 10,dpi=300)

ggsave("final2.png",do.call("grid.arrange", c(pl[c(1,8)], ncol=2,nrow=1)),width=16,height = 8,dpi=300)


# Focus on ML10 (E10.5)



In [None]:
library(Seurat)

s=CreateSeuratObject(Matrix::t(p2$misc$rawCounts))

s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

s <- CellCycleScoring(s, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)


s[["umap"]] <- CreateDimReducObject(embeddings = p2$embeddings$PCA$UMAP, key = "UMAP_", assay = DefaultAssay(s))


UMAPPlot(s)
