# **CellphoneDB**

## R: data

In [None]:
pbmc <- readRDS("/disk213/xieqq/JINHUA138.sc/RDS/CellLineage.rds")

counts <- as.matrix(pbmc@assays$RNA@data)
write.table(counts,'cellphonedb_count.txt', sep='\t', quote=F)

# meta_data <- cbind(rownames(pbmc@meta.data), pbmc@meta.data[,'CellType', drop=F]) 
meta_data <- cbind(rownames(pbmc@meta.data), pbmc@meta.data[,'CellLineage', drop=F]) 
meta_data <- as.matrix(meta_data)
meta_data[is.na(meta_data)] = "Unkown"
write.table(meta_data,'cellphonedb_meta.txt', sep='\t', quote=F, row.names=F)

## cellphonedb

In [None]:
cd /disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/4.CellLineage
cellphonedb method statistical_analysis cellphonedb_meta.txt cellphonedb_count.txt --counts-data=gene_name

In [None]:
cd /disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large
cellphonedb plot dot_plot
cellphonedb plot heatmap_plot cellphonedb_meta.txt
tree out

## R: plot

In [None]:
library(Seurat)
library(dplyr)
library(psych)
library(qgraph)
library(igraph)
library(tidyverse)
library(pheatmap)

In [None]:
mynet <- read.delim("./out/count_network.txt", check.names = FALSE)
# table(mynet$count)
mynet %>% filter(count>0) -> mynet
head(mynet)
unique(mynet$SOURCE)
net <- graph_from_data_frame(mynet)
plot(net)

In [None]:
allcolour=c('#7570b3','#1b9e77','#EE9B00','#FF220C','#6F5E5C','#e7298a','#CA6702','#6699CC')
# allcolour=c('#004B23','#95D5B2','#184E77','#1A759F','#38B000','#52B788','#D8F3DC','#34A0A4')
# allcolour=c('#52B788','#95D5B2','#007200','#D8F3DC','#38B000','#1A759F','#184E77','#34A0A4')

In [None]:
karate_groups <- cluster_optimal(net) 
coords <- layout_in_circle(net, order=order(membership(karate_groups))) 

E(net)$width <- E(net)$count/10 
plot(net, edge.arrow.size=.1, 
     edge.curved=0,
     vertex.color=allcolour,
     vertex.frame.color="#555555",
     vertex.label.color="black",
     layout=coords,
     vertex.label.cex=.7) 

## python: plot

#### type

In [None]:
setwd("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB")

In [None]:
mypvals1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/pvalues.txt", check.names=FALSE)
mypvals_inf <- mypvals1 %>% dplyr::select('id_cp_interaction','interacting_pair','partner_a','partner_b','gene_a','gene_b','secreted','receptor_a','receptor_b','annotation_strategy','is_integrin')
mypvals_inf <- arrange(mypvals_inf,id_cp_interaction)
write.csv(mypvals_inf,"pldf_inf.csv")
head(mypvals_inf)

In [None]:
# other cell-Enterocytes/Colonocytes
mypvals1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/pvalues.txt", check.names=FALSE)
mypvals1 <- mypvals1 %>% dplyr::select("interacting_pair",ends_with("|Enterocytes"))
mymeans1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/means.txt", check.names=FALSE)
mymeans1 <- mymeans1 %>% dplyr::select("interacting_pair",ends_with("|Enterocytes"))

mypvals2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/pvalues.txt", check.names=FALSE)
mypvals2 <- mypvals2 %>% dplyr::select("interacting_pair",ends_with("|Colonocytes"))
mymeans2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/means.txt", check.names=FALSE)
mymeans2 <- mymeans2 %>% dplyr::select("interacting_pair",ends_with("|Colonocytes"))

nrow(mymeans1)
nrow(mymeans2)
mypvals <- full_join(mypvals1,mypvals2,by="interacting_pair")
mymeans <- full_join(mymeans1,mymeans2,by="interacting_pair")

In [None]:
mymeans %>% reshape2::melt() -> meansdf
colnames(meansdf)<- c("interacting_pair","CtoC","means")

mypvals %>% reshape2::melt()-> pvalsdf
colnames(pvalsdf)<- c("interacting_pair","CtoC","pvals")

pvalsdf$joinlab<- paste0(pvalsdf$interacting_pair,"_",pvalsdf$CtoC)
meansdf$joinlab<- paste0(meansdf$interacting_pair,"_",meansdf$CtoC)

In [None]:
pldf <- merge(pvalsdf,meansdf,by=c("joinlab","interacting_pair","CtoC"))
pldf <- pldf[which(pldf$pvals<0.05),]
write.csv(pldf,"test.csv")

In [None]:
pldf <- merge(pvalsdf,meansdf,by=c("joinlab","interacting_pair","CtoC"))
pldf <- pldf[which(pldf$pvals<0.05),]
# pldf <- pldf[-which(pldf$CtoC=="Colonocytes|Colonocytes"|pldf$CtoC=="Enterocytes|Enterocytes"),]
pldf <- separate(pldf, col=3, into=c("From","To"), sep="[|]", remove=F)
pldf$Info <- paste(pldf$interacting_pair,pldf$From,sep="_")


pldf <- subset(pldf,pldf$Info %in% pldf$Info[duplicated(pldf$Info)]) 
pldf <- arrange(pldf,Info,To)
write.csv(pldf,"pldf_same_toEC.csv")

length(pldf$Info[duplicated(pldf$Info)])
head(pldf)

In [None]:
# Enterocytes/Colonocytes-other cell
mypvals1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/pvalues.txt", check.names=FALSE)
mypvals1 <- mypvals1 %>% dplyr::select("interacting_pair",starts_with("Enterocytes|"))
mymeans1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/means.txt", check.names=FALSE)
mymeans1 <- mymeans1 %>% dplyr::select("interacting_pair",starts_with("Enterocytes|"))

mypvals2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/pvalues.txt", check.names=FALSE)
mypvals2 <- mypvals2 %>% dplyr::select("interacting_pair",starts_with("Colonocytes|"))
mymeans2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/means.txt", check.names=FALSE)
mymeans2 <- mymeans2 %>% dplyr::select("interacting_pair",starts_with("Colonocytes|"))

nrow(mymeans1)
nrow(mymeans2)
mypvals <- full_join(mypvals1,mypvals2,by="interacting_pair")
mymeans <- full_join(mymeans1,mymeans2,by="interacting_pair")

In [None]:
mymeans %>% reshape2::melt() -> meansdf
colnames(meansdf)<- c("interacting_pair","CtoC","means")

mypvals %>% reshape2::melt()-> pvalsdf
colnames(pvalsdf)<- c("interacting_pair","CtoC","pvals")

pvalsdf$joinlab<- paste0(pvalsdf$interacting_pair,"_",pvalsdf$CtoC)
meansdf$joinlab<- paste0(meansdf$interacting_pair,"_",meansdf$CtoC)

In [None]:
pldf <- merge(pvalsdf,meansdf,by=c("joinlab","interacting_pair","CtoC"))
pldf <- pldf[which(pldf$pvals<0.05),]
# pldf <- pldf[-which(pldf$CtoC=="Colonocytes|Colonocytes"|pldf$CtoC=="Enterocytes|Enterocytes"),]
pldf <- separate(pldf, col=3, into=c("From","To"), sep="[|]", remove=F)
pldf$Info <- paste(pldf$interacting_pair,pldf$To,sep="_")


pldf <- subset(pldf,pldf$Info %in% pldf$Info[duplicated(pldf$Info)]) 
pldf <- arrange(pldf,Info,To)
write.csv(pldf,"pldf_same_ECto.csv")

length(pldf$Info[duplicated(pldf$Info)])
head(pldf)

In [None]:
pldf1 <- read.csv("pldf_same_ECto.csv", row.names=1, check.names=FALSE)
pldf1 <- pldf1 %>% dplyr::select("interacting_pair","CtoC","From","To","pvals","means")
pldf1$Info <- "ECtoCell"
pldf2 <- read.csv("pldf_same_toEC.csv", row.names=1, check.names=FALSE)
pldf2 <- pldf2 %>% dplyr::select("interacting_pair","CtoC","From","To","pvals","means")
pldf2$Info <- "CelltoEC"

pldf <- rbind(pldf1,pldf2)
pldf <- pldf[!duplicated(pldf[,c("interacting_pair","CtoC","From","To","pvals","means")]), ]
write.csv(pldf,"pldf_same.csv")

In [None]:
pldf$interacting_pair <- gsub("_","/",pldf$interacting_pair)

In [None]:
unique(pldf$CtoC)
x_order=c('BEST4enterocytes|Colonocytes','EECs|Colonocytes','Goblet|Colonocytes','Progenitor|Colonocytes','Stem|Colonocytes','TA|Colonocytes','Tuft|Colonocytes',
          'BEST4enterocytes|Enterocytes','EECs|Enterocytes','Goblet|Enterocytes','Progenitor|Enterocytes','Stem|Enterocytes','TA|Enterocytes','Tuft|Enterocytes',
          'Colonocytes|BEST4enterocytes','Colonocytes|EECs','Colonocytes|Goblet','Colonocytes|Progenitor','Colonocytes|Stem','Colonocytes|TA','Colonocytes|Tuft',
          'Enterocytes|BEST4enterocytes','Enterocytes|EECs','Enterocytes|Goblet','Enterocytes|Progenitor','Enterocytes|Stem','Enterocytes|TA','Enterocytes|Tuft')

In [None]:
unique(pldf$interacting_pair)
y_order=sort(unique(pldf$interacting_pair),decreasing=TRUE)

In [None]:
p <- ggplot(pldf,aes(x=CtoC,y=interacting_pair,color=pvals,size=means))+ 
     geom_point(shape=16) +
     scale_x_discrete(limits=x_order) +
     scale_y_discrete(limits=y_order)+
     scale_color_gradient(limits=c(0,0.05),low="#08519C", high="white")+
     labs(x="", y="", title="", color="P-value", size="Means")+
     facet_wrap(Info~.) +
     theme_bw()+ 
     theme(axis.text.x=element_text(color="black",family="Times",size=12,angle=90,hjust=1),
           axis.text.y=element_text(color="black",family="Times",size=12),
           legend.text=element_text(color="black",family="Times",size=12),
           legend.title=element_text(color="black",family="Times",size=14),
           #panel.grid.major=element_blank(),
           panel.grid.minor=element_blank())
p
pdf(file="CellphoneDB_same.pdf", width=12, height=10)
p
dev.off()

#### top

In [None]:
setwd("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB")

In [None]:
# other-Enterocytes/Colonocytes
mypvals1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/pvalues.txt", check.names=FALSE)
mypvals1 <- mypvals1 %>% dplyr::select("interacting_pair",ends_with("|Enterocytes"))
mymeans1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/significant_means.txt", check.names=FALSE) 
mymeans1 <- mymeans1 %>% dplyr::select("interacting_pair",ends_with("|Enterocytes"))

mypvals2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/pvalues.txt", check.names=FALSE)
mypvals2 <- mypvals2 %>% dplyr::select("interacting_pair",ends_with("|Colonocytes"))
mymeans2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/significant_means.txt", check.names=FALSE)
mymeans2 <- mymeans2 %>% dplyr::select("interacting_pair",ends_with("|Colonocytes"))

nrow(mymeans1)
nrow(mymeans2)
mypvals <- full_join(mypvals1,mypvals2,by="interacting_pair")
mymeans <- full_join(mymeans1,mymeans2,by="interacting_pair")

In [None]:
mymeans %>% reshape2::melt() -> meansdf
colnames(meansdf)<- c("interacting_pair","CtoC","means")

mypvals %>% reshape2::melt()-> pvalsdf
colnames(pvalsdf)<- c("interacting_pair","CtoC","pvals")

pvalsdf$joinlab<- paste0(pvalsdf$interacting_pair,"_",pvalsdf$CtoC)
meansdf$joinlab<- paste0(meansdf$interacting_pair,"_",meansdf$CtoC)

In [None]:
pldf <- merge(pvalsdf,meansdf,by=c("joinlab","interacting_pair","CtoC"))
pldf <- pldf[!is.na(pldf$means) & pldf$pvals<0.05,] 
pldf <- separate(pldf, col=3, into=c("From","To"), sep="[|]", remove=F)
pldf$Info <- paste(pldf$interacting_pair,pldf$To,sep="_")

pldf <- arrange(pldf,CtoC,desc(means))
pldf <- pldf %>% group_by(CtoC) %>% top_n(5, means)
write.csv(pldf,"pldf_sig_top5_toEC.csv")

length(pldf$Info[duplicated(pldf$Info)])
head(pldf)

In [None]:
# Enterocytes/Colonocytes-other
mypvals1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/pvalues.txt", check.names=FALSE)
mypvals1 <- mypvals1 %>% dplyr::select("interacting_pair",starts_with("Enterocytes|"))
mymeans1 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/2.Small/out/means.txt", check.names=FALSE)
mymeans1 <- mymeans1 %>% dplyr::select("interacting_pair",starts_with("Enterocytes|"))

mypvals2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/pvalues.txt", check.names=FALSE)
mypvals2 <- mypvals2 %>% dplyr::select("interacting_pair",starts_with("Colonocytes|"))
mymeans2 <- read.delim("/disk213/xieqq/JINHUA138/Single_cell_analysis.12.CellphoneDB/3.Large/out/means.txt", check.names=FALSE)
mymeans2 <- mymeans2 %>% dplyr::select("interacting_pair",starts_with("Colonocytes|"))

nrow(mymeans1)
nrow(mymeans2)
mypvals <- full_join(mypvals1,mypvals2,by="interacting_pair")
mymeans <- full_join(mymeans1,mymeans2,by="interacting_pair")

In [None]:
mymeans %>% reshape2::melt() -> meansdf
colnames(meansdf)<- c("interacting_pair","CtoC","means")

mypvals %>% reshape2::melt()-> pvalsdf
colnames(pvalsdf)<- c("interacting_pair","CtoC","pvals")

pvalsdf$joinlab<- paste0(pvalsdf$interacting_pair,"_",pvalsdf$CtoC)
meansdf$joinlab<- paste0(meansdf$interacting_pair,"_",meansdf$CtoC)

In [None]:
pldf <- merge(pvalsdf,meansdf,by=c("joinlab","interacting_pair","CtoC"))
pldf <- pldf[!is.na(pldf$means) & pldf$pvals<0.05,] 
pldf <- separate(pldf, col=3, into=c("From","To"), sep="[|]", remove=F)
pldf$Info <- paste(pldf$interacting_pair,pldf$From,sep="_")

pldf <- arrange(pldf,CtoC,desc(means))
pldf <- pldf %>% group_by(CtoC) %>% top_n(5, means)
write.csv(pldf,"pldf_sig_top5_ECto.csv")

length(pldf$Info[duplicated(pldf$Info)])
head(pldf)

In [None]:
pldf1 <- read.csv("pldf_sig_top5_ECto.csv", row.names=1, check.names=FALSE)
pldf1 <- pldf1 %>% dplyr::select("interacting_pair","CtoC","From","To","pvals","means")
pldf1$Info <- "ECtoCell"
pldf2 <- read.csv("pldf_sig_top5_toEC.csv", row.names=1, check.names=FALSE)
pldf2 <- pldf2 %>% dplyr::select("interacting_pair","CtoC","From","To","pvals","means")
pldf2$Info <- "CelltoEC"

pldf <- rbind(pldf1,pldf2)
pldf <- pldf[!duplicated(pldf[,c("interacting_pair","CtoC","From","To","pvals","means")]), ]
write.csv(pldf,"pldf_sig_top5.csv")

In [None]:
pldf$interacting_pair <- gsub("_","/",pldf$interacting_pair)
pldf$Info[which(pldf$CtoC=='Colonocytes|Colonocytes'|pldf$CtoC=='Enterocytes|Enterocytes')] <- "CelltoEC"

In [None]:
unique(pldf$CtoC)
x_order=c('Colonocytes|Colonocytes','Enterocytes|Enterocytes',
          'BEST4enterocytes|Colonocytes','EECs|Colonocytes','Goblet|Colonocytes','Progenitor|Colonocytes','Stem|Colonocytes','TA|Colonocytes','Tuft|Colonocytes',
          'BEST4enterocytes|Enterocytes','EECs|Enterocytes','Goblet|Enterocytes','Progenitor|Enterocytes','Stem|Enterocytes','TA|Enterocytes','Tuft|Enterocytes',
          'Colonocytes|BEST4enterocytes','Colonocytes|EECs','Colonocytes|Goblet','Colonocytes|Progenitor','Colonocytes|Stem','Colonocytes|TA','Colonocytes|Tuft',
          'Enterocytes|BEST4enterocytes','Enterocytes|EECs','Enterocytes|Goblet','Enterocytes|Progenitor','Enterocytes|Stem','Enterocytes|TA','Enterocytes|Tuft')

In [None]:
unique(pldf$interacting_pair)
y_order=sort(unique(pldf$interacting_pair),decreasing=TRUE)

In [None]:
newdata=subset(pldf, select=c("interacting_pair","CtoC","pvals","means","Info"))
newdata=arrange(newdata,desc(means))
newdata=newdata[order(match(newdata$CtoC, x_order)), ]
newdata$order=c(1:nrow(newdata))
newdata$order[which(duplicated(newdata$interacting_pair))]=NA

In [None]:
plot_data=NULL
for (i in unique(newdata$interacting_pair)){
    data=as.data.frame(newdata[which(newdata$interacting_pair==i),])
    order=data$order[!is.na(data$order)]
    data$order=order
    plot_data <- rbind(plot_data,data)
}

In [None]:
ego <- ggplot(plot_data, aes(CtoC, reorder(interacting_pair,-order)))+
       geom_point(aes(size=means,color=pvals),shape=16)+
       scale_color_gradient(limits=c(0,0.05),low="#08519C", high="white")+
       labs(x="", y="", title="", size="Means", color="P-value")+
       scale_x_discrete(limits=x_order) +
       facet_wrap(Info~.) +
       theme_bw()+ 
       theme(axis.text.x=element_text(color="black",family="Times",size=12,angle=90,hjust=1),
             axis.text.y=element_text(color="black",family="Times",size=12),
             legend.text=element_text(color="black",family="Times",size=12),
             legend.title=element_text(color="black",family="Times",size=14),
             #panel.grid.major=element_blank(),
             panel.grid.minor=element_blank())
ego
pdf(file="CellphoneDB_sig.pdf", width=12, height=8)
ego
dev.off()

## gene expression

In [None]:
COL = adata[adata.obs['CellType'].isin(['Colonocytes'])]
ENT = adata[adata.obs['CellType'].isin(['Enterocytes'])]

In [None]:
marker_genes_dict={
'Colonocytes':['DSC2','COL17A1','COL17A1','LGALS9','EGFR','LGALS9','CD46','FAM3C', 'LAMP1','GHSR', 'LRP5'],
'Enterocytes':['DSG2','ITGA2','ITGB1',    'SORL1', 'AREG','PTPRK', 'JAG1','CLEC2D','FAM3C','LEAP2','FAM3B']
}
sc.pl.dotplot(adata,var_names=marker_genes_dict,groupby='CellType',cmap='Blues',vmin=0,vmax=1,save='Gene_sig_withinE.pdf')