/
LineageFunctions.R
137 lines (112 loc) · 3.95 KB
/
LineageFunctions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
lineageCluster<-function(InputData,genes){
res<-list()
resj<-list()
for(i in 1:length(InputData)){
InData<-InputData[[i]][genes,]
InData<-scale(t(na.omit(InData)))
for(j in 1:100){
resj[[j]]<-NbClust(InData, distance = "euclidean", min.nc=2, max.nc=15,
method = "kmeans", index = "silhouette")$All.index
}
res[[i]]<-matrix(unlist(resj),ncol=14,byrow=T)
}
names(res)<-names(InputData)
return(res)
}
RepeatKmeansAMI<-function(InputData,genes,nrepeat=100,k,annot){
res<-list()
kres<-list()
ami<-list()
totwith<-list()
betweens<-list()
for(i in 1:length(InputData)){
if(genes=="var"){
rvs<-rowVars(InputData[[i]],na.rm=T)
names(rvs)<-rownames(InputData[[i]])
rvs<-rvs[order(rvs,decreasing=TRUE)]
genes<-names(rvs)[1:500]
InData<-InputData[[i]][genes,]
}else{
InData<-InputData[[i]][genes,]}
InData<-scale(t(na.omit(InData)))
rownames(InData)<-colnames(InputData[[i]])
colnames(InData)<-genes
incCL<-intersect(rownames(InData),names(annot))
InData<-InData[incCL,]
annot<-annot[incCL]
InData<-InData[!is.na(annot[rownames(InData)]),]
annot<-annot[!is.na(annot)]
for(j in 1:nrepeat){
temp<-kmeans(InData,k)
kres[[j]]<-temp$cluster
ami[[j]]<-AMI(temp$cluster,annot[names(temp$cluster)])
totwith[[j]]<-temp$tot.withinss
betweens[[j]]<-temp$betweenss
}
res[[i]]<-list(ami=ami,totwithss=totwith,betweenss=betweens)
}
names(res)<-names(InputData)
return(res)
}
plotAMI<-function(AMIlist,plotcolours,pname){
batchnames<-names(AMIlist)
preProc<-names(AMIlist[[1]])
AMIData<-NULL
for(i in 1:length(AMIlist)){
for(j in 1:3){
temp<-data.frame(ami=unlist(AMIlist[[i]][[j]]$ami),preProc=rep(preProc[j],100),batch=rep(batchnames[i],100))
AMIData<-rbind(AMIData,temp)
}
}
AMIData$preProc<-factor(AMIData$preProc,levels=c("CCR","CCRJ","CERES"))
AMIData$batch<-factor(AMIData$batch,levels=c("ComBat","ComBatQN","ComBatPC1","ComBatPC2"))
dodge <- position_dodge(width = 1)
AMIplot<-ggplot(AMIData,aes(x=preProc,y=ami,fill=batch))+scale_fill_manual(values=plotcolours)+theme(axis.text.x=element_text(angle=45))+ylab("AMI")+xlab("")+geom_boxplot(position = dodge,width=0.5)+theme_bw()+theme(legend.position = "none",legend.background = element_blank(),legend.title=element_blank())
pdf(paste0(dir.Results,"/LineageAMI",pname,".pdf"))
print(AMIplot)
dev.off()
}
classPerfLineage<-function(dataset,qualityTH=Inf,QC=NULL,weights=NULL,geneset=NULL,distmetric="Cor",lineagelabel,withRange=1){
if(!is.null(geneset)){
dataset<-dataset[intersect(rownames(dataset),geneset),]
}
dataset<-na.omit(dataset)
clnames<-colnames(dataset)
labnames<-intersect(clnames,names(lineagelabel))
dataset<-dataset[,labnames]
lineagelabel<-lineagelabel[labnames]
if(distmetric=="Cor"){
cdist<-as.matrix(1-cor(dataset))
}
if(distmetric=="Euc"){
cdist<-as.matrix(dist(t(dataset),method = 'euclidean'))
}
if(distmetric=="Jacc"){
cdist<-as.matrix(dist(t(dataset),method="binary"))
}
if(!is.null(weights)){
weights<-weights[rownames(dataset)]
weights[weights<0]<-0
weightmat<-matrix(weights,nrow=nrow(dataset),ncol=ncol(dataset))
cdist<-as.matrix(1-cor.wt(dataset,w=weights)$r)
}
ncls<-ncol(cdist)
cdist<-cdist[names(lineagelabel),names(lineagelabel)]
MATCH<-NULL
for(i in 1:ncls){
lineage<-lineagelabel[i]
neighbours<-colnames(cdist)[setdiff(order(cdist[i,]),i)]
closestLineage<-which(lineagelabel[neighbours]==lineage)[1]
mRow<-rep(0,length(neighbours))
mRow[closestLineage]<-1
MATCH<-rbind(MATCH,mRow)
}
MATCH<-MATCH+0
rownames(MATCH)<-rownames(cdist)
curv<-cumsum(colSums(MATCH))/ncol(cdist)
return(list(MATCHmat=MATCH,CURV=curv,nb=neighbours))
}
getLRTgenes<-function(normLRTlist,thresh=200){
glist<-lapply(normLRTlist,function(x) names(x[[3]])[x[[3]]>thresh])
return(unique(unlist(glist)))
}