-
Notifications
You must be signed in to change notification settings - Fork 0
/
wholeproc_code.R
124 lines (91 loc) · 3.35 KB
/
wholeproc_code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
load("dati_bbstest.RData")
source("modes_functions.R")
library(ks); library(ClusterR);library(snow); library(snowfall); library(ggplot2)
rm(hgridbs)
#####################
# #phase 1: variable selection
ngiri <- 1000
nvar <- 3
p <- rep(1/NCOL(datib),NCOL(datib))
keep <- matrix(NA,nrow=ngiri,ncol=nvar)
poss <- matrix(NA,nrow=ngiri,ncol=1)
for (i in 1:ngiri) {
keep[i,] <- sample(1:ncol(datib),size=nvar,prob=p)
}
set.seed(12345)
sub <- matrix(sample(1:nrow(datib),nrow(datibs), rep=F), ncol=1)
sfInit(parallel=TRUE,cpus=3)
sfExport("datib","datibs","keep","poss","ngiri", "sub")
sfLibrary(ks)
poss <- sfSapply(1:ngiri, function(x) {
hpib <- Hns(datib[sub,keep[x,]])
poss[x] <- kde.test(datib[sub,keep[x,]],datibs[,keep[x,]],H1=hpib,H2=hpib)$pvalue
})
whichplow <- which(poss<0.01)
tot.examined <- table(keep)
tot.relevant <- table(keep[whichplow,])
prop.relevant <- tot.relevant/tot.examined
barplot(prop.relevant)
save.image("finale.Rdata")
#####################
########################################################################
# phase 2: background density estimation
var.sel<- c(7,23)
datib.red <- datib[,var.sel]
datib.red <- scale(datib.red)
datibs.red <- datibs[,var.sel]
datibs.red <- scale(datibs.red, attributes(datib.red)$`scaled:center`, attributes(datib.red)$`scaled:scale`)
plot(kde(datib.red))
plot(kde(datibs.red))
plot(kde(datitest.red))
clb <- kms(datib.red, tol.clust=0.05)
clb$nclust
clb$H
plot(kde(datib.red, H=clb$H))
save.image("finale.Rdata")
################################################
#phase 3: hbs selection
hgridbs <- c(0.010, 0.015, 0.035, 0.060, 0.070, 0.080, 0.090, 0.100, 0.120, 0.140, 0.160, 0.180,
0.200, 0.250, 0.300, 0.350, 0.400, 0.450, 0.500)
fm <- nc <- numeric(length(hgridbs))
d<- 2
temp<- list()
print(c(date()))
for (i in 1:(length(hgridbs)-1)) {
temp[[i]] <- kms(x = datibs.red, y=rbind(datib.red, datibs.red), H=diag(hgridbs[i],d))
fm[i]<- external_validation(temp[[i]]$lab[1:20000], clb$lab, method = "fowlkes_mallows_index")
nc[i] <- temp[[i]]$nclust
print(c(i,date()))
print(table(temp[[i]]$lab))
print(fm[i])
save.image("finale.Rdata")}
fm[length(hgridbs)] <- 1
nc[length(hgridbs)] <- 1
whichhopt <- which(fm==max(fm[nc>1]))
save.image("finale.Rdata")
# phase 5: testing significance of the signal mode
out.mode2 = modetest.fun3(datX=data.matrix(datibs.red), datY=data.matrix(datitest.red),
bw=hgridbs[whichhopt]^0.5, modes=matrix(temp[[whichhopt]]$mode[,2],ncol=2), alpha=0.0001, nboot = 1000)
modes = out.mode2$modes
CI = out.mode2$CIlead
CI
allCI = out.mode2$allCI
save.image("finale.Rdata")
#pdf("test.pdf")
par(mar=c(3,2,1,1))
eigenport.fun(allci=allCI)
#dev.off()
#################################################
# phase 6: final estimation and classification
#################################################
# plot of hat f_bs
cont=c(10,20,30,44,60,80)
plot(kde(datibs.red, H=diag(hgridbs[whichhopt],2)), approx.cont=F, cont=cont)
#classification of X_bs
table(temp[[whichhopt]]$lab[20001:30000], label)
external_validation(temp[[whichhopt]]$lab[20001:30000], label, method = "fowlkes_mallows_index")
#classification of test data
clbstest <- kms(x = datibs.red, y=datitest.red, H=diag(hgridbs[whichhopt],2))
table(clbstest$lab, labeltest)
external_validation(clbstest$lab, labeltest, method = "fowlkes_mallows_index")
save.image("finale.Rdata")