Skip to content

Commit

Permalink
updated rope to +- 0.01
Browse files Browse the repository at this point in the history
pulled SignedRank and SignRank
pulled analyze_uci
  • Loading branch information
gcorani authored and gcorani committed May 11, 2016
1 parent de4a699 commit c8622c3
Show file tree
Hide file tree
Showing 5 changed files with 283 additions and 19 deletions.
30 changes: 13 additions & 17 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
/GC

hierarchical/BayesianSignedRank.R

hierarchical/analyze_uci_with_rope.R

hierarchical/BayesianSignTest.R

*.csv

hierarchical/find-pars.R
*.Rdata

*.hpp

hierarchical/getMargLik.R
*.cpp

*.Rdata

hierarchical/predictive_halfCval.R
*.rda

hierarchical/predictiveComparison.R
hierarchical/12-1000-student1

hierarchical/plot_posterior.r
*.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_11

hierarchical/MultipleDsetsRopeSignedRank.R
hierarchical/delta00Std00.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_12

hierarchical/multiple_dsets_generative.R
hierarchical/delta00Std00.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_13

hierarchical/hierarchical-t-test.rda
hierarchical/delta00Std00.003_dsets_2_delta_acc_cauchy_samples_sizes_50simulationID_14

*.hpp
hierarchical/12-1000-student4

*.cpp
hierarchical/12-1000-student2

hierarchical/12-1000-student3
27 changes: 27 additions & 0 deletions R/BayesianSignTest.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
BayesianSignTest <- function(diffVector,rope_min,rope_max) {

library(MCMCpack)

samples <- 3000

#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))

#add the fake first observation in 0
diffVector <- c (0, diffVector)


#for the moment we implement the sign test. Signedrank will follows
probLeft <- mean (diffVector < rope_min)
probRope <- mean (diffVector > rope_min & diffVector < rope_max)
probRight <- mean (diffVector > rope_max)



results = list ("probLeft"=probLeft, "probRope"=probRope,
"probRight"=probRight)

return (results)

}

48 changes: 48 additions & 0 deletions R/BayesianSignedRank.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
BayesianSignedRank <- function(diffVector,rope_min,rope_max) {

library(MCMCpack)

samples <- 30000

#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))

#add the fake first observation in 0
diffVector <- c (0, diffVector)

sampledWeights <- rdirichlet(samples,weights)

winLeft <- vector(length = samples)
winRope <- vector(length = samples)
winRight <- vector(length = samples)

for (rep in 1:samples){
currentWeights <- sampledWeights[rep,]
for (i in 1:length(currentWeights)){
for (j in 1:length(currentWeights)){
product= currentWeights[i] * currentWeights[j]
if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
winRight[rep] <- winRight[rep] + product
}
else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
winRope[rep] <- winRope[rep] + product
}
else {
winLeft[rep] <- winLeft[rep] + product
}
}
}
maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
winRight[rep] <- (winRight[rep]==maxWins)*1/winners
winRope[rep] <- (winRope[rep]==maxWins)*1/winners
winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
}


results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
"winRight"=mean(winRight) )

return (results)

}
193 changes: 193 additions & 0 deletions hierarchical/analyze_uci_with_rope.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
analyze_uci_with_rope <- function() {

library(MASS)
library(matrixStats)
library(rstan)
rstan_options(auto_write = TRUE)

options(mc.cores = parallel::detectCores())
source ("hierarchical_test.R")

std_upper_bound <- 1000
standardization <- 1 #we run on standardized data

#utiliy function which gets the average score of the given classifier on each data set
getAvgScore <- function (currentClassifier,score) {
avg_score <- vector(length = how_many_dsets, mode = "double");
for ( ii in 1:how_many_dsets ){
avg_score[ii] <- mean ( score [classifierId==currentClassifier & dsetId==dsetsList[ii]] )
}
return (avg_score)
}

load("uci_data.RData")

#output parameters

#check arguments
if (std_upper_bound<=1){
stop("std_std_upper_bound should be larger than 1")
}

samplingType="student" #default choice
if (! ( (samplingType=="normal") | (samplingType=="student") )){
stop("wrong samplingType")
}

#for each couple of classifiers: p_value_sign_rank,prob_classANextDelta,prob_ropeNextDelta,prob_classBNextDelta

dsetId<- uci_classification$DatasetID
classifierId <- uci_classification$ClassifierID
#we run on accuracy
score <- uci_classification$Percent.correct

foldID <- uci_classification$Key.Fold
nFolds <- max (foldID)
rho=1/nFolds

filename <- paste("uciResults_StdUpper",as.character(std_upper_bound),"samplingType",samplingType, sep="-")
if (rho==0) {
filename <- paste(filename,"noCorrel",sep="-")
}

Rdata_filename <- paste (filename,"Rdata",sep=".")


runID <- uci_classification$Key.Run
#the fold ID discriminates between different folds and different runs
foldID <- runID*10+foldID-10

rope_min <- -0.01
rope_max <- 0.01

how_many_classifiers <- max(unique(classifierId))

classifier_names <- c('naive Bayes','aode','hnb','j48','j48_grafted') #hard-coded
how_many_comparisons <- how_many_classifiers*(how_many_classifiers-1)/2 #number of pairwise comparisons

#fields to be filled during the multiple comparisons
p_value_sign_rank <- vector(length = how_many_comparisons, mode = "double")
p_value_t_test <- vector(length = how_many_comparisons, mode = "double")
median_difference <- vector(length = how_many_comparisons, mode = "double")
prob_classANextDelta <- vector(length = how_many_comparisons, mode = "double")
prob_ropeNextDelta <- vector(length = how_many_comparisons, mode = "double")
prob_classBNextDelta <- vector(length = how_many_comparisons, mode = "double")
prob_positiveNextDelta<- vector(length = how_many_comparisons, mode = "double")
prob_negativeNextDelta<- vector(length = how_many_comparisons, mode = "double")
classifierI <- vector(length = how_many_comparisons, mode = "integer")
classifierJ <- vector(length = how_many_comparisons, mode = "integer")


dsetsList <- unique(dsetId);

how_many_dsets <- length(dsetsList)

hierarchicalResults <- list()
counter <- 1
for (i in 1: (how_many_classifiers-1) ) {
for (j in (i+1) : how_many_classifiers){
show(c(i,j))

classifierI[counter] <- i
classifierJ[counter] <- j

#run the signed rank
avgScoreI <- getAvgScore (i,score)
avgScoreJ <- getAvgScore (j,score)
median_difference[counter] <- median(avgScoreI-avgScoreJ)
wilcoxonStat <- wilcox.test (avgScoreI-avgScoreJ,alternative = "two.sided");
p_value_sign_rank[counter] <- wilcoxonStat$p.value
tTestStat <- t.test (avgScoreI-avgScoreJ,alternative = "two.sided");
p_value_t_test[counter] <- tTestStat$p.value

#prepare the data for the hierarchical test
results <- cbind (classifierId, dsetId, score, foldID)
resultsI <- results[classifierId==i,]
resultsJ <- results[classifierId==j,]

#sort by dataset and then by fold
#resultsI<-mat.sort(resultsI, c(2,4))
#resultsJ<-mat.sort(resultsJ, c(2,4))

#check results are consistently paired as for dataset and foldID
#data are already properly sorted, so this control pases
#otherwise we should sort the matrixes
stopifnot( mean (resultsI[,c(2,4)]==resultsJ[,c(2,4)]) == 1)
diffIJ <- cbind (resultsI[,2] , resultsI[,3]-resultsJ[,3])


#build matrix of results to be parsed by hierarchical test
x<-matrix(ncol = max(foldID), nrow = how_many_dsets )
for (dsetIdx in 1:how_many_dsets) {
tmp <- diffIJ [diffIJ[,1] == dsetIdx,]
x[dsetIdx,] <- t (tmp [,2])
}


#run the hierarchical test
#we do not provide a simulation ID as this is run locally
startTime<-Sys.time()
simulationID <- paste(as.character(i*10 + j),as.character(std_upper_bound),samplingType,sep = "-")

hierarchicalResults[[counter]] <- hierarchical.test (x,rho,rope_min,rope_max,simulationID,std_upper_bound,samplingType)

stopTime<-Sys.time()
show(startTime-stopTime)

#classA is the first classifier, classB the second classifier
#delta = acc(classA) - acc(classB)
#thus in the posterior the right tail of delta is the tail in favor
#of classA

prob_classANextDelta[counter] <- hierarchicalResults[[counter]]$nextDelta$right
prob_ropeNextDelta[counter] <- hierarchicalResults[[counter]]$nextDelta$rope
prob_classBNextDelta[counter] <- hierarchicalResults[[counter]]$nextDelta$left

prob_positiveNextDelta[counter] <- hierarchicalResults[[counter]]$nextDelta$positive
prob_negativeNextDelta[counter] <- hierarchicalResults[[counter]]$nextDelta$negative
#waic not useful, we do not track it
#waic[counter] <- hierarchicalResults[[counter]]$waic

counter <- counter + 1
}
}

classifierIString <-vector(length = how_many_comparisons, mode = "character")
classifierJString <-vector(length = how_many_comparisons, mode = "character")

classifier_names <- c('naive Bayes','aode','hnb','j48','j48_grafted');

results_matrix<-data.frame(
classifierI=classifierI,
classifierJ=classifierJ,
median_difference=median_difference,
p_value_sign_rank=p_value_sign_rank,
p_value_t_test=p_value_t_test,
prob_classANextDelta=prob_classANextDelta,
prob_ropeNextDelta=prob_ropeNextDelta,
prob_classBNextDelta=prob_classBNextDelta,
prob_positiveNextDelta=prob_positiveNextDelta,
prob_negativeNextDelta=prob_negativeNextDelta
)

csv_filename <- paste (filename,"csv",sep=".")

write.matrix(results_matrix, file = csv_filename, sep = ",")

results <- list()
results[[1]] <- list('classifierI'=classifierI,
'classifierJ'=classifierJ,
'p_value_sign_rank'=p_value_sign_rank,
'median_difference'=median_difference,
'prob_classANextDelta'=prob_classANextDelta,
'prob_ropeNextDelta'=prob_ropeNextDelta,
'prob_classBNextDelta'=prob_classBNextDelta
)


save(hierarchicalResults, file = Rdata_filename)

return (results)

}

4 changes: 2 additions & 2 deletions hierarchical/multiple_dsets_rope.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
filename <- paste(file_str0,'.csv',sep = "")
log_filename <- paste('log_',file_str0,'.csv',sep = "")

rope_min <- -0.005;
rope_max <- 0.005;
rope_min <- -0.01;
rope_max <- 0.01;
#set the seed for reproducible results
set.seed(simulation_ID)

Expand Down

0 comments on commit c8622c3

Please sign in to comment.