# Analysis dataset by dataset: DensityPlot of data and posterior plot for Bayesian correlated t-test

In [21]:
using DataFrames
using Distributions
using Gadfly
using Fontconfig
using Cairo

include("Tests/ttest_correlated.jl")
include("Tests/Bttest_correlated.jl")
include("Plots/plot_posterior_ttest.jl")
include("Plots/plot_data.jl")
include("Data/get_accuracies.jl")

ClassID = readdlm("Data/ClassifierID.dat", ',')
ClassNames = readdlm("Data/ClassifierNames.dat", ',')
DatasetID = readdlm("Data/DatasetID.dat", ',');
DatasetNames = readdlm("Data/DatasetNames.dat", ',');
Percent_correct = readdlm("Data/Percent_correct.dat", ',');
rho=1/10

#Classifiers compare nbc versus aode on datasets 1
cl1=1 #nbc
cl2=2 #aode
dataset=17 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()


#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform 2-sided Frequentist correlated t-test
pvalue,ci=ttest_correlated(acci-accj,0,rho,0,0.05)   
println("p-value $pvalue and confidence interval $ci")  
println()


# Plot densityplot of data
p=plot_data(cl1,cl2,dataset,acci-accj,-0.02,0.02)
display(p) 



# perform Bayesian correlated t-test
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()


#Plot posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.015,0.015)
display(p1) 



Comparison of n

bc vs. aode
in dataset hepatitis

p-value 0.04757324719614943 and confidence interval [-0.004218660763367625,-2.31392366323753e-5]



Parameters of the posterior mean=-0.002120899768821926, dev.std=0.0010572234264579394 and dof=99.0





In [None]:
println(mur, sigmar)

In [22]:
#Classifiers comparison nbc versus aode on datasets 9
cl1=1 #nbc
cl2=2 #aode
dataset=9 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()

#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform 2-sided Frequentist correlated ttest
pvalue,ci=ttest_correlated(acci-accj,0,rho,0,0.05)   
println("p-value $pvalue and confidence interval $ci")  
println()

# DensityPlot of data
p=plot_data(cl1,cl2,dataset,acci-accj,-0.25,0.15)
display(p)

# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()

#Plot of posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.15,0.015)
display(p1) 

Comparison of n

bc vs. aode
in dataset hepatitis

p-value 0.0007237706429719712 and confidence interval [-0.11401869524225203,-0.03135650475774799]

Parameters of the posterior mean=-0.07268759207705232, dev.std=0.020829926223337843 and dof=99.0



In [23]:
#Classifiers comparison nbc versus aode on datasets 9
cl1=1 #nbc
cl2=2 #aode
dataset=20 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()

#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform 2-sided Frequentist correlated ttest
pvalue,ci=ttest_correlated(acci-accj,0,rho,0,0.05)   
println("p-value $pvalue and confidence interval $ci")  
println()

# DensityPlot of data
p=plot_data(cl1,cl2,dataset,acci-accj,-0.25,0.15)
display(p)

# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()

#Plot of posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.15,0.015)
display(p1) 

Comparison of n

bc vs. aode
in dataset hepatitis

p-value 3.884739189329373e-11 and confidence interval [-0.04107227830466183,-0.02376092169533815]

Parameters of the posterior mean=-0.032416596466590945, dev.std=0.00436226369443885 and dof=99.0



In [39]:
#Classifiers comparison nbc versus aode on datasets 9
cl1=1 #nbc
cl2=2 #aode
dataset=2 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()

#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform 2-sided Frequentist correlated ttest
pvalue,ci=ttest_correlated(acci-accj,0,rho,0,0.05)   
println("p-value $pvalue and confidence interval $ci")  
println()

# DensityPlot of data
p=plot_data(cl1,cl2,dataset,acci-accj,-0.09,0.05)
display(p)

# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()

#Plot of posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.03,0.03)
display(p1) 

Comparison of n

bc vs. aode
in dataset hepatitis

p-value 0.6216862298080181 and confidence interval [-0.013066403395896506,0.007848603395896511]

Parameters of the posterior mean=-0.0026088997156299564, dev.std=0.005270342447276726 and dof=99.0



In [30]:
#Classifiers comparison nbc versus aode on datasets 9
cl1=1 #nbc
cl2=2 #aode
dataset=3 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()

#load accuracies
acci,accj=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)

# perform 2-sided Frequentist correlated ttest
pvalue,ci=ttest_correlated(acci-accj,0,rho,0,0.05)   
println("p-value $pvalue and confidence interval $ci")  
println()

# DensityPlot of data
p=plot_data(cl1,cl2,dataset,acci-accj,-0.15,0.15)
display(p)

# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()

#Plot of posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.05,0.05)
display(p1) 

Comparison of n

bc vs. aode
in dataset hepatitis

p-value 0.5980328575663212 and confidence interval [-0.012840718785713117,0.022174918785713132]

Parameters of the posterior mean=0.004667099491286156, dev.std=0.008823540094666928 and dof=99.0





In [37]:
#Classifiers comparison nbc versus aode on datasets 9
cl1=1 #nbc
cl2=2 #aode
dataset=17 #dataset
println("Comparison of ", ClassNames[cl1,1], " vs. ", ClassNames[cl2,1])
println("in dataset ",DatasetNames[dataset,1])
println()

#load accuracies
accix,accjx=get_accuracies(cl1,cl2,dataset,ClassID,DatasetID,Percent_correct)
acci=accix[85:100]
accj=accjx[85:100]

# perform 2-sided Frequentist correlated ttest
pvalue,ci=ttest_correlated(acci-accj,0,rho,0,0.05)   
println("p-value $pvalue and confidence interval $ci")  
println()

# DensityPlot of data
p=plot_data(cl1,cl2,dataset,acci-accj,-0.02,0.02)
display(p)

# perform Bayesian correlated ttest
rope=0.01
hdi_prob=0.95
mur,sigmar,dofr,p_r,p_l,p_rope,hdi =Bttest_correlated(acci-accj,rho,0,-rope,rope,hdi_prob)
println("Parameters of the posterior mean=$(mur[1]), dev.std=$(sigmar[1]) and dof=$dofr")  
println()

#Plot of posterior
p1=plot_posterior_ttest(cl1,cl2,dataset,mur,sigmar,dofr,-0.02,0.02)
display(p1) 

Comparison of n

bc vs. aode
in dataset hepatitis

p-value 0.0771793058137019 and confidence interval [-0.005630687865129066,0.0003269378651290563]

Parameters of the posterior mean=-0.0026518745856446008, dev.std=0.0013975524697515825 and dof=15.0



