In [1]:
library(lme4)
library(MASS)
library(plyr) 
library(knitr)
library(multcomp)
library(FSA)
library(car)
library(multcomp)
library(PMCMRplus)
# library(PMCMR)
library(rcompanion)
library(glmmTMB)

Loading required package: Matrix

Loading required package: mvtnorm

Loading required package: survival

Loading required package: TH.data


Attaching package: 'TH.data'


The following object is masked from 'package:MASS':

    geyser


## FSA v0.9.4. See citation('FSA') if used in publication.
## Run fishR() for related website and fishR('IFAR') for related book.


Attaching package: 'FSA'


The following object is masked from 'package:plyr':

    mapvalues


Loading required package: carData

Registered S3 methods overwritten by 'car':
  method       from
  hist.boot    FSA 
  confint.boot FSA 


Attaching package: 'car'


The following object is masked from 'package:FSA':

    bootCase


"Package version inconsistency detected.
TMB was built with Matrix version 1.5.4
Current Matrix version is 1.5.3
Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package"
"Package version inconsiste

In [2]:
file_path = '8_24_data_analysis.csv'
data <- read.csv(file_path)

In [3]:
print.model.summary <- function(model){
    print('***********Coefficients of model*******************')
    print(summary(model)$coef)
    print('***************************************************')
}

In [4]:
print.anova_stats <- function(model, data, p_val){
    stats = anova(model)
    df_total <- length(data$id)
    df_vec <- stats$npar
    print(stats)
    f_vec <- stats$"F value"
    for (i in 1:length(df_vec)) {
      p <- pf(f_vec[i], df_vec[i], df_total-df_vec[i], lower.tail = FALSE, log.p = FALSE)
      if(p < p_val){
        (message(rownames(stats)[i],":  F(",df_vec[i],",",df_total,")=",f_vec[i],"     p = ",p))
          }

    }
}

In [5]:
#+++++++++++++++++++++++++
# Function to calculate the mean and the standard deviation
  # for each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of a column containing the variable
  #to be summariezed
# groupnames : vector of column names to be used as
  # grouping variables
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE)/sqrt(length(data)))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- rename(data_sum, c("mean" = varname))
 return(data_sum)
}

## Performance

### 1. Score or Total Reward

In [6]:
# rANOVA fails test assuptions (homoscedasticity)
m <- lmer(score ~ factor(data$condition) + data$round 
          + factor(data$gender) + data$age + data$experience
          + data$extraversion + data$agreeableness + data$conscientiousness + data$neuroticism + data$intellect 
          + (1|data$id), data)

# # Test for model assumptions
shapiro.test(residuals(m))  # p value should be greater than 0.05 to satisfy normality assumptions
leveneTest(data$score ~ factor(data$condition) * factor(data$round))  # p value should be greater than 0.05 to satisfy homoscedasticity assumptions (only use categorical variables)

# print(Anova(m))
print.anova_stats(m, data, 0.05)


	Shapiro-Wilk normality test

data:  residuals(m)
W = 0.98238, p-value = 0.05183


Unnamed: 0_level_0,Df,F value,Pr(>F)
Unnamed: 0_level_1,<int>,<dbl>,<dbl>
group,5,12.48147,4.535702e-10
,144,,


Analysis of Variance Table
                       npar Sum Sq Mean Sq  F value
factor(data$condition)    2 317269  158634 245.9917
data$round                1    173     173   0.2680
factor(data$gender)       2   3900    1950   3.0238
data$age                  1   1703    1703   2.6401
data$experience           1     97      97   0.1503
data$extraversion         1   1150    1150   1.7838
data$agreeableness        1    143     143   0.2221
data$conscientiousness    1    193     193   0.2992
data$neuroticism          1      4       4   0.0061
data$intellect            1     97      97   0.1499


factor(data$condition):  F(2,150)=245.991664953917     p = 8.76770580686296e-48



In [7]:
kruskal.test(data$score ~ factor(data$condition))


	Kruskal-Wallis rank sum test

data:  data$score by factor(data$condition)
Kruskal-Wallis chi-squared = 109.9, df = 2, p-value < 2.2e-16


In [8]:
dunnTest(data$score, factor(data$condition))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




           Comparison          Z      P.unadj        P.adj
1 Heuristic - Inverse   5.593690 2.222934e-08 4.445868e-08
2   Heuristic - POMCP  -4.881515 1.052741e-06 1.052741e-06
3     Inverse - POMCP -10.475205 1.122942e-25 3.368827e-25

In [9]:
Hadj <- unname(kruskal.test(data$score ~ factor(data$condition))$statistic)
n <- sum(table(data$score, factor(data$condition)))
e2 <- Hadj*(n+1)/(n^2-1)
print(Hadj)  # H-value (test statistic => chi-squared)
print(e2)  # Effect Size

[1] 109.899
[1] 0.7375771


### 2. Extra Steps

In [10]:
kruskal.test(data$extra_step ~ factor(data$condition))
kruskal.test(data$extra_step ~ factor(data$map))
kruskal.test(data$extra_step ~ data$condition_n_map)


	Kruskal-Wallis rank sum test

data:  data$extra_step by factor(data$condition)
Kruskal-Wallis chi-squared = 92.198, df = 2, p-value < 2.2e-16



	Kruskal-Wallis rank sum test

data:  data$extra_step by factor(data$map)
Kruskal-Wallis chi-squared = 22.467, df = 5, p-value = 0.0004266



	Kruskal-Wallis rank sum test

data:  data$extra_step by data$condition_n_map
Kruskal-Wallis chi-squared = 116.86, df = 17, p-value < 2.2e-16


In [11]:
dunnTest(data$extra_step, factor(data$condition))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




           Comparison         Z      P.unadj        P.adj
1 Heuristic - Inverse -4.306995 1.654872e-05 1.654872e-05
2   Heuristic - POMCP  5.278576 1.301918e-07 2.603835e-07
3     Inverse - POMCP  9.585571 9.194757e-22 2.758427e-21

### 3. Falling Number

In [12]:
kruskal.test(data$falling ~ factor(data$condition)) 
kruskal.test(data$falling ~ data$round)
kruskal.test(data$falling ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$falling by factor(data$condition)
Kruskal-Wallis chi-squared = 114.07, df = 2, p-value < 2.2e-16



	Kruskal-Wallis rank sum test

data:  data$falling by data$round
Kruskal-Wallis chi-squared = 0.011877, df = 1, p-value = 0.9132



	Kruskal-Wallis rank sum test

data:  data$falling by data$condition_n_round
Kruskal-Wallis chi-squared = 114.09, df = 5, p-value < 2.2e-16


In [13]:
dunnTest(data$falling, factor(data$condition))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




           Comparison         Z      P.unadj        P.adj
1 Heuristic - Inverse -5.613110 1.987226e-08 3.974451e-08
2   Heuristic - POMCP  5.062524 4.137411e-07 4.137411e-07
3     Inverse - POMCP 10.675634 1.323508e-26 3.970525e-26

### 4. Detection Number

In [14]:
kruskal.test(data$detection ~ factor(data$condition)) 
kruskal.test(data$detection ~ data$round)
kruskal.test(data$detection ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$detection by factor(data$condition)
Kruskal-Wallis chi-squared = 10.668, df = 2, p-value = 0.004824



	Kruskal-Wallis rank sum test

data:  data$detection by data$round
Kruskal-Wallis chi-squared = 0.011302, df = 1, p-value = 0.9153



	Kruskal-Wallis rank sum test

data:  data$detection by data$condition_n_round
Kruskal-Wallis chi-squared = 11.008, df = 5, p-value = 0.05121


In [15]:
dunnTest(data$detection, factor(data$condition))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




           Comparison         Z     P.unadj       P.adj
1 Heuristic - Inverse 0.3242317 0.745762656 0.745762656
2   Heuristic - POMCP 2.9768040 0.002912701 0.008738103
3     Inverse - POMCP 2.6525724 0.007988100 0.015976200

### 5. Interruption Number

In [16]:
kruskal.test(data$interrupt_fog ~ factor(data$condition))
kruskal.test(data$interrupt_no_fog ~ factor(data$condition))
kruskal.test(data$slippery_interrupt_no_fog ~ factor(data$condition))
kruskal.test(data$slippery_interrupt_fog ~ factor(data$condition))


	Kruskal-Wallis rank sum test

data:  data$interrupt_fog by factor(data$condition)
Kruskal-Wallis chi-squared = 7.7432, df = 2, p-value = 0.02082



	Kruskal-Wallis rank sum test

data:  data$interrupt_no_fog by factor(data$condition)
Kruskal-Wallis chi-squared = 24.781, df = 2, p-value = 4.158e-06



	Kruskal-Wallis rank sum test

data:  data$slippery_interrupt_no_fog by factor(data$condition)
Kruskal-Wallis chi-squared = 56.412, df = 2, p-value = 5.627e-13



	Kruskal-Wallis rank sum test

data:  data$slippery_interrupt_fog by factor(data$condition)
Kruskal-Wallis chi-squared = 101.8, df = 2, p-value < 2.2e-16


## Subjective Metrics

### 1. User Preference  -> TODO

In [17]:
rank_data <- read.csv("rank_data_user_study_2.csv")

In [18]:
kruskal.test(rank_data$rank ~ factor(rank_data$condition)) 


	Kruskal-Wallis rank sum test

data:  rank_data$rank by factor(rank_data$condition)
Kruskal-Wallis chi-squared = 45.405, df = 2, p-value = 1.382e-10


In [19]:
dunnTest(rank_data$rank, factor(rank_data$condition))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




           Comparison         Z      P.unadj        P.adj
1 heuristic - inverse  4.370858 1.237593e-05 2.475185e-05
2   heuristic - POMCP -2.255927 2.407522e-02 2.407522e-02
3     inverse - POMCP -6.626785 3.430775e-11 1.029232e-10

In [20]:
Hadj <- unname(kruskal.test(rank_data$rank ~ factor(rank_data$condition_n_explanation))$statistic)
n <- sum(table(rank_data$rank, factor(rank_data$condition_n_explanation)))
e2 <- Hadj*(n+1)/(n^2-1)
print(Hadj)  # H-value (test statistic => chi-squared)
print(e2)  # Effect Size

[1] 45.40525
[1] 0.5279681


### 2. Trust

In [21]:
# ANOVA Models
m <- lmer(trust ~ factor(condition) 
          + extra_step 
          + interrupt_fog 
          + interrupt_no_fog 
          + detection 
          + score
          + falling 
          + (1|data$id), data)

# # Test for model assumptions
shapiro.test(residuals(m))  # p value should be greater than 0.05 to satisfy normality assumptions
leveneTest(data$trust ~ factor(data$condition))  # p value should be greater than 0.05 to satisfy homoscedasticity assumptions (only use categorical variables)

print.anova_stats(m, data, 0.05)


	Shapiro-Wilk normality test

data:  residuals(m)
W = 0.99505, p-value = 0.8945


Unnamed: 0_level_0,Df,F value,Pr(>F)
Unnamed: 0_level_1,<int>,<dbl>,<dbl>
group,2,0.7831285,0.4588711
,147,,


Analysis of Variance Table
                  npar Sum Sq Mean Sq F value
factor(condition)    2 979.89  489.95 70.7864
extra_step           1 145.41  145.41 21.0090
interrupt_fog        1  73.24   73.24 10.5821
interrupt_no_fog     1  73.41   73.41 10.6064
detection            1   5.95    5.95  0.8603
score                1   0.64    0.64  0.0925
falling              1   1.22    1.22  0.1765


factor(condition):  F(2,150)=70.7863871926483     p = 2.68716994573963e-22

extra_step:  F(1,150)=21.0090145338664     p = 9.6032930930485e-06

interrupt_fog:  F(1,150)=10.5821491287885     p = 0.00141264525421937

interrupt_no_fog:  F(1,150)=10.6063811085248     p = 0.00139547756583688



In [22]:
m.null <- lmer(trust ~ (1|data$id), data = data)  
m.full <- lmer(trust ~ factor(gender) + age + experience
          + extraversion
               + agreeableness
               + conscientiousness
               + neuroticism
          + intellect 
          + interaction_attitude
               + social_influence_attitude
               + emotion_attitude 
          + factor(condition)
          + round
          + factor(condition) * round
          + extra_step 
          + interrupt_fog 
          + interrupt_no_fog 
          + slippery_interrupt_fog 
          + slippery_interrupt_no_fog
#           + longer_interrupt_fog 
#           + longer_interrupt_no_fog
          + detection 
          + score
          + falling
          + factor(map)
          + factor(condition) * factor(map)
          + (1|id)
        , data = data)  
shapiro.test(residuals(m.full))  # p value should be greater than 0.05 to satisfy normality assumptions
print(leveneTest(data$trust ~ factor(data$condition) * factor(data$map)* factor(data$gender)*factor(data$round)))
# step.model<- step(m.full)
# print(step.model)
# AIC(lmer(trust ~ age + social_influence_attitude + factor(condition) + longer_interrupt_fog + longer_interrupt_no_fog + score + (1 | id),data=data))
# AIC(lmer(trust ~ age + social_influence_attitude + factor(condition) + interrupt_fog + interrupt_no_fog + (1 | id),data=data))


	Shapiro-Wilk normality test

data:  residuals(m.full)
W = 0.99289, p-value = 0.6668


Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group 75  0.7203 0.9205
      74               


In [23]:
m.final = lmer(trust ~ age + social_influence_attitude + factor(condition) + interrupt_fog + interrupt_no_fog + (1 | id), data=data)
print.anova_stats(m.final, data, 0.05)
summary(glht(m.final, linfct=mcp("factor(condition)"="Tukey")))

Analysis of Variance Table
                          npar Sum Sq Mean Sq F value
age                          1  35.84   35.84  5.2875
social_influence_attitude    1  41.00   41.00  6.0482
factor(condition)            2 979.89  489.95 72.2816
interrupt_fog                1 159.55  159.55 23.5377
interrupt_no_fog             1 105.23  105.23 15.5245


age:  F(1,150)=5.28745410485787     p = 0.0228687873441123

social_influence_attitude:  F(1,150)=6.04815619076246     p = 0.0150651533930515

factor(condition):  F(2,150)=72.281630010508     p = 1.25632709042779e-22

interrupt_fog:  F(1,150)=23.5376872119746     p = 3.05544281482474e-06

interrupt_no_fog:  F(1,150)=15.5244810661451     p = 0.00012479675683519




	 Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: lmer(formula = trust ~ age + social_influence_attitude + factor(condition) + 
    interrupt_fog + interrupt_no_fog + (1 | id), data = data)

Linear Hypotheses:
                         Estimate Std. Error z value Pr(>|z|)    
Inverse - Heuristic == 0  -4.2335     0.5336  -7.934  < 1e-05 ***
POMCP - Heuristic == 0     2.2289     0.5319   4.191 7.65e-05 ***
POMCP - Inverse == 0       6.4625     0.5232  12.352  < 1e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)


### 3. Willingness

In [24]:
m <- lmer(willingness ~ factor(condition) 
          + extra_step 
          + interrupt_fog 
          + interrupt_no_fog 
          + detection 
          + score
          + falling 
          + (1|data$id), data)

# # Test for model assumptions
shapiro.test(residuals(m))  # p value should be greater than 0.05 to satisfy normality assumptions
leveneTest(data$willingness ~ factor(data$condition))  # p value should be greater than 0.05 to satisfy homoscedasticity assumptions (only use categorical variables)


print.anova_stats(m, data, 0.05)


	Shapiro-Wilk normality test

data:  residuals(m)
W = 0.99228, p-value = 0.5955


Unnamed: 0_level_0,Df,F value,Pr(>F)
Unnamed: 0_level_1,<int>,<dbl>,<dbl>
group,2,3.180011,0.04446108
,147,,


Analysis of Variance Table
                  npar Sum Sq Mean Sq F value
factor(condition)    2 574.84 287.420 42.0186
extra_step           1 109.88 109.883 16.0640
interrupt_fog        1 114.42 114.422 16.7276
interrupt_no_fog     1  19.75  19.748  2.8870
detection            1   5.06   5.063  0.7402
score                1  12.56  12.560  1.8361
falling              1   0.09   0.088  0.0129


factor(condition):  F(2,150)=42.0185585114203     p = 3.53276569330806e-15

extra_step:  F(1,150)=16.0640277021977     p = 9.64470445399461e-05

interrupt_fog:  F(1,150)=16.7276367940066     p = 7.03701060081353e-05



In [25]:
m.null <- lmer(willingness ~ (1|data$id), data = data)  
m.full <- lmer(willingness ~ factor(gender) + age + experience
          + extraversion
               + agreeableness
               + conscientiousness
               + neuroticism
          + intellect 
          + interaction_attitude
               + social_influence_attitude
               + emotion_attitude 
          + factor(condition)
          + round
          + factor(condition) * round
          + extra_step 
#           + interrupt_fog
#           + interrupt_no_fog
          + longer_interrupt_fog 
          + longer_interrupt_no_fog
          + slippery_interrupt_fog 
          + slippery_interrupt_no_fog
          + detection 
          + score
          + falling
          + map
          + factor(condition) * map
          + (1|id)
        , data = data)  

# step.model<- step(m.full)
# print(step.model)
# AIC(lmer(willingness ~ factor(gender) + age + conscientiousness + emotion_attitude + longer_interrupt_fog + longer_interrupt_no_fog + score + (1 | id),data=data))
# AIC(lmer(willingness ~ factor(gender) + age + conscientiousness + emotion_attitude + interrupt_fog + score + (1 | id),data=data))

In [26]:
m.final = lmer(willingness ~ factor(condition) + factor(gender) + age + conscientiousness + emotion_attitude + interrupt_fog + (1 | id), data=data)
# AIC(m.final)
AIC(lmer(willingness ~ factor(condition) + factor(gender) + age + conscientiousness + emotion_attitude + interrupt_fog + (1 | id), data=data))
print.anova_stats(m.final, data, 0.05)
summary(glht(m.final, linfct=mcp("factor(condition)"="Tukey")))

Analysis of Variance Table
                  npar Sum Sq Mean Sq F value
factor(condition)    2 574.84 287.420 40.5220
factor(gender)       2  14.90   7.450  1.0504
age                  1 131.17 131.167 18.4927
conscientiousness    1  44.98  44.977  6.3411
emotion_attitude     1  82.59  82.589 11.6438
interrupt_fog        1 181.90 181.903 25.6457


factor(condition):  F(2,150)=40.5220215888093     p = 9.23338265029413e-15

age:  F(1,150)=18.4926501200538     p = 3.06972118200302e-05

conscientiousness:  F(1,150)=6.34114912660418     p = 0.0128532670787324

emotion_attitude:  F(1,150)=11.6437990286291     p = 0.000829555554060164

interrupt_fog:  F(1,150)=25.6457081130333     p = 1.19520017963514e-06




	 Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: lmer(formula = willingness ~ factor(condition) + factor(gender) + 
    age + conscientiousness + emotion_attitude + interrupt_fog + 
    (1 | id), data = data)

Linear Hypotheses:
                         Estimate Std. Error z value Pr(>|z|)    
Inverse - Heuristic == 0  -3.0031     0.5356  -5.607  < 1e-04 ***
POMCP - Heuristic == 0     2.0343     0.5327   3.819 0.000423 ***
POMCP - Inverse == 0       5.0374     0.5351   9.414  < 1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)


### 4. Robot Likeability

In [27]:
m <- lmer(likeability ~ factor(condition_n_explanation) 
          + extra_step 
          + interrupt_fog 
          + interrupt_no_fog 
          + detection 
          + score
          + falling 
          + (1|data$id), data)

# # Test for model assumptions
shapiro.test(residuals(m))  # p value should be greater than 0.05 to satisfy normality assumptions
leveneTest(data$likeability ~ factor(data$condition))  # p value should be greater than 0.05 to satisfy homoscedasticity assumptions (only use categorical variables)

print.anova_stats(m, data, 0.05)


	Shapiro-Wilk normality test

data:  residuals(m)
W = 0.98949, p-value = 0.3244


Unnamed: 0_level_0,Df,F value,Pr(>F)
Unnamed: 0_level_1,<int>,<dbl>,<dbl>
group,2,2.176468,0.1170838
,147,,


Analysis of Variance Table
                                npar Sum Sq Mean Sq F value
factor(condition_n_explanation)    3 998.49  332.83 30.0600
extra_step                         1 171.97  171.97 15.5319
interrupt_fog                      1 140.79  140.79 12.7157
interrupt_no_fog                   1  92.10   92.10  8.3177
detection                          1   0.56    0.56  0.0507
score                              1  56.86   56.86  5.1357
falling                            1   2.88    2.88  0.2597


factor(condition_n_explanation):  F(3,150)=30.0599719450828     p = 3.25157950466497e-15

extra_step:  F(1,150)=15.5319281758672     p = 0.000124352583994118

interrupt_fog:  F(1,150)=12.7157332743235     p = 0.000487842700685365

interrupt_no_fog:  F(1,150)=8.31767270680381     p = 0.00450833024321424

score:  F(1,150)=5.13568252672391     p = 0.0248789746196446



In [28]:
m.null <- lmer(likeability ~ (1|data$id), data = data)  
m.full <- lmer(likeability ~ factor(gender) + age + experience
          + extraversion
               + agreeableness
               + conscientiousness
               + neuroticism
          + intellect 
          + interaction_attitude
               + social_influence_attitude
               + emotion_attitude 
          + factor(condition)
          + round
          + factor(condition) * round
          + extra_step 
#           + longer_interrupt_fog 
#           + longer_interrupt_no_fog
          + slippery_interrupt_fog 
          + slippery_interrupt_no_fog
          + interrupt_fog 
          + interrupt_no_fog
          + detection 
          + score
          + falling
          + map
          + factor(condition) * map
          + (1|id)
        , data = data)  

# step.model<- step(m.full)
# print(step.model)
# AIC(lmer(likeability ~ score + falling + (1 | id), data=data))
# AIC(lmer(likeability ~ factor(condition) + interrupt_fog + interrupt_no_fog + score + (1 | id), data=data))

In [29]:
m.final = lmer(likeability ~ factor(condition) + interrupt_fog + interrupt_no_fog  +  (1 | id), data=data)
# AIC(m.final)
print.anova_stats(m.final, data, 0.05)
summary(glht(m.final, linfct=mcp("factor(condition)"="Tukey")))

Analysis of Variance Table
                  npar Sum Sq Mean Sq F value
factor(condition)    2 963.21  481.61  42.429
interrupt_fog        1 295.27  295.27  26.013
interrupt_no_fog     1 143.30  143.30  12.624


factor(condition):  F(2,150)=42.4290348599577     p = 2.72026524975747e-15

interrupt_fog:  F(1,150)=26.0127886532734     p = 1.01642876912418e-06

interrupt_no_fog:  F(1,150)=12.6241464491835     p = 0.000510356088824544




	 Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: lmer(formula = likeability ~ factor(condition) + interrupt_fog + 
    interrupt_no_fog + (1 | id), data = data)

Linear Hypotheses:
                         Estimate Std. Error z value Pr(>|z|)    
Inverse - Heuristic == 0  -5.1672     0.6908  -7.480   <1e-04 ***
POMCP - Heuristic == 0     1.1651     0.6884   1.692    0.208    
POMCP - Inverse == 0       6.3323     0.6771   9.352   <1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
