In [23]:
library(lme4)
library(MASS)
library(plyr) 
library(knitr)
library(multcomp)
library(FSA)
library(car)
# library(PMCMRplus)
# library(PMCMR)
# library(rcompanion)
# library(glmmTMB)

In [24]:
file_path = 'combined_data_5_20_data_analysis.csv'
data <- read.csv(file_path)

In [25]:
print.model.summary <- function(model){
    print('***********Coefficients of model*******************')
    print(summary(model)$coef)
    print('***************************************************')
}

In [26]:
print.anova_stats <- function(model, data, p_val){
    stats = anova(model)
    df_total <- length(data$Id)
    df_vec <- stats$npar
    print(stats)
    f_vec <- stats$"F value"
    for (i in 1:length(df_vec)) {
      p <- pf(f_vec[i], df_vec[i], df_total-df_vec[i], lower.tail = FALSE, log.p = FALSE)
      if(p < p_val){
        (message(rownames(stats)[i],":  F(",df_vec[i],",",df_total,")=",f_vec[i],"     p = ",p))
          }

    }
}

In [27]:
#+++++++++++++++++++++++++
# Function to calculate the mean and the standard deviation
  # for each group
#+++++++++++++++++++++++++
# data : a data frame
# varname : the name of a column containing the variable
  #to be summariezed
# groupnames : vector of column names to be used as
  # grouping variables
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE)/sqrt(length(data)))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- rename(data_sum, c("mean" = varname))
 return(data_sum)
}

## Performance

### 1. Score or Total Reward

In [28]:
# rANOVA fails test assuptions
m <- lmer(score ~ factor(data$condition_n_explanation) + data$round 
          + factor(data$gender) + data$age + data$experience
          + data$extraversion + data$agreeableness + data$conscientiousness + data$neuroticism + data$intellect 
          + (1|data$Id), data)

# # Test for model assumptions
shapiro.test(residuals(m))  # p value should be greater than 0.05 to satisfy normality assumptions
# leveneTest(data$trust ~ factor(data$condition_n_explanation) * factor(data$round))  # p value should be greater than 0.05 to satisfy homoscedasticity assumptions (only use categorical variables)

# print(Anova(m))
print.anova_stats(m, data, 0.05)


	Shapiro-Wilk normality test

data:  residuals(m)
W = 0.97211, p-value = 2.016e-05


Analysis of Variance Table
                                     npar Sum Sq Mean Sq F value
factor(data$condition_n_explanation)    4 101035 25258.8 19.7664
data$round                              1    362   362.0  0.2833
factor(data$gender)                     1   2036  2036.4  1.5936
data$age                                1    550   550.4  0.4307
data$experience                         1     13    12.7  0.0099
data$extraversion                       1   6546  6545.7  5.1223
data$agreeableness                      1    293   292.6  0.2290
data$conscientiousness                  1   3306  3305.6  2.5868
data$neuroticism                        1   3326  3325.5  2.6024
data$intellect                          1   1674  1674.2  1.3102


factor(data$condition_n_explanation):  F(4,290)=19.7663504411515     p = 2.21865136535073e-14

data$extraversion:  F(1,290)=5.12233605425222     p = 0.0243615583089973



In [29]:
kruskal.test(data$score ~ factor(data$condition_n_explanation)) 
kruskal.test(data$score ~ data$round)
kruskal.test(data$score ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$score by factor(data$condition_n_explanation)
Kruskal-Wallis chi-squared = 58.159, df = 4, p-value = 7.066e-12



	Kruskal-Wallis rank sum test

data:  data$score by data$round
Kruskal-Wallis chi-squared = 0.059397, df = 1, p-value = 0.8075



	Kruskal-Wallis rank sum test

data:  data$score by data$condition_n_round
Kruskal-Wallis chi-squared = 61.52, df = 9, p-value = 6.824e-10


In [30]:
dunnTest(data$score, factor(data$condition_n_explanation))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




                                Comparison          Z      P.unadj        P.adj
1              control - control_w_explain -0.6261877 5.311918e-01 5.311918e-01
2                      control - interrupt  2.7616483 5.751040e-03 2.875520e-02
3            control_w_explain - interrupt  3.3878359 7.044639e-04 4.931247e-03
4            control - interrupt_w_explain  0.6394755 5.225137e-01 1.000000e+00
5  control_w_explain - interrupt_w_explain  1.2656632 2.056337e-01 6.169011e-01
6          interrupt - interrupt_w_explain -2.1221728 3.382323e-02 1.352929e-01
7                   control - no_interrupt  6.0060867 1.900546e-09 1.710492e-08
8         control_w_explain - no_interrupt  6.6322744 3.305534e-11 3.305534e-10
9                 interrupt - no_interrupt  3.2444384 1.176825e-03 7.060948e-03
10      interrupt_w_explain - no_interrupt  5.3666112 8.022975e-08 6.418380e-07

In [31]:
Hadj <- unname(kruskal.test(data$score ~ factor(data$condition_n_explanation))$statistic)
n <- sum(table(data$score, factor(data$condition_n_explanation)))
e2 <- Hadj*(n+1)/(n^2-1)
print(Hadj)  # H-value (test statistic => chi-squared)
print(e2)  # Effect Size

[1] 58.15909
[1] 0.2012425


### 2. Extra Steps

In [32]:
# rANOVA fails test assuptions
m <- lmer(extra_step ~ factor(data$condition_n_explanation) + data$round 
          + factor(data$gender) + data$age + data$experience
          + data$extraversion + data$agreeableness + data$conscientiousness + data$neuroticism + data$intellect 
          + (1|data$Id), data)

# # Test for model assumptions
shapiro.test(residuals(m))  # p value should be greater than 0.05 to satisfy normality assumptions
# leveneTest(data$trust ~ factor(data$condition_n_explanation) * factor(data$round))  # p value should be greater than 0.05 to satisfy homoscedasticity assumptions (only use categorical variables)

# print(Anova(m))
print.anova_stats(m, data, 0.05)


	Shapiro-Wilk normality test

data:  residuals(m)
W = 0.97548, p-value = 7.126e-05


Analysis of Variance Table
                                     npar Sum Sq Mean Sq F value
factor(data$condition_n_explanation)    4 33.317  8.3292 14.6585
data$round                              1  1.410  1.4103  2.4820
factor(data$gender)                     1  0.187  0.1866  0.3285
data$age                                1  0.443  0.4430  0.7797
data$experience                         1  0.063  0.0631  0.1111
data$extraversion                       1  0.803  0.8031  1.4133
data$agreeableness                      1  0.628  0.6277  1.1046
data$conscientiousness                  1  1.169  1.1689  2.0571
data$neuroticism                        1  0.806  0.8062  1.4188
data$intellect                          1  1.199  1.1993  2.1107


factor(data$condition_n_explanation):  F(4,290)=14.6585212837656     p = 6.6334236632083e-11



In [33]:
kruskal.test(data$extra_step ~ factor(data$condition_n_explanation)) 
kruskal.test(data$extra_step ~ data$round)
kruskal.test(data$extra_step ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$extra_step by factor(data$condition_n_explanation)
Kruskal-Wallis chi-squared = 52.712, df = 4, p-value = 9.791e-11



	Kruskal-Wallis rank sum test

data:  data$extra_step by data$round
Kruskal-Wallis chi-squared = 1.3531, df = 1, p-value = 0.2447



	Kruskal-Wallis rank sum test

data:  data$extra_step by data$condition_n_round
Kruskal-Wallis chi-squared = 56.937, df = 9, p-value = 5.197e-09


In [34]:
dunnTest(data$extra_step, factor(data$condition_n_explanation))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




                                Comparison          Z      P.unadj        P.adj
1              control - control_w_explain  1.0308837 3.025954e-01 6.051907e-01
2                      control - interrupt -4.3732280 1.224228e-05 8.569594e-05
3            control_w_explain - interrupt -5.4041117 6.513036e-08 5.861732e-07
4            control - interrupt_w_explain -2.3845414 1.710042e-02 8.550211e-02
5  control_w_explain - interrupt_w_explain -3.4154252 6.368253e-04 3.820952e-03
6          interrupt - interrupt_w_explain  1.9886865 4.673581e-02 1.402074e-01
7                   control - no_interrupt -4.7258987 2.290998e-06 1.832799e-05
8         control_w_explain - no_interrupt -5.7567824 8.573229e-09 8.573229e-08
9                 interrupt - no_interrupt -0.3526707 7.243353e-01 7.243353e-01
10      interrupt_w_explain - no_interrupt -2.3413573 1.921377e-02 7.685508e-02

### 3. Fallling Number

In [35]:
kruskal.test(data$falling_num ~ factor(data$condition_n_explanation)) 
kruskal.test(data$falling_num ~ data$round)
kruskal.test(data$falling_num ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$falling_num by factor(data$condition_n_explanation)
Kruskal-Wallis chi-squared = 76.961, df = 4, p-value = 7.665e-16



	Kruskal-Wallis rank sum test

data:  data$falling_num by data$round
Kruskal-Wallis chi-squared = 0.33692, df = 1, p-value = 0.5616



	Kruskal-Wallis rank sum test

data:  data$falling_num by data$condition_n_round
Kruskal-Wallis chi-squared = 80.273, df = 9, p-value = 1.427e-13


In [36]:
dunnTest(data$falling_num, factor(data$condition_n_explanation))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




                                Comparison          Z      P.unadj        P.adj
1              control - control_w_explain  0.2577293 7.966158e-01 7.966158e-01
2                      control - interrupt -3.3841972 7.138671e-04 3.569336e-03
3            control_w_explain - interrupt -3.6419265 2.706053e-04 1.623632e-03
4            control - interrupt_w_explain -1.5720916 1.159293e-01 2.318586e-01
5  control_w_explain - interrupt_w_explain -1.8298209 6.727672e-02 2.691069e-01
6          interrupt - interrupt_w_explain  1.8121056 6.996988e-02 2.099096e-01
7                   control - no_interrupt -7.2992825 2.893064e-13 2.603758e-12
8         control_w_explain - no_interrupt -7.5570118 4.124347e-14 4.124347e-13
9                 interrupt - no_interrupt -3.9150853 9.037226e-05 6.326058e-04
10      interrupt_w_explain - no_interrupt -5.7271909 1.021073e-08 8.168582e-08

### 4. Detection Number (averaged across both rounds)

In [37]:
kruskal.test(data$detection_num ~ factor(data$condition_n_explanation)) 
kruskal.test(data$detection_num ~ data$round)
kruskal.test(data$detection_num ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$detection_num by factor(data$condition_n_explanation)
Kruskal-Wallis chi-squared = 11.822, df = 4, p-value = 0.01873



	Kruskal-Wallis rank sum test

data:  data$detection_num by data$round
Kruskal-Wallis chi-squared = 0.0042673, df = 1, p-value = 0.9479



	Kruskal-Wallis rank sum test

data:  data$detection_num by data$condition_n_round
Kruskal-Wallis chi-squared = 12.623, df = 9, p-value = 0.1804


In [38]:
dunnTest(data$detection_num, factor(data$condition_n_explanation))

Dunn (1964) Kruskal-Wallis multiple comparison

  p-values adjusted with the Holm method.




                                Comparison           Z     P.unadj      P.adj
1              control - control_w_explain -0.04442458 0.964565966 0.96456597
2                      control - interrupt -2.18513422 0.028879007 0.23103205
3            control_w_explain - interrupt -2.14070964 0.032297461 0.22608223
4            control - interrupt_w_explain -1.71923140 0.085572246 0.51343347
5  control_w_explain - interrupt_w_explain -1.67480681 0.093972142 0.46986071
6          interrupt - interrupt_w_explain  0.46590282 0.641285060 1.00000000
7                   control - no_interrupt -2.59828285 0.009369128 0.09369128
8         control_w_explain - no_interrupt -2.55385827 0.010653661 0.09588295
9                 interrupt - no_interrupt -0.41314863 0.679497722 1.00000000
10      interrupt_w_explain - no_interrupt -0.87905145 0.379373378 1.00000000

### 5. Interruption Number

In [39]:
kruskal.test(data$slippery_interruption_num_fog ~ factor(data$condition_n_explanation)) 
kruskal.test(data$interruption_num_fog ~ data$extra_step)
kruskal.test(data$slippery_interruption_num_fog ~ data$condition_n_round)
kruskal.test(data$slippery_interruption_num_non_fog ~ factor(data$condition_n_explanation)) 
kruskal.test(data$slippery_interruption_num_non_fog ~ data$round)
kruskal.test(data$slippery_interruption_num_non_fog ~ data$condition_n_round)


	Kruskal-Wallis rank sum test

data:  data$slippery_interruption_num_fog by factor(data$condition_n_explanation)
Kruskal-Wallis chi-squared = 122.46, df = 4, p-value < 2.2e-16



	Kruskal-Wallis rank sum test

data:  data$interruption_num_fog by data$extra_step
Kruskal-Wallis chi-squared = 229.53, df = 180, p-value = 0.007392



	Kruskal-Wallis rank sum test

data:  data$slippery_interruption_num_fog by data$condition_n_round
Kruskal-Wallis chi-squared = 130, df = 9, p-value < 2.2e-16



	Kruskal-Wallis rank sum test

data:  data$slippery_interruption_num_non_fog by factor(data$condition_n_explanation)
Kruskal-Wallis chi-squared = 125.32, df = 4, p-value < 2.2e-16



	Kruskal-Wallis rank sum test

data:  data$slippery_interruption_num_non_fog by data$round
Kruskal-Wallis chi-squared = 0.84537, df = 1, p-value = 0.3579



	Kruskal-Wallis rank sum test

data:  data$slippery_interruption_num_non_fog by data$condition_n_round
Kruskal-Wallis chi-squared = 127.4, df = 9, p-value < 2.2e-16


## Subjective Metrics

### 1. User Preference

In [40]:
rank_data <- read.csv("rank_data_user_study_1.csv")

"cannot open file 'rank_data_user_study_1.csv': No such file or directory"


ERROR: Error in file(file, "rt"): cannot open the connection
