# Simulation of defaced vs original manual ratings

Simulate 12 raters each rating 580 original images. The ratings are randomly sampled from {1,2,3,4} and randomly distributed across subjects. To introduce a bias in the ratings of defaced images, we add +1 to a predefined precentage of the ratings on original images. The percentage of scan affected varies between raters. 

In [8]:
set.seed(1234)

n_rated <- 580 #nbr of subjects rated per rater
#n_rated <- 130 #nbr of subjects rated per rater
n_sub <- 580 #nbr of subjects available in the dataset
n_rater <- 12 #nbr of raters

manual_original <- matrix(, nrow = n_sub, ncol = n_rater)
manual_defaced <- matrix(, nrow = n_sub, ncol = n_rater)

#Define for each rater the percentage of biased ratings
perc_biased <- c(2,10,10,30,40,40,50,50,60,70,90,90)
for (i in 1:n_rater) {
    #Each rater rates subjects picked at random
    ind_sub <- sample(1:n_sub, n_rated, replace = F)
    #random original ratings sampled from {1,2,3,4}
    ratings <- sample(1:4, n_rated, replace = T)
    manual_original[ind_sub, i] <- ratings
    
    #To simulate a positive bias towards defaced data, we improve the ratings of a 
    #predefined percentage of the original scans
    ind_rat <- sample(1:n_rated, round(n_rated*perc_biased[i]/100), replace = F)
    ratings_biased <- ratings
    ratings_biased[ind_rat] <- ratings_biased[ind_rat] + 1
    #The scale stops at 4 so clip higher values to 4 
    ratings_biased[ratings_biased == 5] <- 4
    
    #Set the biased ratings as the ratings on the defaced condition
    manual_defaced[ind_sub, i] <- ratings_biased
}

manual_original_vec <- c(t(manual_original))
manual_defaced_vec <- c(t(manual_defaced))
                  
defaced <- rep(c(0, 1), times = n_rater*n_sub)
sub <- rep(rep(1:n_sub, each=2), times = n_rater)
rater <- rep(1:n_rater, each=n_sub*2)

#Convert to dataframe to use in regression
df <- data.frame(rater = rater, sub = sub)
df$defaced <- factor(defaced, levels = 0:1, labels = c("original", "defaced"))
df$ratings <- factor(c(manual_original_vec, manual_defaced_vec), levels = 1:4, labels = c("excluded", "poor", "good", "excellent"))

#Write dataframe to file
write.csv(df,"SimulatedDefacedRatings.csv", row.names = FALSE)

In [20]:
df

rater,sub,defaced,ratings
<int>,<int>,<fct>,<fct>
1,1,original,excellent
1,1,defaced,excluded
1,2,original,excellent
1,2,defaced,good
1,3,original,good
1,3,defaced,excellent
1,4,original,excluded
1,4,defaced,excluded
1,5,original,poor
1,5,defaced,poor
