-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_Read_XML_Data.R
97 lines (71 loc) · 3.92 KB
/
01_Read_XML_Data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Preamble etc. -----------------------------------------------------------
rm(list=ls())
# I need to update this path when finishing the testing
setwd("/Users/howquez/Documents/002_UNI/UCPH/016_Master Thesis/05_Data/01_RawData")
library(XML)
library(methods)
library(plyr)
list.files()
filenames <- list.files(path = , full.names=TRUE)
filenames <- gsub("./", "", filenames)
# Merge XML Files Into Data Frame -----------------------------------------
parse_xml <-function(FileName) {
step1 <- xmlParse(FileName)
step2 <- cbind(xmlToDataFrame(step1), xmlToDataFrame(nodes=getNodeSet(step1,"//User/Answers")))
}
Data <- ldply(filenames, parse_xml)
# Add Session ID ----------------------------------------------------------
# find the number of participants for each session (sessionLength). We can use this to subset the final data set
sessionLength <- 0
for(i in 1:length(filenames)){
sessionLength[i] <- length(getNodeSet(xmlParse(file = filenames[i]),"//User/Answers"))
}
# add a session cloumn and replace its pseudo-values using sessionLength and a subset strategy
Data$Session <- 9999
for(j in 1:length(filenames)){
if(j==1){
Data$Session[1:(sessionLength[1])] <- 1
}
else if(j==length(filenames)){
Data$Session[(length(Data$Session)-sessionLength[j]+1):length(Data$Session)] <- j
}
else
Data$Session[(sum(sessionLength[1:(j-1)])+1):(sum(sessionLength[1:j]))] <- j
}
# Drop Useless Variables --------------------------------------------------
# the vector has to be updated as soon as the eventual variables (/names of them) are programmed
usefull <- c("Session", "Username", "Group", "Role", "Completed", "score2", "score", "PersonAProb", "screenChoice",
"Reciprocity1", "Reciprocity2", "Reciprocity3", "Reciprocity4",
"PayA2", "PayB2", "PayA1", "PayB1", "PaymentA", "PaymentB")
Data <- Data[usefull]
# Tidy Up -----------------------------------------------------------------
# Drop incomplete observations
Data <- Data[Data$Completed == "true",]
# tidy data such that, eventually, one row contains one observation
for(j in Data$Session){
for(i in Data$Group){
Data$PrinProd[Data$Role==1] <- Data$score2[Data$Role==2]
Data$PersonAProb[Data$Role==1] <- Data$PersonAProb[Data$Role==2]
Data$PaymentA[Data$Role==1] <- Data$PaymentA[Data$Role==2]
Data$PayA1[Data$Role==1] <- Data$PayA1[Data$Role==2]
Data$PayA2[Data$Role==1] <- Data$PayA2[Data$Role==2]
Data$RA1[Data$Role==1] <- Data$Reciprocity1[Data$Role==2]
Data$RA2[Data$Role==1] <- Data$Reciprocity2[Data$Role==2]
Data$RA3[Data$Role==1] <- Data$Reciprocity3[Data$Role==2]
Data$RA4[Data$Role==1] <- Data$Reciprocity4[Data$Role==2]
}
}
Data <- Data[Data$Role==1,]
drop <- names(Data) %in% c("Role")
Data <- Data[!drop]
# View, Save and Reload Data ----------------------------------------------
# write a new .csv file
setwd("/Users/howquez/Documents/002_UNI/UCPH/016_Master Thesis/05_Data")
write.csv(Data, file = "02_ProcessedData/experimentData.csv")
# load it
experimentData <- read.csv("02_ProcessedData/experimentData.csv", header=T)
experimentData <- experimentData[,2:(ncol(experimentData))]
# and delete everything else
rm(list=setdiff(ls(), "experimentData"))
# before viewing it
View(experimentData)