In [1]:
train.data <- read.csv(file.path("..", "data", "training_data.csv"))
test.data <- read.csv(file.path("..", "data", "test_data.csv"))

## Boosting

In [3]:
#searching for input variables with zero variance (without the varible Intensity)
set.seed(5)
x <- train.data[, -c(1,2,3)]

idx.zero.var <- apply(x, 2, var) == 0
x <- x[,!idx.zero.var]


#attache Intensity as factor
x$Intensity <- as.factor(train.data$Intensity)

data <- x
data$VALENCE.PLEASANTNESS <- train.data$VALENCE.PLEASANTNESS


In [4]:
#train and validation indexes
len <- length(x[,1])
idx.train <- sample(1:len, 2*len/3)

#xgboost does not accept data frames therefore we will first convert the data into ordinary matrices
library(xgboost)
library(Matrix)
train.x <- sparse.model.matrix(VALENCE.PLEASANTNESS ~ . -1, data = data[idx.train,])
validation.x <- sparse.model.matrix(VALENCE.PLEASANTNESS ~ . -1, data = data[-idx.train,])
train.y <- data$VALENCE.PLEASANTNESS[idx.train]
validation.y <- data$VALENCE.PLEASANTNESS[-idx.train]

In [5]:
boost.heart <- xgboost(train.x, label = train.y,
                      objective = "reg:squarederror",
                      eta = 0.01,
                      max_depth = 2,
                      nround = 500)

[1]	train-rmse:48.127544 
[2]	train-rmse:47.746429 
[3]	train-rmse:47.369846 
[4]	train-rmse:46.997787 
[5]	train-rmse:46.628361 
[6]	train-rmse:46.264954 
[7]	train-rmse:45.904148 
[8]	train-rmse:45.549248 
[9]	train-rmse:45.198635 
[10]	train-rmse:44.850327 
[11]	train-rmse:44.507858 
[12]	train-rmse:44.169567 
[13]	train-rmse:43.833401 
[14]	train-rmse:43.503033 
[15]	train-rmse:43.176708 
[16]	train-rmse:42.852329 
[17]	train-rmse:42.533710 
[18]	train-rmse:42.219006 
[19]	train-rmse:41.906116 
[20]	train-rmse:41.598976 
[21]	train-rmse:41.293633 
[22]	train-rmse:40.993671 
[23]	train-rmse:40.697559 
[24]	train-rmse:40.405052 
[25]	train-rmse:40.114166 
[26]	train-rmse:39.828815 
[27]	train-rmse:39.546772 
[28]	train-rmse:39.264969 
[29]	train-rmse:38.990009 
[30]	train-rmse:38.716423 
[31]	train-rmse:38.448151 
[32]	train-rmse:38.179821 
[33]	train-rmse:37.918335 
[34]	train-rmse:37.659760 
[35]	train-rmse:37.402504 
[36]	train-rmse:37.150524 
[37]	train-rmse:36.898293 
[38]	train

In [6]:
prediction.train <- predict(boost.heart, train.x)
prediction.validation <- predict(boost.heart, validation.x)
MSE.train <- mean((prediction.train - train.y)^2)
MSE.validation <- mean((prediction.validation - validation.y)^2)


In [7]:
MSE.train
MSE.validation