In [175]:
require(dplyr)
require(tidyr)
require(ggplot2)
#Define which log files you want to report the running time
LogFiles = c("RunExperiment_GEFCom2012_noRandomForest.log",
             "RunExperiment_Hvaler.log")
reportDf = data.frame(MethodName=character(), Zone=character(), Period=character(), Time=numeric())
for (logFile in LogFiles) {
    df = read.table(logFile, stringsAsFactors = FALSE, sep="!", header=FALSE)
    names(df) = c("Text")
    #Time report for all methods except random forest, which has different model for each horizon
    otherMethodDf = df %>% filter (regexpr("\\w*\\|(\\w|\\.|\\s)*\\|(\\w|\\.|\\s)*\\|(\\w|\\.|\\s)*$", Text)>=0 
                                   & regexpr("horizon", Text)<0) #other method report does not contain horizon string
    if (nrow(otherMethodDf) > 0){
        otherMethodDf = otherMethodDf %>% 
                mutate (start = regexpr("\\w*\\|", Text), length = attr(start, 'match.length')) %>%
                mutate (MethodName = substr(Text, start, start+length-2), Text = substr(Text, start+length, nchar(Text))) %>%
                mutate (start = regexpr("(\\w|\\.|\\s)*\\|", Text), length = attr(start, 'match.length')) %>%
                mutate (Zone = substr(Text, start, start+length-2), Text = substr(Text, start+length, nchar(Text))) %>%
                mutate (start = regexpr("(\\w|\\.|\\s)*\\|", Text), length = attr(start, 'match.length')) %>%
                mutate (Period = substr(Text, start, start+length-2), Text = substr(Text, start+length, nchar(Text))) %>%
                mutate (start = regexpr("(\\.|\\d)*$", Text), length = attr(start, 'match.length')) %>%
                mutate (Time = as.numeric(substr(Text, start, start+length-2))) %>%
                select (MethodName, Zone, Period, Time)
    } else {
        otherMethodDf = NULL
    }
    #Time report for random forest, which has different model for each horizon, and use all 8 cores to train 1 model
    #Must multiply to 8
    randomForestDf = df %>% filter (regexpr("\\w*\\|(\\w|\\.|\\s)*\\|(\\w|\\.|\\s)*\\|(\\w|\\.|\\s)*\\|(\\w|\\.|\\s)*$", Text)>=0)
    if (nrow(randomForestDf) > 0){
        randomForestDf = randomForestDf %>% 
                mutate (start = regexpr("\\w*\\|", Text), length = attr(start, 'match.length')) %>%
                mutate (MethodName = substr(Text, start, start+length-2), Text = substr(Text, start+length, nchar(Text))) %>%
                mutate (start = regexpr("(\\w|\\.|\\s)*\\|", Text), length = attr(start, 'match.length')) %>%
                mutate (Zone = substr(Text, start, start+length-2), Text = substr(Text, start+length, nchar(Text))) %>%
                mutate (start = regexpr("(\\w|\\.|\\s)*\\|", Text), length = attr(start, 'match.length')) %>%
                mutate (Period = substr(Text, start, start+length-2), Text = substr(Text, start+length, nchar(Text))) %>%
                mutate (start = regexpr("(\\.|\\d)*$", Text), length = attr(start, 'match.length')) %>%
                mutate (Time = as.numeric(substr(Text, start, start+length-2))) %>%
                select (MethodName, Zone, Period, Time) %>%
                group_by (MethodName, Zone, Period) %>% summarize (Time = mean(Time)*24) %>% #24 horizons
                mutate (Time = Time*8) #Need all 8 cores
    } else {
        randomForestDf = NULL
    }
    reportDf = rbind(reportDf, otherMethodDf, randomForestDf)
}
    
#df %>% unite(Zone.Period, Zone, Period, sep = ".") %>%spread(Zone.Period, Time)


In [186]:
#Order the method for better plot in paper, don't need this
reportDf$MethodName <- factor(reportDf$MethodName, levels = c('averageARIMA','OriginalDSHW', "ModifiedDSHW", "semiParametric", "TBATS", "randomforest" ),ordered = TRUE)
pdf("running_time.pdf",width=7,height=5)
ggplot(reportDf, aes(factor(MethodName), Time)) + geom_boxplot() + 
#ggtitle("Running Time") +
labs(x="")+ 
scale_y_continuous(name="CPU Time (seconds)", breaks = seq(0, 1500, 200)) +
theme(axis.text.y = element_text(size=12),
      axis.text.x = element_text(size=12, angle=45, hjust=1))
dev.off()

In [174]:
write.csv(reportDf, "running_time_report.csv", row.names=FALSE)

In [180]:
seq(0, 1500, 200)