### Measure and Report all prediction stored in csv file with approriate name

All csv file with the name format "method_name_horizon_n.csv" will be tested. Without the horizon, the measurement will be reported for all horizon

In [79]:
library(tidyr)
library(dplyr)
library(lubridate)

HvalerDir = "Hvaler/"
HvalerPredictionDir = "Hvaler/Predictions/"
HvalerCompleteFile = "Hvaler/imputed_complete.csv"
HvalerTrainingFile = "Hvaler/training_set.csv"

HvalerClasses = c('POSIXct', rep("numeric", 21))
HvalerZones = paste0("subs.", seq(1, 20))

GEFCom2012Dir = "GEFCom2012/"
GEFCom2012PredictionDir = "GEFCom2012/Predictions/"
GEFCom2012CompleteFile = "GEFCom2012/complete.csv"
GEFCom2012TrainingFile = "GEFCom2012/training_set.csv"
GEFCom2012Classes = c('POSIXct', rep("numeric", 32))
GEFComZones = paste0("zone.", seq(1, 20))

In [80]:
#Measure for Hvaler
completeDf = read.csv(HvalerCompleteFile, stringsAsFactors=FALSE, colClasses=HvalerClasses)
trainingDf = read.csv(HvalerTrainingFile, stringsAsFactors=FALSE, colClasses=HvalerClasses)
reportDf = data.frame('files'=list.files(path = HvalerPredictionDir, pattern = "*.csv"))

In [81]:
reportDf = reportDf %>% mutate(method=substr(files, 1, regexpr("(_horizon_|.csv)", files)-1)) %>%
                        mutate(horizon_start = regexpr("_horizon_", files) ) %>%
                        mutate(horizon_end = regexpr(".csv", files)-1) %>%
                        mutate(horizon = as.numeric(
                            ifelse(horizon_start > -1, substr(files, horizon_start+9, horizon_end), -1))) %>%
                        select(-horizon_start, -horizon_end) %>%
                        arrange(method, horizon)

testingIdx = !complete.cases(trainingDf)

for (zone in HvalerZones) {
    reportDf[[zone]] = rep(0, nrow(reportDf))
}
for (i in seq(1, nrow(reportDf))){
    file = reportDf$files[i]
    predictionDf = read.csv(paste0(HvalerPredictionDir, file), stringsAsFactors=FALSE, colClasses=HvalerClasses)
    for (zone in HvalerZones){
        if (all(!is.na(predictionDf[[zone]]))){
        MAPE = mean(abs(predictionDf[[zone]][testingIdx] - completeDf[[zone]][testingIdx])/completeDf[[zone]][testingIdx])
        } else {
            MAPE = NA
        }
        reportDf[[zone]][i] = MAPE
    }
}

In [82]:
reportDf

Unnamed: 0,files,method,horizon,subs.1,subs.2,subs.3,subs.4,subs.5,subs.6,subs.7,⋯,subs.11,subs.12,subs.13,subs.14,subs.15,subs.16,subs.17,subs.18,subs.19,subs.20
1,average_baseline.csv,average_baseline,-1,0.189019626907884,0.259929745968079,0.171966572535935,0.292253119754064,0.227894353405942,1.30131637657179,0.223565982761755,⋯,0.294223044590854,0.155332555793375,0.176020273467689,0.214705034879139,0.3053220764972,0.294635078576918,0.293813779522868,0.181520745368565,0.170860708861476,0.171458038697331
2,randomforest_horizon_1.csv,randomforest,1,0.0498881168113542,,,,,,,⋯,,,,,,,,,,
3,randomforest_horizon_2.csv,randomforest,2,0.0645283370199427,,,,,,,⋯,,,,,,,,,,
4,randomforest_horizon_3.csv,randomforest,3,0.0714777202273151,,,,,,,⋯,,,,,,,,,,
5,randomforest_horizon_4.csv,randomforest,4,0.0743663796251404,,,,,,,⋯,,,,,,,,,,
6,randomforest_horizon_5.csv,randomforest,5,0.074964677045497,,,,,,,⋯,,,,,,,,,,
7,randomforest_horizon_6.csv,randomforest,6,0.0748716340129152,,,,,,,⋯,,,,,,,,,,
8,randomforest_horizon_7.csv,randomforest,7,0.0756768017547219,,,,,,,⋯,,,,,,,,,,
9,randomforest_horizon_8.csv,randomforest,8,0.0756396750546237,,,,,,,⋯,,,,,,,,,,
10,randomforest_horizon_9.csv,randomforest,9,0.0763447642802556,,,,,,,⋯,,,,,,,,,,


In [None]:
df = read.csv(GEFCom2012CompleteFile, stringsAsFactors=FALSE, colClasses=GEFCom2012Classes)

In [48]:
m <- regexpr("_horizon_", reportDf$files)

In [40]:
m