### This file call all predicting methods on GEF2012Com and Hvaler
Use MeasurePerformance.ipynb file to measure performance of produced prediction

In [40]:
library(tidyr)
library(dplyr)
library(lubridate)
library(xts)
library("doParallel")
library("foreach")
source("Lib/PredictRandomForest.R")
source("Lib/PredictDSHW.R")
source("Lib/PredictSemiParametricArima.R")
source("Lib/PredictTBATS.R")
source("Lib/PredictAverageARIMABaseline.R")

In [41]:
#Predict for Hvaler
HvalerTrainingFile = "Hvaler/training_set.csv"
HvalerCompleteFile = "Hvaler/imputed_complete.csv"
OutputDir = "Hvaler/Predictions/"
HvalerClasses = c('POSIXct', rep("numeric", 21))
NCores = 8
Zones = paste0("subs.", seq(1, 20))#Only test 1 zone now
Temperatures = c("T01")
Horizons = seq(1, 24)
trainingDf = read.csv(HvalerTrainingFile, stringsAsFactors=FALSE, colClasses=HvalerClasses)
completeDf = read.csv(HvalerCompleteFile, stringsAsFactors=FALSE, colClasses=HvalerClasses)


In [None]:
predictAverageARIMABaselineParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, plotResult = FALSE)

In [None]:
#BE CAREFUL: This will take several hours!
predictRandomForestParallel(OutputDir, trainingDf, completeDf, Zones, Temperatures, Horizons, nDataPoints = -1, plotResult = FALSE)

In [None]:
#This will also take a lot of time
registerDoParallel(NCores)
x = foreach(Zones = Zones, .combine=c) %dopar% 
    predictDSHW(OutputDir, trainingDf, completeDf, Zones, Horizons, modifiedDSHW=FALSE, plotResult = FALSE)
stopImplicitCluster()

In [None]:
#This will also take a lot of time
predictDSHWParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, modifiedDSHW=TRUE, plotResult = FALSE)

In [None]:
#This will also take a lot of time
predictSemiParametricArimaParallel(OutputDir, trainingDf, completeDf, Zones, Temperatures, Horizons, plotResult = FALSE)

In [None]:
#This will also take a lot of time
predictTBATSParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, plotResult = FALSE)

In [None]:
#Predict for GEFCom2012
GEFCom2012TrainingFile = "GEFCom2012/training_set.csv"
GEFCom2012CompleteFile = "GEFCom2012/complete.csv"
OutputDir = "GEFCom2012/Predictions/"
GEFCom2012Classes = c('POSIXct', rep("numeric", 32))
Zones = paste0("zone.", seq(1, 20))
Temperatures = c("T01","T02","T03","T04","T05","T06","T07","T08","T09","T10","T11")
Horizons = seq(1, 24)
trainingDf = read.csv(GEFCom2012TrainingFile, stringsAsFactors=FALSE, colClasses=GEFCom2012Classes)
completeDf = read.csv(GEFCom2012CompleteFile, stringsAsFactors=FALSE, colClasses=GEFCom2012Classes)

In [None]:
predictAverageARIMABaselineParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, PlotResult = FALSE)

In [None]:
#This will take several hours!
predictRandomForestParallel(OutputDir, trainingDf, completeDf, Zones, Temperatures, Horizons, nDataPoints = -1, plotResult = FALSE)

In [None]:
predictDSHWParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, modifiedDSHW=FALSE, plotResult = FALSE)

In [None]:
predictDSHWParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, modifiedDSHW=TRUE, plotResult = FALSE)

In [None]:
predictSemiParametricArimaParallel(OutputDir, trainingDf, completeDf, Zones, Temperatures, Horizons, plotResult = FALSE)

In [None]:
predictTBATSParallel(OutputDir, trainingDf, completeDf, Zones, Horizons, plotResult = FALSE)

In [None]:
install.packages("ranger")

In [None]:
install.packages("bbemkr")

In [None]:
install.packages("doParallel")
install.packages("foreach")

In [8]:
x = left_join(trainingDf, completeDf, by="DateTime")

In [10]:
x = trainingDf

In [20]:
zones = paste0("subs.", seq(1, 20))
for (zone in zones){
    x[[zone]] = ifelse(!is.na(trainingDf[[zone]]), trainingDf[[zone]], completeDf[[zone]])
}

In [21]:
sum(!complete.cases(x))

In [18]:
head(x[is.na(x[['subs.2']]), ])

Unnamed: 0,DateTime,subs.1,subs.2,subs.3,subs.4,subs.5,subs.6,subs.7,subs.8,subs.9,⋯,subs.12,subs.13,subs.14,subs.15,subs.16,subs.17,subs.18,subs.19,subs.20,T01
11305,2013-04-16 00:00:00,203.110000222921,,,,,,,,,⋯,,,,,,,,,,40.1
11306,2013-04-16 01:00:00,206.179999148473,,,,,,,,,⋯,,,,,,,,,,40.28
11307,2013-04-16 02:00:00,201.460000077263,,,,,,,,,⋯,,,,,,,,,,39.92
11308,2013-04-16 03:00:00,204.359999302775,,,,,,,,,⋯,,,,,,,,,,40.28
11309,2013-04-16 04:00:00,203.52000034973,,,,,,,,,⋯,,,,,,,,,,40.1
11310,2013-04-16 05:00:00,234.179999619722,,,,,,,,,⋯,,,,,,,,,,40.1


In [19]:
zones