### Measure and Report all prediction stored in csv files with approriate name

All csv file with the name format "method_name_horizon_n.csv" will be tested. Without the horizon, the measurement will be reported for all horizon.

Read GenerateReport.R for more information

In [64]:
library(tidyr)
library(dplyr)
library(lubridate)
source("Lib/GenerateReport.R")


HvalerDir = "Hvaler/"
HvalerPredictionDir = "Hvaler/Predictions/"
HvalerCompleteFile = "Hvaler/imputed_complete.csv"
HvalerTrainingFile = "Hvaler/training_set.csv"

HvalerClasses = c('POSIXct', rep("numeric", 21))
HvalerZones = paste0("subs.", seq(1, 20))

GEFCom2012Dir = "GEFCom2012/"
GEFCom2012PredictionDir = "GEFCom2012/Predictions/"
GEFCom2012CompleteFile = "GEFCom2012/complete.csv"
GEFCom2012TrainingFile = "GEFCom2012/training_set.csv"
GEFCom2012Classes = c('POSIXct', rep("numeric", 32))
GEFComZones = paste0("zone.", seq(1, 20))

In [65]:
#Measure for Hvaler
completeDf = read.csv(HvalerCompleteFile, stringsAsFactors=FALSE, colClasses=HvalerClasses)
trainingDf = read.csv(HvalerTrainingFile, stringsAsFactors=FALSE, colClasses=HvalerClasses)
reportHvalerDf = generateReport(completeDf, trainingDf, HvalerDir, HvalerPredictionDir, HvalerZones, HvalerClasses)

[1] "Processing Hvaler/Predictions/average_baseline.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_1.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_2.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_3.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_4.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_5.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_6.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_7.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_8.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_9.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_10.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_11.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_12.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_13.csv ..."
[1] "Processing Hvaler/Predictions/averageARIMA_horizon_14.csv .

In [8]:
#Measure for GEFCom2012
completeDf = read.csv(GEFCom2012CompleteFile, stringsAsFactors=FALSE, colClasses=GEFCom2012Classes)
trainingDf = read.csv(GEFCom2012TrainingFile, stringsAsFactors=FALSE, colClasses=GEFCom2012Classes)
reportGEFCom2012Df = generateReport(completeDf, trainingDf, GEFCom2012Dir, GEFCom2012PredictionDir, GEFComZones, GEFCom2012Classes)

[1] "Processing GEFCom2012/Predictions/average_baseline.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_1.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_2.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_3.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_4.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_5.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_6.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_7.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_8.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_9.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_10.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_11.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_12.csv ..."
[1] "Processing GEFCom2012/Predictions/averageARIMA_horizon_13.csv ..."
[1] "Pro

In [108]:
#Plot median accuracy on GEFCom2012 dataset
require(ggplot2)
require(gridExtra)
#Function required to combine legend of two plots
g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)}
                      
plotGEFCom2012Df = reportGEFCom2012Df %>% filter (horizon >=1 & horizon <= 24) %>% 
             gather (zone, acc, which(grepl("(zone|subs.)", names(reportDf)))) %>%
             select (method, zone, horizon, acc) %>%
             group_by (method, horizon) %>%
             summarize (acc = median(acc, na.rm=TRUE))
pdf("Horizon_vs_Acc.pdf",width=7,height=4)
p1 = ggplot(data=plotGEFCom2012Df, aes(x=horizon, y=acc, group = method, shape=method)) +
    geom_line(linetype = 2) +
    geom_point( size=2, fill="white") +
    theme_bw() +
    theme(legend.position="top")+
    labs(y="MAPE %", x="Horizon") +
    ggtitle("GEFCOM2012")+
    scale_y_continuous(breaks = seq(0, 0.3, 0.01), labels = scales::percent)#, limits=c(0, 0.23))

plotHvalerDf = reportHvalerDf %>% filter (horizon >=1 & horizon <= 24) %>% 
             gather (zone, acc, which(grepl("(zone|subs.)", names(reportDf)))) %>%
             select (method, zone, horizon, acc) %>%
             group_by (method, horizon) %>%
             summarize (acc = median(acc, na.rm=TRUE))

p2 = ggplot(data=plotHvalerDf, aes(x=horizon, y=acc, group = method, shape=method)) +
    geom_line(linetype = 2) +
    geom_point( size=2, fill="white") +
    theme_bw() +
    labs(y="MAPE %", x="Horizon") +
    ggtitle("Hvaler")+
    scale_y_continuous(breaks = seq(0, 0.3, 0.01), labels = scales::percent)#, limits=c(0, 0.23))

commonLegend<-g_legend(p1)

p3 <- grid.arrange(arrangeGrob(p1 + theme(legend.position="none"),
                         p2 + theme(legend.position="none"),
                         nrow=1),
                 commonLegend, nrow=2,heights=c(10, 1))
dev.off()

In [111]:
#Rank plot
plotGEFCom2012Df = reportGEFCom2012Df %>% filter (horizon >=1 & horizon <= 24) %>% 
             gather (zone, acc, which(grepl("(zone|subs.)", names(reportDf)))) %>%
             select (method, zone, horizon, acc)
plotHvalerDf = reportHvalerDf %>% filter (horizon >=1 & horizon <= 24) %>% 
             gather (zone, acc, which(grepl("(zone|subs.)", names(reportDf)))) %>%
             select (method, zone, horizon, acc)
plotDf = rbind(plotGEFCom2012Df, plotHvalerDf)
plotDf = plotDf %>% group_by(method, zone) %>% 
                    summarize(acc=mean(acc, rm.na=TRUE)) %>% 
                    arrange(acc)%>% 
                    mutate(rank=min_rank(acc))

pdf("Rank_vs_Acc.pdf",width=6,height=5)
ggplot(data=df, aes(x=rank, y=acc, group=method, shape=method)) +
    geom_line(linetype = 2) + 
    geom_point(size = 2, fill="white") +
    theme_bw() +
    theme(legend.position="bottom")+
    labs(y="MAPE %", x="Rank") +
    scale_y_continuous(breaks = seq(0, 0.3, 0.01), limits = c(0.04, 0.3), labels = scales::percent)
dev.off()

“Removed 8 rows containing missing values (geom_point).”