#### Tidy Hvaler dataset

In [33]:
library(tidyr)
library(dplyr)
library(lubridate)
library(xts)
top20File = "Hvaler/top_20.csv"
tempFile = "Hvaler/temperature_2010_2014.csv"

#Define a class num.with.commas to probably transform string with comma to number

top20Classes = c('POSIXct', "numeric", "character")

df = read.csv(top20File, stringsAsFactors=FALSE, colClasses=top20Classes)
names(df) = c("DateTime", "Consumption", "NETTSTNR")

In [34]:
#Spread dataset and change name to make substation name arbitrary
tidyDf = df %>% spread(NETTSTNR, Consumption, fill = NA, convert = FALSE)
names(tidyDf) = c("DateTime", paste0("subs.", seq(1, ncol(tidyDf)-1)))
#Make sure all the DateTime in 2012 and 2013 are included
xt = xts(x = tidyDf[, -1], order.by = tidyDf[, 1])
xt = xt['2012/2013']
xt <- merge(xt,xts(,seq(start(xt),end(xt),by="hour")), all=TRUE)
tidyDf = data.frame(DateTime=index(xt), coredata(xt))
#head(tidyDf)

#### Read temperature and join to make complete dataset

In [35]:
tempClasses = c('POSIXct', rep("numeric", 3))
tempDf = read.csv(tempFile, stringsAsFactors=FALSE, colClasses=tempClasses)
names(tempDf) = c("DateTime", "T01", "Min", "Max")
tempDf = tempDf %>% select(DateTime, T01) %>% mutate(T01 = T01 * 9/5 + 32)
completeDf = left_join(tidyDf, tempDf, by="DateTime")

In [36]:
completeDf[completeDf == 0] = NA
write.csv(completeDf, "Hvaler/complete.csv", row.names=FALSE)