In [1]:
make_tuning_df <- function(data, cutoff){
    require(plyr)
    stations_list <- list()
    stations <- unique(data$field)
    for(i in 1:length(stations)){
        print(as.character(stations[i]))
        stations_list[[i]] <- subset(data, field == stations[i])
        numrows <- nrow(stations_list[[i]])
        sub <- as.integer(numrows*cutoff)
        stations_list[[i]] <- stations_list[[i]][sub:numrows, ]
    }
    return(ldply(stations_list, data.frame))
}


In [2]:
ts_impute <- function(data){
  require(plyr)
  require(forecast)
  require(dplyr)
  require(imputeTS)
  ### IMPUTATION FUNCTION
  stations_list <- list()
  stations <- unique(data$station)
  for(i in 1:length(stations)){
    print(as.character(stations[i]))
    stations_list[[i]] <- subset(data, station == stations[i])
    
    # county <- unique(stations_list[[i]]$county)
    # station <- unique(stations_list[[i]]$station)
    # 
    date_range <- range(stations_list[[i]]$datetime, na.rm=T)
    start <- as.numeric(date_range[1])
    # date_seq <- seq(date_range[1], date_range[2], by = '15 mins')
    # date_seq <- data.frame('datetime' = strptime(gsub(date_seq, pattern = ' PST', replacement = ''),
    #                                              format = '%Y-%m-%d %H:%M:%S', tz = 'US/Pacific'))
    # stations_list[[i]] <- right_join(stations_list[[i]], date_seq, by='datetime')
    # 
    # stations_list[[i]]$county <- county
    # stations_list[[i]]$station <- station
    
    # make timeseries objects from each vector that has yearly seasonality
    # use msts function from forecast to to time series decomposition
    # because the data is shorter, the seasonality may be each day due to rising and falling temperatures
    
    air1.ts <- msts(stations_list[[i]]$air_temp_1, start = start, seasonal.periods = 96)
    dewpoint.ts <- msts(stations_list[[i]]$dewpoint, start = start, seasonal.periods = 96)
    rel_hum.ts <- msts(stations_list[[i]]$rel_hum, start = start, seasonal.periods = 96)
    eight_in.ts <- msts(stations_list[[i]]$eight_in_soil_temp, start = start, seasonal.periods = 96)
    solar.ts <- msts(stations_list[[i]]$solar_Watts_m2.y, start = start, seasonal.periods = 96)
    leafwet.ts <- msts(stations_list[[i]]$leaf_wet, start = start, seasonal.periods = 96)
    wind_speed.ts <- msts(stations_list[[i]]$wind_speed, start = start, seasonal.periods = 96)
    wind_gust.ts <- msts(stations_list[[i]]$wind_gust, start = start, seasonal.periods = 96)
    # vwc.ts <- msts(stations_list[[i]]$vwc, start = start, seasonal.periods = 35064)
    two_in.ts <- msts(stations_list[[i]]$two_in_soil_temp, start=start, seasonal.periods = 96)
    
    # impute using seasplit and/or spline interpolation
    stations_list[[i]]$air_temp_1 <- na.seasplit(air1.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$dewpoint <- na.seasplit(dewpoint.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$rel_hum <- na.seasplit(rel_hum.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$eight_in_soil_temp <- na.seasplit(eight_in.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$solar_Watts_m2.y <- na.seasplit(solar.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$leaf_wet <- na.seasplit(leafwet.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$wind_speed <- na.seasplit(wind_speed.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$wind_gust <- na.seasplit(wind_gust.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$two_in_soil_temp <- na.seasplit(two_in.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    stations_list[[i]]$vwc <- na.interpolation(stations_list[[i]]$vwc, option='spline')
    
    # tryCatch({
    #   stations_list[[i]]$vwc <- na.seasplit(vwc.ts, algorithm = 'interpolation')[1:nrow(stations_list[[i]])]
    # }, error = function(e) {
    #   print(e)
    #   print(paste0('Station: ', stations[i]))
    # })
  }
  final_data <- ldply(stations_list, data.frame)
  return(final_data)
}

In [3]:
add_full_timeseries <- function(data){
  require(plyr)
  require(lubridate)
  # Sys.setenv(TZ='PST')
  stations_list <- list()
  stations <- unique(data$station)
  for(i in 1:length(stations)){
    print(stations[i])
    stations_list[[i]] <- data[data[,'station'] == stations[i], ] # subset
    
    county <- unique(stations_list[[i]]$county)
    station <- unique(stations_list[[i]]$station)
    
    date_range <- range(stations_list[[i]]$datetime)

    date_seq <- seq(date_range[1], date_range[2], by = '15 mins')
    date_seq <- data.frame('datetime' = as.POSIXct(gsub(date_seq, pattern = ' PST', replacement = ''), format = '%Y-%m-%d %H:%M:%S'))
    stations_list[[i]] <- right_join(stations_list[[i]], date_seq, by='datetime')
    
    stations_list[[i]]$county <- county
    stations_list[[i]]$station <- station
  }
  final <- ldply(stations_list, data.frame)
  return(final)
}