In [None]:
# Install/load required packages
if (!requireNamespace("forecast", quietly = TRUE)) install.packages("forecast", repos = "https://cloud.r-project.org")
if (!requireNamespace("jsonlite", quietly = TRUE)) install.packages("jsonlite", repos = "https://cloud.r-project.org")
if (!requireNamespace("pryr", quietly = TRUE)) install.packages("pryr", repos = "https://cloud.r-project.org")

library(forecast)
library(jsonlite)
library(pryr)

# Directories
project_root <- normalizePath(file.path(getwd(), ".."))
data_dir <- file.path(project_root, "data")
out_dir <- file.path(project_root, "output", "r")
if (!dir.exists(out_dir)) dir.create(out_dir, recursive = TRUE)

# Selection pattern (glob)
name_glob <- "*.csv"  # e.g., "linear_*_*.csv" or "season_*_*.csv"

# Config (defaults; can be overridden per-file)
train_ratio <- 0.7          # used only if you switch back from fixed 30%
ycol <- "y"
seasonal_periods <- NA      # default; per-file override for season_*
trend_type <- "A"          # "A" additive, "N" none
seasonal_type <- "N"       # default seasonal letter if seasonal is enabled

read_series <- function(path, ycol = "y") {
  df <- tryCatch({
    read.csv(path, stringsAsFactors = FALSE)
  }, error = function(e) stop(e))
  if (!(ycol %in% colnames(df))) stop(sprintf("Column '%s' not found in %s", ycol, path))
  series <- df[[ycol]]
  # keep numeric index for saving
  return(list(series = as.numeric(series), df = df))
}

save_forecast_csv <- function(stem, idx_vals, yhat, out_dir, seasonal_periods_current) {
  out_path <- file.path(out_dir, sprintf("%s_hw_h%d_s%d.csv",
                                         stem, length(yhat),
                                         ifelse(is.na(seasonal_periods_current), 0, seasonal_periods_current)))
  write.csv(data.frame(index = idx_vals, value = as.numeric(yhat)), out_path, row.names = FALSE)
  return(out_path)
}

process_one <- function(csv_path) {
  sr <- read_series(csv_path, ycol)
  y <- sr$series
  n <- length(y)

  # Enforce exactly 30% test
  h <- max(1L, round(n * 0.3))
  split <- n - h
  train <- y[1:split]
  test <- if (split < n) y[(split + 1):n] else numeric(0)

  # Detect seasonal files by prefix "season_" and set local seasonal config
  stem <- tools::file_path_sans_ext(basename(csv_path))
  is_season <- startsWith(stem, "season_")
  sp_local <- if (is_season) 24 else if (!is.na(seasonal_periods) && seasonal_periods > 1) seasonal_periods else NA
  seasonal_type_local <- if (is_season) "A" else seasonal_type

  # Build ts object for ETS with local frequency
  if (!is.na(sp_local) && sp_local > 1) {
    train_ts <- ts(train, frequency = sp_local)
  } else {
    train_ts <- ts(train)
  }

  # Model string: error, trend, seasonal (simple fixed letters)
  model_str <- sprintf("%s%s%s", trend_type, trend_type, ifelse(is.na(sp_local), "N", seasonal_type_local))

  # Measure time and memory using system.time + pryr::mem_change
  fit_mem_change <- mem_change({
    fit_time <- system.time({
      fit <- tryCatch({
        forecast::ets(train_ts, model = model_str)
      }, error = function(e) {
        forecast::ets(train_ts)
      })
    })
  })
  train_time_s <- as.numeric(fit_time["elapsed"])
  mem_used_fit_bytes <- as.numeric(fit_mem_change)

  # Forecast
  pred_mem_change <- mem_change({
    pred_time <- system.time({
      fcast <- if (h > 0) forecast::forecast(fit, h = h) else NULL
    })
  })
  predict_time_s <- as.numeric(pred_time["elapsed"])
  mem_used_pred_bytes <- as.numeric(pred_mem_change)

  # Prepare outputs
  idx_vals <- if (h > 0) (split):(split + h - 1) else integer(0)
  out_path <- if (!is.null(fcast)) save_forecast_csv(stem, idx_vals, fcast$mean, out_dir, sp_local)
              else file.path(out_dir, sprintf("%s_hw_h0_s%d.csv", stem, ifelse(is.na(sp_local), 0, sp_local)))

  # Metrics
  if (h > 0) {
    y_true <- test
    y_pred <- as.numeric(fcast$mean)
    err <- y_true - y_pred
    rmse <- sqrt(mean(err^2))
    mae <- mean(abs(err))
    mape <- suppressWarnings(mean(abs(err / ifelse(y_true == 0, NA, y_true)), na.rm = TRUE) * 100)
  } else {
    rmse <- NA; mae <- NA; mape <- NA; y_pred <- NULL
  }

  metrics <- list(
    file = normalizePath(csv_path),
    n_total = n,
    n_train = length(train),
    n_test = h,
    train_time_s = train_time_s,
    predict_time_s = predict_time_s,
    mem_used_fit_bytes = as.integer(mem_used_fit_bytes),
    mem_used_pred_bytes = as.integer(mem_used_pred_bytes),
    rmse = ifelse(is.na(rmse), NA, as.numeric(rmse)),
    mae = ifelse(is.na(mae), NA, as.numeric(mae)),
    mape_pct = ifelse(is.na(mape), NA, as.numeric(mape)),
    forecast_csv = normalizePath(out_path)
  )

  metrics_path <- file.path(out_dir, sprintf("%s_hw_metrics.json", stem))
  write(jsonlite::toJSON(metrics, pretty = TRUE, auto_unbox = TRUE), metrics_path)

  # Plot on a common axis 1..n
  png(file.path(out_dir, sprintf("%s_hw_plot_h%d.png", stem, h)), width = 900, height = 450)
  x_full <- 1:n
  y_full <- c(train, test)
  plot(x_full, y_full, type = "n",
       main = sprintf("Holt-Winters forecast (%s)", basename(csv_path)),
       xlab = "t", ylab = "y")
  lines(1:split, train, type = "o", col = "black")
  if (h > 0) {
    lines((split + 1):n, test, type = "o", col = "red")
    lines((split + 1):n, y_pred, type = "o", col = "blue")
  }
  abline(v = split, col = "gray60", lty = 2)
  legend("topleft",
         legend = c("train", "test", "forecast", sprintf("test≈%.0f%%", 100 * h / n)),
         col = c("black", "red", "blue", "gray60"),
         lty = c(1, 1, 1, 2), pch = c(1, 1, 1, NA))
  dev.off()

  message(sprintf("Done: %s → %s", basename(csv_path), basename(out_path)))
}

In [8]:
# Process matching files
files <- list.files(path = data_dir, pattern = glob2rx(name_glob), full.names = TRUE)
for (f in sort(files)) {
  tryCatch({
    process_one(f)
  }, error = function(e) {
    message(sprintf("Failed %s: %s", basename(f), e$message))
  })
}

Done: AR_100_0.csv → AR_100_0_hw_h39_s0.csv

Done: AR_1000_0.csv → AR_1000_0_hw_h390_s0.csv

Done: AR_1000_0.csv → AR_1000_0_hw_h390_s0.csv

Done: AR_10000_0.csv → AR_10000_0_hw_h3900_s0.csv

Done: AR_10000_0.csv → AR_10000_0_hw_h3900_s0.csv

Done: AR_10000_01.csv → AR_10000_01_hw_h3900_s0.csv

Done: AR_10000_01.csv → AR_10000_01_hw_h3900_s0.csv

Done: AR_10000_02.csv → AR_10000_02_hw_h3900_s0.csv

Done: AR_10000_02.csv → AR_10000_02_hw_h3900_s0.csv

Done: AR_10000_03.csv → AR_10000_03_hw_h3900_s0.csv

Done: AR_10000_03.csv → AR_10000_03_hw_h3900_s0.csv

Done: AR_50_0.csv → AR_50_0_hw_h20_s0.csv

Done: AR_50_0.csv → AR_50_0_hw_h20_s0.csv

Done: AR_500_0.csv → AR_500_0_hw_h195_s0.csv

Done: AR_500_0.csv → AR_500_0_hw_h195_s0.csv

Done: linear_100_0.csv → linear_100_0_hw_h39_s0.csv

Done: linear_100_0.csv → linear_100_0_hw_h39_s0.csv

Done: linear_1000_0.csv → linear_1000_0_hw_h390_s0.csv

Done: linear_1000_0.csv → linear_1000_0_hw_h390_s0.csv

Done: linear_10000_0.csv → linear_10000_0_h