From ec94389525292a2e9b84a9b76caae5b897be272e Mon Sep 17 00:00:00 2001
From: Alex Axthelm <alex@alexaxthelm.com>
Date: Tue, 15 Nov 2022 10:31:42 +0100
Subject: [PATCH] Initial Commit

---
 R/get_csv_specs.R                             | 726 ++++++++++++++++++
 README.md                                     |  29 +
 phase-1_combine-portfolios.R                  | 430 +++++++++++
 phase-2_run-pacta-docker.R                    |  79 ++
 phase-3_combine-results.R                     | 124 +++
 template.yml                                  |  16 +
 .../run-like-constructiva-flags.sh            | 141 ++++
 7 files changed, 1545 insertions(+)
 create mode 100755 R/get_csv_specs.R
 create mode 100644 README.md
 create mode 100755 phase-1_combine-portfolios.R
 create mode 100644 phase-2_run-pacta-docker.R
 create mode 100644 phase-3_combine-results.R
 create mode 100644 template.yml
 create mode 100755 transitionmonitor_docker/run-like-constructiva-flags.sh

diff --git a/R/get_csv_specs.R b/R/get_csv_specs.R
new file mode 100755
index 0000000..1f6dd58
--- /dev/null
+++ b/R/get_csv_specs.R
@@ -0,0 +1,726 @@
+# - [X] absolute filepath
+# - [X] encoding of the filepath
+# - [X] file name
+# - [X] file extension
+# - [X] file size
+# - [X] file is not empty
+# - [X] file exists?
+# - [X] file is readable?
+# - [X] file is writable?
+# - [X] file is not binary?
+# - [X] number of rows
+# - [X] does it have a newline at the end
+# - [X] determine best guess encoding
+# - [ ] determine if best guess encoding is used and then converted to UTF-8 there are non-UTF-8 characters
+# - [X] determine delimiter
+# - [X] determine decimal mark
+# - [X] determine grouping mark
+# - [X] determine number of columns
+# - [ ] determine/guess if a header is included
+# - [X] determine if column names match expectation
+# - [ ] is investor_name column all strings
+# - [ ] is portfolio_name column all strings
+# - [ ] is isin column all strings
+# - [X] is isin column all valid isins (Luhn)
+# - [ ] is isin column all isins that are in our financial data
+# - [ ] is market_value all numeric
+# - [ ] is market_value all numeric after using determined decimal and grouping mark
+# - [ ] is market value negative
+# - [ ] is currency column all strings
+# - [ ] is currency column all 3 character
+# - [ ] is currency column all alpha strings
+# - [ ] is currency column all uppercase
+# - [x] is currency column all valid, current ISO 4217 alpha currency codes
+# - [ ] is currency column all currency codes that exist in our currency exchange rate data
+
+get_csv_specs <- function(files, expected_colnames = c("Investor.Name", "Portfolio.Name", "ISIN", "MarketValue", "Currency")) {
+  alert_by_type <- function(type, ...) {
+    switch(
+      type,
+      info = cli::cli_alert_info(...),
+      warning = cli::cli_alert_warning(...),
+      danger = cli::cli_alert_danger(...),
+      success = cli::cli_alert_success(...),
+    )
+  }
+
+  report_alert_files <- function(msg, bullets, type = "info", info = NULL) {
+    cli::cli_div(theme = list(`.indented` = list(`margin-left` = 2), `.file` = list(color = "blue")))
+    on.exit(cli::cli_end(), add = TRUE)
+
+    cli::cli({
+      alert_by_type(type, msg)
+      if (!is.null(info)) {
+        alert_by_type("info", info, class = "indented")
+      }
+      if (length(bullets) > 10L) {
+        abbreviated <- c(bullets[1:10], paste0("\u2026 and {.strong ", length(bullets) - 10, " more}"))
+        cli::cli_bullets(abbreviated, class = "file indented")
+      } else {
+        cli::cli_bullets(bullets, class = "file indented")
+      }
+    })
+  }
+
+  if (length(files) == 1 && fs::is_dir(files)) {
+    files <- file.path(files, list.files(files))
+  }
+  files_df <- tibble::tibble(input = files, id = seq_along(files))
+
+  files_df$filename <- basename(files_df$input)
+  files_df$extension <- tools:::file_ext(files_df$input)
+
+  files_df$filepath <- fs::path_abs(fs::path_expand(files_df$input))
+
+  files_df$file_exists <- unname(fs::file_exists(files_df$filepath))
+
+  if (all(files_df$file_exists)) {
+    cli::cli_alert_success("all files exist")
+  } else if (all(!files_df$file_exists)) {
+    cli::cli_alert_danger("none of the files exist")
+    cli::cli_abort("the {.fun get_csv_specs} function had to stop because none of the files could be used")
+  } else {
+    alert_files <- files_df$filename[!files_df$file_exists]
+    report_alert_files("the following files do not exist and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[files_df$file_exists, ]
+  }
+
+  files_df$file_size <- unname(fs::file_size(files_df$filepath))
+
+  if (all(files_df$file_size > 0)) {
+    cli::cli_alert_success("all files have a size > 0")
+  } else if (all(files_df$file_size == 0)) {
+    cli::cli({
+      cli::cli_alert_danger("all of the files have a size of 0")
+      cli::cli_alert_info("this might mean that they are un-downloaded Dropbox files or are empty files", class = "indented")
+    })
+    cli::cli_abort("the {.fun get_csv_specs} function had to stop because none of the files could be used")
+  } else {
+    alert_files <- files_df$filename[files_df$file_size == 0]
+    report_alert_files("the following files have a size of 0 and will not be considered further:", alert_files, type = "danger", info = "this might mean that they are un-downloaded Dropbox files or are empty files")
+    files_df <- files_df[files_df$file_size > 0, ]
+  }
+
+
+  files_df$file_read_access <- unname(fs::file_access(files_df$filepath, mode = "read"))
+
+  if (all(files_df$file_read_access)) {
+    cli::cli_alert_success("all files have read access")
+  } else if (all(!files_df$file_read_access)) {
+    cli::cli_alert_danger("all of the files do not have read access")
+    cli::cli_abort("the {.fun get_csv_specs} function had to stop because none of the files could be used")
+  } else {
+    alert_files <- files_df$filename[!files_df$file_read_access]
+    report_alert_files("the following files do not have read access and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[files_df$file_read_access, ]
+  }
+
+
+  files_df$file_write_access <- unname(fs::file_access(files_df$filepath, mode = "write"))
+
+  files_df$mime_encoding <- vapply(files_df$filepath, function(x) system2("file", args = c("-b", "--mime-encoding", shQuote(x)), stdout = TRUE), character(1))
+
+  if (all(files_df$mime_encoding != "binary")) {
+    cli::cli_alert_success("all files are not binary files")
+  } else if (all(files_df$mime_encoding == "binary")) {
+    cli::cli_alert_danger("all of the files are binary files")
+    cli::cli_abort("the {.fun get_csv_specs} function had to stop because none of the files could be used")
+  } else {
+    alert_files <- files_df$filename[files_df$mime_encoding == "binary"]
+    report_alert_files("the following files are binary files and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[files_df$mime_encoding != "binary", ]
+  }
+
+
+  files_df$content_type <- guess_content_types(files_df$filepath)
+
+  if (all(grepl("/csv$|/comma-separated-values$", files_df$content_type))) {
+    cli::cli_alert_success("all files are CSV files")
+  } else if (all(!grepl("/csv$|/comma-separated-values$", files_df$content_type))) {
+    cli::cli_alert_danger("all of the files are not CSV files")
+    cli::cli_abort("the {.fun get_csv_specs} function had to stop because none of the files could be used")
+  } else {
+    alert_files <- files_df$filename[!grepl("/csv$|/comma-separated-values$", files_df$content_type)]
+    report_alert_files("the following files are not CSV files and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[grepl("/csv$|/comma-separated-values$", files_df$content_type), ]
+  }
+
+  files_df$filepath_is_ascii <- stringi::stri_enc_isascii(files_df$filepath)
+
+  detected_filepath_encoding_tbls <- stringi::stri_enc_detect(files_df$filepath)
+  files_df$filepath_encoding <- vapply(detected_filepath_encoding_tbls, function(x) { x$Encoding[[1]] }, character(1))
+
+  files_df$filepath_declared_encoding <- Encoding(files_df$filepath)
+
+  # if (all(files_df$filepath_is_ascii)) {
+  #   cli::cli_alert_success("all filepaths are in ASCII")
+  # } else {
+  #   # cli::cli_alert_warning("the following filepaths are not in ASCII")
+  #   # cli::cli_bullets(setNames(files_df$filepath[!files_df$filepath_is_ascii], " "))
+  #   cli::cli_text("the following filepaths are not in ASCII: {.file {files_df$filepath[!files_df$filepath_is_ascii]}}")
+  # }
+
+  # if (any(files_df$filepath_encoding == "UTF-8")) {
+  #   # cli::cli_alert_warning("the following filepaths are in UTF-8, which can cause trouble, especially on Windows")
+  #   cli::cli_text("the following filepaths are in UTF-8, which can cause trouble, especially on Windows: {.file {files_df$filepath[!files_df$filepath_is_ascii]}}")
+  # }
+
+
+  files_df$file_encoding <- pacta.portfolio.analysis::guess_file_encoding(files_df$filepath)
+
+  if (all(grepl("ascii|utf-8", files_df$file_encoding, ignore.case = TRUE))) {
+    cli::cli_alert_success("all files are encoded in ASCII or UTF-8")
+  } else {
+    alert_files <- files_df$filename[!grepl("ascii|utf-8", files_df$file_encoding, ignore.case = TRUE)]
+    report_alert_files("the following files are not encoded in ASCII or UTF-8:", alert_files, type = "warning", info = "this can be adapted to automatically by the {.fun read_portfolio_csv} function")
+  }
+
+  files_df$num_of_lines <- guess_num_of_lines(files_df$filepath)
+
+  files_df$last_line_has_newline <- has_newline_at_end(files_df$filepath)
+
+  if (all(files_df$last_line_has_newline)) {
+    cli::cli_alert_success("all files have a newline at the end")
+  } else {
+    alert_files <- files_df$filename[!files_df$last_line_has_newline]
+    report_alert_files("the following files do not have a newline at the end:", alert_files, type = "warning", info = "this can be adapted to automatically by the {.fun read_portfolio_csv} function")
+  }
+
+  files_df$delimiter <- pacta.portfolio.analysis::guess_delimiter(files_df$filepath)
+
+  if (all(files_df$delimiter == ",")) {
+    cli::cli_alert_success(paste0("all files use {.strong ", cli::style_inverse(","), "} for a delimiter"))
+  } else {
+    alert_files <- files_df$filename[files_df$delimiter != ","]
+    report_alert_files(paste0("the following files do not use {.strong ", cli::style_inverse(","), "} for a delimiter:"), alert_files, type = "warning", info = "this can be adapted to automatically by the {.fun read_portfolio_csv} function")
+  }
+
+  files_df$read_without_error <- validate_read_without_error(files_df$filepath, files_df$file_encoding, files_df$delimiter)
+
+  if (all(files_df$read_without_error == TRUE)) {
+    cli::cli_alert_success(paste0("all files can be read without error"))
+  } else if (any(files_df$read_without_error == FALSE)) {
+    alert_files <- files_df$filename[isFALSE(files_df$read_without_error)]
+    report_alert_files("the following files can not be read without error and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[files_df$read_without_error == TRUE, ]
+  }
+
+
+  files_df$num_of_columns <- guess_num_of_columns(files_df$filepath, files_df$file_encoding, files_df$delimiter)
+
+  if (all(files_df$num_of_columns == 5L)) {
+    cli::cli_alert_success(paste0("all files have {.strong 5} columns"))
+  } else if (any(files_df$num_of_columns > 5L)) {
+    alert_files <- files_df$filename[files_df$num_of_columns > 5L]
+    report_alert_files("the following files have more than {.strong 5} columns:", alert_files, type = "warning", info = "this can usually be adapted to automatically by the {.fun read_portfolio_csv} function")
+  } else if (any(files_df$num_of_columns < 4L)) {
+    alert_files <- files_df$filename[files_df$num_of_columns < 4L]
+    report_alert_files("the following files have less than {.strong 4} columns and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[files_df$num_of_columns >= 4L, ]
+  }
+
+
+
+  files_df$decimal_mark <- pacta.portfolio.analysis:::guess_decimal_mark(files_df$filepath)
+
+  if (all(!is.na(files_df$decimal_mark)) && all(files_df$decimal_mark == ".")) {
+    cli::cli_alert_success(paste0("all files use {.strong ", cli::style_inverse("."), "} for a decimal mark"))
+  } else {
+    alert_files <- files_df$filename[is.na(files_df$decimal_mark) || grepl("^[.]$", files_df$decimal_mark)]
+    report_alert_files(paste0("the following files do not use {.strong ", cli::style_inverse("."), "} for a decimal mark"), alert_files, type = "warning", info = "this can be adapted to automatically by the {.fun read_portfolio_csv} function")
+  }
+
+  files_df$grouping_mark <- pacta.portfolio.analysis:::guess_grouping_mark(filepaths = files_df$filepath)
+
+  if (all(!is.na(files_df$grouping_mark) && files_df$grouping_mark == ",")) {
+    cli::cli_alert_success(paste0("all files use {.strong ", cli::style_inverse(","), "} for a grouping mark"))
+  } else {
+    alert_files <- files_df$filename[is.na(files_df$grouping_mark) || grepl("^[,]$", files_df$grouping_mark)]
+    report_alert_files(paste0("the following files do not use {.strong ", cli::style_inverse(","), "} for a grouping mark"), alert_files, type = "warning", info = "this can be adapted to automatically by the {.fun read_portfolio_csv} function")
+  }
+
+  files_df$tokenizer <- get_tokenizers(files_df$filepath, files_df$file_encoding, files_df$delimiter)
+
+  files_df$fields_per_line <- get_fields_per_line(files_df$filepath, files_df$tokenizer)
+  files_df$has_consistent_fields_per_line <- vapply(X = files_df$fields_per_line, FUN = function(x) all(x == x[1]), FUN.VALUE = logical(1))
+
+  if (all(files_df$has_consistent_fields_per_line == TRUE)) {
+    cli::cli_alert_success(paste0("all files have a consistent number of fields per line"))
+  } else if (any(files_df$has_consistent_fields_per_line == FALSE)) {
+    alert_files <- files_df$filename[files_df$has_consistent_fields_per_line == FALSE]
+    report_alert_files("the following files do not have a consistent number of fields per line and will not be considered further:", alert_files, type = "danger")
+    files_df <- files_df[files_df$has_consistent_fields_per_line == TRUE, ]
+  }
+
+  files_df$column_names <- get_column_names(files_df$filepath, files_df$file_encoding, files_df$delimiter)
+
+  files_df$has_expected_colnames <- vapply(X = files_df$column_names, FUN = function(x) isTRUE(all.equal(target = expected_colnames, current = x)), FUN.VALUE = logical(1))
+
+  if (all(files_df$has_expected_colnames == TRUE)) {
+    cli::cli_alert_success(paste0("all files have the expected column names"))
+  } else if (all(files_df$has_expected_colnames == FALSE)) {
+    cli::cli_alert_warning("none of the files have the expected column names")
+  } else if (any(files_df$has_expected_colnames == FALSE)) {
+    alert_files <- files_df$filename[files_df$has_expected_colnames == FALSE]
+    report_alert_files("the following files do not have the expected column names:", alert_files, type = "warning")
+  }
+
+  files_df$readr_locale <- get_locales(encodings = files_df$file_encoding, decimal_marks = files_df$decimal_mark, grouping_marks = files_df$grouping_mark)
+
+  investor_name_colname <- expected_colnames[[1]]
+  portfolio_name_colname <- expected_colnames[[2]]
+  isin_colname <- expected_colnames[[3]]
+  market_value_colname <- expected_colnames[[4]]
+  currency_colname <- expected_colnames[[5]]
+  test <- purrr::map(seq_along(files_df$filepath), ~ suppressWarnings(suppressMessages(readr::read_delim(files_df$filepath[.x], delim = files_df$delimiter[.x], locale = files_df$readr_locale[.x][[1]], progress = FALSE, show_col_types = FALSE))))
+
+  files_df$investor_name_is_string <- vapply(test, function(x) is.character(x[[investor_name_colname]]), logical(1))
+
+  files_df$portfolio_name_is_string <- vapply(test, function(x) is.character(x[[portfolio_name_colname]]), logical(1))
+
+  files_df$market_value_is_numeric <- vapply(test, function(x) is.numeric(x[[market_value_colname]]), logical(1))
+
+  files_df$market_value_has_negatives <- vapply(test, function(x) any(x[[market_value_colname]] < 0), logical(1))
+
+  files_df$market_value_has_nas <- vapply(test, function(x) any(is.na(x[[market_value_colname]])), logical(1))
+
+  files_df$valid_iso4217c_codes <- vapply(test, function(x) all(is_valid_currency_code(x[[currency_colname]])), FUN.VALUE = logical(1), USE.NAMES = FALSE)
+
+  if (all(files_df$valid_iso4217c_codes == TRUE)) {
+    cli::cli_alert_success(paste0("all files have only valid iso4217c currency codes"))
+  } else if (any(files_df$valid_iso4217c_codes == FALSE)) {
+    alert_files <- files_df$filename[files_df$valid_iso4217c_codes == FALSE]
+    report_alert_files("the following files have some invalid iso4217c currency codes:", alert_files, type = "warning")
+  }
+
+  files_df$has_invalid_isins <- vapply(test, function(x) any(is_valid_isin(x[[isin_colname]]) == FALSE), FUN.VALUE = logical(1), USE.NAMES = FALSE)
+
+  if (all(files_df$has_invalid_isins == FALSE)) {
+    cli::cli_alert_success(paste0("all files have only valid ISINs"))
+  } else if (any(files_df$has_invalid_isins == TRUE)) {
+    alert_files <- files_df$filename[files_df$has_invalid_isins == TRUE]
+    report_alert_files("the following files have some invalid ISINs:", alert_files, type = "warning")
+  }
+
+
+  invisible(files_df)
+}
+
+
+has_newline_at_end <- function(filepaths) {
+  filepaths <- pacta.portfolio.analysis:::simplify_if_one_col_df(filepaths)
+  stopifnot("`filepaths` must be a character vector" = typeof(filepaths) == "character")
+  filepaths <- pacta.portfolio.analysis:::canonize_path(filepaths)
+
+  vapply(
+    X = filepaths,
+    FUN = function(filepath) {
+      if (!is_file_accessible(filepath) || !is_text_file(filepath)) {
+        return(NA)
+      }
+
+      con <- file(filepath, "rb")
+      on.exit(close(con))
+
+      not_at_end <- TRUE
+      chars <- ""
+      while (not_at_end) {
+        prev_chars <- chars
+        chars <- readChar(con, nchars = 2048L, useBytes = TRUE)
+        if (length(chars) == 0L) { not_at_end <- FALSE }
+      }
+
+      grepl("[\n\r]$", prev_chars)
+    },
+    FUN.VALUE = logical(1),
+    USE.NAMES = FALSE
+  )
+}
+
+
+guess_ <- function(filepaths, encodings) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+
+    },
+    FUN.VALUE = character(1)
+  )
+}
+
+
+check_column_names <- function(filepaths, encodings, delimiters) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+      readr::read_delim(file = filepaths[[i]], locale = readr::locale(encoding = encodings[[i]]), delim = delimiters[[i]], n_max = 1L, col_names = FALSE, show_col_types = FALSE, progress = FALSE)
+    },
+    FUN.VALUE = integer(1)
+  )
+}
+
+get_column_names <- function(filepaths, encodings, delimiters) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+      list(
+        names(
+          suppressMessages(
+            readr::read_delim(
+              file = filepaths[[i]],
+              delim = delimiters[[i]],
+              locale = readr::locale(encoding = encodings[[i]]),
+              n_max = 1L,
+              trim_ws = TRUE,
+              col_types = readr::cols(.default = "c"),
+              show_col_types = FALSE,
+              progress = FALSE
+            )
+          )
+        )
+      )
+    },
+    FUN.VALUE = list(1)
+  )
+}
+
+
+get_fields_per_line <- function(filepaths, tokenizers) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+      list(
+        readr::count_fields(
+          file = filepaths[[i]],
+          tokenizer = tokenizers[[i]]
+        )
+      )
+    },
+    FUN.VALUE = list(1)
+  )
+}
+
+
+get_locales <- function(encodings, decimal_marks, grouping_marks) {
+  vapply(
+    X = seq_along(encodings),
+    FUN = function(i) {
+      if (is.na(decimal_marks[[i]])) { decimal_marks[[i]] <- "."}
+      if (is.na(grouping_marks[[i]])) { grouping_marks[[i]] <- ","}
+      list(
+        readr::locale(
+          encoding = encodings[[i]],
+          decimal_mark = decimal_marks[[i]],
+          grouping_mark = grouping_marks[[i]]
+        )
+      )
+    },
+    FUN.VALUE = list(1)
+  )
+}
+
+
+get_tokenizers <- function(filepaths, encodings, delimiters) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+      list(
+        readr::tokenizer_delim(
+          delim = delimiters[[i]],
+          quote = "\"",
+          na = "NA",
+          quoted_na = TRUE,
+          comment = "",
+          trim_ws = TRUE,
+          escape_double = TRUE,
+          escape_backslash = FALSE,
+          skip_empty_rows = TRUE
+        )
+      )
+    },
+    FUN.VALUE = list(1)
+  )
+}
+
+
+guess_content_types <- function(filepaths) {
+  vapply(
+    X = filepaths,
+    FUN = function(x) {
+      wand::guess_content_type(x)[[1]]
+    },
+    FUN.VALUE = character(1))
+}
+
+
+# guess_encodings <- function(filepaths) {
+#   file_encodings <-
+#     vapply(X = filepaths,
+#            FUN = function(x) {
+#              readr::guess_encoding(file = x, n_max = -1, threshold = 0)$encoding[[1]]
+#            },
+#            FUN.VALUE = character(1),
+#            USE.NAMES = FALSE)
+#
+#   raw_lines <- lapply(X = filepaths, FUN = readr::read_lines_raw)
+#   has_cp850_chars <-
+#     vapply(
+#       X = raw_lines,
+#       FUN = function(x) {
+#         any(
+#           vapply(
+#             X = x,
+#             FUN = function(y) {
+#               any(y == as.raw(0x94))
+#               },
+#             FUN.VALUE = logical(1)
+#           )
+#         )
+#       },
+#       FUN.VALUE = logical(1)
+#     )
+#
+#   ifelse(has_cp850_chars, "cp850", file_encodings)
+# }
+
+
+# guess_decimal_mark <- function(filepath, encoding, delimiter) {
+#   decimal_mark <- ifelse(delimiter == ";", ",", ".")
+#   grouping_mark <- ifelse(decimal_mark == ",", ".", ",")
+#
+#   cust_locale <-
+#     readr::locale(
+#       decimal_mark = decimal_mark,
+#       grouping_mark = grouping_mark,
+#       encoding = encoding
+#     )
+#
+#   char_data <-
+#     readr::read_delim(
+#       file = filepath,
+#       delim = delimiter,
+#       locale = cust_locale,
+#       trim_ws = TRUE,
+#       col_types = readr::cols(.default = "c"),
+#       col_names = TRUE,
+#       progress = FALSE
+#     )
+#
+#   all_num_chars <- char_data[[4]]
+#
+#   grp_mrk_com_regex <- "^((?![,]).)*$|[,][[:digit:]]{3}[^[:digit:]]|[,][[:digit:]]{3}$"
+#   grp_mrk_dot_regex <- "^((?![.]).)*$|[.][[:digit:]]{3}[^[:digit:]]|[.][[:digit:]]{3}$"
+#
+#   has_comma <- any(grepl("[,]", all_num_chars))
+#   has_dot <- any(grepl("[.]", all_num_chars))
+#
+#   comma_only_before_3 <- all(has_comma, grepl(grp_mrk_com_regex, all_num_chars, perl = TRUE))
+#   dot_only_before_3 <- all(has_dot, grepl(grp_mrk_dot_regex, all_num_chars, perl = TRUE))
+#
+#   nums_have_space <- any(grepl(" ", char_data[[4]]))
+#
+#   if (comma_only_before_3 && !dot_only_before_3) {
+#     decimal_mark <- "."
+#   } else if (dot_only_before_3 && !comma_only_before_3) {
+#     decimal_mark <- ","
+#   } else if (has_comma && !comma_only_before_3 && !dot_only_before_3) {
+#     decimal_mark <- ","
+#   } else if (has_dot && !dot_only_before_3 && !comma_only_before_3) {
+#     decimal_mark <- "."
+#   }
+#
+#   decimal_mark
+# }
+#
+#
+# guess_decimal_marks <- function(filepaths, encodings, delimiters) {
+#   vapply(
+#     X = seq_along(filepaths),
+#     FUN = function(i) {
+#       guess_decimal_mark(
+#         filepath = filepaths[[i]],
+#         encoding = encodings[[i]],
+#         delimiter = delimiters[[i]]
+#       )
+#     },
+#     FUN.VALUE = character(1)
+#   )
+# }
+
+
+# guess_delimiters <- function(filepaths, encodings) {
+#   vapply(
+#     X = seq_along(filepaths),
+#     FUN = function(i) {
+#       line1 <- readr::read_lines(file = filepaths[[i]], locale = readr::locale(encoding = encodings[[i]]), n_max = 1, progress = FALSE)
+#       commas <- stringr::str_count(line1, ",")
+#       semicolons <- stringr::str_count(line1, ";")
+#       delim <- ifelse(commas > semicolons, ",", ";")
+#     },
+#     FUN.VALUE = character(1)
+#   )
+# }
+
+
+# guess_grouping_mark <- function(filepath, encoding, delimiter) {
+#   decimal_mark <- ifelse(delimiter == ";", ",", ".")
+#   grouping_mark <- ifelse(decimal_mark == ",", ".", ",")
+#
+#   cust_locale <-
+#     readr::locale(
+#       decimal_mark = decimal_mark,
+#       grouping_mark = grouping_mark,
+#       encoding = encoding
+#     )
+#
+#   char_data <-
+#     readr::read_delim(
+#       file = filepath,
+#       delim = delimiter,
+#       locale = cust_locale,
+#       trim_ws = TRUE,
+#       col_types = readr::cols(.default = "c"),
+#       col_names = TRUE,
+#       progress = FALSE
+#     )
+#
+#   all_num_chars <- char_data[[4]]
+#
+#   grp_mrk_com_regex <- "^((?![,]).)*$|[,][[:digit:]]{3}[^[:digit:]]|[,][[:digit:]]{3}$"
+#   grp_mrk_dot_regex <- "^((?![.]).)*$|[.][[:digit:]]{3}[^[:digit:]]|[.][[:digit:]]{3}$"
+#
+#   has_comma <- any(grepl("[,]", all_num_chars))
+#   has_dot <- any(grepl("[.]", all_num_chars))
+#
+#   comma_only_before_3 <- all(has_comma, grepl(grp_mrk_com_regex, all_num_chars, perl = TRUE))
+#   dot_only_before_3 <- all(has_dot, grepl(grp_mrk_dot_regex, all_num_chars, perl = TRUE))
+#
+#   nums_have_space <- any(grepl(" ", char_data[[4]]))
+#
+#   if (comma_only_before_3 && !dot_only_before_3) {
+#     grouping_mark <- ","
+#     decimal_mark <- "."
+#   } else if (dot_only_before_3 && !comma_only_before_3) {
+#     grouping_mark <- "."
+#     decimal_mark <- ","
+#   } else if (has_comma && !comma_only_before_3 && !dot_only_before_3) {
+#     grouping_mark <- "."
+#     decimal_mark <- ","
+#   } else if (has_dot && !dot_only_before_3 && !comma_only_before_3) {
+#     grouping_mark <- ","
+#     decimal_mark <- "."
+#   }
+#
+#   if (nums_have_space) { grouping_mark <- " " }
+#
+#   grouping_mark
+# }
+#
+#
+# guess_grouping_marks <- function(filepaths, encodings, delimiters) {
+#   vapply(
+#     X = seq_along(filepaths),
+#     FUN = function(i) {
+#       guess_grouping_mark(
+#         filepath = filepaths[[i]],
+#         encoding = encodings[[i]],
+#         delimiter = delimiters[[i]]
+#       )
+#     },
+#     FUN.VALUE = character(1)
+#   )
+# }
+
+
+guess_num_of_columns <- function(filepaths, encodings, delimiters) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+      ncol(readr::read_delim(file = filepaths[[i]], locale = readr::locale(encoding = encodings[[i]]), delim = delimiters[[i]], n_max = 1L, col_names = FALSE, show_col_types = FALSE, progress = FALSE))
+    },
+    FUN.VALUE = integer(1)
+  )
+}
+
+
+guess_num_of_lines <- function(filepaths) {
+  vapply(
+    X = filepaths,
+    FUN = function(filepath) {
+      length(readr::read_lines(filepath, n_max = -1L, lazy = TRUE, progress = FALSE))
+    },
+    FUN.VALUE = integer(1)
+  )
+}
+
+
+# validate_isins <- function(isins) {
+#   is_luhn <- function(x) {
+#     digits <- suppressWarnings(as.numeric(rev(unlist(strsplit(x, "")))))
+#     odd <- seq_along(digits) %% 2 == 1
+#     s1 <- sum(digits[odd])
+#     s2 <- digits[!odd] * 2
+#     s2 <- sum(s2 %% 10 + s2 %/% 10)
+#     sum(s1, s2) %% 10 == 0
+#   }
+#
+#   isins <- toupper(isins)
+#   isins <- gsub(pattern = "[[:blank:]]", replacement = "", isins)
+#   valid_struct <- grepl("^[[:upper:]]{2}[[:alnum:]]{9}[[:digit:]]$", isins)
+#
+#   valid_luhn <-
+#     vapply(
+#       X = isins,
+#       FUN = function(x) {
+#         x <- stringi::stri_replace_all_fixed(x, LETTERS, seq_along(LETTERS) + 9L,
+#                                              vectorize_all = FALSE)
+#         out <- vapply(x, is_luhn, FUN.VALUE = logical(1L), USE.NAMES = FALSE)
+#         out[is.na(out)] <- FALSE
+#         out
+#       },
+#       FUN.VALUE = logical(1),
+#       USE.NAMES = FALSE
+#     )
+#
+#   valid_struct & valid_luhn
+# }
+
+
+# validate_isins_list <- function(isins_list) {
+#   if (class(isins_list) != "list") isins_list <- list(isins_list)
+#   lapply(X = isins_list, FUN = validate_isins)
+# }
+#
+#
+# validate_iso4217c <- function(codes_list) {
+#   if (!is.list(codes_list)) codes_list <- list(codes_list)
+#   vapply(
+#     X = codes_list,
+#     FUN = function(codes) {
+#       all(toupper(unique(codes)) %in% countrycode::codelist$iso4217c)
+#     },
+#     FUN.VALUE = logical(1)
+#   )
+# }
+
+
+validate_read_without_error <- function(filepaths, encodings, delimiters) {
+  vapply(
+    X = seq_along(filepaths),
+    FUN = function(i) {
+      out <- tryCatch(
+        suppressWarnings(suppressMessages(
+          readr::read_delim(
+            file = filepaths[[i]],
+            delim = delimiters[[i]],
+            locale = readr::locale(encoding = encodings[[i]]),
+            progress = FALSE,
+            show_col_types = FALSE
+          )
+        ))
+      )
+      !any(class(out) == "error")
+    },
+    FUN.VALUE = logical(1)
+  )
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0244a6a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,29 @@
+# Meta Report Data Creator
+
+## Instructions
+
+In each of the R files (`phase-1`, `pahse-2`, and `phase-3`), update the lines defining:
+
+* `data_path <- <path to unzipped directory from constructiva>`
+* `output_dir <- <path to where meta results will live>`
+
+Then run 
+```bash
+Rscript phase-1_combine-portfolios.R
+```
+
+If you encounter errors, you will need to modify the portfolio `csv` files to correct the errors.
+
+
+Then run 
+```bash
+Rscript phase-2_run-pacta.R
+```
+
+which takes a long time.
+
+```bash
+Rscript phase-3_combine-results.R
+```
+
+combines the results into peer files (meta + org in one set, user_id in the other)
diff --git a/phase-1_combine-portfolios.R b/phase-1_combine-portfolios.R
new file mode 100755
index 0000000..1373aba
--- /dev/null
+++ b/phase-1_combine-portfolios.R
@@ -0,0 +1,430 @@
+# load required packages -------------------------------------------------------
+
+suppressPackageStartupMessages({
+  require(R.utils, quietly = TRUE, warn.conflicts = FALSE)
+  require(tibble, quietly = TRUE)
+  require(fs, quietly = TRUE)
+  require(cli, quietly = TRUE)
+  require(stringi, quietly = TRUE)
+  require(wand, quietly = TRUE)
+  require(stringr, quietly = TRUE)
+  require(pacta.portfolio.analysis, quietly = TRUE) # must install with # devtools::install_github("RMI-PACTA/pacta.portfolio.analysis")
+
+  library(dplyr, warn.conflicts = FALSE)
+  library(devtools, quietly = TRUE, warn.conflicts = FALSE)
+  library(purrr)
+  library(stringr)
+  library(fs)
+  library(r2dii.utils)  # must install with # devtools::install_github("2DegreesInvesting/r2dii.utils")
+  library(readr)
+  library(yaml)
+  library(config)
+  library(here)
+})
+
+cfg <- config::get(file = commandArgs(trailingOnly = TRUE))
+
+source(here::here("R", "get_csv_specs.R"))
+
+
+# manually set certain values and paths ----------------------------------------
+
+# WARNING!!! These filepaths are easy to mess up. You're much better off
+# copy-pasting them from your filesystem rather than trying to manually edit
+# bits of it. Seriously. Trust me.
+
+output_dir <- cfg$output_dir # this will likely not work on Windows, so change it!
+combined_portfolio_results_output_dir <- file.path(output_dir, "combined", "portfolio_level")
+combined_user_results_output_dir <- file.path(output_dir, "combined", "user_level")
+combined_orgtype_results_output_dir <- file.path(output_dir, "combined", "orgtype_level")
+
+data_path <- cfg$data_path
+portfolios_path <- file.path(data_path, "portfolios")
+portfolios_meta_csv <- file.path(data_path, "portfolios.csv")
+users_meta_csv <- file.path(data_path, "users.csv")
+
+project_code <- cfg$project_code
+default_language <- cfg$default_language
+
+project_prefix <- cfg$project_prefix
+holdings_date <- cfg$holdings_date
+
+bogus_csvs_to_be_ignored <- cfg$bogus_csvs_to_be_ignored  # if none, this should be c()
+if (is.null(bogus_csvs_to_be_ignored)) {bogus_csvs_to_be_ignored <- c()}
+users_to_be_ignored <- cfg$users_to_be_ignored  # if none, this should be c()
+if (is.null(users_to_be_ignored)) {users_to_be_ignored <- c()}
+
+
+# check paths and directories --------------------------------------------------
+
+dir.create(output_dir, showWarnings = FALSE)
+dir.create(combined_portfolio_results_output_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(file.path(combined_portfolio_results_output_dir, "30_Processed_inputs"), showWarnings = FALSE)
+dir.create(file.path(combined_portfolio_results_output_dir, "40_Results"), showWarnings = FALSE)
+dir.create(combined_user_results_output_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(file.path(combined_user_results_output_dir, "30_Processed_inputs"), showWarnings = FALSE)
+dir.create(file.path(combined_user_results_output_dir, "40_Results"), showWarnings = FALSE)
+dir.create(combined_orgtype_results_output_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(file.path(combined_orgtype_results_output_dir, "30_Processed_inputs"), showWarnings = FALSE)
+dir.create(file.path(combined_orgtype_results_output_dir, "40_Results"), showWarnings = FALSE)
+
+stopifnot(dir.exists(output_dir))
+stopifnot(dir.exists(portfolios_path))
+stopifnot(file.exists(portfolios_meta_csv))
+stopifnot(file.exists(users_meta_csv))
+stopifnot(dir.exists(combined_portfolio_results_output_dir))
+stopifnot(dir.exists(file.path(combined_portfolio_results_output_dir, "30_Processed_inputs")))
+stopifnot(dir.exists(file.path(combined_portfolio_results_output_dir, "40_Results")))
+stopifnot(dir.exists(combined_user_results_output_dir))
+stopifnot(dir.exists(file.path(combined_user_results_output_dir, "30_Processed_inputs")))
+stopifnot(dir.exists(file.path(combined_user_results_output_dir, "40_Results")))
+stopifnot(dir.exists(combined_orgtype_results_output_dir))
+stopifnot(dir.exists(file.path(combined_orgtype_results_output_dir, "30_Processed_inputs")))
+stopifnot(dir.exists(file.path(combined_orgtype_results_output_dir, "40_Results")))
+
+
+# set needed values
+
+pacta_directories <- c("00_Log_Files", "10_Parameter_File", "20_Raw_Inputs", "30_Processed_Inputs", "40_Results", "50_Outputs")
+
+
+# prepare a list of all the CSVs to import -------------------------------------
+
+portfolio_csvs <- list.files(portfolios_path, pattern = "[.]csv$", full.names = TRUE)
+
+
+# read in meta data CSVs -------------------------------------------------------
+
+portfolios_meta <- read_csv(portfolios_meta_csv, show_col_types = FALSE)
+users_meta <- read_csv(users_meta_csv, show_col_types = FALSE)
+
+if ("organization_type.id" %in% names(users_meta)) {
+  users_meta <- select(users_meta, id, organization_type = organization_type.id)
+} else {
+  users_meta <- select(users_meta, id, organization_type)
+}
+
+
+# remove child portfolios -------------------------------------------------
+
+child_ids <- portfolios_meta$id[!is.na(portfolios_meta$parent)]
+portfolio_csvs <- portfolio_csvs[!tools::file_path_sans_ext(basename(portfolio_csvs)) %in% child_ids]
+
+
+# remove unsubmitted CSVs ------------------------------------------------------
+
+unsubmitted_ids <- portfolios_meta$id[portfolios_meta$submitted == 0]
+portfolio_csvs <- portfolio_csvs[!tools::file_path_sans_ext(basename(portfolio_csvs)) %in% unsubmitted_ids]
+
+
+# remove bogus CSVs ------------------------------------------------------------
+
+portfolio_csvs <- portfolio_csvs[! tools::file_path_sans_ext(basename(portfolio_csvs)) %in% bogus_csvs_to_be_ignored]
+
+
+# read in all the specs and remove unusable CSVs -------------------------------
+
+specs <- get_csv_specs(portfolio_csvs)
+saveRDS(specs, file.path(output_dir, paste0(project_prefix, "_csv_specs.rds")))
+portfolio_csvs <- specs$filepath
+
+
+# read in all the CSVs ---------------------------------------------------------
+
+data <- read_portfolio_csv(portfolio_csvs)
+
+
+# add meta data to full data and save it ---------------------------------------
+
+data <-
+  data %>%
+  mutate(port_id = suppressWarnings(as.numeric(tools::file_path_sans_ext(basename(filepath))))) %>%
+  left_join(portfolios_meta[, c("id", "user_id")], by = c(port_id = "id")) %>%
+  left_join(users_meta[, c("id", "organization_type")], by = c(user_id = "id"))
+
+data <-
+  data %>%
+  filter(!is.na(port_id)) %>%
+  filter(!is.na(user_id))
+
+# remove users from analysis
+data <- data %>%
+  filter(!(user_id %in% users_to_be_ignored))
+
+# `write_csv()` sometimes fails on Windows and is not necessary, so commented out until solved
+# write_csv(data, file = file.path(output_dir, paste0(project_prefix, "_full.csv")))
+saveRDS(data, file.path(output_dir, paste0(project_prefix, "_full.rds")))
+
+
+# prepare meta PACTA project ---------------------------------------------------
+
+meta_output_dir <- file.path(output_dir, "meta")
+dir.create(meta_output_dir, showWarnings = FALSE)
+
+dir_create(file.path(meta_output_dir, pacta_directories))
+
+data %>%
+  mutate(portfolio_name = "Meta Portfolio") %>%
+  mutate(investor_name = "Meta Investor") %>%
+  select(investor_name, portfolio_name, isin, market_value, currency) %>%
+  group_by_all() %>% ungroup(market_value) %>%
+  summarise(market_value = sum(market_value, na.rm = TRUE), .groups = "drop") %>%
+  write_csv(file = file.path(meta_output_dir, "20_Raw_Inputs", paste0(project_prefix, "_meta.csv")))
+
+config_list <-
+  list(
+    default = list(
+      parameters = list(
+        portfolio_name = "Meta Portfolio",
+        investor_name = "Meta Investor",
+        peer_group = paste0(project_prefix, "_meta"),
+        language = default_language,
+        project_code = project_code,
+        holdings_date = holdings_date
+      )
+    )
+  )
+write_yaml(config_list, file = file.path(meta_output_dir, "10_Parameter_File", paste0(project_prefix, "_meta", "_PortfolioParameters.yml")))
+
+
+# slices for per user_id -------------------------------------------------------
+
+users_output_dir <- file.path(output_dir, "user_id")
+dir.create(users_output_dir, showWarnings = FALSE)
+
+all_user_ids <- unique(data$user_id)
+
+for (user_id in all_user_ids) {
+  user_data <- data %>% dplyr::filter(user_id == .env$user_id)
+
+  investor_name <- encodeString(as.character(unique(user_data$investor_name)))
+  if (length(investor_name) > 1) {
+    investor_name <- investor_name[[1]]
+    user_data <- user_data %>% mutate(investor_name = .env$investor_name)
+  }
+
+  user_data <- user_data %>% mutate(portfolio_name = .env$investor_name)
+
+  peer_group <- unique(user_data$organization_type)
+  if (length(peer_group) > 1) { peer_group <- peer_group[[1]] }
+
+  config_list <-
+    list(
+      default = list(
+        parameters = list(
+          portfolio_name = as.character(investor_name),
+          investor_name = as.character(investor_name),
+          peer_group = peer_group,
+          language = default_language,
+          project_code = project_code,
+          holdings_date = holdings_date
+        )
+      )
+    )
+
+  user_id_output_dir <- file.path(users_output_dir, paste0(project_prefix, "_user_", user_id))
+  dir_create(file.path(user_id_output_dir, pacta_directories))
+
+  write_yaml(config_list, file = file.path(user_id_output_dir, "10_Parameter_File", paste0(project_prefix, "_user_", user_id, "_PortfolioParameters.yml")))
+
+  user_data %>%
+    select(investor_name, portfolio_name, isin, market_value, currency) %>%
+    group_by_all() %>% ungroup(market_value) %>%
+    summarise(market_value = sum(market_value, na.rm = TRUE), .groups = "drop") %>%
+    write_csv(file.path(user_id_output_dir, "20_Raw_Inputs", paste0(project_prefix, "_user_", user_id, ".csv")))
+}
+
+
+# slices for per organization_type ---------------------------------------------
+
+orgs_output_dir <- file.path(output_dir, "organization_type")
+dir.create(orgs_output_dir, showWarnings = FALSE)
+
+all_org_types <- unique(data$organization_type)
+
+for (org_type in all_org_types) {
+  org_data <-
+    data %>%
+    dplyr::filter(organization_type == .env$org_type) %>%
+    mutate(investor_name = .env$org_type) %>%
+    mutate(portfolio_name = .env$org_type)
+
+  config_list <-
+    list(
+      default = list(
+        parameters = list(
+          portfolio_name = org_type,
+          investor_name = org_type,
+          peer_group = org_type,
+          language = default_language,
+          project_code = project_code,
+          holdings_date = holdings_date
+        )
+      )
+    )
+
+  org_type_output_dir <- file.path(orgs_output_dir, paste0(project_prefix, "_org_", org_type))
+  dir_create(file.path(org_type_output_dir, pacta_directories))
+
+  write_yaml(config_list, file = file.path(org_type_output_dir, "10_Parameter_File", paste0(project_prefix, "_org_", org_type, "_PortfolioParameters.yml")))
+
+  org_data %>%
+    select(investor_name, portfolio_name, isin, market_value, currency) %>%
+    group_by_all() %>% ungroup(market_value) %>%
+    summarise(market_value = sum(market_value, na.rm = TRUE), .groups = "drop") %>%
+    write_csv(file.path(org_type_output_dir, "20_Raw_Inputs", paste0(project_prefix, "_org_", org_type, ".csv")))
+}
+
+# slices for per port_id -------------------------------------------------------
+
+ports_output_dir <- file.path(output_dir, "port_id")
+dir.create(ports_output_dir, showWarnings = FALSE)
+
+all_port_ids <- unique(data$port_id)
+
+for (port_id in all_port_ids) {
+  port_data <-
+    data %>%
+    dplyr::filter(port_id == .env$port_id) %>%
+    mutate(portfolio_name = as.character(.env$port_id))
+
+  portfolio_name <- encodeString(as.character(unique(port_data$portfolio_name)))
+  if (length(portfolio_name) > 1) { portfolio_name <- port_id }
+
+  investor_name <- encodeString(as.character(unique(port_data$investor_name)))
+  if (length(investor_name) > 1) { investor_name <- investor_name[[1]] }
+
+  peer_group <- unique(port_data$organization_type)
+  if (length(peer_group) > 1) { peer_group <- peer_group[[1]] }
+
+  config_list <-
+    list(
+      default = list(
+        parameters = list(
+          portfolio_name = as.character(portfolio_name),
+          investor_name = as.character(investor_name),
+          peer_group = peer_group,
+          language = default_language,
+          project_code = project_code,
+          holdings_date = holdings_date
+        )
+      )
+    )
+
+  port_id_output_dir <- file.path(ports_output_dir, paste0(project_prefix, "_port_", port_id))
+  dir_create(file.path(port_id_output_dir, pacta_directories))
+
+  write_yaml(config_list, file = file.path(port_id_output_dir, "10_Parameter_File", paste0(project_prefix, "_port_", port_id, "_PortfolioParameters.yml")))
+
+  port_data %>%
+    select(investor_name, portfolio_name, isin, market_value, currency) %>%
+    mutate(investor_name = .env$investor_name) %>%
+    mutate(portfolio_name = .env$portfolio_name) %>%
+    group_by_all() %>% ungroup(market_value) %>%
+    summarise(market_value = sum(market_value, na.rm = TRUE), .groups = "drop") %>%
+    write_csv(file.path(port_id_output_dir, "20_Raw_Inputs", paste0(project_prefix, "_port_", port_id, ".csv")))
+}
+
+
+# slices for per port_id -------------------------------------------------------
+
+ports_output_dir <- file.path(output_dir, "port_id")
+dir.create(ports_output_dir, showWarnings = FALSE)
+
+all_port_ids <- unique(data$port_id)
+
+for (port_id in all_port_ids) {
+  port_data <-
+    data %>%
+    dplyr::filter(port_id == .env$port_id) %>%
+    mutate(portfolio_name = as.character(.env$port_id))
+
+  portfolio_name <- encodeString(as.character(unique(port_data$portfolio_name)))
+  if (length(portfolio_name) > 1) { portfolio_name <- port_id }
+
+  investor_name <- encodeString(as.character(unique(port_data$investor_name)))
+  if (length(investor_name) > 1) { investor_name <- investor_name[[1]] }
+
+  peer_group <- unique(port_data$organization_type)
+  if (length(peer_group) > 1) { peer_group <- peer_group[[1]] }
+
+  config_list <-
+    list(
+      default = list(
+        parameters = list(
+          portfolio_name = as.character(portfolio_name),
+          investor_name = as.character(investor_name),
+          peer_group = peer_group,
+          language = default_language,
+          project_code = project_code,
+          holdings_date = holdings_date
+        )
+      )
+    )
+
+  port_id_output_dir <- file.path(ports_output_dir, paste0(project_prefix, "_port_", port_id))
+  dir_create(file.path(port_id_output_dir, pacta_directories))
+
+  write_yaml(config_list, file = file.path(port_id_output_dir, "10_Parameter_File", paste0(project_prefix, "_port_", port_id, "_PortfolioParameters.yml")))
+
+  port_data %>%
+    select(investor_name, portfolio_name, isin, market_value, currency) %>%
+    mutate(investor_name = .env$investor_name) %>%
+    mutate(portfolio_name = .env$portfolio_name) %>%
+    group_by_all() %>% ungroup(market_value) %>%
+    summarise(market_value = sum(market_value, na.rm = TRUE), .groups = "drop") %>%
+    write_csv(file.path(port_id_output_dir, "20_Raw_Inputs", paste0(project_prefix, "_port_", port_id, ".csv")))
+}
+
+
+# slices for per port_id -------------------------------------------------------
+
+ports_output_dir <- file.path(output_dir, "port_id")
+dir.create(ports_output_dir, showWarnings = FALSE)
+
+all_port_ids <- unique(data$port_id)
+
+for (port_id in all_port_ids) {
+  port_data <-
+    data %>%
+    dplyr::filter(port_id == .env$port_id) %>%
+    mutate(portfolio_name = as.character(.env$port_id))
+
+  portfolio_name <- encodeString(as.character(unique(port_data$portfolio_name)))
+  if (length(portfolio_name) > 1) { portfolio_name <- port_id }
+
+  investor_name <- encodeString(as.character(unique(port_data$investor_name)))
+  if (length(investor_name) > 1) { investor_name <- investor_name[[1]] }
+
+  peer_group <- unique(port_data$organization_type)
+  if (length(peer_group) > 1) { peer_group <- peer_group[[1]] }
+
+  config_list <-
+    list(
+      default = list(
+        parameters = list(
+          portfolio_name = as.character(portfolio_name),
+          investor_name = as.character(investor_name),
+          peer_group = peer_group,
+          language = default_language,
+          project_code = project_code,
+          holdings_date = holdings_date
+        )
+      )
+    )
+
+  port_id_output_dir <- file.path(ports_output_dir, paste0(project_prefix, "_port_", port_id))
+  dir_create(file.path(port_id_output_dir, pacta_directories))
+
+  write_yaml(config_list, file = file.path(port_id_output_dir, "10_Parameter_File", paste0(project_prefix, "_port_", port_id, "_PortfolioParameters.yml")))
+
+  port_data %>%
+    select(investor_name, portfolio_name, isin, market_value, currency) %>%
+    mutate(investor_name = .env$investor_name) %>%
+    mutate(portfolio_name = .env$portfolio_name) %>%
+    group_by_all() %>% ungroup(market_value) %>%
+    summarise(market_value = sum(market_value, na.rm = TRUE), .groups = "drop") %>%
+    write_csv(file.path(port_id_output_dir, "20_Raw_Inputs", paste0(project_prefix, "_port_", port_id, ".csv")))
+}
+
+
diff --git a/phase-2_run-pacta-docker.R b/phase-2_run-pacta-docker.R
new file mode 100644
index 0000000..3e61af3
--- /dev/null
+++ b/phase-2_run-pacta-docker.R
@@ -0,0 +1,79 @@
+library("dplyr")
+library("tibble")
+library("purrr")
+library("tidyr")
+library("fs")
+library("here")
+library("config")
+
+
+cfg <- config::get(file = commandArgs(trailingOnly = TRUE))
+
+output_dir <- cfg$output_dir # this will likely not work on Windows, so change it!
+project_prefix <- cfg$project_prefix
+group_types <- cfg$group_types
+
+pacta_directories <- c("00_Log_Files", "10_Parameter_File", "20_Raw_Inputs", "30_Processed_Inputs", "40_Results", "50_Outputs")
+
+if (is.null(group_types)){
+  group_types <- c(
+    "meta",
+    "organization_type",
+    "user_id" # ,
+    # "port_id"
+  )
+}
+
+all_paths <- tibble(type = setdiff(group_types, "meta")) %>% #so the meta 10/20/30 dirs don't get expanded
+  mutate(path = purrr::map(type, ~ list.dirs(file.path(output_dir, .x), recursive = FALSE))) %>%
+  tidyr::unnest(path) %>%
+  bind_rows(tibble(type = "meta", path = file.path(output_dir, "meta"))) %>%
+  mutate(portfolio_name_ref_all = case_when(
+    type == "meta" ~ paste0(project_prefix, "_meta"),
+    TRUE ~ basename(path)
+  )) %>%
+  mutate(type = factor(type, ordered = TRUE, levels = group_types)) %>%
+  arrange(type, portfolio_name_ref_all) %>%
+  filter(!portfolio_name_ref_all %in% c(
+    ""
+  ))
+
+script_path <- here("transitionmonitor_docker", "run-like-constructiva-flags.sh")
+working_dir <- here("working_dir")
+user_dir <- here("user_results")
+dir_create(file.path(user_dir, "4"))
+stopifnot(file.exists(script_path))
+stopifnot(dir.exists(user_dir))
+
+for ( i in seq(1, nrow(all_paths)) ){
+  this_row <- all_paths[i, ]
+  message(paste(Sys.time(), this_row$type, this_row$portfolio_name_ref_all, "--", i, "/", nrow(all_paths)))
+  message(paste("  ", this_row$path))
+  these_dirs <- file.path(this_row$path, pacta_directories)
+  stopifnot(all(dir.exists(these_dirs)))
+  has_results <- (length(list.files(file.path(this_row$path, "40_Results"))) > 0)
+  if (has_results){
+    message("  Results already exist, skipping")
+  } else {
+    message("  Running PACTA")
+    if (dir.exists(working_dir)) {
+      dir_delete(working_dir)
+    }
+    dir_create(working_dir)
+    portfolio_name_ref_all <- this_row$portfolio_name_ref_all
+    dir_copy(this_row$path, file.path(working_dir, portfolio_name_ref_all), overwrite = TRUE)
+    tic <- Sys.time()
+    system2(
+      command = script_path,
+      args = c(
+        paste0("-p ", "\"", this_row$portfolio_name_ref_all, "\""),
+	paste("-w", working_dir),
+	paste("-y", user_dir),
+	"-r /bound/bin/run-r-scripts-results-only"
+      )
+    )
+    message(paste("  ", format.difftime(Sys.time() - tic)))
+    dir_delete(this_row$path)
+    dir_copy(file.path(working_dir, portfolio_name_ref_all), this_row$path, overwrite = TRUE)
+  }
+}
diff --git a/phase-3_combine-results.R b/phase-3_combine-results.R
new file mode 100644
index 0000000..a1e6d55
--- /dev/null
+++ b/phase-3_combine-results.R
@@ -0,0 +1,124 @@
+library("tidyverse")
+library("config")
+
+cfg <- config::get(file = commandArgs(trailingOnly = TRUE))
+
+output_dir <- cfg$output_dir
+combined_portfolio_results_output_dir <- file.path(output_dir, "combined", "portfolio_level")
+combined_user_results_output_dir <- file.path(output_dir, "combined", "user_level")
+combined_orgtype_results_output_dir <- file.path(output_dir, "combined", "orgtype_level")
+
+data_path <- cfg$data_path
+portfolios_path <- file.path(data_path, "portfolios")
+portfolios_meta_csv <- file.path(data_path, "portfolios.csv")
+users_meta_csv <- file.path(data_path, "users.csv")
+
+project_code <- cfg$project_code
+default_language <- cfg$default_language
+
+project_prefix <- cfg$project_prefix
+holdings_date <- cfg$holdings_date
+
+users <- read_csv(users_meta_csv) %>%
+  select(
+    user_id = "id",
+    type_id = "organization_type.id",
+    type = "organization_type.translationKey"
+  ) %>% mutate_all(as.character)
+
+data_filenames <-
+  c(
+    "Bonds_results_portfolio.rds",
+    "Bonds_results_company.rds",
+    "Bonds_results_map.rds",
+    "Equity_results_portfolio.rds",
+    #"Equity_results_company.rds",
+    "Equity_results_map.rds"
+  )
+
+input_filenames <- c(
+    "overview_portfolio.rds",
+    "total_portfolio.rds",
+    "emissions.rds",
+    "audit_file.rds"
+  )
+
+all_filenames <- tibble(filename = data_filenames, num_dir = "40_Results") %>%
+  bind_rows(tibble(filename = input_filenames, num_dir = "30_Processed_inputs"))
+
+#--combine org-level results--
+
+org_paths <- tibble(path = list.files(file.path(output_dir, "organization_type"), recursive = TRUE, full.names = TRUE)) %>%
+bind_rows(tibble(path = list.files(file.path(output_dir, "meta"), recursive = TRUE, full.names = TRUE))) %>%
+mutate(
+  filename = basename(path),
+  filedir = dirname(path)
+) %>%
+filter(filename %in% all_filenames$filename) %>%
+mutate(org = str_extract(string = as.character(filedir), pattern = "(?<=_org_)\\d+|meta")) %>% #lookahead group. https://stackoverflow.com/a/46788230
+left_join(distinct(select(users, type_id, type)), by = c("org" = "type_id")) %>%
+print()
+
+for (j in seq(1, nrow(all_filenames))){
+  this_filetype <- all_filenames[j, ]
+  file_paths <- org_paths %>% filter(filename == this_filetype$filename)
+  all_results <- NULL
+  for (i in seq(1, nrow(file_paths))){
+    this_file <- file_paths[i, ]
+    message(paste("processing", this_file$filename, "for org", this_file$org, "--", i, "/", nrow(file_paths),  "--", j, "/", nrow(all_filenames)))
+    content <- readRDS(this_file$path) %>%
+      mutate(
+        investor_name = this_file$type,
+        portfolio_name = this_file$type
+      )
+    all_results <- bind_rows(all_results, content)
+  }
+ saveRDS(all_results, file.path(combined_orgtype_results_output_dir, this_filetype$num_dir, this_filetype$filename))
+}
+
+#--combine user-level results--
+
+user_paths <- tibble(path = list.files(file.path(output_dir, "user_id"), recursive = TRUE, full.names = TRUE)) %>%
+mutate(
+  filename = basename(path),
+  filedir = dirname(path)
+) %>%
+filter(filename %in% all_filenames$filename) %>%
+mutate(user = str_extract(string = as.character(filedir), pattern = "(?<=_user_)\\d+")) %>% #lookahead group. https://stackoverflow.com/a/46788230
+left_join(distinct(select(users, user_id, type)), by = c("user" = "user_id")) %>%
+print()
+
+for (j in seq(1, nrow(all_filenames))){
+  this_filetype <- all_filenames[j, ]
+  file_paths <- user_paths %>% filter(filename == this_filetype$filename)
+  all_results <- NULL
+  for (i in seq(1, nrow(file_paths))){
+    this_file <- file_paths[i, ]
+    message(paste("processing", this_file$filename, "for user", this_file$user, "--", i, "/", nrow(file_paths),  "--", j, "/", nrow(all_filenames)))
+    message(paste("  ", nrow(all_results)))
+    content <- readRDS(this_file$path) %>%
+      mutate(
+        investor_name = this_file$type,
+        portfolio_name = this_file$user
+      )
+    all_results <- bind_rows(all_results, content)
+  }
+  saveRDS(all_results, file.path(combined_user_results_output_dir, this_filetype$num_dir, this_filetype$filename))
+}
+
+file.copy(
+  from = file.path(combined_orgtype_results_output_dir, "40_Results", "Equity_results_portfolio.rds"),
+  to = file.path(output_dir, paste0(project_code, "_peers_equity_results_portfolio.rds"))
+)
+file.copy(
+  from = file.path(combined_orgtype_results_output_dir, "40_Results", "Bonds_results_portfolio.rds"),
+  to = file.path(output_dir, paste0(project_code, "_peers_bonds_results_portfolio.rds"))
+)
+file.copy(
+  from = file.path(combined_user_results_output_dir, "40_Results", "Equity_results_portfolio.rds"),
+  to = file.path(output_dir, paste0(project_code, "_peers_equity_results_portfolio_ind.rds"))
+)
+file.copy(
+  from = file.path(combined_user_results_output_dir, "40_Results", "Bonds_results_portfolio.rds"),
+  to = file.path(output_dir, paste0(project_code, "_peers_bonds_results_portfolio_ind.rds"))
+)
diff --git a/template.yml b/template.yml
new file mode 100644
index 0000000..0977402
--- /dev/null
+++ b/template.yml
@@ -0,0 +1,16 @@
+---
+default:
+  output_dir: "<absolute_path>"
+  data_path: "<absolute_path_to_raw_portfolios>"
+  project_code: "PA2022XX"
+  default_language: "EN"
+  project_prefix: "PA2022XX"
+  holdings_date: "2021Q4"
+  bogus_csvs_to_be_ignored:
+  # yaml list
+  # - 1
+  # - 2
+  users_to_be_ignored:
+  # yaml list
+  # - 1
+  # - 2
diff --git a/transitionmonitor_docker/run-like-constructiva-flags.sh b/transitionmonitor_docker/run-like-constructiva-flags.sh
new file mode 100755
index 0000000..edee456
--- /dev/null
+++ b/transitionmonitor_docker/run-like-constructiva-flags.sh
@@ -0,0 +1,141 @@
+#! /bin/bash
+
+usage() {
+  echo "Usage: $0  -p <portfolio name string>" 1>&2
+  echo "Optional flags:" 1>&2
+  # t for tag
+  echo "[-t <docker image tag>] (default latest)" 1>&2
+  echo "[-u <userId>] (default 4)" 1>&2
+  echo "[-m <docker image>] (default rmi_pacta)" 1>&2
+  # a for pacta.portfolio.analysis
+  echo "[-a <path to local pacta.portfolio.analysis repo>] (default docker internal)" 1>&2
+  # d for pacta-data
+  echo "[-d <path to local pacta-data repo>] (default docker internal)" 1>&2
+  # c for pacta.interactive.report
+  echo "[-c <path to local pacta.interactive.report repo>] (default docker internal)" 1>&2
+  # r for run
+  echo "[-r <container command>] (default /bound/bin/run-r-scripts "portfolio name string")" 1>&2
+  # x for architecture?
+  echo "[-x <target platform>] (default linux/x86_64)" 1>&2
+  # w for working_dirs
+  echo "[-w <path to directory caontaing the users working directory>] (default ./working_dirs)" 1>&2
+  # y for user_results
+  echo "[-y <path to directory caontaing the user results directory>] (default ./user_results)" 1>&2
+  # v for verbose
+  echo "[-v] (verbose mode)" 1>&2
+  echo "[-i] run container in interactive, tty mode (docker run -it)" 1>&2
+  exit 1;
+}
+
+while getopts p:t:u:a:d:c:d:r:x:w:y:m:vi flag
+do
+  case "${flag}" in
+    u) userId=${OPTARG};;
+    p) portfolioIdentifier=${OPTARG};;
+    m) docker_image=${OPTARG};;
+    t) tag=${OPTARG};;
+    a) pa_repo=${OPTARG};;
+    d) data_repo=${OPTARG};;
+    c) cir_repo=${OPTARG};;
+    r) docker_command=${OPTARG};;
+    x) target_platform=${OPTARG};;
+    w) working_dirs=${OPTARG};;
+    y) user_results=${OPTARG};;
+    v) verbose=1;;
+    i) interactive=1;;
+    *) usage;;
+  esac
+done
+
+if [ -z "${portfolioIdentifier}" ]; then
+  usage
+fi
+
+if [ -z "${tag}" ]; then
+  tag="latest"
+fi
+
+if [ -z "${userId}" ]; then
+  userId="4"
+fi
+
+if [ -z "${target_platform}" ]; then
+  target_platform="linux/x86_64"
+fi
+
+if [ -z "${working_dirs}" ]; then
+  working_dirs="$(pwd)"/working_dirs
+fi
+
+if [ -z "${user_results}" ]; then
+  user_results="$(pwd)/user_results"
+fi
+
+if [ -z "${docker_image}" ]; then
+  docker_image="rmi_pacta"
+fi
+
+if [ -z "${docker_command}" ]; then
+  docker_command="/bound/bin/run-r-scripts"
+fi
+
+yellow () {
+  printf "\033[33m$1\033[0m\n"
+}
+
+resultsFolder="$working_dirs"/"$portfolioIdentifier"
+userFolder="$user_results"/"$userId"
+
+args=(
+  "--rm"
+  --platform "$target_platform"
+  "--pull=never"
+  --network none
+  --user 1000:1000
+  # "--memory=16g"
+)
+
+if [ "${target_platform}" != "linux/arm64" ]; then
+  args+=("--memory-swappiness=0")
+fi
+
+if [ -n "${interactive}" ]; then
+  args+=("-it")
+fi
+
+if [ -n "${pa_repo}" ]; then
+  args+=(--mount "type=bind,source=${pa_repo},target=/bound")
+fi
+
+if [ -n "${data_repo}" ]; then
+  args+=(--mount "type=bind,source=${data_repo},target=/pacta-data")
+fi
+
+if [ -n "${cir_repo}" ]; then
+  args+=(--mount "type=bind,source=${cir_repo},target=/pacta.interactive.report")
+fi
+
+args+=(
+  --mount "type=bind,source=${resultsFolder},target=/bound/working_dir"
+  --mount "type=bind,readonly,source=${userFolder},target=/user_results"
+)
+args+=("$docker_image:$tag")
+if [ "$docker_command" = "bash" ]; then
+  args+=("$docker_command")
+else
+  args+=("$docker_command" "$portfolioIdentifier")
+fi
+
+if [ -n "${verbose}" ]; then
+  yellow "docker run \\ "
+  for arg in "${args[@]}"; do
+    yellow "  $arg \\ "
+  done
+  yellow ""
+fi
+
+echo Running Docker Container
+
+docker run "${args[@]}"
+
+exit 0