diff --git a/7a_temp_coop_munge.yml b/7a_temp_coop_munge.yml index eff88fea..62a1039c 100644 --- a/7a_temp_coop_munge.yml +++ b/7a_temp_coop_munge.yml @@ -15,6 +15,7 @@ sources: - 7a_temp_coop_munge/src/data_merge_fxns.R - 7a_temp_coop_munge/src/crosswalk_fxns.R - lib/src/require_local.R + - lib/src/generic_utils.R targets: @@ -22,12 +23,25 @@ targets: depends: - 7a_temp_coop_munge/out/all_coop_dat_linked.feather.ind +# create the trigger file, which will not be checked into git +# if we use a trigger file elsewhere, this could be called just once, in 1_fetch. +# I see no reason to have different trigger files for different sections of the pipeline +# if this is used elsewhere, since the intent is to keep modifying it +# or the trigger file could be checked into git so that people building this part +# for the first time don't get "Can't build implicit target 7a_temp_coop_munge/tmp/always_stale_time.txt" +# if this recipe is missing and the file doesn't exist + 7a_temp_coop_munge/tmp/always_stale_time.txt: + command: make_file_stale(target_name) + +# here, trigger_file is used to keep this parser_file list _always stale_, so it always gets rebuilt. 7a_temp_coop_munge/tmp/parser_files.yml: command: list_coop_files(target_name, - dirpath = I('7a_temp_coop_munge/src/data_parsers'), dummy = I('2021-09-13')) + dirpath = I('7a_temp_coop_munge/src/data_parsers'), + trigger_file = '7a_temp_coop_munge/tmp/always_stale_time.txt') coop_parsers: - command: find_parser(coop_wants, '7a_temp_coop_munge/tmp/parser_files.yml') + command: find_parser(coop_wants, '7a_temp_coop_munge/tmp/parser_files.yml', + trigger_file = '7a_temp_coop_munge/tmp/always_stale_time.txt') coop_munge_taskplan: command: create_coop_munge_taskplan(wants = coop_wants, parsers = coop_parsers) diff --git a/7a_temp_coop_munge/src/crosswalk_fxns.R b/7a_temp_coop_munge/src/crosswalk_fxns.R index 46a971d0..22271511 100644 --- a/7a_temp_coop_munge/src/crosswalk_fxns.R +++ b/7a_temp_coop_munge/src/crosswalk_fxns.R @@ -1,4 +1,5 @@ -list_coop_files <- function(fileout, dirpath, dummy){ +list_coop_files <- function(fileout, dirpath, trigger_file){ + make_file_stale(trigger_file) scipiper::sc_indicate(fileout, data_file = list.files(dirpath, full.names = TRUE)) } diff --git a/7a_temp_coop_munge/src/parsing_task_fxns.R b/7a_temp_coop_munge/src/parsing_task_fxns.R index a73e7fa1..483746d8 100644 --- a/7a_temp_coop_munge/src/parsing_task_fxns.R +++ b/7a_temp_coop_munge/src/parsing_task_fxns.R @@ -1,5 +1,13 @@ -find_parser <- function(coop_wants, parser_filehash) { - +#' @param trigger_file is a file that will always be modified when it is used +#' +#' @details whenever `trigger_file` is used as an input, the function needs to call +#' make_trigger_file_stale(trigger_file) to modify the file, keeping that input always stale +#' +#' in this function, the trigger_file is used we're building a hash table of the files in a directory, +#' and since we can't rely on a directory as a dependency, we want to check changes to this diretory +#' in a greedy way (i.e., every time). +find_parser <- function(coop_wants, parser_filehash, trigger_file) { + make_file_stale(trigger_file) parser_files <- yaml::yaml.load_file(parser_filehash) %>% names() parser_env <- new.env() sapply(parser_files, source, parser_env) diff --git a/7a_temp_coop_munge/tmp/parser_files.yml b/7a_temp_coop_munge/tmp/parser_files.yml index 5d1837ae..c0d3f7fc 100644 --- a/7a_temp_coop_munge/tmp/parser_files.yml +++ b/7a_temp_coop_munge/tmp/parser_files.yml @@ -5,7 +5,7 @@ 7a_temp_coop_munge/src/data_parsers/parse_indiana_files.R: f2a81bb1cbcb1767d4b58418c5b4fa7e 7a_temp_coop_munge/src/data_parsers/parse_manualentry_files.R: c9a32776901f585ca9140228f19511ad 7a_temp_coop_munge/src/data_parsers/parse_micorps_files.R: cbb2d98f8a9b987149c28da5e5f2f122 -7a_temp_coop_munge/src/data_parsers/parse_missouri_files.R: e27494d559c3a3a39574f441e910e665 +7a_temp_coop_munge/src/data_parsers/parse_missouri_files.R: baf12ef6aab287dfa26c635dcf771e10 7a_temp_coop_munge/src/data_parsers/parse_mndnr_files.R: 57b1b027d9ba7c1e821cd2fc7c8e7bdc 7a_temp_coop_munge/src/data_parsers/parse_mndow_coop_files.R: 6a6c233b3d6da67ee765bc11cb193d61 7a_temp_coop_munge/src/data_parsers/parse_redlake_files.R: 8496c673c467c09219bfdb6d85ffa964 diff --git a/7b_temp_merge/out/source_metadata_for_release.csv.ind b/7b_temp_merge/out/source_metadata_for_release.csv.ind index 846a1d28..1d1bbdfc 100644 --- a/7b_temp_merge/out/source_metadata_for_release.csv.ind +++ b/7b_temp_merge/out/source_metadata_for_release.csv.ind @@ -1,2 +1,2 @@ -hash: 03e6cfda99af1b92b1bb210c4aaa6caa +hash: 18e611c05b9a37c1a1277e059419d724 diff --git a/build/status/N2FfdGVtcF9jb29wX211bmdlL2xvZy83YV90ZW1wX2Nvb3BfbXVuZ2VfdGFza3MuaW5k.yml b/build/status/N2FfdGVtcF9jb29wX211bmdlL2xvZy83YV90ZW1wX2Nvb3BfbXVuZ2VfdGFza3MuaW5k.yml index 4b4e10a3..062c496f 100644 --- a/build/status/N2FfdGVtcF9jb29wX211bmdlL2xvZy83YV90ZW1wX2Nvb3BfbXVuZ2VfdGFza3MuaW5k.yml +++ b/build/status/N2FfdGVtcF9jb29wX211bmdlL2xvZy83YV90ZW1wX2Nvb3BfbXVuZ2VfdGFza3MuaW5k.yml @@ -2,11 +2,11 @@ version: 0.3.0 name: 7a_temp_coop_munge/log/7a_temp_coop_munge_tasks.ind type: file hash: 4a83a5fe0ac58c5f1086b7113747ec92 -time: 2021-12-02 21:30:18 UTC +time: 2021-12-03 17:30:32 UTC depends: coop_munge_taskplan: 7484abdf608d66130428839ae8689053 7a_temp_coop_munge_tasks.yml: a5d7b8331b77f242faa8e6c59cee1157 - 7a_temp_coop_munge/tmp/parser_files.yml: 41f66e5aa9bdcd9b5211e20882d8240a + 7a_temp_coop_munge/tmp/parser_files.yml: 0b5c793c466b44a1802a6aeb72cf23e3 fixed: d47fa28c71f538b3724b78d50a486af1 code: functions: {} diff --git a/build/status/N2JfdGVtcF9tZXJnZS9vdXQvc291cmNlX21ldGFkYXRhX2Zvcl9yZWxlYXNlLmNzdi5pbmQ.yml b/build/status/N2JfdGVtcF9tZXJnZS9vdXQvc291cmNlX21ldGFkYXRhX2Zvcl9yZWxlYXNlLmNzdi5pbmQ.yml index 2a36e5d3..8d9c88ba 100644 --- a/build/status/N2JfdGVtcF9tZXJnZS9vdXQvc291cmNlX21ldGFkYXRhX2Zvcl9yZWxlYXNlLmNzdi5pbmQ.yml +++ b/build/status/N2JfdGVtcF9tZXJnZS9vdXQvc291cmNlX21ldGFkYXRhX2Zvcl9yZWxlYXNlLmNzdi5pbmQ.yml @@ -1,10 +1,10 @@ version: 0.3.0 name: 7b_temp_merge/out/source_metadata_for_release.csv.ind type: file -hash: 86e2757ec368bd11bf900a08b467fde9 -time: 2021-12-01 23:34:55 UTC +hash: 5e8277da52545d7d590e1bec493a9a06 +time: 2021-12-03 17:46:06 UTC depends: - 7b_temp_merge/out/temp_data_with_sources.feather.ind: 2d3d7b033f909ab1644923fe00e08be1 + 7b_temp_merge/out/temp_data_with_sources.feather.ind: 18cae4ab1a4e59cd68a8b4961d6d3f65 7b_temp_merge/in/source_metadata.csv.ind: 4fad5461061fbd7343eb58c534fdf787 fixed: 70be80eb410015414c94b5cc0eccdf43 code: diff --git a/lib/src/generic_utils.R b/lib/src/generic_utils.R new file mode 100644 index 00000000..2ba4fab1 --- /dev/null +++ b/lib/src/generic_utils.R @@ -0,0 +1,13 @@ + +#' write a unique value to a text file when this function is called. +#' Replaces existing text in the file +#' +#' @param filepath a text filepath that can be written to +#' +#' @details this function is used to keep a dependency stale always, assuming this function is called whenever +#' that file is referenced and there is at least a paired target use of the file. +#' The file is kept stale by writing the current time (with timezone offset) and a random number. The random +#' number is used because this function could be called rapidly several times within the same second. +make_file_stale <- function(filepath){ + cat(file = filepath, paste0(format(Sys.time(), '%m/%d/%y %H:%M:%S %z; random ID: '), sample(1E6, 1))) +}