Skip to content

Commit

Permalink
Merge pull request #242 from jread-usgs/always_stale
Browse files Browse the repository at this point in the history
Create an "always stale" target that generates the hash table of parser functions
  • Loading branch information
Jordan S Read authored Dec 6, 2021
2 parents faf0551 + 27885bc commit b00d549
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 12 deletions.
18 changes: 16 additions & 2 deletions 7a_temp_coop_munge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,33 @@ sources:
- 7a_temp_coop_munge/src/data_merge_fxns.R
- 7a_temp_coop_munge/src/crosswalk_fxns.R
- lib/src/require_local.R
- lib/src/generic_utils.R

targets:

7a_temp_coop_munge:
depends:
- 7a_temp_coop_munge/out/all_coop_dat_linked.feather.ind

# create the trigger file, which will not be checked into git
# if we use a trigger file elsewhere, this could be called just once, in 1_fetch.
# I see no reason to have different trigger files for different sections of the pipeline
# if this is used elsewhere, since the intent is to keep modifying it
# or the trigger file could be checked into git so that people building this part
# for the first time don't get "Can't build implicit target 7a_temp_coop_munge/tmp/always_stale_time.txt"
# if this recipe is missing and the file doesn't exist
7a_temp_coop_munge/tmp/always_stale_time.txt:
command: make_file_stale(target_name)

# here, trigger_file is used to keep this parser_file list _always stale_, so it always gets rebuilt.
7a_temp_coop_munge/tmp/parser_files.yml:
command: list_coop_files(target_name,
dirpath = I('7a_temp_coop_munge/src/data_parsers'), dummy = I('2021-09-13'))
dirpath = I('7a_temp_coop_munge/src/data_parsers'),
trigger_file = '7a_temp_coop_munge/tmp/always_stale_time.txt')

coop_parsers:
command: find_parser(coop_wants, '7a_temp_coop_munge/tmp/parser_files.yml')
command: find_parser(coop_wants, '7a_temp_coop_munge/tmp/parser_files.yml',
trigger_file = '7a_temp_coop_munge/tmp/always_stale_time.txt')

coop_munge_taskplan:
command: create_coop_munge_taskplan(wants = coop_wants, parsers = coop_parsers)
Expand Down
3 changes: 2 additions & 1 deletion 7a_temp_coop_munge/src/crosswalk_fxns.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
list_coop_files <- function(fileout, dirpath, dummy){
list_coop_files <- function(fileout, dirpath, trigger_file){
make_file_stale(trigger_file)
scipiper::sc_indicate(fileout, data_file = list.files(dirpath, full.names = TRUE))
}

Expand Down
12 changes: 10 additions & 2 deletions 7a_temp_coop_munge/src/parsing_task_fxns.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
find_parser <- function(coop_wants, parser_filehash) {

#' @param trigger_file is a file that will always be modified when it is used
#'
#' @details whenever `trigger_file` is used as an input, the function needs to call
#' make_trigger_file_stale(trigger_file) to modify the file, keeping that input always stale
#'
#' in this function, the trigger_file is used we're building a hash table of the files in a directory,
#' and since we can't rely on a directory as a dependency, we want to check changes to this diretory
#' in a greedy way (i.e., every time).
find_parser <- function(coop_wants, parser_filehash, trigger_file) {
make_file_stale(trigger_file)
parser_files <- yaml::yaml.load_file(parser_filehash) %>% names()
parser_env <- new.env()
sapply(parser_files, source, parser_env)
Expand Down
2 changes: 1 addition & 1 deletion 7a_temp_coop_munge/tmp/parser_files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
7a_temp_coop_munge/src/data_parsers/parse_indiana_files.R: f2a81bb1cbcb1767d4b58418c5b4fa7e
7a_temp_coop_munge/src/data_parsers/parse_manualentry_files.R: c9a32776901f585ca9140228f19511ad
7a_temp_coop_munge/src/data_parsers/parse_micorps_files.R: cbb2d98f8a9b987149c28da5e5f2f122
7a_temp_coop_munge/src/data_parsers/parse_missouri_files.R: e27494d559c3a3a39574f441e910e665
7a_temp_coop_munge/src/data_parsers/parse_missouri_files.R: baf12ef6aab287dfa26c635dcf771e10
7a_temp_coop_munge/src/data_parsers/parse_mndnr_files.R: 57b1b027d9ba7c1e821cd2fc7c8e7bdc
7a_temp_coop_munge/src/data_parsers/parse_mndow_coop_files.R: 6a6c233b3d6da67ee765bc11cb193d61
7a_temp_coop_munge/src/data_parsers/parse_redlake_files.R: 8496c673c467c09219bfdb6d85ffa964
Expand Down
2 changes: 1 addition & 1 deletion 7b_temp_merge/out/source_metadata_for_release.csv.ind
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
hash: 03e6cfda99af1b92b1bb210c4aaa6caa
hash: 18e611c05b9a37c1a1277e059419d724

Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ version: 0.3.0
name: 7a_temp_coop_munge/log/7a_temp_coop_munge_tasks.ind
type: file
hash: 4a83a5fe0ac58c5f1086b7113747ec92
time: 2021-12-02 21:30:18 UTC
time: 2021-12-03 17:30:32 UTC
depends:
coop_munge_taskplan: 7484abdf608d66130428839ae8689053
7a_temp_coop_munge_tasks.yml: a5d7b8331b77f242faa8e6c59cee1157
7a_temp_coop_munge/tmp/parser_files.yml: 41f66e5aa9bdcd9b5211e20882d8240a
7a_temp_coop_munge/tmp/parser_files.yml: 0b5c793c466b44a1802a6aeb72cf23e3
fixed: d47fa28c71f538b3724b78d50a486af1
code:
functions: {}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
version: 0.3.0
name: 7b_temp_merge/out/source_metadata_for_release.csv.ind
type: file
hash: 86e2757ec368bd11bf900a08b467fde9
time: 2021-12-01 23:34:55 UTC
hash: 5e8277da52545d7d590e1bec493a9a06
time: 2021-12-03 17:46:06 UTC
depends:
7b_temp_merge/out/temp_data_with_sources.feather.ind: 2d3d7b033f909ab1644923fe00e08be1
7b_temp_merge/out/temp_data_with_sources.feather.ind: 18cae4ab1a4e59cd68a8b4961d6d3f65
7b_temp_merge/in/source_metadata.csv.ind: 4fad5461061fbd7343eb58c534fdf787
fixed: 70be80eb410015414c94b5cc0eccdf43
code:
Expand Down
13 changes: 13 additions & 0 deletions lib/src/generic_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

#' write a unique value to a text file when this function is called.
#' Replaces existing text in the file
#'
#' @param filepath a text filepath that can be written to
#'
#' @details this function is used to keep a dependency stale always, assuming this function is called whenever
#' that file is referenced and there is at least a paired target use of the file.
#' The file is kept stale by writing the current time (with timezone offset) and a random number. The random
#' number is used because this function could be called rapidly several times within the same second.
make_file_stale <- function(filepath){
cat(file = filepath, paste0(format(Sys.time(), '%m/%d/%y %H:%M:%S %z; random ID: '), sample(1E6, 1)))
}

0 comments on commit b00d549

Please sign in to comment.