diff --git a/R/match_name.R b/R/match_name.R index 21e31867..99f01ce8 100644 --- a/R/match_name.R +++ b/R/match_name.R @@ -92,7 +92,12 @@ named_tibble <- function(names) { } minimum_names_of_match_name <- function(loanbook) { - pattern <- collapse_pipe(glue("^name_{level_root()}")) + pattern <- collapse_pipe( + c( + glue("^name_{level_root()}"), + glue("^id_{level_root()}") + ) + ) name_starts_with_name_level <- grep(pattern, names(loanbook), value = TRUE) unique(c( @@ -166,18 +171,33 @@ some_is_one <- function(x) { tidy_match_name_result <- function(data) { level_cols <- data %>% - names_matching(level = get_level_columns()) + names_matching(level = level_root()) + + id_cols <- sub("name_", "id_", level_cols) + # FIXME # 83, here is where we get multiple UP data %>% + tidyr::pivot_longer( - cols = level_cols, + cols = id_cols, names_to = "level_lbk", - values_to = "name_lbk" + values_to = "id_lbk2", + names_prefix = "id_" ) %>% mutate( - level_lbk = sub("^name_", "", .data$level_lbk), - level_lbk = sub("_lbk$", "", .data$level_lbk), + id_lbk = .data$id_lbk2, id_lbk2 = NULL, + level_lbk = sub("_lbk$", "", .data$level_lbk) + ) %>% + + tidyr::pivot_longer( + cols = level_cols, + names_to = "level_lbk2", + values_to = "name_lbk", + names_prefix = "name_" ) %>% + # FIXME: This is hacky, I'm not sure why these levels don't correspond + # to id_lbk but the existing level_lbk does + mutate(level_lbk2 = NULL) %>% remove_suffix("_lbk") } @@ -186,10 +206,6 @@ names_matching <- function(x, level) { grep(pattern, names(x), value = TRUE) } -get_level_columns <- function() { - c("direct_", "intermediate_", "ultimate_") -} - remove_suffix <- function(data, suffix) { set_names(data, ~ sub(suffix, "", .x)) } diff --git a/R/restructure_loanbook_for_matching.R b/R/restructure_loanbook_for_matching.R index 8c332cd5..8dd8d968 100644 --- a/R/restructure_loanbook_for_matching.R +++ b/R/restructure_loanbook_for_matching.R @@ -64,9 +64,18 @@ restructure_loanbook_for_matching <- function(data, overwrite = NULL) { call. = FALSE ) data %>% + # FIXME: Maybe map uniquify_id_column over level_root(), then reduce rbind + # and may map prefix of intermediate_parent_1, _2, _n to IP1, IP2, IPn + # My only concent is that I haven't come up yet with a way to test if this + # really matters uniquify_id_column(id_column = "id_direct_loantaker", prefix = "C") %>% uniquify_id_column(id_column = "id_ultimate_parent", prefix = "UP") %>% + may_add_sector_and_borderline() %>% + + # FIXME: Here is where we loose intermediate_parent columns + # fix input_cols_for_prepare_loanbook() to use all level_root columsn and + # not fail is some is missing. select(input_cols_for_prepare_loanbook(), .data$sector) %>% identify_loans_by_sector_and_level() %>% identify_loans_by_name_and_source() %>% @@ -122,6 +131,8 @@ check_prepare_loanbook_data <- function(data) { } input_cols_for_prepare_loanbook <- function() { + # FIXME: This should not be hard coded but taken from the input loanbook, as + # matching the values of level_root() c( "id_direct_loantaker", "name_direct_loantaker", diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R index 40cf1ea1..a60f35fe 100644 --- a/tests/testthat/test-match_name.R +++ b/tests/testthat/test-match_name.R @@ -4,9 +4,6 @@ library(r2dii.dataraw) test_that("match_name has expected names", { expected <- c( "id_loan", - "id_direct_loantaker", - "id_intermediate_parent_1", - "id_ultimate_parent", "loan_size_outstanding", "loan_size_outstanding_currency", "loan_size_credit_limit", @@ -50,15 +47,6 @@ test_that("match_name has expected names", { expect_named(match_name(lbk45, ald_demo), expected) }) -test_that("match_name names are as loanbook (except missing, plus new names)", { - lbk <- slice(loanbook_demo, 1) - out <- match_name(lbk, ald_demo) - - expected <- setdiff(names(lbk), paste0("name_", out$level)) - actual <- names(out)[seq_along(expected)] - expect_equal(actual, expected) -}) - test_that("match_name takes unprepared loanbook and ald datasets", { expect_error( match_name(slice(loanbook_demo, 1), ald_demo), @@ -136,7 +124,11 @@ test_that("match_name outputs expected names found in loanbook (after tweaks)", # `level` stores values that used to be columns in the input loanbook except # they lack the prefix "name_" All other names should be the same - tweaked <- c(glue("name_{unique(out$level)}"), names(out)) + tweaked <- c( + glue("name_{unique(out$level)}"), + glue("id_{unique(out$level)}"), + names(out) + ) expect_length(setdiff(names(loanbook_demo), tweaked), 0L) }) @@ -253,3 +245,39 @@ test_that("match_nanme works with slice(loanbook_demo, 1)", { "no match" ) }) + +test_that("match_name outputs id consistent with level", { + all_rows_of_level_have_expected_id <- function(loanbook, matched, this_level) { + prefix <- strsplit( + unique(loanbook[[paste0("id_", this_level, collapse = "")]]), split = "" + )[[1]][[1]] + + matched %>% + dplyr::filter(.data$level %in% this_level) %>% + pull(id) %>% + unique() %>% + startsWith(prefix) + } + + loanbook <- slice(loanbook_demo, 3) + matched <- loanbook %>% match_name(ald_demo) + + expect_true( + all_rows_of_level_have_expected_id( + loanbook, matched, + "direct_loantaker" + ) + ) + expect_true( + all_rows_of_level_have_expected_id( + loanbook, matched, + "intermediate_parent_1" + ) + ) + expect_true( + all_rows_of_level_have_expected_id( + loanbook, matched, + "ultimate_parent" + ) + ) +})