Merge 2a5d3df into f9f179a

2DegreesInvesting · Jan 10, 2020 · 4f09219 · 4f09219
2 parents f9f179a + 2a5d3df
commit 4f09219
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 23 deletions.
diff --git a/R/match_name.R b/R/match_name.R
@@ -92,7 +92,12 @@ named_tibble <- function(names) {
 }
 
 minimum_names_of_match_name <- function(loanbook) {
-  pattern <- collapse_pipe(glue("^name_{level_root()}"))
+  pattern <- collapse_pipe(
+    c(
+      glue("^name_{level_root()}"),
+      glue("^id_{level_root()}")
+    )
+  )
   name_starts_with_name_level <- grep(pattern, names(loanbook), value = TRUE)
 
   unique(c(
@@ -166,18 +171,33 @@ some_is_one <- function(x) {
 
 tidy_match_name_result <- function(data) {
   level_cols <- data %>%
-    names_matching(level = get_level_columns())
+    names_matching(level = level_root())
+
+  id_cols <- sub("name_", "id_", level_cols)
 
+  # FIXME # 83, here is where we get multiple UP
   data %>%
+
     tidyr::pivot_longer(
-      cols = level_cols,
+      cols = id_cols,
       names_to = "level_lbk",
-      values_to = "name_lbk"
+      values_to = "id_lbk2",
+      names_prefix = "id_"
     ) %>%
     mutate(
-      level_lbk = sub("^name_", "", .data$level_lbk),
-      level_lbk = sub("_lbk$", "", .data$level_lbk),
+      id_lbk = .data$id_lbk2, id_lbk2 = NULL,
+      level_lbk = sub("_lbk$", "", .data$level_lbk)
+    ) %>%
+
+    tidyr::pivot_longer(
+      cols = level_cols,
+      names_to = "level_lbk2",
+      values_to = "name_lbk",
+      names_prefix = "name_"
     ) %>%
+    # FIXME: This is hacky, I'm not sure why these levels don't correspond
+    # to id_lbk but the existing level_lbk does
+    mutate(level_lbk2 = NULL) %>%
     remove_suffix("_lbk")
 }
 
@@ -186,10 +206,6 @@ names_matching <- function(x, level) {
   grep(pattern, names(x), value = TRUE)
 }
 
-get_level_columns <- function() {
-  c("direct_", "intermediate_", "ultimate_")
-}
-
 remove_suffix <- function(data, suffix) {
   set_names(data, ~ sub(suffix, "", .x))
 }

diff --git a/R/restructure_loanbook_for_matching.R b/R/restructure_loanbook_for_matching.R
@@ -64,9 +64,18 @@ restructure_loanbook_for_matching <- function(data, overwrite = NULL) {
     call. = FALSE
   )
   data %>%
+    # FIXME: Maybe map uniquify_id_column over level_root(), then reduce rbind
+    # and may map prefix of intermediate_parent_1, _2, _n to IP1, IP2, IPn
+    # My only concent is that I haven't come up yet with a way to test if this
+    # really matters
     uniquify_id_column(id_column = "id_direct_loantaker", prefix = "C") %>%
     uniquify_id_column(id_column = "id_ultimate_parent", prefix = "UP") %>%
+
     may_add_sector_and_borderline() %>%
+
+    # FIXME: Here is where we loose intermediate_parent columns
+    # fix input_cols_for_prepare_loanbook() to use all level_root columsn and
+    # not fail is some is missing.
     select(input_cols_for_prepare_loanbook(), .data$sector) %>%
     identify_loans_by_sector_and_level() %>%
     identify_loans_by_name_and_source() %>%
@@ -122,6 +131,8 @@ check_prepare_loanbook_data <- function(data) {
 }
 
 input_cols_for_prepare_loanbook <- function() {
+  # FIXME: This should not be hard coded but taken from the input loanbook, as
+  # matching the values of level_root()
   c(
     "id_direct_loantaker",
     "name_direct_loantaker",

diff --git a/tests/testthat/test-match_name.R b/tests/testthat/test-match_name.R
@@ -4,9 +4,6 @@ library(r2dii.dataraw)
 test_that("match_name has expected names", {
   expected <- c(
     "id_loan",
-    "id_direct_loantaker",
-    "id_intermediate_parent_1",
-    "id_ultimate_parent",
     "loan_size_outstanding",
     "loan_size_outstanding_currency",
     "loan_size_credit_limit",
@@ -50,15 +47,6 @@ test_that("match_name has expected names", {
   expect_named(match_name(lbk45, ald_demo), expected)
 })
 
-test_that("match_name names are as loanbook (except missing, plus new names)", {
-  lbk <- slice(loanbook_demo, 1)
-  out <- match_name(lbk, ald_demo)
-
-  expected <- setdiff(names(lbk), paste0("name_", out$level))
-  actual <- names(out)[seq_along(expected)]
-  expect_equal(actual, expected)
-})
-
 test_that("match_name takes unprepared loanbook and ald datasets", {
   expect_error(
     match_name(slice(loanbook_demo, 1), ald_demo),
@@ -136,7 +124,11 @@ test_that("match_name outputs expected names found in loanbook (after tweaks)",
 
   # `level` stores values that used to be columns in the input loanbook except
   # they lack the prefix "name_" All other names should be the same
-  tweaked <- c(glue("name_{unique(out$level)}"), names(out))
+  tweaked <- c(
+    glue("name_{unique(out$level)}"),
+    glue("id_{unique(out$level)}"),
+    names(out)
+  )
 
   expect_length(setdiff(names(loanbook_demo), tweaked), 0L)
 })
@@ -253,3 +245,39 @@ test_that("match_nanme works with slice(loanbook_demo, 1)", {
     "no match"
   )
 })
+
+test_that("match_name outputs id consistent with level", {
+  all_rows_of_level_have_expected_id <- function(loanbook, matched, this_level) {
+    prefix <- strsplit(
+      unique(loanbook[[paste0("id_", this_level, collapse = "")]]), split = ""
+    )[[1]][[1]]
+
+    matched %>%
+      dplyr::filter(.data$level %in% this_level) %>%
+      pull(id) %>%
+      unique() %>%
+      startsWith(prefix)
+  }
+
+  loanbook <- slice(loanbook_demo, 3)
+  matched <- loanbook %>% match_name(ald_demo)
+
+  expect_true(
+    all_rows_of_level_have_expected_id(
+      loanbook, matched,
+      "direct_loantaker"
+    )
+  )
+  expect_true(
+    all_rows_of_level_have_expected_id(
+      loanbook, matched,
+      "intermediate_parent_1"
+    )
+  )
+  expect_true(
+    all_rows_of_level_have_expected_id(
+      loanbook, matched,
+      "ultimate_parent"
+    )
+  )
+})