Skip to content
This repository has been archived by the owner on Sep 30, 2022. It is now read-only.

Commit

Permalink
Merge 2a5d3df into f9f179a
Browse files Browse the repository at this point in the history
  • Loading branch information
maurolepore committed Jan 10, 2020
2 parents f9f179a + 2a5d3df commit 4f09219
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 23 deletions.
36 changes: 26 additions & 10 deletions R/match_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ named_tibble <- function(names) {
}

minimum_names_of_match_name <- function(loanbook) {
pattern <- collapse_pipe(glue("^name_{level_root()}"))
pattern <- collapse_pipe(
c(
glue("^name_{level_root()}"),
glue("^id_{level_root()}")
)
)
name_starts_with_name_level <- grep(pattern, names(loanbook), value = TRUE)

unique(c(
Expand Down Expand Up @@ -166,18 +171,33 @@ some_is_one <- function(x) {

tidy_match_name_result <- function(data) {
level_cols <- data %>%
names_matching(level = get_level_columns())
names_matching(level = level_root())

id_cols <- sub("name_", "id_", level_cols)

# FIXME # 83, here is where we get multiple UP
data %>%

tidyr::pivot_longer(
cols = level_cols,
cols = id_cols,
names_to = "level_lbk",
values_to = "name_lbk"
values_to = "id_lbk2",
names_prefix = "id_"
) %>%
mutate(
level_lbk = sub("^name_", "", .data$level_lbk),
level_lbk = sub("_lbk$", "", .data$level_lbk),
id_lbk = .data$id_lbk2, id_lbk2 = NULL,
level_lbk = sub("_lbk$", "", .data$level_lbk)
) %>%

tidyr::pivot_longer(
cols = level_cols,
names_to = "level_lbk2",
values_to = "name_lbk",
names_prefix = "name_"
) %>%
# FIXME: This is hacky, I'm not sure why these levels don't correspond
# to id_lbk but the existing level_lbk does
mutate(level_lbk2 = NULL) %>%
remove_suffix("_lbk")
}

Expand All @@ -186,10 +206,6 @@ names_matching <- function(x, level) {
grep(pattern, names(x), value = TRUE)
}

get_level_columns <- function() {
c("direct_", "intermediate_", "ultimate_")
}

remove_suffix <- function(data, suffix) {
set_names(data, ~ sub(suffix, "", .x))
}
Expand Down
11 changes: 11 additions & 0 deletions R/restructure_loanbook_for_matching.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,18 @@ restructure_loanbook_for_matching <- function(data, overwrite = NULL) {
call. = FALSE
)
data %>%
# FIXME: Maybe map uniquify_id_column over level_root(), then reduce rbind
# and may map prefix of intermediate_parent_1, _2, _n to IP1, IP2, IPn
# My only concent is that I haven't come up yet with a way to test if this
# really matters
uniquify_id_column(id_column = "id_direct_loantaker", prefix = "C") %>%
uniquify_id_column(id_column = "id_ultimate_parent", prefix = "UP") %>%

may_add_sector_and_borderline() %>%

# FIXME: Here is where we loose intermediate_parent columns
# fix input_cols_for_prepare_loanbook() to use all level_root columsn and
# not fail is some is missing.
select(input_cols_for_prepare_loanbook(), .data$sector) %>%
identify_loans_by_sector_and_level() %>%
identify_loans_by_name_and_source() %>%
Expand Down Expand Up @@ -122,6 +131,8 @@ check_prepare_loanbook_data <- function(data) {
}

input_cols_for_prepare_loanbook <- function() {
# FIXME: This should not be hard coded but taken from the input loanbook, as
# matching the values of level_root()
c(
"id_direct_loantaker",
"name_direct_loantaker",
Expand Down
54 changes: 41 additions & 13 deletions tests/testthat/test-match_name.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ library(r2dii.dataraw)
test_that("match_name has expected names", {
expected <- c(
"id_loan",
"id_direct_loantaker",
"id_intermediate_parent_1",
"id_ultimate_parent",
"loan_size_outstanding",
"loan_size_outstanding_currency",
"loan_size_credit_limit",
Expand Down Expand Up @@ -50,15 +47,6 @@ test_that("match_name has expected names", {
expect_named(match_name(lbk45, ald_demo), expected)
})

test_that("match_name names are as loanbook (except missing, plus new names)", {
lbk <- slice(loanbook_demo, 1)
out <- match_name(lbk, ald_demo)

expected <- setdiff(names(lbk), paste0("name_", out$level))
actual <- names(out)[seq_along(expected)]
expect_equal(actual, expected)
})

test_that("match_name takes unprepared loanbook and ald datasets", {
expect_error(
match_name(slice(loanbook_demo, 1), ald_demo),
Expand Down Expand Up @@ -136,7 +124,11 @@ test_that("match_name outputs expected names found in loanbook (after tweaks)",

# `level` stores values that used to be columns in the input loanbook except
# they lack the prefix "name_" All other names should be the same
tweaked <- c(glue("name_{unique(out$level)}"), names(out))
tweaked <- c(
glue("name_{unique(out$level)}"),
glue("id_{unique(out$level)}"),
names(out)
)

expect_length(setdiff(names(loanbook_demo), tweaked), 0L)
})
Expand Down Expand Up @@ -253,3 +245,39 @@ test_that("match_nanme works with slice(loanbook_demo, 1)", {
"no match"
)
})

test_that("match_name outputs id consistent with level", {
all_rows_of_level_have_expected_id <- function(loanbook, matched, this_level) {
prefix <- strsplit(
unique(loanbook[[paste0("id_", this_level, collapse = "")]]), split = ""
)[[1]][[1]]

matched %>%
dplyr::filter(.data$level %in% this_level) %>%
pull(id) %>%
unique() %>%
startsWith(prefix)
}

loanbook <- slice(loanbook_demo, 3)
matched <- loanbook %>% match_name(ald_demo)

expect_true(
all_rows_of_level_have_expected_id(
loanbook, matched,
"direct_loantaker"
)
)
expect_true(
all_rows_of_level_have_expected_id(
loanbook, matched,
"intermediate_parent_1"
)
)
expect_true(
all_rows_of_level_have_expected_id(
loanbook, matched,
"ultimate_parent"
)
)
})

0 comments on commit 4f09219

Please sign in to comment.