Skip to content

Commit

Permalink
Merge pull request #359 from JaseZiv/fix-351
Browse files Browse the repository at this point in the history
Generalize player table scraping in `fb_league_stats()`
  • Loading branch information
JaseZiv committed Jan 18, 2024
2 parents a12f152 + 3a5113a commit 703904a
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 35 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: worldfootballR
Title: Extract and Clean World Football (Soccer) Data
Version: 0.6.5.0002
Version: 0.6.5.0003
Authors@R: c(
person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")),
person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* `fb_league_stats()` not returning `opponent` table. (0.6.5.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/355)
* `tm_player_bio()` not returning values in the `player_valuation`, `max_player_valuation` and `max_player_valuation_date` fields. Unfortunately, `max_player_valuation` and `max_player_valuation_date` fields are no able to be scraped at this release (0.6.5.0002) [#357](https://github.com/JaseZiv/worldfootballR/issues/357)

* `fb_league_stats()` not returning `player` table when hidden on page load. (0.6.5.0003) [#351](https://github.com/JaseZiv/worldfootballR/issues/351)

***

# worldfootballR 0.6.5
Expand Down
29 changes: 18 additions & 11 deletions R/chromote-fbref.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l
unlist(self$session$DOM$querySelectorAll(self$root_id, css)$nodeIds)
},

call_node_method = function(node_id) {
js_fun <- paste0("function() { return this.outerHTML}")
call_node_method = function(node_id, method, ...) {
js_fun <- paste0("function() { return this", method, "}")
obj_id <- self$object_id(node_id)
self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id)
self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id, ...)
},

object_id = function(node_id) {
Expand All @@ -41,14 +41,21 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l
#' @importFrom purrr map_chr
#' @importFrom xml2 xml_children read_html
#' @noRd
worldfootballr_html_page <- function(x) {
stopifnot(identical(class(x), c("WorldfootballRDynamicPage", "R6")))
nodes <- x$find_nodes("table")

elements <- purrr::map_chr(nodes, function(node_id) {
json <- x$call_node_method(node_id)
json$result$value
})
worldfootballr_html_player_table <- function(session) {
stopifnot(identical(class(session), c("WorldfootballRDynamicPage", "R6")))

## find element "above" commented out table
node_id0 <- session$find_nodes("#stats_shooting_sh")
## skip 1 for the div "placeholder"
node_id <- node_id0 + 2L

elements <- session$call_node_method(node_id, ".textContent")[["result"]][["value"]]
n_elements <- length(elements)
if (n_elements != 1) {
warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_elements))
return(NULL)
}

html <- paste0("<html>", paste0(elements, collapse = "\n"), "</html>")
xml2::read_html(html)
}
39 changes: 19 additions & 20 deletions R/fb_league_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,34 +36,33 @@
.frequency = "once",
.frequency_id = "fb_league_stats-player"
)

session <- worldfootballr_chromote_session(url)
page <- worldfootballr_html_page(session)
player_table <- worldfootballr_html_player_table(session)
session$session$close(wait_ = FALSE)
elements <- xml2::xml_children(xml2::xml_children(page))
tables <- rvest::html_table(elements)

n_tables <- length(tables)
if (n_tables != 3) {
warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_tables))
if (is.null(player_table)) {
return(tibble::tibble())
}
renamed_table <- .rename_fb_cols(tables[[3]])
renamed_table <- renamed_table[renamed_table$Rk != "Rk", ]
renamed_table <- .add_player_href(
renamed_table,
parent_element = elements[[3]],

player_table_elements <- xml2::xml_children(xml2::xml_children(player_table))
parsed_player_table <- rvest::html_table(player_table_elements)
renamed_player_table <- .rename_fb_cols(parsed_player_table[[1]])
renamed_player_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ]
renamed_player_table <- .add_player_href(
renamed_player_table,
parent_element = player_table_elements,
player_xpath = ".//tbody/tr/td[@data-stat='player']/a"
)
}

suppressMessages(
readr::type_convert(
clean_table,
guess_integer = TRUE,
na = "",
trim_ws = TRUE
suppressMessages(
readr::type_convert(
renamed_player_table,
guess_integer = TRUE,
na = "",
trim_ws = TRUE
)
)
)
}
}


Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-fbref.R
Original file line number Diff line number Diff line change
Expand Up @@ -406,16 +406,16 @@ test_that("fb_league_stats() for players works", {
testthat::skip_on_cran()
testthat::skip_on_ci()
expected_player_shooting_cols <- c("Rk", "Player", "Player_Href", "Nation", "Pos", "Squad", "Age", "Born", "Mins_Per_90", "Gls_Standard", "Sh_Standard", "SoT_Standard", "SoT_percent_Standard", "Sh_per_90_Standard", "SoT_per_90_Standard", "G_per_Sh_Standard", "G_per_SoT_Standard", "Dist_Standard", "FK_Standard", "PK_Standard", "PKatt_Standard", "xG_Expected", "npxG_Expected", "npxG_per_Sh_Expected", "G_minus_xG_Expected", "np:G_minus_xG_Expected", "Matches", "url")
epl_player_shooting_22 <- fb_league_stats(
single_player_shooting_22 <- fb_league_stats(
country = "BRA",
gender = "M",
season_end_year = 2022,
tier = "1st",
stat_type = "shooting",
team_or_player = "player"
)
expect_gt(nrow(epl_player_shooting_22), 0)
expect_setequal(colnames(epl_player_shooting_22), expected_player_shooting_cols)
expect_gt(nrow(single_player_shooting_22), 0)
expect_setequal(colnames(single_player_shooting_22), expected_player_shooting_cols)

expected_player_misc_cols <- c("Rk", "Player", "Player_Href", "Nation", "Pos", "Squad", "Age", "Born", "Mins_Per_90", "CrdY", "CrdR", "2CrdY", "Fls", "Fld", "Off", "Crs", "Int", "TklW", "PKwon", "PKcon", "OG", "Recov", "Won_Aerial Duels", "Lost_Aerial Duels", "Won_percent_Aerial Duels", "Matches", "url")
## testing a lot would take too long, so just test multiple years since that is the most likely input param to have multiple values
Expand Down

0 comments on commit 703904a

Please sign in to comment.