Skip to content

Commit

Permalink
stabilize code base
Browse files Browse the repository at this point in the history
  • Loading branch information
KoderKow committed Aug 20, 2023
1 parent 3786249 commit 6004b22
Show file tree
Hide file tree
Showing 19 changed files with 295 additions and 135 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
^_kaggle_data$
^FUNDING\.yml$
^dev_notes\.R
^dev$
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ Imports:
tibble,
usethis
Suggests:
covr,
knitr,
rmarkdown
VignetteBuilder:
knitr
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
1 change: 0 additions & 1 deletion FUNDING.yml

This file was deleted.

5 changes: 2 additions & 3 deletions R/api.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ kgl_api_get <- function(path, ..., body = NULL, auth = kgl_auth()) {
r <- httr::GET(
url = get_url,
body = body,
auth,
httr::verbose()
auth
)

## check status
Expand Down Expand Up @@ -100,7 +99,7 @@ kgl_request <- function(
endpoint,
...,
body = NULL
) {
) {
resp <-
.kaggle_base_url %>%
httr2::request() %>%
Expand Down
1 change: 0 additions & 1 deletion R/auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ kgl_auth <- function(req = NULL, username = NULL, key = NULL, creds_file = NULL)

## alternatively, users can provide path to (or text of) creds_file
} else if (!is.null(creds_file)) {

## read and parse kaggle.json creds file if it exists
if (file.exists(creds_file)) {
con <- file(creds_file)
Expand Down
10 changes: 5 additions & 5 deletions R/competition-submissions.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ kgl_competitions_list_submissions <- function(
id,
page = 1,
clean_response = TRUE
) {
) {
encoded_params <- url_encode(id)

endpoint <- glue::glue("/competitions/submissions/list/{encoded_params}")
Expand Down Expand Up @@ -48,7 +48,7 @@ kgl_competitions_list_submissions <- function(
kgl_competitions_submissions_url <- function(
id,
file
) {
) {
## TODO Check if file is a dataset, if it is, prompt user to save it.

content_length <- file.size(file)
Expand Down Expand Up @@ -85,7 +85,7 @@ kgl_competitions_submissions_url <- function(
kgl_competitions_submissions_upload <- function(
file,
create_url
) {
) {
content_length <- file.size(file)

last_modified_date_utc <-
Expand Down Expand Up @@ -119,7 +119,7 @@ kgl_competitions_submissions_submit <- function(
id,
blob_file_tokens,
submission_description
) {
) {
encoded_params <- url_encode(id)

endpoint <- glue::glue("competitions/submissions/submit/{encoded_params}")
Expand Down Expand Up @@ -160,7 +160,7 @@ kgl_competition_submit <- function(
id,
file,
submission_description = ""
) {
) {
url_result <- kgl_competitions_submissions_url(
id = id,
file = file
Expand Down
6 changes: 4 additions & 2 deletions R/competitions-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ kgl_competitions_data_download <- function(
file_name,
output_dir = ".",
clean_response = TRUE
) {
) {
assertthat::assert_that(
assertthat::is.string(id),
assertthat::is.string(file_name),
Expand All @@ -59,6 +59,8 @@ kgl_competitions_data_download <- function(
assertthat::is.flag(clean_response)
)

cat(" - Downloading", file_name, "\n")

if (!fs::file_exists(output_dir)) {
stop("output_dir does not exist!")
}
Expand Down Expand Up @@ -110,7 +112,7 @@ kgl_competitions_data_download_all <- function(
id,
output_dir = ".",
clean_response = TRUE
) {
) {
assertthat::assert_that(
assertthat::is.string(id),
assertthat::is.string(output_dir),
Expand Down
4 changes: 2 additions & 2 deletions R/competitions-leaderboard.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ kgl_competitions_leaderboard_download <- function(
id,
output_dir = ".",
clean_response = TRUE
) {
) {
assertthat::assert_that(
assertthat::is.string(id),
assertthat::is.string(output_dir),
Expand Down Expand Up @@ -55,7 +55,7 @@ kgl_competitions_leaderboard_download <- function(
kgl_competitions_leaderboard_view <- function(
id,
clean_response = TRUE
) {
) {
assertthat::assert_that(
assertthat::is.string(id),
assertthat::is.flag(clean_response)
Expand Down
2 changes: 1 addition & 1 deletion R/competitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ kgl_competitions_list <- function(
page = 1,
search = NULL,
clean_response = TRUE
) {
) {
assertthat::assert_that(
assertthat::is.number(page),
is.null(search) || assertthat::is.string(search),
Expand Down
8 changes: 4 additions & 4 deletions R/datasets.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ kgl_datasets_list <- function(
search = NULL,
owner_dataset = NULL,
clean_response = TRUE
) {
) {
assertthat::assert_that(
assertthat::is.number(page),
is.null(search) || assertthat::is.string(search),
Expand Down Expand Up @@ -130,7 +130,7 @@ kgl_datasets_download <- function(
owner_dataset,
fileName,
datasetVersionNumber = NULL
) {
) {
owner_dataset_clean <- owner_dataset_parser(owner_dataset)
owner_slug <- owner_dataset_clean[1]
dataset_slug <- owner_dataset_clean[2]
Expand All @@ -156,7 +156,7 @@ kgl_datasets_upload_file <- function(
fileName,
contentLength,
lastModifiedDateUtc
) {
) {
contentLength <- file.size(fileName)
lastModifiedDateUtc <- format(
file.info(fileName)$mtime,
Expand All @@ -183,7 +183,7 @@ kgl_datasets_upload_file <- function(
kgl_datasets_create_version <- function(
owner_dataset,
datasetNewVersionRequest
) {
) {
owner_dataset_clean <- owner_dataset_parser(owner_dataset)
owner_slug <- owner_dataset_clean[1]
dataset_slug <- owner_dataset_clean[2]
Expand Down
13 changes: 7 additions & 6 deletions R/kgl_flow.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ kgl_flow <- function(id = NULL) {

usethis::ui_todo(
"These files will be downloaded:
{v_needs_to_download}."
{v_needs_to_download}
------"
)

d_info <-
v_to_download %>%
purrr::map_dfr(~ {
purrr::map(~ {
d <- kgl_competitions_data_download(
id = competition_id,
file_name = .x,
Expand Down Expand Up @@ -143,10 +144,10 @@ kgl_flow <- function(id = NULL) {
data_list %>%
dplyr::mutate(name = name_old) %>%
dplyr::select(-name_old) %>%
dplyr::left_join(
y = d_info,
by = "name"
) %>%
# dplyr::left_join(
# y = d_info,
# by = "name"
# ) %>%
saveRDS(file = path_meta)
}
} else {
Expand Down
1 change: 0 additions & 1 deletion R/kgl_flow_leaderboard.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#' kgl_flow_leaderboard()
#' }
kgl_flow_leaderboard <- function() {

# usethis::ui_todo("Validating Project and Competition ID")

## Set up directories and paths
Expand Down
9 changes: 5 additions & 4 deletions R/kgl_flow_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#'
#' If all of the files in `_kaggle_data` are in **csv** format (excluding the meta directory) then this function will load all of the kaggle data into the current environment (or the environment of your choosing) via `readr::read_csv()`.
#'
#' @inheritDotParams readr::read_csv
#' @param envir Environment to put the loaded kaggle data.
#'
#' @return Nothing.
Expand All @@ -14,7 +15,7 @@
#'
#' kgl_flow_load()
#' }
kgl_flow_load <- function(..., prefix = "", envir = parent.frame()) {
kgl_flow_load <- function(..., envir = parent.frame()) {
# d_meta <- kgl_flow_meta()

dir_kgl <- usethis::proj_path(.kgl_dir)
Expand All @@ -36,9 +37,9 @@ kgl_flow_load <- function(..., prefix = "", envir = parent.frame()) {
fs::path_file() %>%
fs::path_ext_remove()

if (prefix != "") {
v_file_names <- paste0(prefix, "_", v_file_names)
}
# if (prefix != "") {
# v_file_names <- paste0(prefix, "_", v_file_names)
# }

l_d <-
v_files_to_iterate_over %>%
Expand Down
4 changes: 2 additions & 2 deletions R/kgl_flow_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ validator_path_competition_id <- function(
competition_id,
dir_path,
dir_meta
) {
) {
if (fs::file_exists(path_competition_id)) {
logged_competition_id <- readLines(path_competition_id)[2]

Expand Down Expand Up @@ -56,7 +56,7 @@ kgl_flow_data_download <- function(
competition_id,
file_name,
dir_name
) {
) {
get_url <- glue::glue("competitions/data/download/{competition_id}/{file_name}")

get_request <- kgl_api_get(get_url)
Expand Down
13 changes: 8 additions & 5 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,24 @@
#' @param x Output from kaggle function
#' @return Print out of summary info and a tibble of the data.
kgl_as_tbl <- function(x) {
x <-
d <-
x %>%
purrr::map_dfr(~ .x %||% NA) %>%
tibble::as_tibble(
.name_repair = snakecase::to_snake_case
) %>%
readr::type_convert(col_types = readr::cols())
)

return(x)
if (nrow(d) > 0) {
d <- readr::type_convert(d, col_types = readr::cols())
}

return(d)
}

url_encode <- function(...) {
list(...) %>%
purrr::map_chr(as.character) %>%
purrr::map_chr(URLencode, reserved = TRUE) %>%
purrr::map_chr(utils::URLencode, reserved = TRUE) %>%
stringr::str_c(collapse = "/")
}

Expand Down
30 changes: 24 additions & 6 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ You can install the dev version of **{kaggler}** from [CRAN](https://github.com/

```{r, eval = FALSE}
## install kaggler package from github
devtools::install_packages("koderkow/kaggler")
devtools::install_github("koderkow/kaggler")
```

# API Authorization
Expand All @@ -54,10 +54,6 @@ Browse or search for Kaggle competitions.
comps1 <- kgl_competitions_list()
comps1
## it's paginated, so to see page two:
comps2 <- kgl_competitions_list(page = 2)
comps2
## search by keyword for competitions
imagecomps <- kgl_competitions_list(search = "image")
imagecomps
Expand All @@ -67,12 +63,34 @@ You can look up the datalist for a given Kaggle competition using the API.

```{r}
## data list for a given competition
c1_datalist <- kgl_competitions_data_list(comps1$ref[1])
c1_datalist <- kgl_competitions_data_list("titanic")
c1_datalist
```

For a more in-depth walkthrough visit the [Kaggle API](https://koderkow.github.io/kaggler/articles/kaggle-api.html) page.

# Kaggle Flow

This is an **experimental** and **opinionated** reproducible workflow
for working with Kaggle competitions.

``` r
library(kaggler)

kgl_flow("titanic")

#> • These files will be downloaded:
#> - 'gender_submission'
#> - 'test'
#> - 'train'.
#> • Downloading 'gender_submission.csv'...
#> • Downloading 'test.csv'...
#> • Downloading 'train.csv'...
```

For a more in-depth walkthrough visit the [Kaggle
Flow](https://koderkow.github.io/kaggler/articles/kgl-flow.html) page.

## Note(s)

- The base of this package was cloned from the original at [{kaggler}](https://github.com/mkearney/kaggler). I have decided to take the developers work and continue their amazing development! Major props and recognition goes out to the original developer(s) of this package.
Expand Down
Loading

0 comments on commit 6004b22

Please sign in to comment.