Skip to content

Commit

Permalink
resolve merge conflicts in #111
Browse files Browse the repository at this point in the history
Merge branch 'dev' into issue105

# Conflicts:
#	DESCRIPTION
#	R/RSocrata.R
  • Loading branch information
nicklucius committed Oct 26, 2016
2 parents 16de299 + 981c824 commit 0fea2f6
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 7 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Description: Provides easier interaction with
format and manages throttling by 'Socrata'.
Users can upload data to Socrata portals directly
from R.
Version: 1.7.1-18
Date: 2016-10-25
Version: 1.7.1-20
Date: 2016-10-26
Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
Maintainer: "Tom Schenk Jr." <developers@cityofchicago.org>
Depends:
Expand Down
41 changes: 38 additions & 3 deletions R/RSocrata.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,21 +269,56 @@ read.socrata <- function(url, app_token = NULL, email = NULL, password = NULL,
validUrl <- validateUrl(url, app_token) # check url syntax, allow human-readable Socrata url
parsedUrl <- httr::parse_url(validUrl)
mimeType <- mime::guess_type(parsedUrl$path)
if (!is.null(names(parsedUrl$query))) { # check if URL has any queries
## if there is a query, check for $order within the query
orderTest <- any(names(parsedUrl$query) == "$order")
if(!orderTest) # sort by Socrata unique identifier
validUrl <- paste(validUrl, if(is.null(parsedUrl$query)) {'?'} else {"&"}, '$order=:id', sep='')
}
else {
validUrl <- paste(validUrl, {'?'}, '$order=:id', sep='')
parsedUrl <- httr::parse_url(validUrl) # reparse because URL now has a query
}
if(!(mimeType %in% c('text/csv','application/json')))
stop("Error in read.socrata: ", mimeType, " not a supported data format.")
response <- getResponse(validUrl, email, password)
page <- getContentAsDataFrame(response)
result <- page
dataTypes <- getSodaTypes(response)
while (nrow(page) > 0) { # more to come maybe?
# parse any $limit out of the URL
if(is.null(parsedUrl$query$`$limit`) & is.null(parsedUrl$query$`$LIMIT`))
limitProvided <- FALSE
else {
names(parsedUrl$query) <- tolower(names(parsedUrl$query))
userLimit <- as.integer(parsedUrl$query$`$limit`)
limitProvided <- TRUE
##remove LIMIT from URL
parsedUrl$query <- parsedUrl$query[-which(names(parsedUrl$query) == '$limit')]
validUrl <- httr::build_url(parsedUrl)
}
# PAGE through data and combine
# if $limit is <= 1000, do not page
# if $limit > 1000, page only until limit is met
# if no limit $provided, loop until all data is paged
while (nrow(page) > 0) {
if(limitProvided)
if(userLimit < 1000) break
else if(userLimit - nrow(result) <= 1000) {
query <- paste(validUrl, if(is.null(parsedUrl$query)) {'?'} else {"&"},
'$limit=', (userLimit - nrow(result)),'&$offset=', nrow(result), sep='')
response <- getResponse(query, email, password)
page <- getContentAsDataFrame(response)
result <- rbind.fill(result, page) # accumulate
break
}
query <- paste(validUrl, if(is.null(parsedUrl$query)) {'?'} else {"&"}, '$offset=', nrow(result), sep='')
response <- getResponse(query, email, password)
page <- getContentAsDataFrame(response)
result <- rbind.fill(result, page) # accumulate
}
# convert Socrata calendar dates to posix format
for(columnName in colnames(result)[!is.na(dataTypes[fieldName(colnames(result))])
& (dataTypes[fieldName(colnames(result))] == 'calendar_date'
for(columnName in colnames(result)[!is.na(dataTypes[fieldName(colnames(result))])
& (dataTypes[fieldName(colnames(result))] == 'calendar_date'
| dataTypes[fieldName(colnames(result))] == 'floating_timestamp')]) {
result[[columnName]] <- posixify(result[[columnName]])
}
Expand Down
82 changes: 80 additions & 2 deletions tests/testthat/test-all.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,91 @@ test_that("format is not supported", {
expect_error(read.socrata('http://soda.demo.socrata.com/resource/4334-bgaj.xml'))
})

test_that("read Socrata JSON with missing fields (issue 19)", {
test_that("read Socrata JSON with missing fields (issue 19 - bind within page)", {
## Define and test issue 19
df <- read.socrata("https://data.cityofchicago.org/resource/kn9c-c2s2.json")
expect_error(df <- read.socrata("https://data.cityofchicago.org/resource/kn9c-c2s2.json"), NA,
info = "https://github.com/Chicago/RSocrata/issues/19")
expect_equal(78, nrow(df), label="rows", info = "https://github.com/Chicago/RSocrata/issues/19")
expect_equal(9, ncol(df), label="columns", info = "https://github.com/Chicago/RSocrata/issues/19")
})

test_that("read Socrata JSON with missing fields (issue 19 - binding pages together)", {
## Define and test issue 19
expect_error(df <- read.socrata(paste0("https://data.smgov.net/resource/ia9m-wspt.json?",
"$where=incident_date>'2010-12-15'%20AND%20incident_date<'2011-01-15'"))
, NA, info = "https://github.com/Chicago/RSocrata/issues/19")
expect_equal(7927, nrow(df), label="rows", info = "https://github.com/Chicago/RSocrata/issues/19")
expect_equal(18, ncol(df), label="columns", info = "https://github.com/Chicago/RSocrata/issues/19")
})

test_that("Accept a URL with a $limit= clause and properly limit the results", {
## Define and test issue 83
df <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.json?$LIMIT=500") # uppercase
expect_equal(500, nrow(df), label="rows",
info = "$LIMIT in uppercase https://github.com/Chicago/RSocrata/issues/83")
df <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.json?$limit=500") # lowercase
expect_equal(500, nrow(df), label="rows",
info = "$limit in lowercase https://github.com/Chicago/RSocrata/issues/83")
df <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.json?$LIMIT=1001&$order=:id") # uppercase
expect_equal(1001, nrow(df), label="rows",
info = "$LIMIT in uppercase with 2 queries https://github.com/Chicago/RSocrata/issues/83")
df <- read.socrata("http://soda.demo.socrata.com/resource/4334-bgaj.json?$limit=1001&$order=:id") # lowercase
expect_equal(1001, nrow(df), label="rows lowercase",
info = "$LIMIT in lowercase with 2 queries https://github.com/Chicago/RSocrata/issues/83")
})

test_that("If URL has no queries, insert $order:id into URL", {
## Define and test issue 15
## Ensure that the $order=:id is inserted when no other query parameters are used.
df <- read.socrata("https://data.cityofchicago.org/resource/kn9c-c2s2.json")
expect_equal("21.5", df$percent_aged_under_18_or_over_64[7],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("38", df$percent_aged_under_18_or_over_64[23],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("40.4", df$percent_aged_under_18_or_over_64[36],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("36.1", df$percent_aged_under_18_or_over_64[42],
info = "https://github.com/Chicago/RSocrata/issues/15")

})

test_that("If URL has an $order clause, do not insert ?$order:id into URL", {
## Define and test issue 15
## Ensure that $order=:id is not used when other $order parameters are requested by the user.
df <- read.socrata("https://data.cityofchicago.org/resource/kn9c-c2s2.json?$order=hardship_index")
expect_equal("35.3", df$percent_aged_under_18_or_over_64[7],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("37.6", df$percent_aged_under_18_or_over_64[23],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("38.5", df$percent_aged_under_18_or_over_64[36],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("32", df$percent_aged_under_18_or_over_64[42],
info = "https://github.com/Chicago/RSocrata/issues/15")
})

test_that("If URL has only non-order query parameters, insert $order:id into URL", {
## Define and test issue 15
## Ensure that $order=:id is inserted when other (non-$order) arguments are used.
df <- read.socrata("https://data.cityofchicago.org/resource/kn9c-c2s2.json?$limit=50")
expect_equal("21.5", df$percent_aged_under_18_or_over_64[7],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("38", df$percent_aged_under_18_or_over_64[23],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("40.4", df$percent_aged_under_18_or_over_64[36],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("36.1", df$percent_aged_under_18_or_over_64[42],
info = "https://github.com/Chicago/RSocrata/issues/15")
df <- read.socrata("https://data.cityofchicago.org/resource/kn9c-c2s2.json?$where=hardship_index>20")
expect_equal("34", df$percent_aged_under_18_or_over_64[7],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("30.7", df$percent_aged_under_18_or_over_64[23],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("41.2", df$percent_aged_under_18_or_over_64[36],
info = "https://github.com/Chicago/RSocrata/issues/15")
expect_equal("42.9", df$percent_aged_under_18_or_over_64[42],
info = "https://github.com/Chicago/RSocrata/issues/15")
})

context("Checks the validity of 4x4")

test_that("is 4x4", {
Expand Down

0 comments on commit 0fea2f6

Please sign in to comment.