Skip to content

Commit

Permalink
Fixes #105 and improves error message for conflicting app_token argum…
Browse files Browse the repository at this point in the history
…ents
  • Loading branch information
Tom Schenk Jr committed Oct 25, 2016
1 parent 08dc5e0 commit 16de299
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 122 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Description: Provides easier interaction with
format and manages throttling by 'Socrata'.
Users can upload data to Socrata portals directly
from R.
Version: 1.7.1-17
Date: 2016-10-19
Version: 1.7.1-18
Date: 2016-10-25
Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
Maintainer: "Tom Schenk Jr." <developers@cityofchicago.org>
Depends:
Expand Down
240 changes: 120 additions & 120 deletions R/RSocrata.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' @author Hugh J. Devlin \email{Hugh.Devlin@@cityofchicago.org}
#' @noRd
logMsg <- function(s) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%OS3 "), as.character(sys.call(-1))[1], ": ", s, '\n', sep='')
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%OS3 "), as.character(sys.call(-1))[1], ": ", s, '\n', sep='')
}

#' Checks the validity of the syntax for a potential Socrata dataset Unique Identifier, also known as a 4x4.
Expand All @@ -30,12 +30,12 @@ logMsg <- function(s) {
#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
#' @export
isFourByFour <- function(fourByFour) {
fourByFour <- as.character(fourByFour)
if(nchar(fourByFour) != 9)
return(FALSE)
if(regexpr("[[:alnum:]]{4}-[[:alnum:]]{4}", fourByFour) == -1)
return(FALSE)
TRUE
fourByFour <- as.character(fourByFour)
if(nchar(fourByFour) != 9)
return(FALSE)
if(regexpr("[[:alnum:]]{4}-[[:alnum:]]{4}", fourByFour) == -1)
return(FALSE)
TRUE
}

#' Convert, if necessary, URL to valid REST API URL supported by Socrata.
Expand All @@ -54,32 +54,33 @@ isFourByFour <- function(fourByFour) {
#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
#' @export
validateUrl <- function(url, app_token) {
url <- as.character(url)
url <- as.character(url)
parsedUrl <- httr::parse_url(url)
if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname) | is.null(parsedUrl$path))
stop(url, " does not appear to be a valid URL.")
if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname) | is.null(parsedUrl$path))
stop(url, " does not appear to be a valid URL.")
if(!is.null(app_token)) { # Handles the addition of API token and resolves invalid uses
if(is.null(parsedUrl$query[["$$app_token"]])) {
token_inclusion <- "valid_use"
} else {
token_inclusion <- "already_included" }
switch(token_inclusion,
"already_included"={ # Token already included in url argument
warning(url, " already contains an API token in url. Ignoring user-defined token.")
},
"valid_use"={ # app_token argument is used, not duplicative.
parsedUrl$query[["app_token"]] <- as.character(paste("%24%24app_token=", app_token, sep=""))
})
"already_included"={ # Token already included in url argument
warning(url, " already contains an API token in url. Ignoring token supplied in the `app_token=` argument.")
},
"valid_use"={ # app_token argument is used, not duplicative.
parsedUrl$query$`$$app_token` <- as.character(app_token)
}
)
}
if(substr(parsedUrl$path, 1, 9) == 'resource/') {
return(httr::build_url(parsedUrl)) # resource url already
}
fourByFour <- basename(parsedUrl$path)
return(httr::build_url(parsedUrl)) # resource url already
}
fourByFour <- basename(parsedUrl$path)
if(!isFourByFour(fourByFour))
stop(fourByFour, " is not a valid Socrata dataset unique identifier.")
stop(fourByFour, " is not a valid Socrata dataset unique identifier.")
else {
parsedUrl$path <- paste('resource/', fourByFour, '.csv', sep="")
httr::build_url(parsedUrl)
httr::build_url(parsedUrl)
}
}

Expand All @@ -96,7 +97,7 @@ validateUrl <- function(url, app_token) {
#' @examples
#' fieldName("Number.of.Stations") # number_of_stations
fieldName <- function(humanName) {
tolower(gsub('\\.', '_', as.character(humanName)))
tolower(gsub('\\.', '_', as.character(humanName)))
}

#' Convert Socrata calendar_date string to POSIX
Expand All @@ -106,30 +107,30 @@ fieldName <- function(humanName) {
#' @export
#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org}
posixify <- function(x) {
x <- as.character(x)
if (length(x)==0) return(x)
## Define regex patterns for short and long date formats (CSV) and ISO 8601 (JSON),
## which are the three formats that are supplied by Socrata.
patternShortCSV <- paste0("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$")
patternLongCSV <- paste0("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}",
"[[:digit:]]{1,2}:[[:digit:]]{1,2}:[[:digit:]]{1,2}",
"AM|PM", "$")
patternJSON <- paste0("^[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}T",
"[[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2}.[[:digit:]]{3}","$")
## Find number of matches with grep
nMatchesShortCSV <- grep(pattern = patternShortCSV, x)
nMatchesLongCSV <- grep(pattern = patternLongCSV, x)
nMatchesJSON <- grep(pattern = patternJSON, x)
## Parse as the most likely calendar date format. CSV short/long ties go to short format
if(length(nMatchesLongCSV) > length(nMatchesShortCSV)){
return(as.POSIXct(strptime(x, format="%m/%d/%Y %I:%M:%S %p"))) # long date-time format
} else if (length(nMatchesJSON) == 0){
return(as.POSIXct(strptime(x, format="%m/%d/%Y"))) # short date format
}
if(length(nMatchesJSON) > 0){
as.POSIXct(x, format = "%Y-%m-%dT%H:%M:%S") # JSON format
}
x <- as.character(x)
if (length(x)==0) return(x)
## Define regex patterns for short and long date formats (CSV) and ISO 8601 (JSON),
## which are the three formats that are supplied by Socrata.
patternShortCSV <- paste0("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$")
patternLongCSV <- paste0("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}",
"[[:digit:]]{1,2}:[[:digit:]]{1,2}:[[:digit:]]{1,2}",
"AM|PM", "$")
patternJSON <- paste0("^[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}T",
"[[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2}.[[:digit:]]{3}","$")
## Find number of matches with grep
nMatchesShortCSV <- grep(pattern = patternShortCSV, x)
nMatchesLongCSV <- grep(pattern = patternLongCSV, x)
nMatchesJSON <- grep(pattern = patternJSON, x)
## Parse as the most likely calendar date format. CSV short/long ties go to short format
if(length(nMatchesLongCSV) > length(nMatchesShortCSV)){
return(as.POSIXct(strptime(x, format="%m/%d/%Y %I:%M:%S %p"))) # long date-time format
} else if (length(nMatchesJSON) == 0){
return(as.POSIXct(strptime(x, format="%m/%d/%Y"))) # short date format
}
if(length(nMatchesJSON) > 0){
as.POSIXct(x, format = "%Y-%m-%dT%H:%M:%S") # JSON format
}
}

#' Convert Socrata money fields to numeric
Expand All @@ -155,24 +156,24 @@ no_deniro <- function(x) {
#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org}
#' @noRd
getResponse <- function(url, email = NULL, password = NULL) {
if(is.null(email) && is.null(password)){
response <- httr::GET(url)
} else { # email and password are not NULL
response <- httr::GET(url, httr::authenticate(email, password))
}

# status <- httr::http_status(response)
if(response$status_code != 200) {
msg <- paste("Error in httr GET:", response$status_code, response$headers$statusmessage, url)
if(!is.null(response$headers$`content-length`) && (response$headers$`content-length` > 0)) {
details <- httr::content(response)
msg <- paste(msg, details$code[1], details$message[1])
}
logMsg(msg)
}
httr::stop_for_status(response)
return(response)
if(is.null(email) && is.null(password)){
response <- httr::GET(url)
} else { # email and password are not NULL
response <- httr::GET(url, httr::authenticate(email, password))
}
# status <- httr::http_status(response)
if(response$status_code != 200) {
msg <- paste("Error in httr GET:", response$status_code, response$headers$statusmessage, url)
if(!is.null(response$headers$`content-length`) && (response$headers$`content-length` > 0)) {
details <- httr::content(response)
msg <- paste(msg, details$code[1], details$message[1])
}
logMsg(msg)
}
httr::stop_for_status(response)
return(response)
}

#' Content parsers
Expand Down Expand Up @@ -221,9 +222,9 @@ getContentAsDataFrame <- function(response) {
#' @noRd
getSodaTypes <- function(response) { UseMethod('response') }
getSodaTypes <- function(response) {
result <- jsonlite::fromJSON(response$headers[['x-soda2-types']])
names(result) <- jsonlite::fromJSON(response$headers[['x-soda2-fields']])
return(result)
result <- jsonlite::fromJSON(response$headers[['x-soda2-types']])
names(result) <- jsonlite::fromJSON(response$headers[['x-soda2-fields']])
return(result)
}

#' Get a full Socrata data set as an R data frame
Expand Down Expand Up @@ -265,39 +266,39 @@ getSodaTypes <- function(response) {
#' @export
read.socrata <- function(url, app_token = NULL, email = NULL, password = NULL,
stringsAsFactors = FALSE) {
validUrl <- validateUrl(url, app_token) # check url syntax, allow human-readable Socrata url
parsedUrl <- httr::parse_url(validUrl)
mimeType <- mime::guess_type(parsedUrl$path)
if(!(mimeType %in% c('text/csv','application/json')))
stop("Error in read.socrata: ", mimeType, " not a supported data format.")
response <- getResponse(validUrl, email, password)
page <- getContentAsDataFrame(response)
result <- page
dataTypes <- getSodaTypes(response)
while (nrow(page) > 0) { # more to come maybe?
query <- paste(validUrl, if(is.null(parsedUrl$query)) {'?'} else {"&"}, '$offset=', nrow(result), sep='')
response <- getResponse(query, email, password)
page <- getContentAsDataFrame(response)
result <- rbind.fill(result, page) # accumulate
}
# convert Socrata calendar dates to posix format
for(columnName in colnames(result)[!is.na(dataTypes[fieldName(colnames(result))])
& (dataTypes[fieldName(colnames(result))] == 'calendar_date'
| dataTypes[fieldName(colnames(result))] == 'floating_timestamp')]) {
result[[columnName]] <- posixify(result[[columnName]])
}
validUrl <- validateUrl(url, app_token) # check url syntax, allow human-readable Socrata url
parsedUrl <- httr::parse_url(validUrl)
mimeType <- mime::guess_type(parsedUrl$path)
if(!(mimeType %in% c('text/csv','application/json')))
stop("Error in read.socrata: ", mimeType, " not a supported data format.")
response <- getResponse(validUrl, email, password)
page <- getContentAsDataFrame(response)
result <- page
dataTypes <- getSodaTypes(response)
while (nrow(page) > 0) { # more to come maybe?
query <- paste(validUrl, if(is.null(parsedUrl$query)) {'?'} else {"&"}, '$offset=', nrow(result), sep='')
response <- getResponse(query, email, password)
page <- getContentAsDataFrame(response)
result <- rbind.fill(result, page) # accumulate
}
# convert Socrata calendar dates to posix format
for(columnName in colnames(result)[!is.na(dataTypes[fieldName(colnames(result))])
& (dataTypes[fieldName(colnames(result))] == 'calendar_date'
| dataTypes[fieldName(colnames(result))] == 'floating_timestamp')]) {
result[[columnName]] <- posixify(result[[columnName]])
}
for(columnName in colnames(result)[!is.na(dataTypes[fieldName(colnames(result))]) & dataTypes[fieldName(colnames(result))] == 'money']) {
result[[columnName]] <- no_deniro(result[[columnName]])
}
# convert logical fields to character
for(columnName in colnames(result)) {
if(typeof(result[,columnName]) == "logical")
result[,columnName] <- as.character(result[,columnName])
}
# convert logical fields to character
for(columnName in colnames(result)) {
if(typeof(result[,columnName]) == "logical")
result[,columnName] <- as.character(result[,columnName])
}
if(stringsAsFactors){
result <- data.frame(unclass(result), stringsAsFactors = stringsAsFactors)
}
return(result)
result <- data.frame(unclass(result), stringsAsFactors = stringsAsFactors)
}
return(result)
}

#' List datasets available from a Socrata domain
Expand All @@ -312,17 +313,17 @@ read.socrata <- function(url, app_token = NULL, email = NULL, password = NULL,
#' @importFrom httr parse_url
#' @export
ls.socrata <- function(url) {
url <- as.character(url)
parsedUrl <- httr::parse_url(url)
if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname))
stop(url, " does not appear to be a valid URL.")
parsedUrl$path <- "data.json"
df <- jsonlite::fromJSON(httr::build_url(parsedUrl))
df <- as.data.frame(df$dataset)
df$issued <- as.POSIXct(df$issued)
df$modified <- as.POSIXct(df$modified)
df$theme <- as.character(df$theme)
return(df)
url <- as.character(url)
parsedUrl <- httr::parse_url(url)
if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname))
stop(url, " does not appear to be a valid URL.")
parsedUrl$path <- "data.json"
df <- jsonlite::fromJSON(httr::build_url(parsedUrl))
df <- as.data.frame(df$dataset)
df$issued <- as.POSIXct(df$issued)
df$modified <- as.POSIXct(df$modified)
df$theme <- as.character(df$theme)
return(df)
}


Expand All @@ -343,18 +344,18 @@ ls.socrata <- function(url) {
checkUpdateResponse <- function(json_data_to_upload, url, http_verb, email, password, app_token = NULL) {
if(http_verb == "POST"){
response <- httr::POST(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) #, verbose())
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) #, verbose())
} else if(http_verb == "PUT"){
response <- httr::PUT(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) # , verbose())
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) # , verbose())
}

return(response)
}

Expand Down Expand Up @@ -396,14 +397,13 @@ write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email,
} else {
stop("update_mode must be UPSERT or REPLACE")
}

# convert dataframe to JSON
dataframe_as_json_string <- jsonlite::toJSON(dataframe)

# do the actual upload
response <- checkUpdateResponse(dataframe_as_json_string, dataset_json_endpoint, http_verb, email, password, app_token)

return(response)

}

0 comments on commit 16de299

Please sign in to comment.