Skip to content

Commit

Permalink
Replace encoding iso-8859-1:1998 with latin1
Browse files Browse the repository at this point in the history
and add argument "check.encoding"
  • Loading branch information
hsonne committed Mar 26, 2024
1 parent a5df320 commit 198bef1
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 4 deletions.
3 changes: 1 addition & 2 deletions NAMESPACE
Expand Up @@ -11,12 +11,11 @@ export(readEuCodedFile)
export(readEuCodedFiles)
export(writeEuCodedFile)
export(writeEuCodedFiles)
importFrom(kwb.utils,.logok)
importFrom(kwb.utils,.logstart)
importFrom(kwb.utils,catAndRun)
importFrom(kwb.utils,catIf)
importFrom(kwb.utils,isTryError)
importFrom(kwb.utils,orderBy)
importFrom(kwb.utils,readLinesWithEncoding)
importFrom(kwb.utils,selectColumns)
importFrom(kwb.utils,setColumns)
importFrom(kwb.utils,stopFormatted)
Expand Down
16 changes: 14 additions & 2 deletions R/readEuCodedFile.R
Expand Up @@ -25,11 +25,15 @@
#' @param warn if \code{TRUE}, warnings are shown (e.g. if not all #A-header
#' fields were found)
#' @param dbg if \code{TRUE}, debug messages are shown, else not
#' @param check.encoding logical indicating whether or not to check if the
#' encoding string that is given in the \code{#A1} header of the file is
#' "known". The default is \code{TRUE}, i.e. the check is performed and an
#' error is thrown if the encoding is not in the list of known encodings.
#' @param \dots further arguments to be passed to
#' \code{kwb.en13508.2:::getObservationRecordsFromEuLines}
#' @return list with elements \code{header.info}, \code{inspections},
#' \code{observations}
#' @importFrom kwb.utils catAndRun catIf isTryError .logstart .logok
#' @importFrom kwb.utils catAndRun readLinesWithEncoding
#' @export
#'
readEuCodedFile <- function(
Expand All @@ -41,6 +45,7 @@ readEuCodedFile <- function(
simple.algorithm = TRUE,
warn = TRUE,
dbg = TRUE,
check.encoding = TRUE,
...
)
{
Expand All @@ -54,9 +59,16 @@ readEuCodedFile <- function(
# If not explicitly given, use the encoding as given in the #A1 header
if (is.null(file.encoding)) {
file.encoding <- readFileEncodingFromHeader(input.file)

# Replace "iso-8859-1:1998" with "latin1"
# (see https://de.wikipedia.org/wiki/ISO_8859-1:
# "ISO 8859-1, genauer ISO/IEC 8859-1, auch bekannt als Latin-1 [...]")
file.encoding <- gsub("^iso-8859-1:1998$", "latin1", file.encoding)
}

stopOnInvalidEncoding(file.encoding)
if (check.encoding) {
stopOnInvalidEncoding(file.encoding)
}

eu_lines <- run(
sprintf("Reading %s assuming %s encoding", input.file, file.encoding),
Expand Down
6 changes: 6 additions & 0 deletions man/readEuCodedFile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 198bef1

Please sign in to comment.