# A formatted list of open refine commands used to format data

This r script parses the openrefine json command export and tells you step by step what I did to refine these data and reconcile 
against various wikidata and other resources to produce the enhanced dataset. 

In [5]:
library(jsonlite)
library(dplyr)
library(knitr)
# Read the JSON file
json_file <- "./openrefine/json/openrefine-commands.json"
json_data <- fromJSON(json_file, flatten = TRUE)

# Extract relevant information
extract_info <- function(op) {
  data.frame(
    colname = op$columnName,
    expression = op$expression,
    description = op$description,
    stringsAsFactors = FALSE
  )
}

# Filter and format the data
formatted_data <- json_data %>%
  rowwise() %>%
  do(extract_info(.))


# Print the formatted data
print(formatted_data)

# Convert to markdown table
markdown_table <- formatted_data %>%
  mutate_all(~replace(., is.na(.), "")) %>%
  knitr::kable(format = "markdown")


# Save the markdown table to a file
output_file <- "./openrefine-commands.md"
writeLines(markdown_table, con = output_file)
message('The commands have been formatted to a markdown table')

[90m# A tibble: 132 × 3[39m
[90m# Rowwise: [39m
   colname                    expression   description                          
   [3m[90m<chr>[39m[23m                      [3m[90m<chr>[39m[23m        [3m[90m<chr>[39m[23m                                
[90m 1[39m designated_site_name       value.trim() Text transform on cells in column de…
[90m 2[39m heritage_category          value.trim() Text transform on cells in column he…
[90m 3[39m local_planning_authority   value.trim() Text transform on cells in column lo…
[90m 4[39m site_type                  value.trim() Text transform on cells in column si…
[90m 5[39m site_sub_type              value.trim() Text transform on cells in column si…
[90m 6[39m county                     value.trim() Text transform on cells in column co…
[90m 7[39m district_or_borough        value.trim() Text transform on cells in column di…
[90m 8[39m parish                     value.trim() Text transform on cells in column pa…
