PecanProject · mdietze · Aug 12, 2020 · Aug 1, 2020 · Aug 1, 2020 · Aug 1, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -49,6 +49,7 @@ For more information about this file see also [Keep a Changelog](http://keepacha
 
 ### Added
 
+- New functionality to the PEcAn API to GET information about PFTs, formats & sites, submit workflows in XML or JSON formats & download relevant inputs/outputs/files related to runs & workflows (#2674 #2665 #2662 #2655)
 - Functions to send/receive messages to/from rabbitmq.
 - Documentation in [DEV-INTRO.md](DEV-INTRO.md) on development in a docker environment (#2553)
 - PEcAn API that can be used to talk to PEcAn servers. Endpoints to GET the details about the server that user is talking to, PEcAn models, workflows & runs. Authetication enabled. (#2631)

diff --git a/apps/api/R/auth.R b/apps/api/R/auth.R
@@ -51,12 +51,16 @@ validate_crypt_pass <- function(username, crypt_pass) {
 #* @return Appropriate response
 #* @author Tezan Sahu
 authenticate_user <- function(req, res) {
+  # Fix CORS issues
+  res$setHeader("Access-Control-Allow-Origin", "*")
+
   # If the API endpoint that do not require authentication
   if (
+    Sys.getenv("AUTH_REQ") == FALSE ||
     grepl("swagger", req$PATH_INFO, ignore.case = TRUE) || 
     grepl("openapi.json", req$PATH_INFO, fixed = TRUE) ||
-    grepl("ping", req$PATH_INFO, ignore.case = TRUE) ||
-    grepl("status", req$PATH_INFO, ignore.case = TRUE))
+    grepl("/api/ping", req$PATH_INFO, ignore.case = TRUE) ||
+    grepl("/api/status", req$PATH_INFO, ignore.case = TRUE))
   {
     req$user$userid <- NA
     req$user$username <- ""

diff --git a/apps/api/R/entrypoint.R b/apps/api/R/entrypoint.R
@@ -31,6 +31,14 @@ root$mount("/api/sites", sites_pr)
 pfts_pr <- plumber::plumber$new("pfts.R")
 root$mount("/api/pfts", pfts_pr)
 
+# The endpoints mounted here are related to details of PEcAn formats
+formats_pr <- plumber::plumber$new("formats.R")
+root$mount("/api/formats", formats_pr)
+
+# The endpoints mounted here are related to details of PEcAn inputs
+inputs_pr <- plumber::plumber$new("inputs.R")
+root$mount("/api/inputs", inputs_pr)
+
 # The endpoints mounted here are related to details of PEcAn workflows
 workflows_pr <- plumber::plumber$new("workflows.R")
 root$mount("/api/workflows", workflows_pr)

diff --git a/apps/api/R/formats.R b/apps/api/R/formats.R
@@ -0,0 +1,89 @@
+library(dplyr)
+
+#' Retrieve the details of a PEcAn format, based on format_id
+#' @param format_id Format ID (character)
+#' @return Format details
+#' @author Tezan Sahu
+#* @get /<format_id>
+getFormat <- function(format_id, res){
+
+  dbcon <- PEcAn.DB::betyConnect()
+
+  Format <- tbl(dbcon, "formats") %>%
+    select(format_id = id, name, notes, header, mimetype_id) %>%
+    filter(format_id == !!format_id)
+
+  Format <- tbl(dbcon, "mimetypes") %>%
+    select(mimetype_id = id, mimetype = type_string) %>%
+    inner_join(Format, by = "mimetype_id") %>%
+    select(-mimetype_id)
+
+  qry_res <- Format %>% collect()
+
+  if (nrow(qry_res) == 0) {
+    PEcAn.DB::db.close(dbcon)
+    res$status <- 404
+    return(list(error="Format not found"))
+  }
+  else {
+    # Convert the response from tibble to list
+    response <- list()
+    for(colname in colnames(qry_res)){
+      response[colname] <- qry_res[colname]
+    }
+
+    format_vars <- tbl(dbcon, "formats_variables") %>%
+      select(name, unit, format_id, variable_id) %>%
+      filter(format_id == !!format_id)
+    format_vars <- tbl(dbcon, "variables") %>%
+      select(variable_id = id, description, units) %>%
+      inner_join(format_vars, by="variable_id") %>%
+      mutate(unit = ifelse(unit %in% "", units, unit)) %>%
+      select(-variable_id, -format_id, -units) %>%
+      collect()
+
+    PEcAn.DB::db.close(dbcon)
+
+    response$format_variables <- format_vars
+    return(response)
+  }
+}
+
+#########################################################################
+
+#' Search for PEcAn format(s) containing wildcards for filtering
+#' @param format_name Format name search string (character)
+#' @param mimetype Mime type search string (character)
+#' @param ignore_case Logical. If `TRUE` (default) use case-insensitive search otherwise, use case-sensitive search
+#' @return Formats subset matching the model search string
+#' @author Tezan Sahu
+#* @get /
+searchFormats <- function(format_name="", mimetype="", ignore_case=TRUE, res){
+  format_name <- URLdecode(format_name)
+  mimetype <- URLdecode(mimetype)
+
+  dbcon <- PEcAn.DB::betyConnect()
+
+  Formats <- tbl(dbcon, "formats") %>%
+    select(format_id = id, format_name=name, mimetype_id) %>%
+    filter(grepl(!!format_name, format_name, ignore.case=ignore_case))
+
+  Formats <- tbl(dbcon, "mimetypes") %>%
+    select(mimetype_id = id, mimetype = type_string) %>%
+    inner_join(Formats, by = "mimetype_id") %>%
+    filter(grepl(!!mimetype, mimetype, ignore.case=ignore_case)) %>%
+    select(-mimetype_id) %>%
+    arrange(format_id)
+
+  qry_res <- Formats %>% collect()
+
+  PEcAn.DB::db.close(dbcon)
+
+  if (nrow(qry_res) == 0) {
+    res$status <- 404
+    return(list(error="Format(s) not found"))
+  }
+  else {
+    return(list(formats=qry_res, count = nrow(qry_res)))
+  }
+}
diff --git a/apps/api/R/general.R b/apps/api/R/general.R
@@ -20,6 +20,7 @@ status <- function() {
 
   dbcon <- PEcAn.DB::betyConnect()
   res <- list(host_details = PEcAn.DB::dbHostInfo(dbcon))
+  res$host_details$authentication_required = get_env_var("AUTH_REQ")
 
   res$pecan_details <- list(
     version = get_env_var("PECAN_VERSION"), 

diff --git a/apps/api/R/get.file.R b/apps/api/R/get.file.R
@@ -0,0 +1,38 @@
+library(dplyr)
+
+get.file <- function(filepath, userid) {
+  # Check if the file path is valid
+  if(! file.exists(filepath)){
+    return(list(status = "Error", message = "File not found"))
+  }
+
+  # Check if the workflow for run after obtaining absolute path is owned by the user or not
+  parent_dir <- normalizePath(dirname(filepath))
+
+  run_id <- substr(parent_dir, stringi::stri_locate_last(parent_dir, regex="/")[1] + 1, stringr::str_length(parent_dir))
+
+  if(Sys.getenv("AUTH_REQ") == TRUE) {
+    dbcon <- PEcAn.DB::betyConnect()
+
+    Run <- tbl(dbcon, "runs") %>%
+      filter(id == !!run_id)
+    Run <- tbl(dbcon, "ensembles") %>%
+      select(ensemble_id=id, workflow_id) %>%
+      full_join(Run, by="ensemble_id")  %>%
+      filter(id == !!run_id)
+    user_id <- tbl(dbcon, "workflows") %>%
+      select(workflow_id=id, user_id) %>% full_join(Run, by="workflow_id")  %>%
+      filter(id == !!run_id) %>%
+      pull(user_id)
+
+    PEcAn.DB::db.close(dbcon)
+
+    if(! user_id == userid) {
+      return(list(status = "Error", message = "Access forbidden"))
+    }
+  }
+
+  # Read the data in binary form & return it
+  bin <- readBin(filepath,'raw', n = file.info(filepath)$size)
+  return(list(file_contents = bin))
+}
diff --git a/apps/api/R/inputs.R b/apps/api/R/inputs.R
@@ -0,0 +1,185 @@
+library(dplyr)
+
+#' Search for Inputs containing wildcards for filtering
+#' @param model_id Model Id (character)
+#' @param site_id Site Id (character)
+#' @param offset
+#' @param limit 
+#' @return Information about Inputs based on model & site
+#' @author Tezan Sahu
+#* @get /
+searchInputs <- function(req, model_id=NULL, site_id=NULL, format_id=NULL, host_id=NULL, offset=0, limit=50, res){
+  if (! limit %in% c(10, 20, 50, 100, 500)) {
+    res$status <- 400
+    return(list(error = "Invalid value for parameter"))
+  }
+
+  dbcon <- PEcAn.DB::betyConnect()
+
+  inputs <- tbl(dbcon, "inputs") %>%
+    select(input_name=name, id, site_id, format_id, start_date, end_date)
+
+  inputs <- tbl(dbcon, "dbfiles") %>%
+    select(file_name, file_path, container_type, id=container_id, machine_id) %>%
+    inner_join(inputs, by = "id") %>%
+    filter(container_type == 'Input') %>%
+    select(-container_type)
+
+  inputs <- tbl(dbcon, "machines") %>%
+    select(hostname, machine_id=id) %>%
+    inner_join(inputs, by='machine_id')
+
+  inputs <- tbl(dbcon, "formats") %>%
+    select(format_id = id, format_name = name, mimetype_id) %>%
+    inner_join(inputs, by='format_id')
+
+  inputs <- tbl(dbcon, "mimetypes") %>%
+    select(mimetype_id = id, mimetype = type_string) %>%
+    inner_join(inputs, by='mimetype_id') %>%
+    select(-mimetype_id)
+
+  inputs <- tbl(dbcon, "sites") %>%
+    select(site_id = id, sitename) %>%
+    inner_join(inputs, by='site_id')
+
+  if(! is.null(model_id)) {
+    inputs <- tbl(dbcon, "modeltypes_formats") %>%
+      select(tag, modeltype_id, format_id, input) %>%
+      inner_join(inputs, by='format_id') %>%
+      filter(input) %>%
+      select(-input)
+
+    inputs <- tbl(dbcon, "models") %>%
+      select(model_id = id, modeltype_id, model_name, revision) %>%
+      inner_join(inputs, by='modeltype_id') %>%
+      filter(model_id == !!model_id) %>%
+      select(-modeltype_id, -model_id)
+  }
+
+  if(! is.null(site_id)) {
+    inputs <- inputs %>%
+      filter(site_id == !!site_id)
+  }
+
+  if(! is.null(format_id)) {
+    inputs <- inputs %>%
+      filter(format_id == !!format_id)
+  }
+
+  if(! is.null(host_id)) {
+    inputs <- inputs %>%
+      filter(machine_id == !!host_id)
+  }
+
+  qry_res <- inputs %>%
+    select(-site_id, -format_id, -machine_id) %>%
+    distinct() %>%
+    arrange(id) %>%
+    collect()
+
+  PEcAn.DB::db.close(dbcon)
+
+  if (nrow(qry_res) == 0 || as.numeric(offset) >= nrow(qry_res)) {
+    res$status <- 404
+    return(list(error="Input(s) not found"))
+  }
+  else {
+    has_next <- FALSE
+    has_prev <- FALSE
+    if (nrow(qry_res) > (as.numeric(offset) + as.numeric(limit))) {
+      has_next <- TRUE
+    }
+    if (as.numeric(offset) != 0) {
+      has_prev <- TRUE
+    }
+
+    qry_res <- qry_res[(as.numeric(offset) + 1):min((as.numeric(offset) + as.numeric(limit)), nrow(qry_res)), ]
+
+    result <- list(inputs = qry_res)
+    result$count <- nrow(qry_res)
+    if(has_next){
+      result$next_page <- paste0(
+        req$rook.url_scheme, "://",
+        req$HTTP_HOST,
+        "/api/workflows",
+        req$PATH_INFO,
+        substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
+        (as.numeric(limit) + as.numeric(offset)),
+        "&limit=", 
+        limit
+      )
+    }
+    if(has_prev) {
+      result$prev_page <- paste0(
+        req$rook.url_scheme, "://",
+        req$HTTP_HOST,
+        "/api/workflows",
+        req$PATH_INFO, 
+        substr(req$QUERY_STRING, 0, stringr::str_locate(req$QUERY_STRING, "offset=")[[2]]),
+        max(0, (as.numeric(offset) - as.numeric(limit))),
+        "&limit=", 
+        limit
+      )
+    }
+
+    return(result)
+  }
+}
+
+#################################################################################################
+
+#' Download the input specified by the id
+#' @param id Input id (character)
+#' @param filename Optional filename specified if the id points to a folder instead of file (character)
+#' If this is passed with an id that actually points to a file, this name will be ignored
+#' @return Input file specified by user
+#' @author Tezan Sahu
+#* @serializer contentType list(type="application/octet-stream")
+#* @get /<input_id>
+downloadInput <- function(input_id, filename="", req, res){
+  dbcon <- PEcAn.DB::betyConnect()
+  db_hostid <- PEcAn.DB::dbHostInfo(dbcon)$hostid
+
+  # This is just for temporary testing due to the existing issue in dbHostInfo()
+  db_hostid <- ifelse(db_hostid == 99, 99000000001, db_hostid)
+
+  input <- tbl(dbcon, "dbfiles") %>%
+    select(file_name, file_path, container_id, machine_id, container_type) %>%
+    filter(machine_id == !!db_hostid) %>%
+    filter(container_type == "Input") %>%
+    filter(container_id == !!input_id) %>%
+    collect()
+
+  PEcAn.DB::db.close(dbcon)
+
+  if (nrow(input) == 0) {
+    res$status <- 404
+    return()
+  }
+  else {
+    # Generate the full file path using the file_path & file_name
+    filepath <- paste0(input$file_path, "/", input$file_name)
+
+    # If the id points to a directory, check if 'filename' within this directory has been specified
+    if(dir.exists(filepath)) {
+      # If no filename is provided, return 400 Bad Request error
+      if(filename == "") {
+        res$status <- 400
+        return()
+      }
+
+      # Append the filename to the filepath
+      filepath <- paste0(filepath, filename)
+    }
+
+    # If the file doesn't exist, return 404 error
+    if(! file.exists(filepath)){
+      res$status <- 404
+      return()
+    }
+
+    # Read the data in binary form & return it
+    bin <- readBin(filepath,'raw', n = file.info(filepath)$size)
+    return(bin)
+  }
+}
diff --git a/apps/api/R/models.R b/apps/api/R/models.R
@@ -50,6 +50,8 @@ getModel <- function(model_id, res){
 #' @author Tezan Sahu
 #* @get /
 searchModels <- function(model_name="", revision="", ignore_case=TRUE, res){
+  model_name <- URLdecode(model_name)
+  revision <- URLdecode(revision)
 
   dbcon <- PEcAn.DB::betyConnect()
 

diff --git a/apps/api/R/pfts.R b/apps/api/R/pfts.R
@@ -49,6 +49,10 @@ getPfts <- function(pft_id, res){
 #' @author Tezan Sahu
 #* @get /
 searchPfts <- function(pft_name="", pft_type="", model_type="", ignore_case=TRUE, res){
+  pft_name <- URLdecode(pft_name)
+  pft_type <- URLdecode(pft_type)
+  model_type <- URLdecode(model_type)
+
   if(! pft_type %in% c("", "plant", "cultivar")){
     res$status <- 400
     return(list(error = "Invalid pft_type"))