diff --git a/doc/about.rst b/doc/about.rst index 984cc17b..1e10cca4 100644 --- a/doc/about.rst +++ b/doc/about.rst @@ -1,3 +1,7 @@ +##### +About +##### + What is Loom? ============= diff --git a/doc/examples/add_then_multiply/add_then_multiply.yaml b/doc/examples/add_then_multiply/add_then_multiply.yaml new file mode 100644 index 00000000..7f953a99 --- /dev/null +++ b/doc/examples/add_then_multiply/add_then_multiply.yaml @@ -0,0 +1,48 @@ +name: add_then_multiply +inputs: + - type: integer + channel: a + data: + contents: 3 + - type: integer + channel: b + data: + contents: 5 + - type: integer + channel: c + data: + contents: 7 +outputs: + - type: integer + channel: result +steps: + - name: add + command: print({{ a }} + {{ b }}, end='') + environment: + docker_image: python + interpreter: python + inputs: + - type: integer + channel: a + - type: integer + channel: b + outputs: + - type: integer + channel: ab_sum + source: + stream: stdout + - name: multiply + command: print({{ c }} * {{ ab_sum }}, end='') + environment: + docker_image: python + interpreter: python + inputs: + - type: integer + channel: ab_sum + - type: integer + channel: c + outputs: + - type: integer + channel: result + source: + stream: stdout diff --git a/doc/examples/building_blocks/add.yaml b/doc/examples/building_blocks/add.yaml new file mode 100644 index 00000000..53f6cf5d --- /dev/null +++ b/doc/examples/building_blocks/add.yaml @@ -0,0 +1,15 @@ +name: add +command: print({{ a }} + {{ b }}, end='') +environment: + docker_image: python +interpreter: python +inputs: + - type: integer + channel: a + - type: integer + channel: b +outputs: + - type: integer + channel: ab_sum + source: + stream: stdout diff --git a/doc/examples/building_blocks/building_blocks.yaml b/doc/examples/building_blocks/building_blocks.yaml new file mode 100644 index 00000000..027410b2 --- /dev/null +++ b/doc/examples/building_blocks/building_blocks.yaml @@ -0,0 +1,20 @@ +name: building_blocks +inputs: + - type: integer + channel: a + data: + contents: 3 + - type: integer + channel: b + data: + contents: 5 + - type: integer + channel: c + data: + contents: 7 +outputs: + - type: integer + channel: result +steps: + - add$5b4c0b69984d50284d0a3c38f735c532 + - multiply$32b277628f359b590304f69202cd6f86 diff --git a/doc/examples/building_blocks/multiply.yaml b/doc/examples/building_blocks/multiply.yaml new file mode 100644 index 00000000..e24a6e34 --- /dev/null +++ b/doc/examples/building_blocks/multiply.yaml @@ -0,0 +1,15 @@ +name: multiply +command: print({{ c }} * {{ ab_sum }}, end='') +environment: + docker_image: python +interpreter: python +inputs: + - type: integer + channel: ab_sum + - type: integer + channel: c +outputs: + - type: integer + channel: result + source: + stream: stdout diff --git a/doc/examples/capitalize_words/capitalize_words.yaml b/doc/examples/capitalize_words/capitalize_words.yaml new file mode 100644 index 00000000..78465b04 --- /dev/null +++ b/doc/examples/capitalize_words/capitalize_words.yaml @@ -0,0 +1,15 @@ +name: capitalize_words +command: echo -n {{word}} | awk '{print toupper($0)}' +environment: + docker_image: ubuntu:latest +inputs: + - channel: words + as_channel: word + type: string + data: + contents: [aardvark,aback,abacus,abaft] +outputs: + - channel: wordoutput + type: string + source: + stream: stdout diff --git a/doc/examples/hello_world/hello_world.yaml b/doc/examples/hello_world/hello_world.yaml index 521cc854..4d683263 100644 --- a/doc/examples/hello_world/hello_world.yaml +++ b/doc/examples/hello_world/hello_world.yaml @@ -1,4 +1,3 @@ ---- name: hello_world inputs: - type: file diff --git a/doc/examples/join_array_of_words/join_array_of_words.yaml b/doc/examples/join_array_of_words/join_array_of_words.yaml new file mode 100644 index 00000000..9639a89f --- /dev/null +++ b/doc/examples/join_array_of_words/join_array_of_words.yaml @@ -0,0 +1,15 @@ +name: join_array_of_words +command: echo -n {{wordarray}} +environment: + docker_image: ubuntu:latest +inputs: + - channel: wordarray + type: string + mode: gather + data: + contents: [aardvark,aback,abacus,abaft] +outputs: + - channel: wordoutput + type: string + source: + stream: stdout diff --git a/doc/examples/join_two_words/join_two_words.yaml b/doc/examples/join_two_words/join_two_words.yaml new file mode 100644 index 00000000..0ae06908 --- /dev/null +++ b/doc/examples/join_two_words/join_two_words.yaml @@ -0,0 +1,18 @@ +name: join_two_words +command: echo {{word1}} {{word2}} +environment: + docker_image: ubuntu:latest +inputs: + - channel: word1 + type: string + data: + contents: hello + - channel: word2 + type: string + data: + contents: world +outputs: + - channel: output_text + type: string + source: + stream: stdout diff --git a/doc/examples/nested_hello_world/README.txt b/doc/examples/nested_hello_world/README.txt deleted file mode 100644 index 56e0d6e6..00000000 --- a/doc/examples/nested_hello_world/README.txt +++ /dev/null @@ -1,39 +0,0 @@ -This example shows how with nested templates, children -can be defined separately from their parents. - -To run this workflow: - -1. If running for the first time, start the server - - loom server start --settings-file local.conf - -2. Import the input files - - loom import files hello.txt world.txt - -3. Import the child templates - - loom import template steps/hello_step.yaml - loom import template steps/world_step.yaml - loom import template steps/final_step.yaml - -4. Import the parent template - - loom import template hello_world.yaml - -5. Select inputs and execute the run - - loom run hello_world hello=hello.txt world=world.txt - -6. Monitor the run from the commandline - - loom show run hello_world --detail - -7. Monitor the run in the browser - - loom browser - -8. If you want to delete the Loom server - - loom server delete - diff --git a/doc/examples/nested_hello_world/hello.txt b/doc/examples/nested_hello_world/hello.txt deleted file mode 100644 index ce013625..00000000 --- a/doc/examples/nested_hello_world/hello.txt +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/doc/examples/nested_hello_world/hello_world.yaml b/doc/examples/nested_hello_world/hello_world.yaml deleted file mode 100644 index ce3752dd..00000000 --- a/doc/examples/nested_hello_world/hello_world.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: hello_world -inputs: -- type: file - hint: Enter the 'hello' file - channel: hello -- type: file - hint: Enter the 'world' file - channel: world -- type: string - channel: final_punc - data: - contents: "!!" -outputs: -- type: string - channel: hello_world -steps: -- hello_step -- world_step -- final_step diff --git a/doc/examples/nested_hello_world/steps/final_step.yaml b/doc/examples/nested_hello_world/steps/final_step.yaml deleted file mode 100644 index e1bb4040..00000000 --- a/doc/examples/nested_hello_world/steps/final_step.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: final_step -command: echo $(cat {{ hello_cap }} {{ world_cap }})'{{ final_punc}}' | tee {{ hello_world }} | tee /dev/fd/2 -environment: - docker_image: ubuntu -resources: - cores: '1' - memory: '1' -inputs: - - type: file - channel: hello_cap - group: 0 - - type: file - channel: world_cap - group: 0 - - type: string - channel: final_punc - group: 1 -outputs: - - type: string - channel: hello_world - source: - filename: hello_world.out diff --git a/doc/examples/nested_hello_world/steps/hello_step.yaml b/doc/examples/nested_hello_world/steps/hello_step.yaml deleted file mode 100644 index bf9dd9c6..00000000 --- a/doc/examples/nested_hello_world/steps/hello_step.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: hello_step -command: cat {{ hello }} | tr '[a-z]' '[A-Z]' | tee {{ hello_cap }} | tee /dev/fd/2 -environment: - docker_image: ubuntu -resources: - cores: '1' - memory: '1' -inputs: - - type: file - channel: hello -outputs: - - type: file - channel: hello_cap - source: - filename: hello_cap.txt diff --git a/doc/examples/nested_hello_world/steps/world_step.yaml b/doc/examples/nested_hello_world/steps/world_step.yaml deleted file mode 100644 index 2c19fbc5..00000000 --- a/doc/examples/nested_hello_world/steps/world_step.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: world_step -command: cat {{ world }} | tr '[a-z]' '[A-Z]' | tee {{ world_cap }} | tee /dev/fd/2 -environment: - docker_image: ubuntu -resources: - cores: '1' - memory: '1' -inputs: - - type: file - channel: world -outputs: - - type: file - channel: world_cap - source: - filename: world_cap.txt diff --git a/doc/examples/nested_hello_world/world.txt b/doc/examples/nested_hello_world/world.txt deleted file mode 100644 index cc628ccd..00000000 --- a/doc/examples/nested_hello_world/world.txt +++ /dev/null @@ -1 +0,0 @@ -world diff --git a/doc/examples/search_file/beowulf.txt b/doc/examples/search_file/beowulf.txt new file mode 100644 index 00000000..ea78824c --- /dev/null +++ b/doc/examples/search_file/beowulf.txt @@ -0,0 +1,13 @@ +Lo! the Spear-Danes' glory through splendid achievements +The folk-kings' former fame we have heard of, +How princes displayed then their prowess-in-battle. +Oft Scyld the Scefing from scathers in numbers +From many a people their mead-benches tore. +Since first he found him friendless and wretched, +The earl had had terror: comfort he got for it, +Waxed 'neath the welkin, world-honor gained, +Till all his neighbors o'er sea were compelled to +Bow to his bidding and bring him their tribute: +An excellent atheling! After was borne him +A son and heir, young in his dwelling, +Whom God-Father sent to solace the people. diff --git a/doc/examples/search_file/lorem_ipsum.txt b/doc/examples/search_file/lorem_ipsum.txt new file mode 100644 index 00000000..04de33e4 --- /dev/null +++ b/doc/examples/search_file/lorem_ipsum.txt @@ -0,0 +1,9 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing +elit, sed do eiusmod tempor incididunt ut labore et +dolore magna aliqua. Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut aliquip +ex ea commodo consequat. Duis aute irure dolor in +reprehenderit in voluptate velit esse cillum dolore +eu fugiat nulla pariatur. Excepteur sint occaecat +cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum. diff --git a/doc/examples/search_file/search_file.yaml b/doc/examples/search_file/search_file.yaml new file mode 100644 index 00000000..ed657244 --- /dev/null +++ b/doc/examples/search_file/search_file.yaml @@ -0,0 +1,23 @@ +name: search_file +command: grep {{pattern}} {{file_to_search}} +environment: + docker_image: ubuntu:latest +inputs: + - channel: file_to_search + type: file + data: + contents: lorem_ipsum.txt + - channel: pattern + type: string + data: + contents: dolor +outputs: + - channel: matches + type: string + mode: scatter + source: + stream: stdout + parser: + type: delimited + options: + delimiter: "\n" diff --git a/doc/examples/word_scoring/word_scoring.yaml b/doc/examples/sentence_scoring/sentence_scoring.yaml similarity index 70% rename from doc/examples/word_scoring/word_scoring.yaml rename to doc/examples/sentence_scoring/sentence_scoring.yaml index 1349a189..f546f3e1 100644 --- a/doc/examples/word_scoring/word_scoring.yaml +++ b/doc/examples/sentence_scoring/sentence_scoring.yaml @@ -1,7 +1,6 @@ ---- -name: word_scoring +name: sentence_scoring inputs: -- channel: wordlist +- channel: sentence type: string hint: Input text to be broken into words and letters data: @@ -11,12 +10,12 @@ outputs: type: integer steps: - name: split_into_words - command: echo {{ wordlist }} + command: echo {{ sentence }} inputs: - - channel: wordlist + - channel: sentence type: string outputs: - - channel: word + - channel: words mode: scatter type: string source: @@ -28,17 +27,15 @@ steps: trim: true environment: docker_image: ubuntu - resources: - cores: '1' - memory: '1' - name: split_into_letters interpreter: python command: print(' '.join([letter for letter in '{{ word }}'])) inputs: - - channel: word + - channel: words + as_channel: word type: string outputs: - - channel: letter + - channel: letters type: string mode: scatter source: @@ -50,47 +47,39 @@ steps: trim: true environment: docker_image: python - resources: - cores: '1' - memory: '1' - name: letter_to_integer interpreter: python command: print(ord( '{{ letter }}' ), end='') inputs: - - channel: letter + - channel: letters + as_channel: letter type: string outputs: - - channel: letter_value + - channel: letter_values type: integer source: stream: stdout environment: docker_image: python - resources: - cores: '1' - memory: '1' - name: sum_word interpreter: python - command: print({{ letter_value|join(' + ') }}, end='') + command: print({{ letter_values|join(' + ') }}, end='') inputs: - - channel: letter_value + - channel: letter_values type: integer mode: gather outputs: - - channel: word_value + - channel: word_values type: integer source: stream: stdout environment: docker_image: python - resources: - cores: '1' - memory: '1' - name: multiply_sentence interpreter: python - command: print({{ word_value|join(' * ') }}, end='') + command: print({{ word_values|join(' * ') }}, end='') inputs: - - channel: word_value + - channel: word_values type: integer mode: gather outputs: @@ -100,6 +89,3 @@ steps: stream: stdout environment: docker_image: python - resources: - cores: '1' - memory: '1' diff --git a/doc/examples/split_words_into_array/split_words_into_array.yaml b/doc/examples/split_words_into_array/split_words_into_array.yaml new file mode 100644 index 00000000..5d1c7962 --- /dev/null +++ b/doc/examples/split_words_into_array/split_words_into_array.yaml @@ -0,0 +1,24 @@ +name: split_words_into_array +command: echo -n {{text}} +environment: + docker_image: ubuntu:latest +inputs: + - channel: text + type: string + data: + contents: > + Lorem ipsum dolor sit amet, consectetur adipiscing + elit, sed do eiusmod tempor incididunt ut labore et dolore + magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation + ullamco laboris nisi ut aliquip ex ea commodo consequat. +outputs: + - channel: wordlist + type: string + mode: scatter + source: + stream: stdout + parser: + type: delimited + options: + delimiter: " " + trim: True diff --git a/doc/examples/word_combinations/README.txt b/doc/examples/word_combinations/README.txt deleted file mode 100644 index 6af01060..00000000 --- a/doc/examples/word_combinations/README.txt +++ /dev/null @@ -1,37 +0,0 @@ -This template shows how to create parallel workflows -by using nonscalar inputs, defining scatter/gather modes -on outputs/inputs respectively. It shows how to combine array -inputs with cross-product behavior by giving the inputs different -groups. (Inputs within the same group combine as a dot-product). -It also shows how "gather depth" can be set to flatten input data -that is more than scalar or 1-dimensional (in this case gather(2). - -To run this workflow: - -1. If running for the first time, start the server - - loom server start --settings-file local.conf - -2. Import the template - - loom import template word_combinations.yaml - -3. Execute the run - - loom run word_combinations - - # or to override the default input values: - - loom run word_combinations adjectives="[hard,soft]" nouns="[ball,sell,wood]" - -4. Monitor the run from the commandline - - loom show run word_combinations --detail - -5. Monitor the run in the browser - - loom browser - -6. If you want to delete the Loom server - - loom server delete diff --git a/doc/examples/word_combinations/word_combinations.yaml b/doc/examples/word_combinations/word_combinations.yaml index c6cfffa1..682f8354 100644 --- a/doc/examples/word_combinations/word_combinations.yaml +++ b/doc/examples/word_combinations/word_combinations.yaml @@ -1,4 +1,3 @@ ---- name: word_combinations inputs: - channel: adjectives @@ -14,18 +13,21 @@ outputs: type: file steps: - name: combine_words - command: echo "{{adjectives}} {{nouns}}" > {{word_pair_files}} + command: echo "{{adjective}} {{noun}}" > {{word_pair_file}} environment: docker_image: ubuntu inputs: - channel: adjectives + as_channel: adjective type: string group: 0 - channel: nouns + as_channel: noun type: string group: 1 outputs: - channel: word_pair_files + as_channel: word_pair_file type: file source: filename: word_pair.txt diff --git a/doc/examples/word_scoring/README.txt b/doc/examples/word_scoring/README.txt deleted file mode 100644 index 4ef81063..00000000 --- a/doc/examples/word_scoring/README.txt +++ /dev/null @@ -1,40 +0,0 @@ -This template shows how Loom data is not restricted to -scalars or arrays but also supports higher dimensionality, -and here this is used to create a -scatter-scatter-gather-gather pattern. In this example, -a sentence is split into words, which are split into -letters. Each letter is converted to an integer score. -When a "gather" is applied to sum the scores letters, -it merges only the last level of split, creating a separate -sum for each word. A second gather is applied to calculate -the product of the scores of all the words. - -To run this workflow: - -1. If running for the first time, start the server - - loom server start --settings-file local.conf - -2. Import the template - - loom import template word_scoring.yaml - -3. Execute the run - - loom run word_scoring - - # or to override the default input value: - - loom run word_scoring wordlist="my own crazy input here" - -4. Monitor the run from the commandline - - loom show run word_scoring --detail - -5. Monitor the run in the browser - - loom browser - -6. If you want to delete the Loom server - - loom server delete diff --git a/doc/index.rst b/doc/index.rst index 60de7320..1391c8d6 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -13,3 +13,4 @@ Welcome to Loom's documentation! releasenotes about installing + templates diff --git a/doc/installing.rst b/doc/installing.rst index 5c070a60..a80f2b57 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -1,5 +1,7 @@ +.. _getting-started: + ############### -Getting started +Getting Started ############### This guide walks you through installing the Loom client, using it to launch a Loom server either on your local machine or on Google Cloud Platform, and running a workflow. diff --git a/doc/releasenotes.rst b/doc/releasenotes.rst index 1d4c0e51..7b1e7b8d 100644 --- a/doc/releasenotes.rst +++ b/doc/releasenotes.rst @@ -1,8 +1,9 @@ +############# Release Notes -============= +############# 0.4.0 ---------- +============= * Parallel workflows * Deprecated fixed inputs, replaced with optional and overridable "data" field on standard inputs * User-defined run names, using the optional "--name" flag with "loom run" @@ -11,25 +12,25 @@ Release Notes * Outputs can now use "glob" source in addition to "filename" and "stream" 0.3.8 ---------- +============= * Run overview shows nested runs, tasks, and task-attempts 0.3.7 ---------- +============= * Retries for upload/download from Google Storage 0.3.6 ---------- +============= * Runs have "waiting" status until they start * Runs are no longer changed to "killed" if they already completed * Input/output detail routes on runs 0.3.5 ---------- +============= * Critical bugfix for 0.3.4 0.3.4 ---------- +============= * Pre-configure Kibana * Disable X-Pack features in Kibana and Elasticsearch * Handle several sporadic failures from gcloud services @@ -37,25 +38,24 @@ Release Notes * Prune docker data volumes 0.3.3 ----------- +============= * Critical bugfix for 0.3.2 that prevented use on Google Cloud 0.3.2 ----------- +============= * Fluentd for logging, with kibana+elasticsearch for log viewing * Nested templates by reference * API documentation with swagger * Reduced lag time in running tasks 0.3.1 ----------- +============= * Allow substitution in template output filenames * Added LOOM_PRESERVE_ON_FAILURE and LOOM_PRESERVE_ALL flags for debugging * Several bugfixes 0.3.0 ------------ - +============= * User-configurable playbooks * Non-reverse-compatible simplifications to API * Reduced server response times @@ -64,13 +64,11 @@ Release Notes * Retry tasks if process stops responding 0.2.1 ------------ - +============= * Use release-specific DOCKER_TAG in default settings 0.2.0 ------------ - +============= * Loom can create a server locally or on Google Cloud Platform * Accepts workflow templates in JSON or YAML format * Web portal provides a brower interface for viewing templates, files, and runs diff --git a/doc/templates.rst b/doc/templates.rst new file mode 100644 index 00000000..2283df90 --- /dev/null +++ b/doc/templates.rst @@ -0,0 +1,414 @@ +###################################### +Loom Templates +###################################### + +To run an analysis on Loom, you must first have a template that defines the analysis steps and their relative arrangement (input/output dependencies, scatter-gather patterns). An analysis run may then be initiated by assigning input data to an existing template. + +A Loom template is defined in a yaml or json file and then imported to the Loom server. + +********* +Examples +********* + +To run these examples, you will need access to a running Loom server. See :ref:`getting-started` for help launching a Loom server either locally or in the cloud. + +join_two_words +============== + +*simplest example, a.k.a. hello world* + +This example illustrates the minimal set of features in a Loom template: name, command, environment (defined by a docker image), and input/output definitions. + +We use the optional "data" field on the inputs to assign default values. + +*join_two_words.yaml:* + +.. literalinclude:: examples/join_two_words/join_two_words.yaml + +:download:`join_two_words.yaml ` + +The command "echo {{word1}} {{word2}}" makes use of Jinja2_ notation to substitute input values. "{{word1}}" in the command will be substituted with the value provided on the "word1" input channel. For inputs of type "string", "integer", "boolean", and "float", the value substituted is a string representation of the data. For inputs of type "file", the filename is substituted. The full set of Jinja2 features may be used, including filters, conditional statements, and loops. + +.. _Jinja2: http://jinja.pocoo.org/docs/ + +**Run the join_two_words example** + +:: + + loom import template join_two_words.yaml + + # Run with default input data + loom run join_two_words + + # Run with custom input data + loom run join_two_words word1=foo word2=bar + +capitalize_words +================ + +*array data, iterating over an array input* + +This template illustrates the concept of non-scalar data (in this case a 1-dimensional array). The default mode for inputs is "no_gather", which means that rather than gather all the objects into an array to be processed together in a single task, Loom will iterate over the array and execute the command once for each data object, in separate tasks. + +Here we capitalize each word in the array. The output from each task executed is a string, but since many tasks are executed, the output is an array of strings. + +Note the use of "as_channel" on the input definition. Since our input channel is an array we named the channel with the plural "words", but this run executes a separate tasks for each element in the array it may be confusing to refer to "{{words}} inside the command. It improves readability to use "as_channel: word". + +*capitalize_words.yaml:* + +.. literalinclude:: examples/capitalize_words/capitalize_words.yaml + +:download:`capitalize_words.yaml ` + +**Run the capitalize_words example** + +:: + + loom import template capitalize_words.yaml + + # Run with default input data + loom run capitalize_words + + # Run with custom input data + loom run capitalize_words words=[uno,dos,tres] + +join_array_of_words +=================== + +*array data, gather mode on an input* + +Earlier we saw how to join two words, each defined on a separate input. But what if we want to join an arbitrary number of words? + +This example has a single input, whose default value is an array of words. By setting the mode of this input as "gather", instead of iterating as in the last example we will execute a single task that receives the full list of words as an input. + +In this example we merge the strings and output the result as a string. + +*join_array_of_words.yaml:* + +.. literalinclude:: examples/join_array_of_words/join_array_of_words.yaml + +:download:`join_array_of_words.yaml ` + +**Run the join_array_of_words example** + +:: + + loom import template join_array_of_words.yaml + + # Run with default input data + loom run join_array_of_words + + # Run with custom input data + loom run join_array_of_words wordarray=[uno,dos,tres] + +split_words_into_array +====================== + +*array data, scatter mode on an output, output parsers* + +This example is the reverse of the previous example. We begin with a scalar string of space-separated words, and split them into an array. + +To generate an array output from a single task, we set the output mode to "scatter". + +We also need to instruct Loom how to split the text in stdout to an array. For this we use a parser that uses the space character as the delimiter and trims any extra whitespace characters from the words. + +*split_words_into_array.yaml:* + +.. literalinclude:: examples/split_words_into_array/split_words_into_array.yaml + +:download:`split_words_into_array.yaml ` + +**Run the split_words_into_array example** + +:: + + loom import template split_words_into_array.yaml + + # Run with default input data + loom run split_words_into_array + + # Run with custom input data + loom run split_words_into_array text="one two three" + +add_then_multiply +================= + +*multistep templates, connecting inputs and outputs, custom interpreter* + +All the previous examples have involved just one step. Here we show how to define more than one step in a template. + +Also, since we are doing math in this example, it is easier to use python than bash, so we introduce the concept of custom interpreters. + +Notice how the flow of data is defined using shared channel names between inputs and outputs. On the top-level template "add_then_multiply" we define input channels "a", "b", and "c". These are used by the steps "add" ("a" and "b") and "multiply" ("c"). There is also an output from "add" called "ab_sum" that serves as an input for "multiply". Finally, the output from "multiply", called "result" is passed up to "add_then_multiply" as a top-level output. + +*add_then_multiply.yaml:* + +.. literalinclude:: examples/add_then_multiply/add_then_multiply.yaml + +:download:`add_then_multiply.yaml ` + +**Run the add_then_multiply example** + +:: + + loom import template add_then_multiply.yaml + + # Run with default input data + loom run add_then_multiply + + # Run with custom input data + loom run add_then_multiply a=1 b=2 c=3 + +building_blocks +=============== + +*reusing templates* + +Let's look at another way to write the previous workflow. The "add" and "multiply" steps can be defined as stand-alone workflows. After they are defined, we can create a template that includes those templates as steps. + +*add.yaml:* + +.. literalinclude:: examples/building_blocks/add.yaml + +*multiply.yaml:* + +.. literalinclude:: examples/building_blocks/multiply.yaml + +*building_blocks.yaml:* + +.. literalinclude:: examples/building_blocks/building_blocks.yaml + +:download:`add.yaml ` + +:download:`multiply.yaml ` + +:download:`building_blocks.yaml ` + +**Run the building_blocks example** + +:: + + # Import child templates before the parent that references them + loom import template add.yaml + loom import template multiply.yaml + loom import template building_blocks.yaml + + # Run with default input data + loom run building_blocks + + # Run with custom input data + loom run building_blocks a=1 b=2 c=3 + +search_file +=========== + +*file inputs* + +Most of these examples use non-file inputs for convenience, but files can be used as inputs and outputs much like other data types. + +In this example, the "lorem_ipsum.txt" input file should be imported prior to importing the "search_file.yaml" template that references it. + +*lorem_ipsum.txt:* + +.. literalinclude:: examples/search_file/lorem_ipsum.txt + +*search_file.yaml:* + +.. literalinclude:: examples/search_file/search_file.yaml + +:download:`lorem_ipsum.txt ` + +:download:`search_file.yaml ` + +Here is an alternative text file not referenced in the template. We can override the default input file and specify beowulf.txt as the input when starting a run. + +*beowulf.txt:* + +.. literalinclude:: examples/search_file/beowulf.txt + +:download:`beowulf.txt ` + +**Run the search_file example** + +:: + + # Import the file before the template that references it + loom import file lorem_ipsum.txt + loom import template search_file.yaml + + # Run with default input data + loom run search_file + + # Run with custom input data + loom import file beowulf.txt + loom run search_file pattern=we file_to_search=beowulf.txt\$20b8f89484673eae4f121801e1fec28c + +word_combinations +================= + +*scatter-gather, input groups, output mode gather(n)* + +When a template step has two inputs rather than one, iteration can be done in two ways: + +* collated iteration: [a,b] + [c,d] => [a+c,b+d] +* combinatorial iteration: [a,b] + [c,d] => [a+c, a+d, b+c, b+d] + +With more than two inputs, we could employ some combination of these two approaches. + +"groups" provide a flexible way to define when to use collated or combinatorial iteration. Each input has an integer group ID (the default is 0). All inputs with a common group ID will be combined with collation. Between groups, combinatorial iteration is used. + +In this example, we iterate over two inputs, one with an array of adjectives and one with an array of nouns. Since the inputs have different group IDs, we iterate over all possible combinations of word pairs (combinatorial). + +*word_combinations.yaml:* + +.. literalinclude:: examples/word_combinations/word_combinations.yaml + +:download:`word_combinations.yaml ` + +You may have noticed that we gather the input "word_pair_files" with "mode: gather(2)". This is because word_pair_files is not just an array, but an array of arrays. We wish to gather it to full depth. You may wish to modify this example to use "mode: gather" (or equivalently "mode: gather(1)") to see how it affects the result. + +**Run the word_combinations example** + +:: + + loom import template word_combinations.yaml + + # Run with default input data + loom run word_combinations + + # Run with custom input data + loom run word_combinations adjectives=[little,green] nouns=[men,pickles,apples] + +sentence_scoring +================ + +Why should we bother differentiating between "gather" and "gather(2)"? This example illustrates why, by showing how to construct a scatter-scatter-gather-gather workflow. On the first gather, we do not fully gather the results into an array, but only gather the last level of nested arrays. This lets us group data for the letters in each word while keeping data for different words separate. On the second gather, we combine the data for each word to get an overall result for the sentence. + +*sentence_scoring.yaml:* + +.. literalinclude:: examples/sentence_scoring/sentence_scoring.yaml + +:download:`sentence_scoring.yaml ` + +**Run the sentence_scoring example** + +:: + + loom import template sentence_scoring.yaml + + # Run with default input data + loom run sentence_scoring + + # Run with custom input data + loom run sentence_scoring sentence='To infinity and beyond' + +******* +Schemas +******* + +Template schema +=============== + +============ ======== ======================= ================= =============== +field required default type example +============ ======== ======================= ================= =============== +name yes string 'calculate_error' +inputs no [] [Input] ['channel': 'input1', 'type': 'string'] +outputs no [] [Output] ['channel': 'output1', 'type': 'string', 'source': {'stream': 'stdout'}] +command* yes string 'echo {{input1}}' +interpreter* no /bin/bash -euo pipefail string '/usr/bin/python' +resources* no null +environment* yes string {'docker_image': 'ubuntu:latest'} +steps+ no [] [Template|string] see examples in previous section +============ ======== ======================= ================= =============== + +\* only on executable steps (leaf nodes) + +\+ only on container steps (non-leaf nodes) + +Input schema +============ + +============ ======== ======================= ================= =============== +field required default type example +============ ======== ======================= ================= =============== +channel yes string 'sampleid' +type yes string 'file' +mode* no no_gather string 'gather' +group* no 0 integer 2 +hint no string 'Enter a quality threshold' +data no null DataNode {'contents': [3,7,12]} +============ ======== ======================= ================= =============== + +\* only on executable steps (leaf nodes) + +DataNode schema +=============== + +============ ======== ======================= ================= =============== +field required default type example +============ ======== ======================= ================= =============== +contents yes see notes below +============ ======== ======================= ================= =============== + +DataNode contents can be a valid data value of any type. They can also be a list, or nested lists of any of these types, provided all items are of the same type and at the same nested depth. + +========= ================================================ ================================== +data type valid DataNode contents examples invalid DataNode contents examples +========= ================================================ ================================== +integer 172 +float 3.98 +string 'sx392' +boolean true +file myfile.txt +file myfile.txt$9dd4e461268c8034f5c8564e155c67a6 +file $9dd4e461268c8034f5c8564e155c67a6 +file myfile.txt\@ef62b731-e714-4b82-b1a7-057c1032419e +file myfile.txt\@ef62b7 +file \@ef62b7 +integer [2,3] +integer [[2,2],[2,3,5],[17]] +integer [2,'three'] (mismatched types) +integer [[2,2],[2,3,[5,17]]] (mismatched depths) +========= ================================================ ================================== + +Output schema +============= + +============ ======== ======================= ================= =============== +field required default type example +============ ======== ======================= ================= =============== +channel yes string 'sampleid' +type yes string 'file' +mode* no no_gather string 'gather' +parser* no null OutputParser {'type': 'delimited', 'options': {'delimiter': ','} +source* yes OutputSource {'glob': '*.dat'} +============ ======== ======================= ================= =============== + +\* only on executable steps (leaf nodes) + +OutputParser schema +=================== + +============ ======== ======================= ================= =============== +field required default type example +============ ======== ======================= ================= =============== +type yes string 'delimited' +options no ParserOptions {'delimiter':' ','trim':true} +============ ======== ======================= ================= =============== + +OutputSource schema +=================== + +============ ======== ======================= ================= =============== +field required default type example +============ ======== ======================= ================= =============== +filename* false string 'out.txt' +stream* false string 'stderr' +glob+ false string '*.txt' +filenames+ false string ['out1.txt','out2.txt'] +============ ======== ======================= ================= =============== + +\* When used with outputs with "scatter" mode, an OutputParser is required + +\+ Only for outputs with "scatter" mode. (No parser required.) diff --git a/loomengine/client/importer.py b/loomengine/client/importer.py index efd0e4ba..5d516400 100755 --- a/loomengine/client/importer.py +++ b/loomengine/client/importer.py @@ -7,7 +7,7 @@ verify_has_connection_settings, parse_as_json_or_yaml from loomengine.utils.filemanager import FileManager from loomengine.utils.connection import Connection -from loomengine.utils.exceptions import DuplicateFileError +from loomengine.utils.exceptions import DuplicateFileError, DuplicateTemplateError class AbstractImporter(object): """Common functions for the various subcommands under 'loom import' @@ -71,12 +71,20 @@ def run(self): return self.import_template(self.args.template, self.args.comments, self.filemanager, - self.connection) + self.connection, + self.args.force_duplicates) + @classmethod - def import_template(cls, template_file, comments, filemanager, connection): + def import_template(cls, template_file, comments, + filemanager, connection, force_duplicates=False): print 'Importing template from "%s".' % filemanager.normalize_url( template_file) (template, source_url) = cls._get_template(template_file, filemanager) + if not force_duplicates: + try: + filemanager.verify_no_template_duplicates(template) + except DuplicateTemplateError as e: + raise SystemExit(e.message) if comments: template.update({'import_comments': comments}) if source_url: @@ -106,6 +114,7 @@ def _warn_for_fixed_inputs(cls, template): @classmethod def _get_template(cls, template_file, filemanager): + md5 = filemanager.calculate_md5(template_file) try: (template_text, source_url) = filemanager.read_file(template_file) except Exception as e: @@ -113,7 +122,7 @@ def _get_template(cls, template_file, filemanager): % (template_file, str(e))) template = parse_as_json_or_yaml(template_text) try: - template.update + template.update({'md5': md5}) except AttributeError: raise SystemExit( 'ERROR! Template at "%s" could not be parsed into a dict.' @@ -153,7 +162,7 @@ def get_parser(cls, parser=None): file_subparser.add_argument('-d', '--force-duplicates', action='store_true', default=False, help='Force upload even if another file with '\ - 'the same md5 exists') + 'the same name and md5 exists') hidden_file_subparser = subparsers.add_parser('files') FileImporter.get_parser(hidden_file_subparser) @@ -162,12 +171,19 @@ def get_parser(cls, parser=None): action='store_true', default=False) template_subparser = subparsers.add_parser('template', help='import a template') + template_subparser.add_argument('-d', '--force-duplicates', action='store_true', + default=False, + help='Force upload even if another template with '\ + 'the same name and md5 exists') + TemplateImporter.get_parser(template_subparser) template_subparser.set_defaults(SubSubcommandClass=TemplateImporter) hidden_template_subparser = subparsers.add_parser('templates') TemplateImporter.get_parser(hidden_template_subparser) hidden_template_subparser.set_defaults(SubSubcommandClass=TemplateImporter) + hidden_template_subparser.add_argument('--force-duplicates', '-d', + action='store_true', default=False) return parser diff --git a/loomengine/client/show.py b/loomengine/client/show.py index 0e12c255..28c9afe0 100755 --- a/loomengine/client/show.py +++ b/loomengine/client/show.py @@ -152,6 +152,7 @@ def _render_template(self, template): if self.args.detail: text = '---------------------------------------\n' text += 'Template: %s\n' % template_identifier + text += ' - md5: %s\n' % template.get('md5') text += ' - Imported: %s\n' % \ render_time(template['datetime_created']) if template.get('inputs'): diff --git a/loomengine/master/api/migrations/0001_initial.py b/loomengine/master/api/migrations/0001_initial.py index 92f531e0..e76ed6ef 100644 --- a/loomengine/master/api/migrations/0001_initial.py +++ b/loomengine/master/api/migrations/0001_initial.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Generated by Django 1.11 on 2017-07-14 01:47 +# Generated by Django 1.11 on 2017-07-14 01:50 from __future__ import unicode_literals import api.models @@ -143,6 +143,7 @@ class Migration(migrations.Migration): ('type', models.CharField(choices=[(b'boolean', b'Boolean'), (b'file', b'File'), (b'float', b'Float'), (b'integer', b'Integer'), (b'string', b'String')], max_length=255)), ('mode', models.CharField(blank=True, max_length=255)), ('group', models.IntegerField(blank=True, null=True)), + ('as_channel', models.CharField(blank=True, max_length=255, null=True)), ('data_node', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='api.DataNode')), ('run', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='inputs', to='api.Run')), ], @@ -158,6 +159,7 @@ class Migration(migrations.Migration): ('mode', models.CharField(blank=True, max_length=255)), ('source', jsonfield.fields.JSONField(blank=True)), ('parser', jsonfield.fields.JSONField(blank=True, validators=[api.models.validators.OutputParserValidator.validate_output_parser])), + ('as_channel', models.CharField(blank=True, max_length=255, null=True)), ('data_node', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='api.DataNode')), ('run', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='outputs', to='api.Run')), ], @@ -305,6 +307,7 @@ class Migration(migrations.Migration): ('channel', models.CharField(max_length=255)), ('type', models.CharField(choices=[(b'boolean', b'Boolean'), (b'file', b'File'), (b'float', b'Float'), (b'integer', b'Integer'), (b'string', b'String')], max_length=255)), ('mode', models.CharField(max_length=255)), + ('as_channel', models.CharField(blank=True, max_length=255, null=True)), ('data_node', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='api.DataNode')), ('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='inputs', to='api.Task')), ], @@ -323,6 +326,7 @@ class Migration(migrations.Migration): ('mode', models.CharField(max_length=255)), ('source', jsonfield.fields.JSONField(blank=True)), ('parser', jsonfield.fields.JSONField(blank=True, validators=[api.models.validators.OutputParserValidator.validate_output_parser])), + ('as_channel', models.CharField(blank=True, max_length=255, null=True)), ('data_node', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='api.DataNode')), ('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='outputs', to='api.Task')), ], @@ -338,6 +342,7 @@ class Migration(migrations.Migration): ('_change', models.IntegerField(default=0)), ('uuid', models.CharField(default=api.models.uuidstr, editable=False, max_length=255, unique=True)), ('name', models.CharField(max_length=255, validators=[api.models.validators.TemplateValidator.validate_name])), + ('md5', models.CharField(blank=True, max_length=32, null=True, validators=[api.models.validators.validate_md5])), ('is_leaf', models.BooleanField()), ('datetime_created', models.DateTimeField(default=django.utils.timezone.now, editable=False)), ('command', models.TextField(blank=True)), @@ -366,6 +371,7 @@ class Migration(migrations.Migration): ('hint', models.CharField(blank=True, max_length=1000)), ('mode', models.CharField(blank=True, max_length=255)), ('group', models.IntegerField(blank=True, null=True)), + ('as_channel', models.CharField(blank=True, max_length=255, null=True)), ('data_node', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='api.DataNode')), ('template', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='inputs', to='api.Template')), ], diff --git a/loomengine/master/api/models/input_calculator.py b/loomengine/master/api/models/input_calculator.py index 4d1bc169..7498582b 100644 --- a/loomengine/master/api/models/input_calculator.py +++ b/loomengine/master/api/models/input_calculator.py @@ -175,7 +175,6 @@ def create_from_data_channel(cls, data_channel, target_path=None, gather_depth=0 """Scan the data tree on the given data_channel to create a corresponding InputSetGenerator tree. """ - # If target_path is given, any data above that path will be ignored. # The path [] represents the root node, so this default value scans # the whole tree @@ -189,7 +188,7 @@ def create_from_data_channel(cls, data_channel, target_path=None, gather_depth=0 # attach it to the task. If gather depth is 1 or 0, the flattened # clone is an unchanged copy. flat_data_node = data_node.flattened_clone() - input_item = InputItem(flat_data_node, data_channel.channel, mode=data_channel.mode) + input_item = InputItem(flat_data_node, data_channel.channel, data_channel.as_channel, mode=data_channel.mode) generator._add_input_item(data_path, input_item) return generator @@ -296,8 +295,9 @@ class InputItem(object): refrain from creating a new ArrayDataObject for no reason. """ - def __init__(self, data_node, channel, mode): + def __init__(self, data_node, channel, as_channel, mode): self.channel = channel + self.as_channel = as_channel self.data_node = data_node self.mode = mode diff --git a/loomengine/master/api/models/runs.py b/loomengine/master/api/models/runs.py index 05c20c1f..6fabdbe2 100644 --- a/loomengine/master/api/models/runs.py +++ b/loomengine/master/api/models/runs.py @@ -202,10 +202,16 @@ def _connect_input_to_template_data(self, input): return template_input = self.template.inputs.get(channel=input.channel) + if template_input.data_node is None: raise ValidationError( "No input data available on channel %s" % input.channel) - template_input.data_node.clone(seed=input.data_node) + if input.data_node is None: + data_node = template_input.data_node.clone() + input.setattrs_and_save_with_retries({'data_node': data_node}) + else: + template_input.data_node.clone(seed=input.data_node) + def _has_user_input(self, channel): try: @@ -377,7 +383,6 @@ def _claim_for_postprocessing(self): @classmethod def postprocess(cls, run_uuid): run = Run.objects.get(uuid=run_uuid) - if run.postprocessing_status == 'complete': # Nothing more to do return @@ -415,6 +420,7 @@ def initialize_inputs(self): run_input = RunInput.objects.create( run=self, channel=input.channel, + as_channel=input.as_channel, type=input.type, group=input.group, mode=input.mode) @@ -436,6 +442,7 @@ def initialize_outputs(self): kwargs = {'run': self, 'type': output.get('type'), 'channel': output.get('channel'), + 'as_channel': output.get('as_channel'), 'source': output.get('source'), 'parser': output.get('parser') } @@ -560,6 +567,7 @@ class Meta: blank=True) mode = models.CharField(max_length=255, blank=True) group = models.IntegerField(null=True, blank=True) + as_channel = models.CharField(max_length=255, null=True, blank=True) def is_ready(self, data_path=None): if self.data_node: @@ -583,6 +591,7 @@ class Meta: parser = jsonfield.JSONField( validators=[validators.OutputParserValidator.validate_output_parser], blank=True) + as_channel = models.CharField(max_length=255, null=True, blank=True) class RunConnectorNode(DataChannel): diff --git a/loomengine/master/api/models/task_attempts.py b/loomengine/master/api/models/task_attempts.py index bfe074e6..6cdabb4f 100644 --- a/loomengine/master/api/models/task_attempts.py +++ b/loomengine/master/api/models/task_attempts.py @@ -139,8 +139,10 @@ def _render_output_source(self, task_output_source): input_context = self.task.get_input_context() stream = task_output_source.get('stream') - filename = render_string_or_list( - task_output_source.get('filename'), + filename = render_from_template( + task_output_source.get('filename'), input_context) + filenames = render_string_or_list( + task_output_source.get('filenames'), input_context) glob = render_from_template( task_output_source.get('glob'), input_context) @@ -150,6 +152,8 @@ def _render_output_source(self, task_output_source): output_source['stream'] = stream if filename: output_source['filename'] = filename + if filenames: + output_source['filenames'] = filenames if glob: output_source['glob'] = glob return output_source diff --git a/loomengine/master/api/models/tasks.py b/loomengine/master/api/models/tasks.py index 0b3550fc..8e481435 100644 --- a/loomengine/master/api/models/tasks.py +++ b/loomengine/master/api/models/tasks.py @@ -150,12 +150,14 @@ def create_from_input_set(cls, input_set, run): TaskInput.objects.create( task=task, channel=input_item.channel, + as_channel=input_item.as_channel, type=input_item.type, mode=input_item.mode, data_node = input_item.data_node) for run_output in run.outputs.all(): task_output = TaskOutput.objects.create( channel=run_output.channel, + as_channel=run_output.as_channel, type=run_output.type, task=task, mode=run_output.mode, @@ -182,24 +184,32 @@ def create_and_activate_attempt(self): def get_input_context(self): context = {} for input in self.inputs.all(): + if input.as_channel: + channel = input.as_channel + else: + channel = input.channel if input.data_node.is_leaf: - context[input.channel] = input.data_node\ - .substitution_value + context[channel] = input.data_node\ + .substitution_value else: - context[input.channel] = ArrayInputContext( + context[channel] = ArrayInputContext( input.data_node.substitution_value, input.type ) return context def get_output_context(self, input_context): + # This returns a value only for Files, where the filename + # is known beforehand and may be used in the command. + # For other types, nothing is added to the context. context = {} for output in self.outputs.all(): - # This returns a value only for Files, where the filename - # is known beforehand and may be used in the command. - # For other types, nothing is added to the context. + if output.as_channel: + channel = output.as_channel + else: + channel = output.channel if output.source.get('filename'): - context[output.channel] = render_string_or_list( + context[channel] = render_string_or_list( output.source.get('filename'), input_context) return context @@ -228,6 +238,7 @@ class TaskInput(DataChannel): related_name='inputs', on_delete=models.CASCADE) mode = models.CharField(max_length=255) + as_channel = models.CharField(max_length=255, null=True, blank=True) class TaskOutput(DataChannel): @@ -240,6 +251,7 @@ class TaskOutput(DataChannel): parser = jsonfield.JSONField( validators=[validators.OutputParserValidator.validate_output_parser], blank=True) + as_channel = models.CharField(max_length=255, null=True, blank=True) def push_data(self, data_path): # Copy data from the TaskAttemptOutput to the TaskOutput diff --git a/loomengine/master/api/models/templates.py b/loomengine/master/api/models/templates.py index 035509ca..9779b86d 100644 --- a/loomengine/master/api/models/templates.py +++ b/loomengine/master/api/models/templates.py @@ -26,12 +26,15 @@ class Template(BaseModel): NAME_FIELD = 'name' + HASH_FIELD = 'md5' ID_FIELD = 'uuid' uuid = models.CharField(default=uuidstr, editable=False, unique=True, max_length=255) name = models.CharField(max_length=255, validators=[validators.TemplateValidator.validate_name]) + md5 = models.CharField(null=True, blank=True, + max_length=32, validators=[validators.validate_md5]) is_leaf = models.BooleanField() datetime_created = models.DateTimeField(default=timezone.now, editable=False) @@ -95,6 +98,7 @@ class TemplateInput(DataChannel): hint = models.CharField(max_length=1000, blank=True) mode = models.CharField(max_length=255, blank=True) group = models.IntegerField(null=True, blank=True) + as_channel = models.CharField(max_length=255, null=True, blank=True) class Meta: app_label = 'api' diff --git a/loomengine/master/api/models/validators.py b/loomengine/master/api/models/validators.py index ffbe9802..79ac5d3e 100644 --- a/loomengine/master/api/models/validators.py +++ b/loomengine/master/api/models/validators.py @@ -222,6 +222,7 @@ def validate_outputs(cls, value): "type": "object", "properties": { "channel": {"type": "string"}, + "as_channel": {"type": "string"}, "type": { "type": "string", "enum": ["file", "boolean", "string", "float", "integer"]}, @@ -231,13 +232,13 @@ def validate_outputs(cls, value): "properties": { "stream": {"type": "string", "enum": ["stdout", "stderr"]}, - "filename": { + "filename": {"type": "string"}, + "filenames": { "oneOf": [ {"type": "string"}, {"type": "array", "items": {"type": "string"}} - ] - }, + ]}, "glob": {"type": "string"} } }, diff --git a/loomengine/master/api/serializers/data_objects.py b/loomengine/master/api/serializers/data_objects.py index d6381eb7..df8f8d46 100644 --- a/loomengine/master/api/serializers/data_objects.py +++ b/loomengine/master/api/serializers/data_objects.py @@ -58,7 +58,7 @@ def create(self, validated_data): if not isinstance(value, dict): # If it's a string, treat it as a data_object identifier and # look it up. - data_objects = DataObject.filter_by_name_or_id(value) + data_objects = DataObject.filter_by_name_or_id_or_hash(value) if data_objects.count() == 0: raise serializers.ValidationError( 'No matching DataObject found for "%s"' % value) diff --git a/loomengine/master/api/serializers/runs.py b/loomengine/master/api/serializers/runs.py index 8bcf01d2..d097a268 100644 --- a/loomengine/master/api/serializers/runs.py +++ b/loomengine/master/api/serializers/runs.py @@ -26,10 +26,11 @@ class RunInputSerializer(DataChannelSerializer): class Meta: model = RunInput - fields = ('type', 'channel', 'data', 'mode', 'group') + fields = ('type', 'channel', 'as_channel', 'data', 'mode', 'group') mode = serializers.CharField() group = serializers.IntegerField() + as_channel = serializers.CharField(required=False) def to_representation(self, instance): return strip_empty_values( @@ -40,11 +41,12 @@ class RunOutputSerializer(DataChannelSerializer): class Meta: model = RunOutput - fields = ('type', 'channel', 'data', 'mode', 'source', 'parser') + fields = ('type', 'channel', 'as_channel', 'data', 'mode', 'source', 'parser') mode = serializers.CharField() source = serializers.JSONField(required=False) parser = serializers.JSONField(required=False) + as_channel = serializers.CharField(required=False) def to_representation(self, instance): return strip_empty_values( diff --git a/loomengine/master/api/serializers/tasks.py b/loomengine/master/api/serializers/tasks.py index cce6130d..6a295eab 100644 --- a/loomengine/master/api/serializers/tasks.py +++ b/loomengine/master/api/serializers/tasks.py @@ -20,20 +20,22 @@ class TaskInputSerializer(DataChannelSerializer): class Meta: model = TaskInput - fields = ('data', 'type', 'channel', 'mode') + fields = ('data', 'type', 'channel', 'as_channel', 'mode') mode = serializers.CharField() + as_channel = serializers.CharField(required=False) class TaskOutputSerializer(DataChannelSerializer): class Meta: model = TaskOutput - fields = ('type', 'channel', 'data', 'mode', 'source', 'parser') + fields = ('type', 'channel', 'as_channel', 'data', 'mode', 'source', 'parser') mode = serializers.CharField() source = serializers.JSONField(required=False) parser = serializers.JSONField(required=False) + as_channel = serializers.CharField(required=False) class TaskSerializer(serializers.HyperlinkedModelSerializer): diff --git a/loomengine/master/api/serializers/templates.py b/loomengine/master/api/serializers/templates.py index 9c5c9087..7aaf6a28 100644 --- a/loomengine/master/api/serializers/templates.py +++ b/loomengine/master/api/serializers/templates.py @@ -40,12 +40,13 @@ class TemplateInputSerializer(DataChannelSerializer): class Meta: model = TemplateInput - fields = ('type', 'channel', 'data', 'hint', 'mode', 'group') + fields = ('type', 'channel', 'as_channel', 'data', 'hint', 'mode', 'group') hint = serializers.CharField(required=False) mode = serializers.CharField(required=False) group = serializers.IntegerField(required=False) data = serializers.JSONField(required=False) # Override to make non-required + as_channel = serializers.CharField(required=False) _template_serializer_fields = ( @@ -53,6 +54,7 @@ class Meta: 'url', '_template_id', 'name', + 'md5', 'datetime_created', 'command', 'comments', @@ -81,6 +83,7 @@ class Meta: view_name='template-detail', lookup_field='uuid') name = serializers.CharField(required=False) + md5 = serializers.CharField(required=False) datetime_created = serializers.DateTimeField( format='iso-8601', required=False) is_leaf = serializers.BooleanField(required=False) @@ -206,7 +209,7 @@ def _reload(self, ModelClass, models): return models def _lookup_by_id(self, template_id): - matches = Template.filter_by_name_or_id(template_id) + matches = Template.filter_by_name_or_id_or_hash(template_id) if matches.count() < 1: raise serializers.ValidationError( 'No template found that matches value "%s"' % template_id) @@ -300,6 +303,7 @@ class SummaryTemplateSerializer(TemplateSerializer): view_name='template-detail', lookup_field='uuid') name = serializers.CharField(required=False) + md5 = serializers.CharField(required=False) datetime_created = serializers.DateTimeField(format='iso-8601') is_leaf = serializers.BooleanField(required=False) steps = RecursiveField(many=True, required=False) diff --git a/loomengine/master/api/views.py b/loomengine/master/api/views.py index a573e93e..863b2a06 100644 --- a/loomengine/master/api/views.py +++ b/loomengine/master/api/views.py @@ -233,7 +233,7 @@ def get_queryset(self): imported = 'imported' in self.request.query_params Serializer = self.get_serializer_class() if query_string: - queryset = models.Template.filter_by_name_or_id(query_string) + queryset = models.Template.filter_by_name_or_id_or_hash(query_string) else: queryset = models.Template.objects.all() if imported: diff --git a/loomengine/portal/partials/run-contents.html b/loomengine/portal/partials/run-contents.html index 88af4518..8488fd0b 100644 --- a/loomengine/portal/partials/run-contents.html +++ b/loomengine/portal/partials/run-contents.html @@ -1,6 +1,6 @@
- + diff --git a/loomengine/utils/exceptions.py b/loomengine/utils/exceptions.py index 8250e53c..5a4a62be 100644 --- a/loomengine/utils/exceptions.py +++ b/loomengine/utils/exceptions.py @@ -42,3 +42,6 @@ class ValidationError(Error): class DuplicateFileError(Error): pass + +class DuplicateTemplateError(Error): + pass diff --git a/loomengine/utils/filemanager.py b/loomengine/utils/filemanager.py index 177ef208..3ab3f079 100644 --- a/loomengine/utils/filemanager.py +++ b/loomengine/utils/filemanager.py @@ -521,7 +521,7 @@ def _create_file_data_object_for_import(self, source, comments, md5 = source.calculate_md5() if not force_duplicates: - self._verify_no_duplicates(md5) + self._verify_no_file_duplicates(filename, md5) value = { 'filename': filename, @@ -537,24 +537,20 @@ def _create_file_data_object_for_import(self, source, comments, 'value': value }) - def _verify_no_duplicates(self, md5): + def _verify_no_file_duplicates(self, filename, md5): files = self.connection.get_data_object_index( - query_string='$%s' % md5, type='file') + query_string='%s$%s' % (filename, md5), type='file') if len(files) == 0: return - md5 = files[0].get('md5') matches = [] for file in files: - value = file.get('value') - try: - filename = value.get('filename') - except AttributeError: - filename = '' - matches.append('%s@%s' % (filename, file.get('uuid'))) + matches.append('%s@%s' % (file['value'].get('filename'), file.get('uuid'))) raise DuplicateFileError( - 'ERROR! One or more files with md5 %s already exist: "%s". '\ - 'Use "--force-duplicates" if you want to create another copy.' - % (md5, '", "'.join(matches))) + 'ERROR! The name and md5 hash "%s$%s" is already in use by one ' + 'or more files: "%s". '\ + 'Use "--force-duplicates" to create another copy, but if you '\ + 'do you will have to use @uuid to reference these files.' + % (filename, md5, '", "'.join(matches))) def import_result_file(self, task_attempt_output, source_url): logger.info('Calculating md5 on file "%s"...' % source_url) @@ -741,3 +737,24 @@ def write_to_file(self, url, content): def normalize_url(self, url): return _urlparse(url).geturl() + + def calculate_md5(self, url): + return Source(url, self.settings).calculate_md5() + + def verify_no_template_duplicates(self, template): + md5 = template.get('md5') + name = template.get('name') + + templates = self.connection.get_template_index( + query_string='%s$%s' % (name, md5)) + if len(templates) == 0: + return + matches = [] + for template in templates: + matches.append('%s@%s' % (template.get('name'), template.get('uuid'))) + raise DuplicateTemplateError( + 'ERROR! The name and md5 hash "%s$%s" is already in use by one ' + 'or more templates: "%s". '\ + 'Use "--force-duplicates" to create another copy, but if you '\ + 'do you will have to use @uuid to reference these templates.' + % (name, md5, '", "'.join(matches))) diff --git a/loomengine/worker/outputs.py b/loomengine/worker/outputs.py index 979c2e93..4a43b2f8 100644 --- a/loomengine/worker/outputs.py +++ b/loomengine/worker/outputs.py @@ -25,7 +25,7 @@ def save(self): class FileListScatterOutput(BaseOutput): def save(self): - filename_list = self.output['source']['filename'] + filename_list = self.output['source']['filenames'] file_path_list = [ os.path.join( self.settings['WORKING_DIR'], filename) @@ -68,7 +68,7 @@ def save(self): class FileListContentsScatterOutput(FileContentsOutput): def save(self): - filename_list = self.output['source']['filename'] + filename_list = self.output['source']['filenames'] if not isinstance(filename_list, list): filename_list = filename_list.split(' ') contents_list = [] @@ -159,21 +159,19 @@ def _get_output_info(output): assert mode in ['scatter', 'no_scatter'], \ 'output has invalid mode "%s"' % mode filename_source = output['source'].get('filename') + filename_list_source = output['source'].get('filenames') glob_source = output['source'].get('glob') stream_source = output['source'].get('stream') - assert sum([bool(filename_source), bool(glob_source), bool(stream_source)]) == 1, \ - 'exactly one type of source is required: "%s"' % output['source'] + assert sum([bool(filename_source), bool(glob_source), bool(stream_source), bool(filename_list_source)]) == 1, \ + 'exactly one type of source is required: "%s"' % output['source'] if glob_source: source_type = 'glob' elif stream_source: source_type = 'stream' elif filename_source: - if isinstance(filename_source, list): - source_type = 'file_list' - elif len(filename_source.split(' ')) > 1: - source_type = 'file_list' - else: - source_type = 'filename' + source_type = 'filename' + elif filename_list_source: + source_type = 'filenames' return (data_type, mode, source_type) def TaskAttemptOutput(output, task_attempt): @@ -185,9 +183,9 @@ def TaskAttemptOutput(output, task_attempt): if data_type == 'file': if mode == 'scatter': - assert source_type in ['file_list', 'glob'], \ + assert source_type in ['filenames', 'glob'], \ 'source type "%s" not allowed' % source_type - if source_type == 'file_list': + if source_type == 'filenames': return FileListScatterOutput(output, task_attempt) return GlobScatterOutput(output, task_attempt) else: @@ -197,11 +195,11 @@ def TaskAttemptOutput(output, task_attempt): return FileOutput(output, task_attempt) else: # data_type is non-file if mode == 'scatter': - assert source_type in ['filename', 'file_list', 'glob', 'stream'], \ + assert source_type in ['filename', 'filenames', 'glob', 'stream'], \ 'source type "%s" not allowed' % source_type if source_type == 'filename': return FileContentsScatterOutput(output, task_attempt) - if source_type == 'file_list': + if source_type == 'filenames': return FileListContentsScatterOutput(output, task_attempt) if source_type == 'glob': return GlobContentsScatterOutput(output, task_attempt) diff --git a/loomengine/worker/parsers.py b/loomengine/worker/parsers.py index 320de567..cbdabca9 100644 --- a/loomengine/worker/parsers.py +++ b/loomengine/worker/parsers.py @@ -10,6 +10,8 @@ def __init__(self, options): self.trim = options.get('trim', False) def parse(self, text): + if self.trim: + text = text.strip() text_array = text.split(self.delimiter) if self.trim: text_array = [item.strip() for item in text_array] diff --git a/loomengine/worker/task_execution_manager.py b/loomengine/worker/task_execution_manager.py index 361e9ae6..87e45159 100755 --- a/loomengine/worker/task_execution_manager.py +++ b/loomengine/worker/task_execution_manager.py @@ -508,9 +508,9 @@ def get_stdout_logger(name, log_level_string): # pip entrypoint requires a function with no arguments def main(): - attempt = TaskAttempt() - attempt.run_with_heartbeats(attempt.main) + #attempt.run_with_heartbeats(attempt.main) + attempt.main() if __name__=='__main__': diff --git a/loomengine/worker/test/all_types_inputs_outputs.yaml b/loomengine/worker/test/all_types_inputs_outputs.yaml index e43e48d9..55ff7088 100644 --- a/loomengine/worker/test/all_types_inputs_outputs.yaml +++ b/loomengine/worker/test/all_types_inputs_outputs.yaml @@ -17,7 +17,7 @@ steps: mode: scatter source: # We have duplicate filename to make things difficult - filename: + filenames: - file1.txt - file1.txt - file2.txt @@ -52,7 +52,7 @@ steps: channel: out_274893 mode: scatter source: - filename: "{{file_list}}" + filenames: "{{file_list}}" # Non-file types as input @@ -190,7 +190,7 @@ steps: source: filename: file.txt -# Output is a list of files selected by filename +# Output is a list of files selected by filenames - name: file_list_scatter_output command: echo one > file1.txt; echo two > file2.txt; @@ -201,7 +201,7 @@ steps: type: file mode: scatter source: - filename: + filenames: - file1.txt - file2.txt @@ -248,7 +248,7 @@ steps: delimiter: " " trim: true -# A list of strings taken from a list of files selected by filename +# A list of strings taken from a list of files selected by filenames - name: file_contents_scatter_output command: echo one > file1.txt; echo two > file2.txt; @@ -259,7 +259,7 @@ steps: type: string mode: scatter source: - filename: + filenames: - file1.txt - file2.txt