In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# to be used as subworkflow
import nlppln
import ochre
import os

with nlppln.WorkflowGenerator(working_dir='/home/jvdzwaan/cwl-working-dir/') as wf:
    wf.load(steps_dir=ochre.cwl_path())
    print(wf.list_steps())
    
    wf.set_documentation('This workflow is meant to be used as a subworkflow.')

    gs_unnormalized = wf.add_input(gs_files='File[]')
    ocr_unnormalized = wf.add_input(ocr_files='File[]')
    language = wf.add_input(language='string')
    lowercase = wf.add_input(lowercase='boolean?')
    align_metadata = wf.add_input(align_m='string?')
    align_changes = wf.add_input(align_c='string?')

    gs = wf.normalize_whitespace_punctuation(meta_in=gs_unnormalized, scatter=['meta_in'], scatter_method='dotproduct')
    ocr = wf.normalize_whitespace_punctuation(meta_in=ocr_unnormalized, scatter=['meta_in'], scatter_method='dotproduct')

    alignments, changes, metadata = wf.align_texts_wf(gs=gs, ocr=ocr, align_c=align_changes, align_m=align_metadata)
    
    gs_saf = wf.pattern(in_file=gs, language=language, scatter='in_file', scatter_method='dotproduct')

    sentences = wf.create_sentence_mappings(alignments=alignments, saf=gs_saf, lowercase=lowercase, 
                                       scatter=['alignments', 'saf'], scatter_method='dotproduct')

    wf.add_outputs(sentences=sentences)
    wf.save(os.path.join(ochre.cwl_path(), 'sentence-mapping-wf.cwl'), wd=True)

In [None]:
# create sentence mappings for directory
import nlppln
import ochre
import os

with nlppln.WorkflowGenerator(working_dir='/home/jvdzwaan/cwl-working-dir/') as wf:
    wf.load(steps_dir=ochre.cwl_path())
    print(wf.list_steps())
    
    gs_dir = wf.add_input(gs_dir='Directory')
    ocr_dir = wf.add_input(ocr_dir='Directory')
    language = wf.add_input(language='string')
    lowercase = wf.add_input(lowercase='boolean?')
    align_metadata = wf.add_input(align_m='string?')
    align_changes = wf.add_input(align_c='string?')
    sentence_mapping_dir = wf.add_input(out_dir_name='string?', default='sentences')

    gs_files = wf.ls(in_dir=gs_dir)
    ocr_files = wf.ls(in_dir=ocr_dir)

    sentences = wf.sentence_mapping_wf(gs_files=gs_files, ocr_files=ocr_files, language=language, 
                                       lowercase=lowercase, align_c=align_changes, 
                                       align_m=align_metadata)
    out_dir = wf.save_files_to_dir(dir_name=sentence_mapping_dir, in_files=sentences)

    wf.add_outputs(sentence_dir=out_dir)
    wf.save(os.path.join(ochre.cwl_path(), 'sentence-mapping-dir.cwl'), pack=True)