In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import nlppln
import ochre
import os

with nlppln.WorkflowGenerator(working_dir='/home/jvdzwaan/cwl-working-dir/') as wf:
    wf.load(steps_dir=ochre.cwl_path())
    print(wf.list_steps())

    gs_files = wf.add_input(gs='File[]')
    ocr_files = wf.add_input(ocr='File[]')
    merged_metadata_name = wf.add_input(align_m='string', default='merged_metadata.csv')
    merged_changes_name = wf.add_input(align_c='string', default='merged_changes.csv')
    
    changes, metadata = wf.align(file1=ocr_files, file2=gs_files, scatter=['file1', 'file2'], scatter_method='dotproduct')
    merged1 = wf.merge_json(in_files=metadata, name=merged_metadata_name)
    merged2 = wf.merge_json(in_files=changes, name=merged_changes_name)
    
    alignments = wf.char_align(gs_text=gs_files, metadata=metadata, ocr_text=ocr_files, 
                               scatter=['gs_text', 'ocr_text', 'metadata'], scatter_method='dotproduct')
    
    wf.add_outputs(alignments=alignments)
    wf.add_outputs(metadata=merged1)
    wf.add_outputs(changes=merged2)
    wf.save(os.path.join(ochre.cwl_path(), 'align-texts-wf.cwl'), wd=True)

In [None]:
# align directory
import nlppln
import ochre
import os

with nlppln.WorkflowGenerator(working_dir='/home/jvdzwaan/cwl-working-dir/') as wf:
    wf.load(steps_dir=ochre.cwl_path())
    print(wf.list_steps())

    gs = wf.add_input(gs='Directory')
    ocr = wf.add_input(ocr='Directory')
    align_dir_name = wf.add_input(align_dir_name='string', default='align')
    
    gs_files = wf.ls(in_dir=gs)
    ocr_files = wf.ls(in_dir=ocr)
    
    alignments, changes, metadata = wf.align_texts_wf(gs=gs_files, ocr=ocr_files)
    
    align = wf.save_files_to_dir(dir_name=align_dir_name, in_files=alignments)
        
    wf.add_outputs(align=align)
    wf.save(os.path.join(ochre.cwl_path(), 'align-dir.cwl'), pack=True)

In [None]:
# align test files only
import nlppln
import ochre
import os

with nlppln.WorkflowGenerator(working_dir='/home/jvdzwaan/cwl-working-dir/') as wf:
    wf.load(steps_dir=ochre.cwl_path())
    print(wf.list_steps())

    gs_dir = wf.add_input(gs_dir='Directory')
    ocr_dir = wf.add_input(ocr_dir='Directory')
    data_div = wf.add_input(data_div='File')
    div_name = wf.add_input(div_name='string?')
    align_dir_name = wf.add_input(align_dir_name='string', default='align')

    test_gs = wf.select_test_files(datadivision=data_div, name=div_name, in_dir=gs_dir)
    test_ocr = wf.select_test_files(datadivision=data_div, name=div_name, in_dir=ocr_dir)

    alignments, changes, metadata = wf.align_texts_wf(gs=test_gs, ocr=test_ocr)

    align = wf.save_files_to_dir(dir_name=align_dir_name, in_files=alignments)

    wf.add_outputs(align=align)
    wf.save(os.path.join(ochre.cwl_path(), 'align-test-files.cwl'), pack=True)