From 305cbf56357f9787c9b9373f545b8a9646c5f139 Mon Sep 17 00:00:00 2001 From: DavidHuber Date: Thu, 11 Apr 2024 18:23:24 +0000 Subject: [PATCH] Add basic sed capability to wxflow. --- src/wxflow/file_utils.py | 41 ++++++++++++++++++++++++++++++++-- tests/test_file_utils.py | 48 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/src/wxflow/file_utils.py b/src/wxflow/file_utils.py index a8220fc..4033dbb 100644 --- a/src/wxflow/file_utils.py +++ b/src/wxflow/file_utils.py @@ -1,5 +1,6 @@ from logging import getLogger +from .executable import which from .fsutils import cp, mkdir __all__ = ['FileHandler'] @@ -17,8 +18,8 @@ class FileHandler: NOTE ---- - "action" can be one of mkdir", "copy", etc. - Corresponding "act" would be ['dir1', 'dir2'], [['src1', 'dest1'], ['src2', 'dest2']] + "action" can be one of "mkdir", "copy", "sed_replace", etc. + Corresponding "act" would be ['dir1', 'dir2'], [['src1', 'dest1'], ['src2', 'dest2']], [['s/search_term/replace_term/', 'src', 'dest']] Attributes ---------- @@ -37,6 +38,7 @@ def sync(self): sync_factory = { 'copy': self._copy_files, 'mkdir': self._make_dirs, + 'sed_replace': self._sed_replace_files } # loop through the configuration keys for action, files in self.config.items(): @@ -75,3 +77,38 @@ def _make_dirs(dirlist): for dd in dirlist: mkdir(dd) logger.info(f'Created {dd}') + + @staticmethod + def _sed_replace_files(sedlist): + """Function to run sed search and replace on a set of files + + `sedlist` should be in the form: + - [s/search/replace/, src, dest] + + Parameters + ---------- + filelist : list + List of lists of [pattern, src, dest] + """ + + sed = which("sed") + + for sublist in sedlist: + if len(sublist) != 3: + raise Exception( + f"List must be of the form ['pattern', 'src', 'dest'], not {sublist}") + + pattern = sublist[0] + src = sublist[1] + dest = sublist[2] + + # Check for in-place search/replace + if src == dest: + arg_list = ["-i", pattern, src] + print(arg_list) + sed(*arg_list, output=str.split, error=str.split) + logger.info(f'Performed sed -i {pattern} {src}') + else: + arg_list = [pattern, src] + output = sed(*arg_list, output=dest, error=str.split) + logger.info(f'Performed sed {pattern} {src} > {dest}') diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 3710881..d5afcc9 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -2,7 +2,6 @@ from wxflow import FileHandler - def test_mkdir(tmp_path): """ Test for creating directories: @@ -65,3 +64,50 @@ def test_copy(tmp_path): # Check if files were indeed copied for ff in dest_files: assert os.path.isfile(ff) + + +def test_sed_replace(tmp_path): + """ + Test for copying files: + Parameters + ---------- + tmp_path - pytest fixture + """ + + input_dir_path = tmp_path / 'my_input_dir' + + # Create the input directory + config = {'mkdir': [input_dir_path]} + FileHandler(config).sync() + + # Put empty files in input_dir_path + src_files = [input_dir_path / 'a.txt', input_dir_path / 'b.txt'] + for ff in src_files: + ff.touch() + ff.write_text("Search text") + + # Create output_dir_path and expected file names + output_dir_path = tmp_path / 'my_output_dir' + config = {'mkdir': [output_dir_path]} + FileHandler(config).sync() + # Create one new file and replace in place for the second + dest_files = [output_dir_path / 'a.txt', input_dir_path / 'b.txt'] + + sed_list = [] + pattern = "s/Search text/Text found and replaced/" + for src, dest in zip(src_files, dest_files): + sed_list.append([pattern, str(src), str(dest)]) + + # Create config object for FileHandler + config = {'sed_replace': sed_list} + + # Copy input files to output files + FileHandler(config).sync() + + # Check if files were indeed copied + for ff in dest_files: + assert os.path.isfile(ff) + with open(ff) as f: + lines = f.readlines() + assert len(lines) == 1 + assert "Text found and replaced" in lines[0]