From d7c537bddef2cf7036e66c164b56319760ca5914 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Tue, 8 Oct 2024 09:46:58 -0400 Subject: [PATCH 01/16] smarter overwrite for Volume and Image save. Tests for overwrite behavior. --- src/aspire/image/image.py | 26 ++++++++++++++-- src/aspire/volume/volume.py | 27 +++++++++++++---- tests/test_image.py | 59 +++++++++++++++++++++++++++++++++++++ tests/test_volume.py | 59 +++++++++++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 8 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index 57b32041cd..8c1be9ed2e 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -1,5 +1,6 @@ import logging import os +from datetime import datetime from warnings import catch_warnings, filterwarnings, simplefilter, warn import matplotlib.pyplot as plt @@ -484,11 +485,32 @@ def filter(self, filter): def rotate(self): raise NotImplementedError - def save(self, mrcs_filepath, overwrite=False): + def save(self, mrcs_filepath, overwrite=None): + """ + Save Image to disk as mrcs file + + :param filename: Filepath where Image will be saved. + :param overwrite: Options to control overwrite behavior (default is None): + - True: Overwrites the existing file if it exists. + - False: Raises an error if the file exists. + - None: Renames the old file by appending a time/date stamp. + """ if self.stack_ndim > 1: raise NotImplementedError("`save` is currently limited to 1D image stacks.") - with mrcfile.new(mrcs_filepath, overwrite=overwrite) as mrc: + if overwrite is None and os.path.exists(mrcs_filepath): + # If the file exists, append the timestamp to the old file and rename it + base, ext = os.path.splitext(mrcs_filepath) + timestamp = datetime.now().strftime("%y%m%d_%H%M%S") + renamed_filepath = f"{base}_{timestamp}{ext}" + logger.info( + f"Found existing file with name {mrcs_filepath}. Renaming existing file as {renamed_filepath}." + ) + + # Rename the existing file by appending the timestamp + os.rename(mrcs_filepath, renamed_filepath) + + with mrcfile.new(mrcs_filepath, overwrite=bool(overwrite)) as mrc: # original input format (the image index first) mrc.set_data(self._data.astype(np.float32)) # Note assigning voxel_size must come after `set_data` diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index 0ed31b8b74..bd46e64dae 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -1,5 +1,7 @@ import logging +import os import warnings +from datetime import datetime import mrcfile import numpy as np @@ -635,21 +637,34 @@ def rotate(self, rot_matrices, zero_nyquist=True): def denoise(self): raise NotImplementedError - def save(self, filename, overwrite=False): + def save(self, filename, overwrite=None): """ Save volume to disk as mrc file - :param filename: Filepath where volume will be saved - - :param overwrite: Option to overwrite file when set to True. - Defaults to overwrite=False. + :param filename: Filepath where volume will be saved. + :param overwrite: Options to control overwrite behavior (default is None): + - True: Overwrites the existing file if it exists. + - False: Raises an error if the file exists. + - None: Renames the old file by appending a time/date stamp. """ if self.stack_ndim > 1: raise NotImplementedError( "`save` is currently limited to 1D Volume stacks." ) - with mrcfile.new(filename, overwrite=overwrite) as mrc: + if overwrite is None and os.path.exists(filename): + # If the file exists, append the timestamp to the old file and rename it + base, ext = os.path.splitext(filename) + timestamp = datetime.now().strftime("%y%m%d_%H%M%S") + renamed_filepath = f"{base}_{timestamp}{ext}" + logger.info( + f"Found existing file with name {filename}. Renaming existing file as {renamed_filepath}." + ) + + # Rename the existing file by appending the timestamp + os.rename(filename, renamed_filepath) + + with mrcfile.new(filename, overwrite=bool(overwrite)) as mrc: mrc.set_data(self._data.astype(np.float32)) # Note assigning voxel_size must come after `set_data` if self.pixel_size is not None: diff --git a/tests/test_image.py b/tests/test_image.py index 89fbde4a84..3e1664ad77 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -353,6 +353,65 @@ def test_asnumpy_readonly(): vw[0, 0, 0] = 123 +def test_save_overwrite(caplog): + """ + Test that the overwrite flag behaves as expected. + - overwrite=True: Overwrites the existing file. + - overwrite=False: Raises an error if the file exists. + - overwrite=None: Renames the existing file and saves the new one. + """ + # Create a tmp dir for this test output + with tempfile.TemporaryDirectory() as tmpdir_name: + # tmp filename + mrc_path = os.path.join(tmpdir_name, "og.mrc") + base, ext = os.path.splitext(mrc_path) + + # Create and save the first image + im1 = Image(np.ones((1, 8, 8), dtype=np.float32)) + im1.save(mrc_path, overwrite=True) + + # Case 1: overwrite=True (should overwrite the existing file) + im2 = Image(2 * np.ones((1, 8, 8), dtype=np.float32)) + im2.save(mrc_path, overwrite=True) + + # Load and check if im2 has overwritten im1 + im2_loaded = Image.load(mrc_path) + np.testing.assert_allclose(im2.asnumpy(), im2_loaded.asnumpy()) + + # Case 2: overwrite=False (should raise an overwrite error) + im3 = Image(3 * np.ones((1, 8, 8), dtype=np.float32)) + + with pytest.raises( + ValueError, + match="File '.*' already exists; set overwrite=True to overwrite it", + ): + im3.save(mrc_path, overwrite=False) + + # Case 3: overwrite=None (should rename the existing file and save im3 with original filename) + with caplog.at_level(logging.INFO): + im3.save(mrc_path, overwrite=None) + + # Check that the existing file was renamed and logged + assert f"Found existing file with name {mrc_path}" in caplog.text + + # Find the renamed file by checking the directory contents + renamed_file = None + for filename in os.listdir(tmpdir_name): + if filename.startswith("og_") and filename.endswith(".mrc"): + renamed_file = os.path.join(tmpdir_name, filename) + break + + assert renamed_file is not None, "Renamed file not found" + + # Load and check that im3 was saved to the original path + im3_loaded = Image.load(mrc_path) + np.testing.assert_allclose(im3.asnumpy(), im3_loaded.asnumpy()) + + # Also check that the renamed file still contains im2's data + im2_loaded_renamed = Image.load(renamed_file) + np.testing.assert_allclose(im2.asnumpy(), im2_loaded_renamed.asnumpy()) + + def test_corrupt_mrc_load(caplog): """ Test that corrupt mrc files are logged as expected. diff --git a/tests/test_volume.py b/tests/test_volume.py index 1f55645e10..254e2fca0c 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -296,6 +296,65 @@ def test_save_load(vols_1): assert vols_loaded_double.pixel_size is None, "Pixel size should be None" +def test_save_overwrite(caplog): + """ + Test that the overwrite flag behaves as expected. + - overwrite=True: Overwrites the existing file. + - overwrite=False: Raises an error if the file exists. + - overwrite=None: Renames the existing file and saves the new one. + """ + # Create a tmp dir for this test output + with tempfile.TemporaryDirectory() as tmpdir_name: + # tmp filename + mrc_path = os.path.join(tmpdir_name, "og.mrc") + base, ext = os.path.splitext(mrc_path) + + # Create and save the first image + vol1 = Volume(np.ones((1, 8, 8, 8), dtype=np.float32)) + vol1.save(mrc_path, overwrite=True) + + # Case 1: overwrite=True (should overwrite the existing file) + vol2 = Volume(2 * np.ones((1, 8, 8, 8), dtype=np.float32)) + vol2.save(mrc_path, overwrite=True) + + # Load and check if vol2 has overwritten vol1 + vol2_loaded = Volume.load(mrc_path) + np.testing.assert_allclose(vol2.asnumpy(), vol2_loaded.asnumpy()) + + # Case 2: overwrite=False (should raise an overwrite error) + vol3 = Volume(3 * np.ones((1, 8, 8, 8), dtype=np.float32)) + + with pytest.raises( + ValueError, + match="File '.*' already exists; set overwrite=True to overwrite it", + ): + vol3.save(mrc_path, overwrite=False) + + # Case 3: overwrite=None (should rename the existing file and save vol3 with original filename) + with caplog.at_level(logging.INFO): + vol3.save(mrc_path, overwrite=None) + + # Check that the existing file was renamed and logged + assert f"Found existing file with name {mrc_path}" in caplog.text + + # Find the renamed file by checking the directory contents + renamed_file = None + for filename in os.listdir(tmpdir_name): + if filename.startswith("og_") and filename.endswith(".mrc"): + renamed_file = os.path.join(tmpdir_name, filename) + break + + assert renamed_file is not None, "Renamed file not found" + + # Load and check that vol3 was saved to the original path + vol3_loaded = Volume.load(mrc_path) + np.testing.assert_allclose(vol3.asnumpy(), vol3_loaded.asnumpy()) + + # Also check that the renamed file still contains vol2's data + vol2_loaded_renamed = Volume.load(renamed_file) + np.testing.assert_allclose(vol2.asnumpy(), vol2_loaded_renamed.asnumpy()) + + def test_volume_pixel_size(vols_2): """ Test volume is storing pixel_size attribute. From 9b7320e96792babd9c2685697f738738ac81e580 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Tue, 8 Oct 2024 16:08:59 -0400 Subject: [PATCH 02/16] smarter overwrite for ImageSource save. Tests for overwrite behavior. --- src/aspire/source/image.py | 28 +++++++++++++-- tests/test_simulation.py | 70 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/src/aspire/source/image.py b/src/aspire/source/image.py index 84cf26e1f9..68b196df9f 100644 --- a/src/aspire/source/image.py +++ b/src/aspire/source/image.py @@ -5,6 +5,7 @@ from abc import ABC, abstractmethod from collections import OrderedDict from collections.abc import Iterable +from datetime import datetime import mrcfile import numpy as np @@ -976,7 +977,7 @@ def save( starfile_filepath, batch_size=512, save_mode=None, - overwrite=False, + overwrite=None, ): """ Save the output metadata to STAR file and/or images to MRCS file. @@ -988,10 +989,31 @@ def save( while `batch_size>=1` implies stack MRC extension `.mrcs`. :param save_mode: Whether to save all images in a `single` or multiple files in batch size. Default is multiple, supply `'single'` for single mode. - :param overwrite: Option to overwrite the output MRC files. + :param overwrite: Options to control overwrite behavior (default is None): + - True: Overwrites the existing file if it exists. + - False: Raises an error if the file exists. + - None: Renames the old file by appending a time/date stamp. :return: A dictionary containing "starfile"--the path to the saved starfile-- and "mrcs", a list of the saved particle stack MRC filenames. """ + if overwrite is None and os.path.exists(starfile_filepath): + # If the file exists, append the timestamp to the old file and rename it + base, ext = os.path.splitext(starfile_filepath) + timestamp = datetime.now().strftime("%y%m%d_%H%M%S") + renamed_filepath = f"{base}_{timestamp}{ext}" + logger.info( + f"Found existing file with name {starfile_filepath}. Renaming existing file as {renamed_filepath}." + ) + + # Retrieve original ImageSource and save with new starfile name. + from aspire.source import RelionSource + + src = RelionSource(starfile_filepath) + src.save(renamed_filepath, overwrite=False) + + # Allow overwriting old files. + overwrite = True + logger.info("save metadata into STAR file") filename_indices = self.save_metadata( starfile_filepath, @@ -1005,7 +1027,7 @@ def save( starfile_filepath, filename_indices=filename_indices, batch_size=batch_size, - overwrite=overwrite, + overwrite=bool(overwrite), ) # return some information about the saved files info = {"starfile": starfile_filepath, "mrcs": unique_filenames} diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 2331339265..17a1f43800 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -666,6 +666,76 @@ def test_cached_image_accessors(): ) +def test_save_overwrite(caplog): + """ + Test that the overwrite flag behaves as expected. + - overwrite=True: Overwrites the existing file. + - overwrite=False: Raises an error if the file exists. + - overwrite=None: Renames the existing file and saves the new one. + """ + sim1 = Simulation(seed=1) + sim2 = Simulation(seed=2) + sim3 = Simulation(seed=3) + + # Create a tmp dir for this test output + with tempfile.TemporaryDirectory() as tmpdir_name: + # tmp filename + starfile = os.path.join(tmpdir_name, "og.star") + base, ext = os.path.splitext(starfile) + + sim1.save(starfile, overwrite=True) + + # Case 1: overwrite=True (should overwrite the existing file) + sim2.save(starfile, overwrite=True) + + # Load and check if sim2 has overwritten sim1 + sim2_loaded = RelionSource(starfile) + np.testing.assert_allclose( + sim2.images[:].asnumpy(), + sim2_loaded.images[:].asnumpy(), + atol=utest_tolerance(sim2.dtype), + ) + + # Case 2: overwrite=False (should raise an overwrite error) + with raises( + ValueError, + match="File '.*' already exists; set overwrite=True to overwrite it", + ): + sim2.save(starfile, overwrite=False) + + # case 3: overwrite=None (should rename the existing file and save im3 with original filename) + with caplog.at_level(logging.INFO): + sim3.save(starfile, overwrite=None) + + # Check that the existing file was renamed and logged + assert f"Found existing file with name {starfile}" in caplog.text + + # Find the renamed file by checking the directory contents + renamed_file = None + for filename in os.listdir(tmpdir_name): + if filename.startswith("og_") and filename.endswith(".star"): + renamed_file = os.path.join(tmpdir_name, filename) + break + + assert renamed_file is not None, "Renamed file not found" + + # Load and check that sim3 was saved to the original path + sim3_loaded = RelionSource(starfile) + np.testing.assert_allclose( + sim3.images[:].asnumpy(), + sim3_loaded.images[:].asnumpy(), + atol=utest_tolerance(sim3.dtype), + ) + + # Also check that the renamed file still contains sim2's data + sim2_loaded_renamed = RelionSource(renamed_file) + np.testing.assert_allclose( + sim2.images[:].asnumpy(), + sim2_loaded_renamed.images[:].asnumpy(), + atol=utest_tolerance(sim2.dtype), + ) + + def test_mismatched_pixel_size(): """ Confirm raises error when explicit Simulation and CTFFilter pixel sizes mismatch. From 893a4b3a1d492571e6a405456a1f19567ecf9191 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 9 Oct 2024 08:59:32 -0400 Subject: [PATCH 03/16] cleanup --- tests/test_image.py | 8 ++++---- tests/test_volume.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_image.py b/tests/test_image.py index 3e1664ad77..825acdaac1 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -360,6 +360,10 @@ def test_save_overwrite(caplog): - overwrite=False: Raises an error if the file exists. - overwrite=None: Renames the existing file and saves the new one. """ + im1 = Image(np.ones((1, 8, 8), dtype=np.float32)) + im2 = Image(2 * np.ones((1, 8, 8), dtype=np.float32)) + im3 = Image(3 * np.ones((1, 8, 8), dtype=np.float32)) + # Create a tmp dir for this test output with tempfile.TemporaryDirectory() as tmpdir_name: # tmp filename @@ -367,11 +371,9 @@ def test_save_overwrite(caplog): base, ext = os.path.splitext(mrc_path) # Create and save the first image - im1 = Image(np.ones((1, 8, 8), dtype=np.float32)) im1.save(mrc_path, overwrite=True) # Case 1: overwrite=True (should overwrite the existing file) - im2 = Image(2 * np.ones((1, 8, 8), dtype=np.float32)) im2.save(mrc_path, overwrite=True) # Load and check if im2 has overwritten im1 @@ -379,8 +381,6 @@ def test_save_overwrite(caplog): np.testing.assert_allclose(im2.asnumpy(), im2_loaded.asnumpy()) # Case 2: overwrite=False (should raise an overwrite error) - im3 = Image(3 * np.ones((1, 8, 8), dtype=np.float32)) - with pytest.raises( ValueError, match="File '.*' already exists; set overwrite=True to overwrite it", diff --git a/tests/test_volume.py b/tests/test_volume.py index 254e2fca0c..3812356be0 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -303,6 +303,10 @@ def test_save_overwrite(caplog): - overwrite=False: Raises an error if the file exists. - overwrite=None: Renames the existing file and saves the new one. """ + vol1 = Volume(np.ones((1, 8, 8, 8), dtype=np.float32)) + vol2 = Volume(2 * np.ones((1, 8, 8, 8), dtype=np.float32)) + vol3 = Volume(3 * np.ones((1, 8, 8, 8), dtype=np.float32)) + # Create a tmp dir for this test output with tempfile.TemporaryDirectory() as tmpdir_name: # tmp filename @@ -310,11 +314,9 @@ def test_save_overwrite(caplog): base, ext = os.path.splitext(mrc_path) # Create and save the first image - vol1 = Volume(np.ones((1, 8, 8, 8), dtype=np.float32)) vol1.save(mrc_path, overwrite=True) # Case 1: overwrite=True (should overwrite the existing file) - vol2 = Volume(2 * np.ones((1, 8, 8, 8), dtype=np.float32)) vol2.save(mrc_path, overwrite=True) # Load and check if vol2 has overwritten vol1 @@ -322,8 +324,6 @@ def test_save_overwrite(caplog): np.testing.assert_allclose(vol2.asnumpy(), vol2_loaded.asnumpy()) # Case 2: overwrite=False (should raise an overwrite error) - vol3 = Volume(3 * np.ones((1, 8, 8, 8), dtype=np.float32)) - with pytest.raises( ValueError, match="File '.*' already exists; set overwrite=True to overwrite it", From d0dfd907b48c04075546897b245549f12aa03581 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 9 Oct 2024 10:10:59 -0400 Subject: [PATCH 04/16] Add check_metadata to test. --- tests/test_simulation.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 17a1f43800..1b9947ff73 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -696,6 +696,9 @@ def test_save_overwrite(caplog): atol=utest_tolerance(sim2.dtype), ) + # Check that metadata is unchanged. + check_metadata(sim2, sim2_loaded) + # Case 2: overwrite=False (should raise an overwrite error) with raises( ValueError, @@ -726,6 +729,7 @@ def test_save_overwrite(caplog): sim3_loaded.images[:].asnumpy(), atol=utest_tolerance(sim3.dtype), ) + check_metadata(sim3, sim3_loaded) # Also check that the renamed file still contains sim2's data sim2_loaded_renamed = RelionSource(renamed_file) @@ -734,6 +738,23 @@ def test_save_overwrite(caplog): sim2_loaded_renamed.images[:].asnumpy(), atol=utest_tolerance(sim2.dtype), ) + check_metadata(sim2, sim2_loaded_renamed) + + +def check_metadata(sim_src, relion_src): + """ + Helper function to test if metadata fields in a Simulation match + those in a RelionSource. + """ + for k, v in sim_src._metadata.items(): + try: + # First try allclose. Loaded metadata might be strings, + # so we cast to the same type as v. + np.testing.assert_allclose( + v, np.array(relion_src._metadata[k]).astype(type(v[0])) + ) + except: + np.testing.assert_array_equal(v, relion_src._metadata[k]) def test_mismatched_pixel_size(): From 68a87e2e1d0184cbf2276488441b8d992ed9853f Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 9 Oct 2024 10:21:22 -0400 Subject: [PATCH 05/16] fix try/except --- tests/test_simulation.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 1b9947ff73..0fc45a9a12 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -748,13 +748,12 @@ def check_metadata(sim_src, relion_src): """ for k, v in sim_src._metadata.items(): try: - # First try allclose. Loaded metadata might be strings, - # so we cast to the same type as v. + np.testing.assert_array_equal(v, relion_src._metadata[k]) + except AssertionError: + # Loaded metadata might be strings so recast. np.testing.assert_allclose( v, np.array(relion_src._metadata[k]).astype(type(v[0])) ) - except: - np.testing.assert_array_equal(v, relion_src._metadata[k]) def test_mismatched_pixel_size(): From 434785661f9627f3c97f91ca1dafe5d16b4ef224 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 9 Oct 2024 11:15:08 -0400 Subject: [PATCH 06/16] rename_file function. --- src/aspire/image/image.py | 21 +++++++++------------ src/aspire/source/image.py | 10 ++-------- src/aspire/utils/__init__.py | 1 + src/aspire/utils/misc.py | 23 +++++++++++++++++++++++ src/aspire/volume/volume.py | 14 +++----------- tests/test_image.py | 2 +- tests/test_simulation.py | 2 +- tests/test_volume.py | 2 +- 8 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index 8c1be9ed2e..3198b95dc2 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -1,6 +1,5 @@ import logging import os -from datetime import datetime from warnings import catch_warnings, filterwarnings, simplefilter, warn import matplotlib.pyplot as plt @@ -13,7 +12,13 @@ import aspire.volume from aspire.nufft import anufft, nufft from aspire.numeric import fft, xp -from aspire.utils import FourierRingCorrelation, anorm, crop_pad_2d, grid_2d +from aspire.utils import ( + FourierRingCorrelation, + anorm, + crop_pad_2d, + grid_2d, + rename_file, +) from aspire.volume import SymmetryGroup logger = logging.getLogger(__name__) @@ -499,16 +504,8 @@ def save(self, mrcs_filepath, overwrite=None): raise NotImplementedError("`save` is currently limited to 1D image stacks.") if overwrite is None and os.path.exists(mrcs_filepath): - # If the file exists, append the timestamp to the old file and rename it - base, ext = os.path.splitext(mrcs_filepath) - timestamp = datetime.now().strftime("%y%m%d_%H%M%S") - renamed_filepath = f"{base}_{timestamp}{ext}" - logger.info( - f"Found existing file with name {mrcs_filepath}. Renaming existing file as {renamed_filepath}." - ) - - # Rename the existing file by appending the timestamp - os.rename(mrcs_filepath, renamed_filepath) + # If the file exists, append a timestamp to the old file and rename it + rename_file(mrcs_filepath) with mrcfile.new(mrcs_filepath, overwrite=bool(overwrite)) as mrc: # original input format (the image index first) diff --git a/src/aspire/source/image.py b/src/aspire/source/image.py index 68b196df9f..5990a111df 100644 --- a/src/aspire/source/image.py +++ b/src/aspire/source/image.py @@ -5,7 +5,6 @@ from abc import ABC, abstractmethod from collections import OrderedDict from collections.abc import Iterable -from datetime import datetime import mrcfile import numpy as np @@ -29,7 +28,7 @@ PowerFilter, ) from aspire.storage import MrcStats, StarFile -from aspire.utils import Rotation, grid_2d, support_mask, trange +from aspire.utils import Rotation, grid_2d, rename_file, support_mask, trange from aspire.volume import IdentitySymmetryGroup, SymmetryGroup logger = logging.getLogger(__name__) @@ -998,12 +997,7 @@ def save( """ if overwrite is None and os.path.exists(starfile_filepath): # If the file exists, append the timestamp to the old file and rename it - base, ext = os.path.splitext(starfile_filepath) - timestamp = datetime.now().strftime("%y%m%d_%H%M%S") - renamed_filepath = f"{base}_{timestamp}{ext}" - logger.info( - f"Found existing file with name {starfile_filepath}. Renaming existing file as {renamed_filepath}." - ) + renamed_filepath = rename_file(starfile_filepath, move=False) # Retrieve original ImageSource and save with new starfile name. from aspire.source import RelionSource diff --git a/src/aspire/utils/__init__.py b/src/aspire/utils/__init__.py index e691e0ba5e..e9f82bdc2a 100644 --- a/src/aspire/utils/__init__.py +++ b/src/aspire/utils/__init__.py @@ -28,6 +28,7 @@ inverse_r, J_conjugate, powerset, + rename_file, sha256sum, support_mask, fuzzy_mask, diff --git a/src/aspire/utils/misc.py b/src/aspire/utils/misc.py index d0c30b9f90..82e0ef526c 100644 --- a/src/aspire/utils/misc.py +++ b/src/aspire/utils/misc.py @@ -5,7 +5,9 @@ import hashlib import importlib.resources import logging +import os import sys +from datetime import datetime from itertools import chain, combinations import numpy as np @@ -48,6 +50,27 @@ def importlib_path(package, resource): return p +def rename_file(filepath, move=True): + """ + Rename a file by appending a timestamp to the end of the filename. + + :param filepath: Filepath to rename. + :param move: Option to rename the file on disk. + + :return: If move=False, returns filepath with timestamp appended. + """ + base, ext = os.path.splitext(filepath) + timestamp = datetime.now().strftime("%y%m%d_%H%M%S") + renamed_filepath = f"{base}_{timestamp}{ext}" + logger.info(f"Renaming {filepath} as {renamed_filepath}.") + + # Rename the existing file by appending the timestamp. + if move: + os.rename(filepath, renamed_filepath) + else: + return renamed_filepath + + def abs2(x): """ Compute complex modulus squared. diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index bd46e64dae..b17c278fbd 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -1,7 +1,6 @@ import logging import os import warnings -from datetime import datetime import mrcfile import numpy as np @@ -18,6 +17,7 @@ grid_2d, grid_3d, mat_to_vec, + rename_file, vec_to_mat, ) from aspire.volume import IdentitySymmetryGroup, SymmetryGroup @@ -653,16 +653,8 @@ def save(self, filename, overwrite=None): ) if overwrite is None and os.path.exists(filename): - # If the file exists, append the timestamp to the old file and rename it - base, ext = os.path.splitext(filename) - timestamp = datetime.now().strftime("%y%m%d_%H%M%S") - renamed_filepath = f"{base}_{timestamp}{ext}" - logger.info( - f"Found existing file with name {filename}. Renaming existing file as {renamed_filepath}." - ) - - # Rename the existing file by appending the timestamp - os.rename(filename, renamed_filepath) + # If the file exists, append a timestamp to the old file and rename it + rename_file(filename) with mrcfile.new(filename, overwrite=bool(overwrite)) as mrc: mrc.set_data(self._data.astype(np.float32)) diff --git a/tests/test_image.py b/tests/test_image.py index 825acdaac1..f6bfc64f3c 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -392,7 +392,7 @@ def test_save_overwrite(caplog): im3.save(mrc_path, overwrite=None) # Check that the existing file was renamed and logged - assert f"Found existing file with name {mrc_path}" in caplog.text + assert f"Renaming {mrc_path}" in caplog.text # Find the renamed file by checking the directory contents renamed_file = None diff --git a/tests/test_simulation.py b/tests/test_simulation.py index 0fc45a9a12..944e2e7c06 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -711,7 +711,7 @@ def test_save_overwrite(caplog): sim3.save(starfile, overwrite=None) # Check that the existing file was renamed and logged - assert f"Found existing file with name {starfile}" in caplog.text + assert f"Renaming {starfile}" in caplog.text # Find the renamed file by checking the directory contents renamed_file = None diff --git a/tests/test_volume.py b/tests/test_volume.py index 3812356be0..0300bd3e71 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -335,7 +335,7 @@ def test_save_overwrite(caplog): vol3.save(mrc_path, overwrite=None) # Check that the existing file was renamed and logged - assert f"Found existing file with name {mrc_path}" in caplog.text + assert f"Renaming {mrc_path}" in caplog.text # Find the renamed file by checking the directory contents renamed_file = None From e091e7e8c7deb60375519a29d924b54bc76d0ee2 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 9 Oct 2024 11:58:10 -0400 Subject: [PATCH 07/16] rename_file test. --- src/aspire/image/image.py | 2 +- src/aspire/utils/misc.py | 6 +++--- src/aspire/volume/volume.py | 2 +- tests/test_utils.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index 3198b95dc2..eaca3a4abb 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -505,7 +505,7 @@ def save(self, mrcs_filepath, overwrite=None): if overwrite is None and os.path.exists(mrcs_filepath): # If the file exists, append a timestamp to the old file and rename it - rename_file(mrcs_filepath) + _ = rename_file(mrcs_filepath) with mrcfile.new(mrcs_filepath, overwrite=bool(overwrite)) as mrc: # original input format (the image index first) diff --git a/src/aspire/utils/misc.py b/src/aspire/utils/misc.py index 82e0ef526c..49a31e12d7 100644 --- a/src/aspire/utils/misc.py +++ b/src/aspire/utils/misc.py @@ -57,7 +57,7 @@ def rename_file(filepath, move=True): :param filepath: Filepath to rename. :param move: Option to rename the file on disk. - :return: If move=False, returns filepath with timestamp appended. + :return: filepath with timestamp appended. """ base, ext = os.path.splitext(filepath) timestamp = datetime.now().strftime("%y%m%d_%H%M%S") @@ -67,8 +67,8 @@ def rename_file(filepath, move=True): # Rename the existing file by appending the timestamp. if move: os.rename(filepath, renamed_filepath) - else: - return renamed_filepath + + return renamed_filepath def abs2(x): diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index b17c278fbd..8f53eaa731 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -654,7 +654,7 @@ def save(self, filename, overwrite=None): if overwrite is None and os.path.exists(filename): # If the file exists, append a timestamp to the old file and rename it - rename_file(filename) + _ = rename_file(filename) with mrcfile.new(filename, overwrite=bool(overwrite)) as mrc: mrc.set_data(self._data.astype(np.float32)) diff --git a/tests/test_utils.py b/tests/test_utils.py index 040f427758..5d48820b21 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,7 @@ import tempfile import warnings from contextlib import contextmanager +from datetime import datetime import matplotlib import numpy as np @@ -20,6 +21,7 @@ num_procs_suggestion, physical_core_cpu_suggestion, powerset, + rename_file, utest_tolerance, virtual_core_cpu_suggestion, ) @@ -112,6 +114,32 @@ def test_get_full_version_unexpected(monkeypatch): assert get_full_version() == __version__ + ".x" +def test_rename_file(): + with tempfile.TemporaryDirectory() as tmpdir_name: + filepath = os.path.join(tmpdir_name, "test_file.name") + base, ext = os.path.splitext(filepath) + + # Create file on disk. + with open(filepath, "w") as f: + f.write("Test file") + + # Case 1: move=False should return the new file name with apended timestamp. + renamed_file = rename_file(filepath, move=False) + timestamp = datetime.now().strftime("%y%m%d_%H%M") + assert renamed_file.startswith(f"{base}_{timestamp}") and renamed_file.endswith( + f"{ext}" + ) + + # Case 2: move=True (default) should rename file on disk. + renamed_file = rename_file(filepath) + + # Check that the original file no longer exists. + assert not os.path.exists(filepath) + + # Check that the new file exists on disk with the expected name. + assert os.path.exists(renamed_file) + + def test_power_set(): ref = sorted([(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]) s = range(1, 4) From cad0273e074f8d46088259ffcfb70d64740d3c2d Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Fri, 18 Oct 2024 09:07:30 -0400 Subject: [PATCH 08/16] rename_file --> timestamp_filename. Use mock datetime. --- src/aspire/image/image.py | 4 ++-- src/aspire/source/image.py | 4 ++-- src/aspire/utils/__init__.py | 2 +- src/aspire/utils/misc.py | 2 +- src/aspire/volume/volume.py | 4 ++-- tests/test_image.py | 27 ++++++++++++++++----------- tests/test_utils.py | 8 ++++---- 7 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index eaca3a4abb..bc292191ba 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -17,7 +17,7 @@ anorm, crop_pad_2d, grid_2d, - rename_file, + timestamp_filename, ) from aspire.volume import SymmetryGroup @@ -505,7 +505,7 @@ def save(self, mrcs_filepath, overwrite=None): if overwrite is None and os.path.exists(mrcs_filepath): # If the file exists, append a timestamp to the old file and rename it - _ = rename_file(mrcs_filepath) + _ = timestamp_filename(mrcs_filepath) with mrcfile.new(mrcs_filepath, overwrite=bool(overwrite)) as mrc: # original input format (the image index first) diff --git a/src/aspire/source/image.py b/src/aspire/source/image.py index 5990a111df..623f7cb713 100644 --- a/src/aspire/source/image.py +++ b/src/aspire/source/image.py @@ -28,7 +28,7 @@ PowerFilter, ) from aspire.storage import MrcStats, StarFile -from aspire.utils import Rotation, grid_2d, rename_file, support_mask, trange +from aspire.utils import Rotation, grid_2d, support_mask, timestamp_filename, trange from aspire.volume import IdentitySymmetryGroup, SymmetryGroup logger = logging.getLogger(__name__) @@ -997,7 +997,7 @@ def save( """ if overwrite is None and os.path.exists(starfile_filepath): # If the file exists, append the timestamp to the old file and rename it - renamed_filepath = rename_file(starfile_filepath, move=False) + renamed_filepath = timestamp_filename(starfile_filepath, move=False) # Retrieve original ImageSource and save with new starfile name. from aspire.source import RelionSource diff --git a/src/aspire/utils/__init__.py b/src/aspire/utils/__init__.py index e9f82bdc2a..10e5e5c855 100644 --- a/src/aspire/utils/__init__.py +++ b/src/aspire/utils/__init__.py @@ -28,7 +28,7 @@ inverse_r, J_conjugate, powerset, - rename_file, + timestamp_filename, sha256sum, support_mask, fuzzy_mask, diff --git a/src/aspire/utils/misc.py b/src/aspire/utils/misc.py index 49a31e12d7..845607d9e5 100644 --- a/src/aspire/utils/misc.py +++ b/src/aspire/utils/misc.py @@ -50,7 +50,7 @@ def importlib_path(package, resource): return p -def rename_file(filepath, move=True): +def timestamp_filename(filepath, move=True): """ Rename a file by appending a timestamp to the end of the filename. diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index 8f53eaa731..a26878de33 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -17,7 +17,7 @@ grid_2d, grid_3d, mat_to_vec, - rename_file, + timestamp_filename, vec_to_mat, ) from aspire.volume import IdentitySymmetryGroup, SymmetryGroup @@ -654,7 +654,7 @@ def save(self, filename, overwrite=None): if overwrite is None and os.path.exists(filename): # If the file exists, append a timestamp to the old file and rename it - _ = rename_file(filename) + _ = timestamp_filename(filename) with mrcfile.new(filename, overwrite=bool(overwrite)) as mrc: mrc.set_data(self._data.astype(np.float32)) diff --git a/tests/test_image.py b/tests/test_image.py index f6bfc64f3c..e62eb0d267 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -1,6 +1,8 @@ import logging import os.path import tempfile +from datetime import datetime +from unittest import mock import mrcfile import numpy as np @@ -388,20 +390,23 @@ def test_save_overwrite(caplog): im3.save(mrc_path, overwrite=False) # Case 3: overwrite=None (should rename the existing file and save im3 with original filename) - with caplog.at_level(logging.INFO): - im3.save(mrc_path, overwrite=None) + # Mock datetime to return a fixed timestamp. + mock_timestamp = datetime(1879, 3, 14, 12, 0, 0).strftime("%y%m%d_%H%M%S") + with mock.patch("aspire.utils.misc.datetime") as mock_datetime: + mock_datetime.now.return_value = datetime(1879, 3, 14, 12, 0, 0) + mock_datetime.strftime = datetime.strftime - # Check that the existing file was renamed and logged - assert f"Renaming {mrc_path}" in caplog.text + with caplog.at_level(logging.INFO): + im3.save(mrc_path, overwrite=None) - # Find the renamed file by checking the directory contents - renamed_file = None - for filename in os.listdir(tmpdir_name): - if filename.startswith("og_") and filename.endswith(".mrc"): - renamed_file = os.path.join(tmpdir_name, filename) - break + # Check that the existing file was renamed and logged + assert f"Renaming {mrc_path}" in caplog.text - assert renamed_file is not None, "Renamed file not found" + # Construct the expected renamed filename using the mock timestamp + renamed_file = f"{base}_{mock_timestamp}{ext}" + + # Assert that the renamed file exists + assert os.path.exists(renamed_file), "Renamed file not found" # Load and check that im3 was saved to the original path im3_loaded = Image.load(mrc_path) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5d48820b21..93f5885506 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,7 +21,7 @@ num_procs_suggestion, physical_core_cpu_suggestion, powerset, - rename_file, + timestamp_filename, utest_tolerance, virtual_core_cpu_suggestion, ) @@ -114,7 +114,7 @@ def test_get_full_version_unexpected(monkeypatch): assert get_full_version() == __version__ + ".x" -def test_rename_file(): +def test_timestamp_filename(): with tempfile.TemporaryDirectory() as tmpdir_name: filepath = os.path.join(tmpdir_name, "test_file.name") base, ext = os.path.splitext(filepath) @@ -124,14 +124,14 @@ def test_rename_file(): f.write("Test file") # Case 1: move=False should return the new file name with apended timestamp. - renamed_file = rename_file(filepath, move=False) + renamed_file = timestamp_filename(filepath, move=False) timestamp = datetime.now().strftime("%y%m%d_%H%M") assert renamed_file.startswith(f"{base}_{timestamp}") and renamed_file.endswith( f"{ext}" ) # Case 2: move=True (default) should rename file on disk. - renamed_file = rename_file(filepath) + renamed_file = timestamp_filename(filepath) # Check that the original file no longer exists. assert not os.path.exists(filepath) From f843b22e4e269a4584d5acacc6208222390be5fd Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Fri, 18 Oct 2024 09:26:05 -0400 Subject: [PATCH 09/16] mock datetime in test_volume.py --- tests/test_image.py | 5 +++-- tests/test_volume.py | 28 +++++++++++++++++----------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/tests/test_image.py b/tests/test_image.py index e62eb0d267..8aca556729 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -391,9 +391,9 @@ def test_save_overwrite(caplog): # Case 3: overwrite=None (should rename the existing file and save im3 with original filename) # Mock datetime to return a fixed timestamp. - mock_timestamp = datetime(1879, 3, 14, 12, 0, 0).strftime("%y%m%d_%H%M%S") + mock_datetime_value = datetime(1879, 3, 14, 12, 0, 0) with mock.patch("aspire.utils.misc.datetime") as mock_datetime: - mock_datetime.now.return_value = datetime(1879, 3, 14, 12, 0, 0) + mock_datetime.now.return_value = mock_datetime_value mock_datetime.strftime = datetime.strftime with caplog.at_level(logging.INFO): @@ -403,6 +403,7 @@ def test_save_overwrite(caplog): assert f"Renaming {mrc_path}" in caplog.text # Construct the expected renamed filename using the mock timestamp + mock_timestamp = mock_datetime_value.strftime("%y%m%d_%H%M%S") renamed_file = f"{base}_{mock_timestamp}{ext}" # Assert that the renamed file exists diff --git a/tests/test_volume.py b/tests/test_volume.py index 0300bd3e71..dd62798e5b 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -2,7 +2,9 @@ import os import tempfile import warnings +from datetime import datetime from itertools import product +from unittest import mock import numpy as np import pytest @@ -331,20 +333,24 @@ def test_save_overwrite(caplog): vol3.save(mrc_path, overwrite=False) # Case 3: overwrite=None (should rename the existing file and save vol3 with original filename) - with caplog.at_level(logging.INFO): - vol3.save(mrc_path, overwrite=None) + # Mock datetime to return a fixed timestamp + mock_datetime_value = datetime(1768, 3, 21, 12, 0, 0) + with mock.patch("aspire.utils.misc.datetime") as mock_datetime: + mock_datetime.now.return_value = mock_datetime_value + mock_datetime.strftime = datetime.strftime - # Check that the existing file was renamed and logged - assert f"Renaming {mrc_path}" in caplog.text + with caplog.at_level(logging.INFO): + vol3.save(mrc_path, overwrite=None) - # Find the renamed file by checking the directory contents - renamed_file = None - for filename in os.listdir(tmpdir_name): - if filename.startswith("og_") and filename.endswith(".mrc"): - renamed_file = os.path.join(tmpdir_name, filename) - break + # Check that the existing file was renamed and logged + assert f"Renaming {mrc_path}" in caplog.text - assert renamed_file is not None, "Renamed file not found" + # Construct the expected renamed filename using the mock timestamp + mock_timestamp = mock_datetime_value.strftime("%y%m%d_%H%M%S") + renamed_file = f"{base}_{mock_timestamp}{ext}" + + # Assert that the renamed file exists + assert os.path.exists(renamed_file), "Renamed file not found" # Load and check that vol3 was saved to the original path vol3_loaded = Volume.load(mrc_path) From 310e54dd817a8fa59a5864388a706d162b018ca7 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Fri, 18 Oct 2024 10:16:01 -0400 Subject: [PATCH 10/16] add try/except block to catch bad filename --- src/aspire/utils/misc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/aspire/utils/misc.py b/src/aspire/utils/misc.py index 845607d9e5..23f0490350 100644 --- a/src/aspire/utils/misc.py +++ b/src/aspire/utils/misc.py @@ -66,7 +66,11 @@ def timestamp_filename(filepath, move=True): # Rename the existing file by appending the timestamp. if move: - os.rename(filepath, renamed_filepath) + try: + os.rename(filepath, renamed_filepath) + except FileNotFoundError: + logger.warning(f"File '{filepath}' not found, could not rename.") + return None return renamed_filepath From d1bb47e77e52af3a1f56f871e3c2c6bf6e5a66b8 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Fri, 18 Oct 2024 10:38:15 -0400 Subject: [PATCH 11/16] add try/except case to test_timestamp_filename. Also use mock datetime. --- tests/test_utils.py | 50 +++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 93f5885506..483e09fd54 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,6 +4,7 @@ import warnings from contextlib import contextmanager from datetime import datetime +from unittest import mock import matplotlib import numpy as np @@ -114,7 +115,7 @@ def test_get_full_version_unexpected(monkeypatch): assert get_full_version() == __version__ + ".x" -def test_timestamp_filename(): +def test_timestamp_filename(caplog): with tempfile.TemporaryDirectory() as tmpdir_name: filepath = os.path.join(tmpdir_name, "test_file.name") base, ext = os.path.splitext(filepath) @@ -123,21 +124,44 @@ def test_timestamp_filename(): with open(filepath, "w") as f: f.write("Test file") - # Case 1: move=False should return the new file name with apended timestamp. - renamed_file = timestamp_filename(filepath, move=False) - timestamp = datetime.now().strftime("%y%m%d_%H%M") - assert renamed_file.startswith(f"{base}_{timestamp}") and renamed_file.endswith( - f"{ext}" - ) + # Mock datetime to return a fixed timestamp. + mock_datetime_value = datetime(1707, 4, 15, 12, 0, 0) + mock_timestamp = mock_datetime_value.strftime("%y%m%d_%H%M%S") + + with mock.patch("aspire.utils.misc.datetime") as mock_datetime: + mock_datetime.now.return_value = mock_datetime_value + mock_datetime.strftime = datetime.strftime + + # Case 1: move=False should return the new file name with appended timestamp. + renamed_file = timestamp_filename(filepath, move=False) + assert renamed_file == f"{base}_{mock_timestamp}{ext}" + + # Case 2: move=True (default) should rename file on disk. + with caplog.at_level(logging.INFO): + renamed_file = timestamp_filename(filepath) + + # Check log for renaming operation. + assert f"Renaming {filepath} as {renamed_file}" in caplog.text + + # Check that the original file no longer exists. + assert not os.path.exists(filepath) + + # Check that the new file exists on disk with the expected name. + assert os.path.exists(renamed_file) - # Case 2: move=True (default) should rename file on disk. - renamed_file = timestamp_filename(filepath) + # Case 3: Test when the file does not exist. + non_existent_file = os.path.join(tmpdir_name, "non_existent_file.name") + with caplog.at_level(logging.WARNING): + result = timestamp_filename(non_existent_file) - # Check that the original file no longer exists. - assert not os.path.exists(filepath) + # Check that None is returned since the file doesn't exist. + assert result is None - # Check that the new file exists on disk with the expected name. - assert os.path.exists(renamed_file) + # Check log for the warning about file not found. + assert ( + f"File '{non_existent_file}' not found, could not rename." + in caplog.text + ) def test_power_set(): From becccbd3ba938c1de9d7ae2e34a641fb310cf54b Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Fri, 18 Oct 2024 15:28:17 -0400 Subject: [PATCH 12/16] change mock datetimes --- tests/test_image.py | 2 +- tests/test_utils.py | 2 +- tests/test_volume.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_image.py b/tests/test_image.py index 8aca556729..593f781e7d 100644 --- a/tests/test_image.py +++ b/tests/test_image.py @@ -391,7 +391,7 @@ def test_save_overwrite(caplog): # Case 3: overwrite=None (should rename the existing file and save im3 with original filename) # Mock datetime to return a fixed timestamp. - mock_datetime_value = datetime(1879, 3, 14, 12, 0, 0) + mock_datetime_value = datetime(2024, 10, 18, 12, 0, 0) with mock.patch("aspire.utils.misc.datetime") as mock_datetime: mock_datetime.now.return_value = mock_datetime_value mock_datetime.strftime = datetime.strftime diff --git a/tests/test_utils.py b/tests/test_utils.py index 483e09fd54..8c8721a75a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -125,7 +125,7 @@ def test_timestamp_filename(caplog): f.write("Test file") # Mock datetime to return a fixed timestamp. - mock_datetime_value = datetime(1707, 4, 15, 12, 0, 0) + mock_datetime_value = datetime(2024, 10, 18, 12, 0, 0) mock_timestamp = mock_datetime_value.strftime("%y%m%d_%H%M%S") with mock.patch("aspire.utils.misc.datetime") as mock_datetime: diff --git a/tests/test_volume.py b/tests/test_volume.py index dd62798e5b..d220860784 100644 --- a/tests/test_volume.py +++ b/tests/test_volume.py @@ -334,7 +334,7 @@ def test_save_overwrite(caplog): # Case 3: overwrite=None (should rename the existing file and save vol3 with original filename) # Mock datetime to return a fixed timestamp - mock_datetime_value = datetime(1768, 3, 21, 12, 0, 0) + mock_datetime_value = datetime(2024, 10, 18, 12, 0, 0) with mock.patch("aspire.utils.misc.datetime") as mock_datetime: mock_datetime.now.return_value = mock_datetime_value mock_datetime.strftime = datetime.strftime From edcaac3f84a741bbc0265340cefc42b13e5a4ffb Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 23 Oct 2024 11:15:11 -0400 Subject: [PATCH 13/16] Smart overwrite for MicrographSimulation. Supporting tests. --- src/aspire/source/micrograph.py | 15 ++++-- tests/test_micrograph_simulation.py | 80 +++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/src/aspire/source/micrograph.py b/src/aspire/source/micrograph.py index 2d654401b5..bf351fab23 100644 --- a/src/aspire/source/micrograph.py +++ b/src/aspire/source/micrograph.py @@ -10,7 +10,7 @@ from aspire.source import Simulation from aspire.source.image import _ImageAccessor from aspire.storage import StarFile -from aspire.utils import Random, grid_2d +from aspire.utils import Random, grid_2d, timestamp_filename from aspire.volume import Volume logger = logging.getLogger(__name__) @@ -44,7 +44,7 @@ def __len__(self): """ return self.micrograph_count - def save(self, path, name_prefix="micrograph", overwrite=True): + def save(self, path, name_prefix="micrograph", overwrite=None): """ Save micrographs to `path`. @@ -54,11 +54,18 @@ def save(self, path, name_prefix="micrograph", overwrite=True): :param path: Directory to save data. :param name_prefix: Optional, name prefix string for micrograph files. - :param overwrite: Optional, bool. Allow writing to existing directory, - and overwriting existing files. + :param overwrite: Options to control overwrite behavior (default is None): + - True: Overwrites the existing path if it exists. + - False: Raises an error if the path exists. + - None: Renames the old path by appending a time/date stamp. :return: List of saved `.mrc` files. """ + if overwrite is None and os.path.exists(path): + # If the directory exists, append a timestamp to existing directory. + _ = timestamp_filename(path) + overwrite = True + # Make dir if does not exist. Path(path).mkdir(parents=True, exist_ok=overwrite) diff --git a/tests/test_micrograph_simulation.py b/tests/test_micrograph_simulation.py index 58f77869a9..66297e0d09 100644 --- a/tests/test_micrograph_simulation.py +++ b/tests/test_micrograph_simulation.py @@ -2,6 +2,8 @@ import logging import os import tempfile +from datetime import datetime +from unittest import mock import numpy as np import pytest @@ -303,6 +305,84 @@ def test_sim_save(): ) +def test_save_overwrite(caplog): + """ + Tests MicrographSimulation.save functionality. + + Specifically tests interoperability with CentersCoordinateSource + """ + + v = AsymmetricVolume(L=16, C=1, dtype=np.float64).generate() + ctfs = [ + RadialCTFFilter( + pixel_size=4, voltage=200, defocus=15000, Cs=2.26, alpha=0.07, B=0 + ) + ] + + mg_sim = MicrographSimulation( + volume=v, + particles_per_micrograph=3, + interparticle_distance=v.resolution, + micrograph_count=2, + micrograph_size=512, + ctf_filters=ctfs, + ) + + mg_sim_new = MicrographSimulation( + volume=v, + particles_per_micrograph=4, + interparticle_distance=v.resolution, + micrograph_count=3, + micrograph_size=512, + ctf_filters=ctfs, + ) + + with tempfile.TemporaryDirectory() as tmp_output_dir: + path = os.path.join(tmp_output_dir, "test") + + # Write MRC and STAR files + save_paths_1 = mg_sim.save(path, overwrite=True) + + # Case 1: overwrite=True (should overwrite the existing file) + save_paths_2 = mg_sim.save(path, overwrite=True) + np.testing.assert_array_equal(save_paths_1, save_paths_2) + + # Case2: overwrite=False (should raise error) + with pytest.raises( + FileExistsError, + match="File exists: '.*'", + ): + _ = mg_sim.save(path, overwrite=False) + + # Case 3: overwrite=None (should rename the existing directory) + mock_datetime_value = datetime(2024, 10, 18, 12, 0, 0) + with mock.patch("aspire.utils.misc.datetime") as mock_datetime: + mock_datetime.now.return_value = mock_datetime_value + mock_datetime.strftime = datetime.strftime + + with caplog.at_level(logging.INFO): + _ = mg_sim_new.save(path, overwrite=None) + + # Check that the existing directory was renamed and logged + assert f"Renaming {path}" in caplog.text + assert os.path.exists(path), "Directory not found" + + # Construct the expected renamed directory using the mock timestamp + mock_timestamp = mock_datetime_value.strftime("%y%m%d_%H%M%S") + renamed_dir = f"{path}_{mock_timestamp}" + + # Assert that the renamed file exists + assert os.path.exists(renamed_dir), "Renamed directory not found" + + # Load renamed directory and check images against orignal sim. + mg_src = DiskMicrographSource(renamed_dir) + np.testing.assert_allclose(mg_src.asnumpy(), mg_sim.asnumpy()) + + # Load new directory and check images against orignal sim. + mg_src_new = DiskMicrographSource(path) + np.testing.assert_allclose(mg_src_new.asnumpy(), mg_sim_new.asnumpy()) + + def test_bad_amplitudes(vol_fixture): """ Test incorrect `particle_amplitudes` argument raises. From b2a8500b7a6b8e1881dfafe49036940df9ed92c4 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Wed, 23 Oct 2024 15:21:36 -0400 Subject: [PATCH 14/16] Remove check raise message. Different message on windows. --- tests/test_micrograph_simulation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_micrograph_simulation.py b/tests/test_micrograph_simulation.py index 66297e0d09..14f1212e0d 100644 --- a/tests/test_micrograph_simulation.py +++ b/tests/test_micrograph_simulation.py @@ -348,10 +348,7 @@ def test_save_overwrite(caplog): np.testing.assert_array_equal(save_paths_1, save_paths_2) # Case2: overwrite=False (should raise error) - with pytest.raises( - FileExistsError, - match="File exists: '.*'", - ): + with pytest.raises(FileExistsError): _ = mg_sim.save(path, overwrite=False) # Case 3: overwrite=None (should rename the existing directory) From 642dc1631e47d0ae40c6f039b639fc3079c53451 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 24 Oct 2024 08:53:45 -0400 Subject: [PATCH 15/16] Rename method. --- src/aspire/image/image.py | 4 ++-- src/aspire/source/image.py | 4 ++-- src/aspire/source/micrograph.py | 4 ++-- src/aspire/utils/__init__.py | 2 +- src/aspire/utils/misc.py | 2 +- src/aspire/volume/volume.py | 4 ++-- tests/test_utils.py | 10 +++++----- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index bc292191ba..f46be29f29 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -17,7 +17,7 @@ anorm, crop_pad_2d, grid_2d, - timestamp_filename, + rename_with_timestamp, ) from aspire.volume import SymmetryGroup @@ -505,7 +505,7 @@ def save(self, mrcs_filepath, overwrite=None): if overwrite is None and os.path.exists(mrcs_filepath): # If the file exists, append a timestamp to the old file and rename it - _ = timestamp_filename(mrcs_filepath) + _ = rename_with_timestamp(mrcs_filepath) with mrcfile.new(mrcs_filepath, overwrite=bool(overwrite)) as mrc: # original input format (the image index first) diff --git a/src/aspire/source/image.py b/src/aspire/source/image.py index 623f7cb713..0778a92a02 100644 --- a/src/aspire/source/image.py +++ b/src/aspire/source/image.py @@ -28,7 +28,7 @@ PowerFilter, ) from aspire.storage import MrcStats, StarFile -from aspire.utils import Rotation, grid_2d, support_mask, timestamp_filename, trange +from aspire.utils import Rotation, grid_2d, rename_with_timestamp, support_mask, trange from aspire.volume import IdentitySymmetryGroup, SymmetryGroup logger = logging.getLogger(__name__) @@ -997,7 +997,7 @@ def save( """ if overwrite is None and os.path.exists(starfile_filepath): # If the file exists, append the timestamp to the old file and rename it - renamed_filepath = timestamp_filename(starfile_filepath, move=False) + renamed_filepath = rename_with_timestamp(starfile_filepath, move=False) # Retrieve original ImageSource and save with new starfile name. from aspire.source import RelionSource diff --git a/src/aspire/source/micrograph.py b/src/aspire/source/micrograph.py index bf351fab23..5aa35e389c 100644 --- a/src/aspire/source/micrograph.py +++ b/src/aspire/source/micrograph.py @@ -10,7 +10,7 @@ from aspire.source import Simulation from aspire.source.image import _ImageAccessor from aspire.storage import StarFile -from aspire.utils import Random, grid_2d, timestamp_filename +from aspire.utils import Random, grid_2d, rename_with_timestamp from aspire.volume import Volume logger = logging.getLogger(__name__) @@ -63,7 +63,7 @@ def save(self, path, name_prefix="micrograph", overwrite=None): if overwrite is None and os.path.exists(path): # If the directory exists, append a timestamp to existing directory. - _ = timestamp_filename(path) + _ = rename_with_timestamp(path) overwrite = True # Make dir if does not exist. diff --git a/src/aspire/utils/__init__.py b/src/aspire/utils/__init__.py index 10e5e5c855..c4b46bcdec 100644 --- a/src/aspire/utils/__init__.py +++ b/src/aspire/utils/__init__.py @@ -28,7 +28,7 @@ inverse_r, J_conjugate, powerset, - timestamp_filename, + rename_with_timestamp, sha256sum, support_mask, fuzzy_mask, diff --git a/src/aspire/utils/misc.py b/src/aspire/utils/misc.py index 23f0490350..a3f9917024 100644 --- a/src/aspire/utils/misc.py +++ b/src/aspire/utils/misc.py @@ -50,7 +50,7 @@ def importlib_path(package, resource): return p -def timestamp_filename(filepath, move=True): +def rename_with_timestamp(filepath, move=True): """ Rename a file by appending a timestamp to the end of the filename. diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index a26878de33..fad3003fc0 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -17,7 +17,7 @@ grid_2d, grid_3d, mat_to_vec, - timestamp_filename, + rename_with_timestamp, vec_to_mat, ) from aspire.volume import IdentitySymmetryGroup, SymmetryGroup @@ -654,7 +654,7 @@ def save(self, filename, overwrite=None): if overwrite is None and os.path.exists(filename): # If the file exists, append a timestamp to the old file and rename it - _ = timestamp_filename(filename) + _ = rename_with_timestamp(filename) with mrcfile.new(filename, overwrite=bool(overwrite)) as mrc: mrc.set_data(self._data.astype(np.float32)) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8c8721a75a..a81ccaf11a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -22,7 +22,7 @@ num_procs_suggestion, physical_core_cpu_suggestion, powerset, - timestamp_filename, + rename_with_timestamp, utest_tolerance, virtual_core_cpu_suggestion, ) @@ -115,7 +115,7 @@ def test_get_full_version_unexpected(monkeypatch): assert get_full_version() == __version__ + ".x" -def test_timestamp_filename(caplog): +def test_rename_with_timestamp(caplog): with tempfile.TemporaryDirectory() as tmpdir_name: filepath = os.path.join(tmpdir_name, "test_file.name") base, ext = os.path.splitext(filepath) @@ -133,12 +133,12 @@ def test_timestamp_filename(caplog): mock_datetime.strftime = datetime.strftime # Case 1: move=False should return the new file name with appended timestamp. - renamed_file = timestamp_filename(filepath, move=False) + renamed_file = rename_with_timestamp(filepath, move=False) assert renamed_file == f"{base}_{mock_timestamp}{ext}" # Case 2: move=True (default) should rename file on disk. with caplog.at_level(logging.INFO): - renamed_file = timestamp_filename(filepath) + renamed_file = rename_with_timestamp(filepath) # Check log for renaming operation. assert f"Renaming {filepath} as {renamed_file}" in caplog.text @@ -152,7 +152,7 @@ def test_timestamp_filename(caplog): # Case 3: Test when the file does not exist. non_existent_file = os.path.join(tmpdir_name, "non_existent_file.name") with caplog.at_level(logging.WARNING): - result = timestamp_filename(non_existent_file) + result = rename_with_timestamp(non_existent_file) # Check that None is returned since the file doesn't exist. assert result is None From ee7c5a14d8507fd5fdf214e64cd858a451cb83a8 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 24 Oct 2024 10:53:23 -0400 Subject: [PATCH 16/16] Remove bool(overwrite) --- src/aspire/image/image.py | 4 +++- src/aspire/source/image.py | 5 ++++- src/aspire/volume/volume.py | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index f46be29f29..757362870d 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -506,8 +506,10 @@ def save(self, mrcs_filepath, overwrite=None): if overwrite is None and os.path.exists(mrcs_filepath): # If the file exists, append a timestamp to the old file and rename it _ = rename_with_timestamp(mrcs_filepath) + elif overwrite is None: + overwrite = False - with mrcfile.new(mrcs_filepath, overwrite=bool(overwrite)) as mrc: + with mrcfile.new(mrcs_filepath, overwrite=overwrite) as mrc: # original input format (the image index first) mrc.set_data(self._data.astype(np.float32)) # Note assigning voxel_size must come after `set_data` diff --git a/src/aspire/source/image.py b/src/aspire/source/image.py index 0778a92a02..0287e9875c 100644 --- a/src/aspire/source/image.py +++ b/src/aspire/source/image.py @@ -1008,6 +1008,9 @@ def save( # Allow overwriting old files. overwrite = True + elif overwrite is None: + overwrite = False + logger.info("save metadata into STAR file") filename_indices = self.save_metadata( starfile_filepath, @@ -1021,7 +1024,7 @@ def save( starfile_filepath, filename_indices=filename_indices, batch_size=batch_size, - overwrite=bool(overwrite), + overwrite=overwrite, ) # return some information about the saved files info = {"starfile": starfile_filepath, "mrcs": unique_filenames} diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index fad3003fc0..eae985a794 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -655,8 +655,10 @@ def save(self, filename, overwrite=None): if overwrite is None and os.path.exists(filename): # If the file exists, append a timestamp to the old file and rename it _ = rename_with_timestamp(filename) + elif overwrite is None: + overwrite = False - with mrcfile.new(filename, overwrite=bool(overwrite)) as mrc: + with mrcfile.new(filename, overwrite=overwrite) as mrc: mrc.set_data(self._data.astype(np.float32)) # Note assigning voxel_size must come after `set_data` if self.pixel_size is not None: