Skip to content

Commit

Permalink
Refactor / replace os.path with pathlib in many places (#985)
Browse files Browse the repository at this point in the history
* first adjusts for pathlib, always import repack_fields now, undo some redundant typehints in specs.py

* additional adjusts

* large refactoring of output_directory for pathlib

* bugfixes, test adjustments to use Paths, small adjusts to docs

* tiny fix

* tiny fix

* really need to figure out sim_input_dir and gen_input_dir logic. tries fixing that validator again. need to avoid passing through Path('.')

* don't need this multi-worker-unit-test block that was just experimental

* why is actions failing but not local?

* Autoformatting

* Incrementing actions version

* cast workflow_dir_path to Path on registering to location stack

* we always have a workflow dir technically. if not workflow_***, it's the current working dir

* removing redundant cast to Path

---------

Co-authored-by: Jeffrey Larson <jmlarson@anl.gov>
  • Loading branch information
jlnav and jmlarson1 committed May 2, 2023
1 parent 6f0e87c commit 5909f42
Show file tree
Hide file tree
Showing 16 changed files with 138 additions and 129 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
shell: bash -l {0}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Setup conda - Python ${{ matrix.python-version }}
uses: conda-incubator/setup-miniconda@v2
with:
Expand Down Expand Up @@ -271,5 +271,5 @@ jobs:
if: contains(github.base_ref, 'main')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: crate-ci/typos@v1.0.4
7 changes: 5 additions & 2 deletions docs/history_output_logging.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Output Management
=================

Each of the following described output files and directories can be placed in a run-specific
directory by setting ``libE_specs["use_workflow_dir"] = True``.

Default Log Files
~~~~~~~~~~~~~~~~~
The history array :ref:`H<funcguides-history>` and
Expand Down Expand Up @@ -71,10 +74,10 @@ or in other directories. This is helpful for taking advantage of scratch spaces
organizing I/O by application run.

* ``"sim_dirs_make"``: ``[bool] = False``. Enables per-simulation directories with default
settings. Directories are placed in ``./ensemble`` by default.
settings. Directories are placed in ``ensemble`` by default.

* ``"gen_dirs_make"``: ``[bool] = False``. Enabled per-generator instance directories with
default settings. Directories are placed in ``./ensemble`` by default.
default settings. Directories are placed in ``ensemble`` by default.

* ``"ensemble_dir_path"``: ``[str] = "./ensemble"``. Specifies where each worker places its
calculation directories. If ``"sim_dirs_make"`` or ``"gen_dirs_make"`` are ``False`` respectively,
Expand Down
1 change: 0 additions & 1 deletion libensemble/comms/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ def manager_logging_config(specs={}):
logconfig = LogConfig.config

if not logconfig.logger_set:

if specs.get("use_workflow_dir"): # placing logfiles in separate directory
logconfig.set_directory(specs.get("workflow_dir_path"))

Expand Down
1 change: 1 addition & 0 deletions libensemble/executors/mpi_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import logging

from libensemble.executors.executor import jassert
from libensemble.resources import mpi_resources

Expand Down
7 changes: 4 additions & 3 deletions libensemble/libE.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@
import socket
import sys
import traceback
from pathlib import Path
from typing import Callable, Dict

import numpy as np
Expand Down Expand Up @@ -742,12 +743,12 @@ def libE_tcp_worker(sim_specs, gen_specs, libE_specs):
# ==================== Additional Internal Functions ===========================


def _dump_on_abort(hist, persis_info, save_H=True, path=os.getcwd()):
def _dump_on_abort(hist, persis_info, save_H=True, path=Path.cwd()):
"""Dump history and persis_info on abort"""
logger.error("Manager exception raised .. aborting ensemble:")
logger.error(f"Dumping ensemble history with {hist.sim_ended_count} sims evaluated:")

if save_H:
np.save(os.path.join(path, "libE_history_at_abort_" + str(hist.sim_ended_count) + ".npy"), hist.trim_H())
with open(os.path.join(path, "libE_persis_info_at_abort_" + str(hist.sim_ended_count) + ".pickle"), "wb") as f:
np.save(Path(path / Path("libE_history_at_abort_" + str(hist.sim_ended_count) + ".npy")), hist.trim_H())
with Path(path / Path("libE_persis_info_at_abort_" + str(hist.sim_ended_count) + ".pickle")).open("wb") as f:
pickle.dump(persis_info, f)
2 changes: 1 addition & 1 deletion libensemble/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def __init__(
wrk["zero_resource_worker"] = True

try:
temp_EnsembleDirectory.make_copyback_check()
temp_EnsembleDirectory.make_copyback()
except OSError as e: # Ensemble dir exists and isn't empty.
logger.manager_warning(_USER_CALC_DIR_WARNING.format(temp_EnsembleDirectory.ensemble_dir))
self._kill_workers()
Expand Down
1 change: 0 additions & 1 deletion libensemble/resources/env_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def __init__(
nodelist_env_lsf: Optional[str] = None,
nodelist_env_lsf_shortform: Optional[str] = None,
) -> None:

"""Initializes a new EnvResources instance
Determines the environment variables to query for resource
Expand Down
63 changes: 37 additions & 26 deletions libensemble/specs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import random
import secrets
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, List, Optional, Tuple, Union

import numpy as np
from pydantic import BaseConfig, BaseModel, Field, root_validator, validator
Expand Down Expand Up @@ -259,7 +258,7 @@ class LibeSpecs(BaseModel):
is specified.
"""

ensemble_dir_path: Optional[str] = "ensemble"
ensemble_dir_path: Optional[Union[str, Path]] = Path("ensemble")
"""
Path to main ensemble directory containing calculation directories. Can serve
as single working directory for workers, or contain calculation directories
Expand All @@ -280,13 +279,17 @@ class LibeSpecs(BaseModel):
By default all workers operate within the top-level ensemble directory
"""

sim_dir_copy_files: Optional[List[str]] = []
""" Paths to files or directories to copy into each simulation or ensemble directory """
sim_dir_copy_files: Optional[List[Union[str, Path]]] = []
""" Paths to files or directories to copy into each simulation or ensemble directory.
List of strings or pathlib.Path objects
"""

sim_dir_symlink_files: Optional[List[str]] = []
""" Paths to files or directories to symlink into each simulation directory """
sim_dir_symlink_files: Optional[List[Union[str, Path]]] = []
""" Paths to files or directories to symlink into each simulation directory.
List of strings or pathlib.Path objects
"""

sim_input_dir: Optional[str] = ""
sim_input_dir: Optional[Union[str, Path]] = None
"""
Copy this directory and its contents for each simulation-specific directory.
If not using calculation directories, contents are copied to the ensemble directory
Expand All @@ -298,13 +301,17 @@ class LibeSpecs(BaseModel):
By default all workers operate within the top-level ensemble directory
"""

gen_dir_copy_files: Optional[List[str]] = []
""" Paths to files or directories to copy into each generator or ensemble directory """
gen_dir_copy_files: Optional[List[Union[str, Path]]] = []
""" Paths to files or directories to copy into each generator or ensemble directory.
List of strings or pathlib.Path objects
"""

gen_dir_symlink_files: Optional[List[str]] = []
""" Paths to files or directories to symlink into each generator directory """
gen_dir_symlink_files: Optional[List[Union[str, Path]]] = []
""" Paths to files or directories to symlink into each generator directory.
List of strings or pathlib.Path objects
"""

gen_input_dir: Optional[str] = ""
gen_input_dir: Optional[Union[str, Path]] = None
"""
Copy this directory and its contents for each generator-instance-specific directory.
If not using calculation directories, contents are copied to the ensemble directory
Expand Down Expand Up @@ -464,7 +471,7 @@ class Config:
arbitrary_types_allowed = True

@validator("comms")
def check_valid_comms_type(cls, value: str) -> str:
def check_valid_comms_type(cls, value):
assert value in ["mpi", "local", "tcp"], "Invalid comms type"
return value

Expand All @@ -475,23 +482,27 @@ def set_platform_specs_to_class(cls, value: Union[Platform, dict]) -> Platform:
return value

@validator("sim_input_dir", "gen_input_dir")
def check_input_dir_exists(cls, value: str) -> str:
if len(value):
assert os.path.exists(value), "libE_specs['{}'] does not refer to an existing path.".format(value)
def check_input_dir_exists(cls, value):
if value:
if isinstance(value, str):
value = Path(value)
assert value.exists(), "value does not refer to an existing path"
assert value != Path("."), "Value can't refer to the current directory ('.' or Path('.'))."
return value

@validator("sim_dir_copy_files", "sim_dir_symlink_files", "gen_dir_copy_files", "gen_dir_symlink_files")
def check_inputs_exist(cls, value: List[str]) -> List[str]:
def check_inputs_exist(cls, value):
value = [Path(path) for path in value]
for f in value:
assert os.path.exists(f), "'{}' in libE_specs['{}'] does not refer to an existing path.".format(f, value)
assert f.exists(), f"'{f}' in Value does not refer to an existing path."
return value

@root_validator
def check_any_workers_and_disable_rm_if_tcp(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def check_any_workers_and_disable_rm_if_tcp(cls, values):
return _check_any_workers_and_disable_rm_if_tcp(values)

@root_validator
def set_defaults_on_mpi(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def set_defaults_on_mpi(cls, values):
if values.get("comms") == "mpi":
from mpi4py import MPI

Expand All @@ -500,7 +511,7 @@ def set_defaults_on_mpi(cls, values: Dict[str, Any]) -> Dict[str, Any]:
return values

@root_validator
def set_workflow_dir(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def set_workflow_dir(cls, values):
if values.get("use_workflow_dir") and len(str(values.get("workflow_dir_path"))) <= 1:
values["workflow_dir_path"] = Path(
"./workflow_" + secrets.token_hex(3)
Expand Down Expand Up @@ -541,21 +552,21 @@ class Config:
arbitrary_types_allowed = True

@root_validator
def check_exit_criteria(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def check_exit_criteria(cls, values):
return _check_exit_criteria(values)

@root_validator
def check_output_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def check_output_fields(cls, values):
return _check_output_fields(values)

@root_validator
def set_ensemble_nworkers(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def set_ensemble_nworkers(cls, values):
if values.get("libE_specs"):
values["nworkers"] = values["libE_specs"].nworkers
return values

@root_validator
def check_H0(cls, values: Dict[str, Any]) -> Dict[str, Any]:
def check_H0(cls, values):
if values.get("H0") is not None:
return _check_H0(values)
return values
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def sim_f_noreturn(In):


if __name__ == "__main__":

nworkers, is_manager, libE_specs, _ = parse_args()

sim_specs = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@

# Fixed resources (one resource set per worker)
from libensemble.gen_funcs.sampling import uniform_random_sample as gen_f
from libensemble.libE import libE
from libensemble.tools import add_unique_random_streams, parse_args

# Uncomment for var resources
# from libensemble.gen_funcs.sampling import uniform_random_sample_with_variable_resources as gen_f


# Uncomment for var resources
# from libensemble.gen_funcs.sampling import uniform_random_sample_with_variable_resources as gen_f

from libensemble.libE import libE
from libensemble.tools import add_unique_random_streams, parse_args

# Parse number of workers, comms type, etc. from arguments
nworkers, is_manager, libE_specs, _ = parse_args()
Expand Down
17 changes: 12 additions & 5 deletions libensemble/tests/unit_tests/test_loc_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import shutil
import tempfile
from pathlib import Path

from libensemble.utils.loc_stack import LocationStack

Expand All @@ -19,7 +20,7 @@ def test_location_stack():

try:
# Record where we started
start_dir = os.getcwd()
start_dir = Path(os.getcwd())

# Set up directory for clone
clone_dirname = os.path.join(tmp_dirname, "basedir")
Expand All @@ -31,7 +32,7 @@ def test_location_stack():
s = LocationStack()

# Register a valid location
tname = s.register_loc(0, "testdir", prefix=tmp_dirname, copy_files=[test_fname])
tname = s.register_loc(0, Path("lstestdir"), prefix=Path(tmp_dirname), copy_files=[Path(test_fname)])
assert os.path.isdir(tname), f"New directory {tname} was not created."
assert os.path.isfile(
os.path.join(tname, "test.txt")
Expand All @@ -42,7 +43,7 @@ def test_location_stack():
assert d is None, "Dir stack not correctly register None at location 1."

# Register a dummy location (del should not work)
d = s.register_loc(2, os.path.join(tmp_dirname, "dummy"))
d = s.register_loc(2, Path(os.path.join(tmp_dirname, "lsdummy")))
assert ~os.path.isdir(d), "Directory stack registration of dummy should not create dir."

# Push unregistered location (we should not move)
Expand All @@ -56,7 +57,8 @@ def test_location_stack():

# Push registered location (we should move
s.push_loc(0)
assert s.stack == [None, start_dir], "Directory stack is incorrect." "Wanted [None, {}], got {}.".format(
stack = s.stack
assert stack == [None, start_dir], "Directory stack is incorrect." "Wanted [None, {}], got {}.".format(
start_dir, s.stack
)
assert os.path.samefile(
Expand All @@ -74,7 +76,8 @@ def test_location_stack():

# Context for moving again
with s.loc(0):
assert s.stack == [None, start_dir], "Directory stack is incorrect." "Wanted [None, {}], got {}.".format(
stack = s.stack
assert stack == [None, start_dir], "Directory stack is incorrect." "Wanted [None, {}], got {}.".format(
start_dir, s.stack
)
assert os.path.samefile(
Expand Down Expand Up @@ -106,3 +109,7 @@ def test_location_stack():

finally:
shutil.rmtree(tmp_dirname)


if __name__ == "__main__":
test_location_stack()
7 changes: 4 additions & 3 deletions libensemble/tests/unit_tests/test_sim_dir_properties.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import shutil
from pathlib import Path

import numpy as np

Expand Down Expand Up @@ -43,7 +44,7 @@ def test_copy_back(tmp_path):
back their work into a directory created by the manager."""

inputdir = tmp_path / "calc"
copybackdir = "./calc"
copybackdir = "./calc_back"
inputfile = tmp_path / "calc/file"

for dire in [inputdir, copybackdir]:
Expand Down Expand Up @@ -71,7 +72,7 @@ def test_copy_back(tmp_path):
libE_specs = {"sim_dirs_make": True, "ensemble_dir_path": inputdir, "ensemble_copy_back": True}

ls = LocationStack()
ls.register_loc("test", inputfile)
ls.register_loc("test", Path(inputfile))
ed = EnsembleDirectory(libE_specs, ls)
ed.copy_back()
assert "file" in os.listdir(copybackdir), "File not copied back to starting dire"
Expand Down Expand Up @@ -193,7 +194,7 @@ def test_workflow_dir_copyback(tmp_path):
ed = EnsembleDirectory(libE_specs, ls)
copybackdir = ed.copybackdir

assert "./fake_workflow" in copybackdir, "workflow_dir wasn't considered as destination for copyback"
assert "fake_workflow" in str(copybackdir), "workflow_dir wasn't considered as destination for copyback"

ed.copy_back()
assert "file" in os.listdir(copybackdir), "File not copied back to starting dire"
Expand Down

0 comments on commit 5909f42

Please sign in to comment.