Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
03b21a0
first draft with double serialization
tphung3 Dec 10, 2024
6556d39
fix bug
tphung3 Dec 10, 2024
72e9e00
deduplicate fn serialization
tphung3 Dec 13, 2024
10486b2
finish serialization deduplication
tphung3 Dec 13, 2024
ee39165
fix bug dedup double serial
tphung3 Dec 13, 2024
79079b0
add option for non-tmp staging dir
tphung3 Dec 13, 2024
a8104e7
context feature added
tphung3 Dec 14, 2024
a6e609d
add _ to hidden variable
tphung3 Dec 16, 2024
5b190bb
use 1 mapping only
tphung3 Dec 16, 2024
4681cd0
check monitoring first
tphung3 Dec 16, 2024
97b3fe7
fix lint issues
tphung3 Dec 16, 2024
3cd8608
fix bug in mapping of function names in executor
tphung3 Dec 16, 2024
433917e
fix flake8
tphung3 Dec 16, 2024
edf192d
add annotation
tphung3 Dec 16, 2024
6c9388b
new way to detect monitoring code
tphung3 Dec 16, 2024
59dd532
add run_parsl_function
tphung3 Sep 18, 2025
62db46d
fixes to update head
tphung3 Oct 27, 2025
1f2ba3e
comment out debug code
tphung3 Oct 30, 2025
bd766e7
use tmp dir defaults to False
tphung3 Oct 30, 2025
a03757d
fix dir name
tphung3 Oct 30, 2025
f93da84
make parents dir if needed
tphung3 Oct 30, 2025
51a047f
makedirs not mkdir
tphung3 Oct 30, 2025
0c327bb
remove context refs during args serialization
tphung3 Nov 3, 2025
7bcb6ef
remove debug code and clean it a bit
tphung3 Nov 3, 2025
9fb00d5
add input files support for function context/library
tphung3 Nov 6, 2025
056dcc4
fix lint errors
tphung3 Nov 6, 2025
a0cc793
ignore cloudpickle import error
tphung3 Nov 6, 2025
75823c2
add ignore error type
tphung3 Nov 6, 2025
2595333
add tests of function context in taskvine
tphung3 Nov 8, 2025
2e0f9f8
sort imports
tphung3 Nov 8, 2025
0b30fef
explicit config
tphung3 Nov 8, 2025
3ca1675
syntax
tphung3 Nov 8, 2025
23fd926
fix test
tphung3 Nov 8, 2025
63d4a12
fix lint
tphung3 Nov 8, 2025
bd26ed0
restrict to taskvine only
tphung3 Nov 8, 2025
efe9ef1
fix lint
tphung3 Nov 8, 2025
d033a54
try restrict tests to taskvine
tphung3 Nov 8, 2025
ee803e6
remove config parametrize
tphung3 Nov 8, 2025
058f42d
remove param config
tphung3 Nov 8, 2025
abcbffd
use 1st config
tphung3 Nov 8, 2025
3bebee1
fix config
tphung3 Nov 8, 2025
831e4b6
dont load config
tphung3 Nov 8, 2025
12a9be7
rerun
tphung3 Nov 8, 2025
f57a957
turn on shared fs
tphung3 Nov 8, 2025
79886a8
add res spec
tphung3 Nov 8, 2025
821daf4
local compute
tphung3 Nov 8, 2025
753a127
remove monitoring detection code
tphung3 Nov 20, 2025
2ba5f41
remove require_taskvine decorator
tphung3 Nov 20, 2025
6f2a148
add docstring for helper function
tphung3 Nov 20, 2025
7736704
remove redundant serverless output check
tphung3 Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 103 additions & 11 deletions parsl/executors/taskvine/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import uuid
from concurrent.futures import Future
from datetime import datetime
from typing import List, Literal, Optional, Union
from typing import Dict, List, Literal, Optional, Union

# Import other libraries
import typeguard
Expand Down Expand Up @@ -84,8 +84,12 @@ class TaskVineExecutor(BlockProviderExecutor, putils.RepresentationMixin):
pre-warmed forked python process.
Default is 'regular'.

use_tmp_dir_for_staging: bool
Whether to use tmp dir for staging functions, arguments, and results.
Default is False.

manager_config: TaskVineManagerConfig
Configuration for the TaskVine manager. Default
Configuration for the TaskVine manager.

factory_config: TaskVineFactoryConfig
Configuration for the TaskVine factory.
Expand All @@ -105,6 +109,7 @@ def __init__(self,
label: str = "TaskVineExecutor",
worker_launch_method: Union[Literal['provider'], Literal['factory'], Literal['manual']] = 'factory',
function_exec_mode: Union[Literal['regular'], Literal['serverless']] = 'regular',
use_tmp_dir_for_staging: bool = False,
manager_config: TaskVineManagerConfig = TaskVineManagerConfig(),
factory_config: TaskVineFactoryConfig = TaskVineFactoryConfig(),
provider: Optional[ExecutionProvider] = None,
Expand Down Expand Up @@ -135,6 +140,7 @@ def __init__(self,
self.label = label
self.worker_launch_method = worker_launch_method
self.function_exec_mode = function_exec_mode
self.use_tmp_dir_for_staging = use_tmp_dir_for_staging
self.manager_config = manager_config
self.factory_config = factory_config
self.storage_access = storage_access
Expand Down Expand Up @@ -183,6 +189,13 @@ def __init__(self,
# Path to directory that holds all tasks' data and results.
self._function_data_dir = ""

# Mapping of function names to function details.
# Currently the values include function objects, path to serialized functions,
# path to serialized function contexts, and whether functions are serialized.
# Helpful to detect inconsistencies in serverless functions.
# Helpful to deduplicate the same function.
self._map_func_names_to_func_details: Dict[str, Dict] = {}

# Helper scripts to prepare package tarballs for Parsl apps
self._package_analyze_script = shutil.which("poncho_package_analyze")
self._package_create_script = shutil.which("poncho_package_create")
Expand Down Expand Up @@ -229,8 +242,12 @@ def __create_data_and_logging_dirs(self):
# Create directories for data and results
log_dir = os.path.join(run_dir, self.label)
os.makedirs(log_dir)
tmp_prefix = f'{self.label}-{getpass.getuser()}-{datetime.now().strftime("%Y%m%d%H%M%S%f")}-'
self._function_data_dir = tempfile.TemporaryDirectory(prefix=tmp_prefix)

if self.use_tmp_dir_for_staging:
tmp_prefix = f'{self.label}-{getpass.getuser()}-{datetime.now().strftime("%Y%m%d%H%M%S%f")}-'
self._function_data_dir = tempfile.TemporaryDirectory(prefix=tmp_prefix).name
else:
self._function_data_dir = os.path.join(log_dir, 'function')

# put TaskVine logs outside of a Parsl run as TaskVine caches between runs while
# Parsl does not.
Expand All @@ -240,7 +257,7 @@ def __create_data_and_logging_dirs(self):

# factory logs go with manager logs regardless
self.factory_config.scratch_dir = self.manager_config.vine_log_dir
logger.debug(f"Function data directory: {self._function_data_dir.name}, log directory: {log_dir}")
logger.debug(f"Function data directory: {self._function_data_dir}, log directory: {log_dir}")
logger.debug(
f"TaskVine manager log directory: {self.manager_config.vine_log_dir}, "
f"factory log directory: {self.factory_config.scratch_dir}")
Expand Down Expand Up @@ -307,7 +324,7 @@ def _path_in_task(self, executor_task_id, *path_components):
'map': Pickled file with a dict between local parsl names, and remote taskvine names.
"""
task_dir = "{:04d}".format(executor_task_id)
return os.path.join(self._function_data_dir.name, task_dir, *path_components)
return os.path.join(self._function_data_dir, task_dir, *path_components)

def submit(self, func, resource_specification, *args, **kwargs):
"""Processes the Parsl app by its arguments and submits the function
Expand All @@ -330,11 +347,25 @@ def submit(self, func, resource_specification, *args, **kwargs):
Keyword arguments to the Parsl app
"""

# a Parsl function must have a name
if func.__name__ is None:
raise ValueError('A Parsl function must have a name')

logger.debug(f'Got resource specification: {resource_specification}')

# Default execution mode of apps is regular
exec_mode = resource_specification.get('exec_mode', self.function_exec_mode)

if exec_mode == 'serverless':
if func.__name__ not in self._map_func_names_to_func_details:
self._map_func_names_to_func_details[func.__name__] = {'func_obj': func}
else:
if id(func) != id(self._map_func_names_to_func_details[func.__name__]['func_obj']):
logger.warning('Inconsistency in a serverless function call detected.\
A function name cannot point to two different function objects.\
Falling back to executing it as a regular task.')
exec_mode = 'regular'

# Detect resources and features of a submitted Parsl app
cores = None
memory = None
Expand Down Expand Up @@ -365,7 +396,7 @@ def submit(self, func, resource_specification, *args, **kwargs):
self._executor_task_counter += 1

# Create a per task directory for the function, argument, map, and result files
os.mkdir(self._path_in_task(executor_task_id))
os.makedirs(self._path_in_task(executor_task_id), exist_ok=True)

input_files = []
output_files = []
Expand Down Expand Up @@ -398,22 +429,74 @@ def submit(self, func, resource_specification, *args, **kwargs):
argument_file = None
result_file = None
map_file = None
function_context_file = None
function_context_input_files = {}

# Get path to files that will contain the pickled function,
# arguments, result, and map of input and output files
function_file = self._path_in_task(executor_task_id, "function")
if exec_mode == 'serverless':
if 'function_file' not in self._map_func_names_to_func_details[func.__name__]:
function_file = os.path.join(self._function_data_dir, func.__name__, 'function')
os.makedirs(os.path.join(self._function_data_dir, func.__name__))
self._map_func_names_to_func_details[func.__name__].update({'function_file': function_file, 'is_serialized': False})
else:
function_file = self._map_func_names_to_func_details[func.__name__]['function_file']
else:
function_file = self._path_in_task(executor_task_id, "function")
argument_file = self._path_in_task(executor_task_id, "argument")
result_file = self._path_in_task(executor_task_id, "result")
map_file = self._path_in_task(executor_task_id, "map")

logger.debug("Creating executor task {} with function at: {}, argument at: {}, \
and result to be found at: {}".format(executor_task_id, function_file, argument_file, result_file))
if exec_mode == 'serverless':
if 'function_context' in resource_specification:
if 'function_context_file' not in self._map_func_names_to_func_details[func.__name__]:
function_context = resource_specification.get('function_context')
function_context_args = resource_specification.get('function_context_args', [])
function_context_kwargs = resource_specification.get('function_context_kwargs', {})
function_context_file = os.path.join(self._function_data_dir, func.__name__, 'function_context')

self._cloudpickle_serialize_object_to_file(function_context_file,
[function_context,
function_context_args,
function_context_kwargs])
self._map_func_names_to_func_details[func.__name__].update({'function_context_file': function_context_file})
else:
function_context_file = self._map_func_names_to_func_details[func.__name__]['function_context_file']
function_context_input_files = resource_specification.get('function_context_input_files', {})

logger.debug("Creating executor task {} with function at: {}, argument at: {}, and result to be found at: {}".format(executor_task_id,
function_file,
argument_file,
result_file))

# Serialize function object and arguments, separately
self._serialize_object_to_file(function_file, func)
if exec_mode == 'regular' or not self._map_func_names_to_func_details[func.__name__]['is_serialized']:
self._serialize_object_to_file(function_file, func)
if exec_mode == 'serverless':
self._map_func_names_to_func_details[func.__name__]['is_serialized'] = True

# Delete references of function context information from resource_specification
# as they are not needed to be transferred to remote nodes.
# They are restored when the kwargs serialization is done.
if exec_mode == 'serverless':
function_context = kwargs['parsl_resource_specification'].pop('function_context', None)
function_context_args = kwargs['parsl_resource_specification'].pop('function_context_args', [])
function_context_kwargs = kwargs['parsl_resource_specification'].pop('function_context_kwargs', {})
function_context_input_files = kwargs['parsl_resource_specification'].pop('function_context_input_files', {})

args_dict = {'args': args, 'kwargs': kwargs}
self._serialize_object_to_file(argument_file, args_dict)

if exec_mode == 'serverless':
if function_context:
kwargs['parsl_resource_specification']['function_context'] = function_context
if function_context_args:
kwargs['parsl_resource_specification']['function_context_args'] = function_context_args
if function_context_kwargs:
kwargs['parsl_resource_specification']['function_context_kwargs'] = function_context_kwargs
if function_context_input_files:
kwargs['parsl_resource_specification']['function_context_input_files'] = function_context_input_files

# Construct the map file of local filenames at worker
self._construct_map_file(map_file, input_files, output_files)

Expand All @@ -431,6 +514,7 @@ def submit(self, func, resource_specification, *args, **kwargs):
category = func.__name__ if self.manager_config.autocategory else 'parsl-default'

task_info = ParslTaskToVine(executor_id=executor_task_id,
func_name=func.__name__,
exec_mode=exec_mode,
category=category,
input_files=input_files,
Expand All @@ -439,6 +523,8 @@ def submit(self, func, resource_specification, *args, **kwargs):
function_file=function_file,
argument_file=argument_file,
result_file=result_file,
function_context_file=function_context_file,
function_context_input_files=function_context_input_files,
cores=cores,
memory=memory,
disk=disk,
Expand Down Expand Up @@ -493,6 +579,12 @@ def _serialize_object_to_file(self, path, obj):
while written < len(serialized_obj):
written += f_out.write(serialized_obj[written:])

def _cloudpickle_serialize_object_to_file(self, path, obj):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we talked about this somewhere before but I can't remember where: you should be using the parsl serialization libraries not cloudpickle unless you have a specific reason that needs different serialization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The object I serialize is a list containing a function and other Python objects. https://github.com/Parsl/parsl/pull/3724/files#diff-c5ce2bce42f707d31639e986d8fea5c00d31b5eead8fa510f7fe7e3181e67ccfR458-R461

Because it is a list, Parsl serialize uses methods_for_data to serialize it which eventually uses pickle, and this can't serialize a function by value. So I'm using cloudpickle serialization only for this case. What do you think?

"""Takes any object and serializes it to the file path."""
import cloudpickle # type: ignore[import-not-found]
with open(path, 'wb') as f:
cloudpickle.dump(obj, f)

def _construct_map_file(self, map_file, input_files, output_files):
""" Map local filepath of parsl files to the filenames at the execution worker.
If using a shared filesystem, the filepath is mapped to its absolute filename.
Expand Down
Loading