Skip to content

Commit

Permalink
mpi/pmix - use container spooldir
Browse files Browse the repository at this point in the history
When containers are active, there is a per task spooldir that is created
and should be used by PMIx. PMIx sets the environment variable
PMIX_SERVER_TMPDIR which must match the ContainerPath in oci.conf.

Bug 15449
  • Loading branch information
naterini authored and wickberg committed Dec 7, 2022
1 parent f667da1 commit 9985efc
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 6 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ documents those changes that are of interest to users and administrators.
-- oci.conf - add MountSpoolDir.
-- slurmstepd - create per task spool directory per container to avoid
conflicts when there are more than 1 container tasks on any given node.
-- mpi/pmix - set PMIX_SERVER_TMPDIR to container spool directory instead of
slurmd spool directory which caused PMIx to fail to load in containers.

* Changes in Slurm 22.05.7
==========================
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/mpi/pmix/pmixp_client.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ extern int pmixp_libpmix_init(void)
}

/* TODO: must be deleted in future once info-key approach harden */
setenv(PMIXP_PMIXLIB_TMPDIR, pmixp_info_tmpdir_lib(), 1);
setenv(PMIXP_PMIXLIB_TMPDIR, _pmixp_info_client_tmpdir_lib(), 1);

/*
if( pmixp_fixrights(pmixp_info_tmpdir_lib(),
Expand Down
24 changes: 19 additions & 5 deletions src/plugins/mpi/pmix/pmixp_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,12 +432,23 @@ static int _env_set(const stepd_step_rec_t *step, char ***env)
*/
_parse_pmix_conf_env(env, slurm_pmix_conf.env);

_pmixp_job_info.server_addr_unfmt =
xstrdup(slurm_conf.slurmd_spooldir);
if (step->container) {
_pmixp_job_info.server_addr_unfmt =
xstrdup(step->container->spool_dir);
_pmixp_job_info.client_lib_tmpdir =
xstrdup(step->container->mount_spool_dir);
} else {
_pmixp_job_info.server_addr_unfmt =
xstrdup(slurm_conf.slurmd_spooldir);
}

debug2("set _pmixp_job_info.server_addr_unfmt = %s",
_pmixp_job_info.server_addr_unfmt);

_pmixp_job_info.lib_tmpdir = slurm_conf_expand_slurmd_path(
_pmixp_job_info.server_addr_unfmt,
_pmixp_job_info.hostname, NULL);
if (!_pmixp_job_info.lib_tmpdir)
_pmixp_job_info.lib_tmpdir = slurm_conf_expand_slurmd_path(
_pmixp_job_info.server_addr_unfmt,
_pmixp_job_info.hostname, NULL);

xstrfmtcat(_pmixp_job_info.server_addr_unfmt,
"/stepd.slurm.pmix.%d.%d",
Expand All @@ -457,6 +468,9 @@ static int _env_set(const stepd_step_rec_t *step, char ***env)

if (p){
_pmixp_job_info.cli_tmpdir_base = xstrdup(p);
} else if (step->container) {
_pmixp_job_info.cli_tmpdir_base = xstrdup(
step->container->spool_dir);
} else if (slurm_pmix_conf.cli_tmpdir_base)
_pmixp_job_info.cli_tmpdir_base =
xstrdup(slurm_pmix_conf.cli_tmpdir_base);
Expand Down
10 changes: 10 additions & 0 deletions src/plugins/mpi/pmix/pmixp_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ typedef struct {
int timeout;
char *cli_tmpdir, *cli_tmpdir_base;
char *lib_tmpdir;
char *client_lib_tmpdir; /* path to lib_tmpdir on client */
char *server_addr_unfmt;
char *spool_dir;
uid_t uid;
Expand Down Expand Up @@ -124,6 +125,15 @@ static inline char *pmixp_info_tmpdir_lib(void)
return _pmixp_job_info.lib_tmpdir;
}

/* client Lib tempdir */
static inline char *_pmixp_info_client_tmpdir_lib(void)
{
if (_pmixp_job_info.client_lib_tmpdir)
return _pmixp_job_info.client_lib_tmpdir;
else
return pmixp_info_tmpdir_lib();
}

/* Dealing with I/O */
void pmixp_info_io_set(eio_handle_t *h);
eio_handle_t *pmixp_info_io(void);
Expand Down

0 comments on commit 9985efc

Please sign in to comment.