Permalink
Browse files

Add REQUEST_ADD_EXTERN_PID option to add pid to the slurmstepd's extern

step.
  • Loading branch information...
dannyauble committed Nov 12, 2015
1 parent 2270ece commit d53755af26125ae968ff0ef0c842e6c852df8037
Showing with 75 additions and 2 deletions.
  1. +2 −0 NEWS
  2. +1 −1 src/common/slurm_xlator.h
  3. +22 −0 src/common/stepd_api.c
  4. +8 −1 src/common/stepd_api.h
  5. +42 −0 src/slurmd/slurmstepd/req.c
View
2 NEWS
@@ -30,6 +30,8 @@ documents those changes that are of interest to users and administrators.
-- jobcomp/elasticsearch plugin: Add array_job_id and array_task_id fields.
-- Remove duplicate #define IS_NODE_POWER_UP.
-- Added SchedulerParameters option of max_script_size.
-- Add REQUEST_ADD_EXTERN_PID option to add pid to the slurmstepd's extern
step.
* Changes in Slurm 15.08.3
==========================
@@ -415,7 +415,7 @@
#define stepd_available slurm_stepd_available
#define stepd_connect slurm_stepd_connect
#define stepd_get_uid slurm_stepd_get_uid
#define stepd_add_extern_pid slurm_stepd_add_extern_pid
#endif /* USE_ALIAS */
View
@@ -73,6 +73,7 @@
strong_alias(stepd_available, slurm_stepd_available);
strong_alias(stepd_connect, slurm_stepd_connect);
strong_alias(stepd_get_uid, slurm_stepd_get_uid);
strong_alias(stepd_add_extern_pid, slurm_stepd_add_extern_pid);
static bool
_slurm_authorized_user()
@@ -728,6 +729,27 @@ stepd_pid_in_container(int fd, uint16_t protocol_version, pid_t pid)
return false;
}
/*
* Add a pid to the "extern" step of a job, meaning add it to the
* jobacct_gather and proctrack plugins.
*/
extern int stepd_add_extern_pid(int fd, uint16_t protocol_version, pid_t pid)
{
int req = REQUEST_ADD_EXTERN_PID;
int rc;
safe_write(fd, &req, sizeof(int));
safe_write(fd, &pid, sizeof(pid_t));
/* Receive the return code */
safe_read(fd, &rc, sizeof(int));
debug("Leaving stepd_add_extern_pid");
return rc;
rwfail:
return SLURM_ERROR;
}
/*
* Return the process ID of the slurmstepd.
*/
View
@@ -70,7 +70,8 @@ typedef enum {
REQUEST_STEP_COMPLETION_V2,
REQUEST_STEP_MEM_LIMITS,
REQUEST_STEP_UID,
REQUEST_STEP_NODEID
REQUEST_STEP_NODEID,
REQUEST_ADD_EXTERN_PID
} step_msg_t;
typedef enum {
@@ -208,6 +209,12 @@ extern List stepd_available(const char *directory, const char *nodename);
*/
bool stepd_pid_in_container(int fd, uint16_t protocol_version, pid_t pid);
/*
* Add a pid to the "extern" step of a job, meaning add it to the
* jobacct_gather and proctrack plugins.
*/
extern int stepd_add_extern_pid(int fd, uint16_t protocol_version, pid_t pid);
/*
* Return the process ID of the slurmstepd.
*/
@@ -87,6 +87,7 @@ static int _handle_signal_container(int fd, stepd_step_rec_t *job, uid_t uid);
static int _handle_checkpoint_tasks(int fd, stepd_step_rec_t *job, uid_t uid);
static int _handle_attach(int fd, stepd_step_rec_t *job, uid_t uid);
static int _handle_pid_in_container(int fd, stepd_step_rec_t *job);
static int _handle_add_extern_pid(int fd, stepd_step_rec_t *job);
static int _handle_daemon_pid(int fd, stepd_step_rec_t *job);
static int _handle_notify_job(int fd, stepd_step_rec_t *job, uid_t uid);
static int _handle_suspend(int fd, stepd_step_rec_t *job, uid_t uid);
@@ -564,6 +565,10 @@ _handle_request(int fd, stepd_step_rec_t *job, uid_t uid, gid_t gid)
debug("Handling REQUEST_JOB_NOTIFY");
rc = _handle_notify_job(fd, job, uid);
break;
case REQUEST_ADD_EXTERN_PID:
debug("Handling REQUEST_ADD_EXTERN_PID");
rc = _handle_add_extern_pid(fd, job);
break;
default:
error("Unrecognized request: %d", req);
rc = SLURM_FAILURE;
@@ -1203,6 +1208,43 @@ _handle_pid_in_container(int fd, stepd_step_rec_t *job)
return SLURM_FAILURE;
}
static int
_handle_add_extern_pid(int fd, stepd_step_rec_t *job)
{
int rc = SLURM_SUCCESS;
pid_t pid;
jobacct_id_t jobacct_id;
safe_read(fd, &pid, sizeof(pid_t));
if (job->stepid != SLURM_EXTERN_CONT) {
error("_handle_add_extern_pid: non-extern step (%u) given for job %u.",
job->stepid, job->jobid);
rc = SLURM_FAILURE;
goto send_it;
}
debug("_handle_add_extern_pid for job %u.%u, pid %d",
job->jobid, job->stepid, pid);
jobacct_id.taskid = job->nodeid;
jobacct_id.nodeid = job->nodeid;
jobacct_id.job = job;
proctrack_g_add(job, pid);
jobacct_gather_add_task(pid, &jobacct_id, 1);
send_it:
/* Send the return code */
safe_write(fd, &rc, sizeof(int));
debug("Leaving _handle_add_extern_pid");
return SLURM_SUCCESS;
rwfail:
return SLURM_FAILURE;
}
static int
_handle_daemon_pid(int fd, stepd_step_rec_t *job)
{

0 comments on commit d53755a

Please sign in to comment.