diff --git a/NEWS b/NEWS index 7d1cdfe8c76..efae140763a 100644 --- a/NEWS +++ b/NEWS @@ -7,8 +7,12 @@ documents those changes that are of interest to users and admins. -- Rebuild slurmctld's job select_jobinfo->node_bitmap on restart/reconfigure of the daemon rather than restoring the bitmap since the nodes in a system can change (be added or removed). - -- Add configuration option "--with-cpusetdir=PATH" for non-standard locations. + -- Add configuration option "--with-cpusetdir=PATH" for non-standard + locations. -- Get new multi-core data structures working on BlueGene systems. + -- Modify PMI_Get_clique_ranks() to return an array of integers rather + than a char * to satisfy PMI standard. Correct logic in + PMI_Get_clique_size() for when srun --overcommit option is used. * Changes in SLURM 1.4.0-pre4 ============================= diff --git a/RELEASE_NOTES b/RELEASE_NOTES index f65393c2201..72a31966675 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,5 +1,5 @@ RELEASE NOTES FOR SLURM VERSION 1.4 -6 October 2008 +12 November 2008 (after SLURM 1.4.0-pre4 released) IMPORTANT NOTE: @@ -22,6 +22,11 @@ HIGHLIGHTS order to effectively preempt or gang schedule jobs. * A new configuration parameter, PrologSlurmctld, can be used to support the booting of different operating systems for each job. +* Preemption of jobs from lower priority partitions in order to execute jobs + in higher priority partitions is now supported. The jobs from the lower + priority partition will resume once preempting job completes. + NOTE: Supported only in SelectType=select/cons_res only as of 12 November + 2008. CONFIGURATION FILE CHANGES (see "man slurm.conf" for details) * DefMemPerTask has been removed. Use DefMemPerCPU or DefMemPerNode instead. @@ -44,3 +49,7 @@ COMMAND CHANGES (see man pages for details) * --task-mem and --job-mem options have been removed from sall,c sbatch and srun. Use --mem-per-cpu or --mem instead. * --ctrl-comm-ifhn-addr option has been removed from the srun comma.d + +OTHER CHANGES +* The libpmi function PMI_Get_clique_ranks() has been changed to return an + array of integers rather than a string to satisfy PMI standard. diff --git a/slurm/pmi.h b/slurm/pmi.h index 9621077da9a..6740a8b6859 100644 --- a/slurm/pmi.h +++ b/slurm/pmi.h @@ -389,7 +389,7 @@ communicate through IPC mechanisms (e.g., shared memory) and other network mechanisms. @*/ -int PMI_Get_clique_ranks( char ranks[], int length); +int PMI_Get_clique_ranks( int ranks[], int length); /*@ PMI_Abort - abort the process group associated with this process diff --git a/src/api/pmi.c b/src/api/pmi.c index d0a7754beb1..3ddac7b7243 100644 --- a/src/api/pmi.c +++ b/src/api/pmi.c @@ -711,9 +711,14 @@ int PMI_Get_clique_size( int *size ) if (size == NULL) return PMI_ERR_INVALID_ARG; - env = getenv("SLURM_CPUS_ON_NODE"); + env = getenv("SLURM_GTIDS"); if (env) { - *size = atoi(env); + int i, tids=1; + for (i=0; env[i]; i++) { + if (env[i] == ',') + tids++; + } + *size = tids; return PMI_SUCCESS; } return PMI_FAIL; @@ -742,7 +747,7 @@ communicate through IPC mechanisms (e.g., shared memory) and other network mechanisms. @*/ -int PMI_Get_clique_ranks( char ranks[], int length ) +int PMI_Get_clique_ranks( int ranks[], int length ) { char *env; @@ -754,7 +759,19 @@ int PMI_Get_clique_ranks( char ranks[], int length ) env = getenv("SLURM_GTIDS"); if (env) { - strcpy(ranks, env); + int i = 0; + char *tid, *tids, *last; + tids = strdup(env); + tid = strtok_r(tids, ",", &last); + while (tid) { + if (i >= length) { + free(tids); + return PMI_ERR_INVALID_LENGTH; + } + ranks[i++] = atoi(tid); + tid = strtok_r(NULL, ",", &last); + } + free(tids); return PMI_SUCCESS; } diff --git a/testsuite/expect/test7.2.prog.c b/testsuite/expect/test7.2.prog.c index e3b2e34df5b..612c0e6cb8a 100644 --- a/testsuite/expect/test7.2.prog.c +++ b/testsuite/expect/test7.2.prog.c @@ -50,6 +50,7 @@ main (int argc, char **argv) { int i, j, rc; int nprocs, procid; + int clique_size, *clique_ranks = NULL; char *nprocs_ptr, *procid_ptr; int pmi_rank, pmi_size, kvs_name_len, key_len, val_len; PMI_BOOL initialized; @@ -120,6 +121,24 @@ main (int argc, char **argv) exit(1); } + if ((rc = PMI_Get_clique_size(&clique_size)) != PMI_SUCCESS) { + printf("FAILURE: PMI_Get_clique_size: %d, task %d\n", + rc, pmi_rank); + exit(1); + } + clique_ranks = malloc(sizeof(int) * clique_size); + if ((rc = PMI_Get_clique_ranks(clique_ranks, clique_size)) != + PMI_SUCCESS) { + printf("FAILURE: PMI_Get_clique_ranks: %d, task %d\n", + rc, pmi_rank); + exit(1); + } +#if _DEBUG + for (i=0; i