Skip to content

Commit 55e62ab

Browse files
author
Moe Jette
committed
Add support for AIX systems:
* Memory leak in slurm_cred.c, added EVP_MD_CTX_cleanup(). * Pthread stack size too small on AIX. Resulting in stack corruption and ugly failure modes. Added slurm_attr_init to macros.h to explicitly set the stack size for all pthreads. * /dev/urandom not present on AIX, use rand() as needed instead in constructing a credential. Used in "srun --join". * getsockopt(Socket, Level, SO_ERROR, &err, OptionLenght) sometime returns an error code of -1. This causes an assert failure in slurmd/io.c:_update_error_state(). * Function aliasing is not working on AIX. It is being turned off via a variable in config.h and "#if" logic in macros.h and slurm_xlator.h. * dlopen failing if plugins reference any functions not present in caller. This may be fixed with the LDFLAG "-Wl,-bgcbypass=1000" being added for the slurm commands (avoid garbage collection of unused functions). * read() is sometimes generates EAGAIN error, which was not handled in some places. * vsnprintf() for string NULL is printing "" instead of "(null)" as produced by snprintf(). More format printing was added to log.c to produce more consistent log messages. * poll() takes a timeout of -1 for unlimited rather than any negative number. Modify logic that was always multiplying by 1000 to convert usec to msec. * getopt_long keyword table was not NULL terminated, resulting in segfault with invalid command-line argument in most commands. * xmalloc module assert failures were not generating a core file. Changed "fatal();abort();" to "error();abort();". * Change msg timeout from 3 sec to 5 sec. Running everything on single AIX node was very slow.
1 parent 887ccd8 commit 55e62ab

28 files changed

+168
-149
lines changed

configure.ac

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,12 @@ CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
103103
LIBS="$PTHREAD_LIBS $LIBS"
104104

105105
case "$host" in
106-
*-*-aix*) LDFLAGS="$LDFLAGS -Wl,-brtl -Wl,-bexpall " ;;
106+
*-*-aix*) LDFLAGS="$LDFLAGS -Wl,-brtl -Wl,-bexpall"
107+
CMD_LDFLAGS="-Wl,-bgcbypass:1000" # keep all common functions
108+
AC_DEFINE(USE_ALIAS, 0, [Define slurm_ prefix function aliases for plusins]) ;;
109+
*) AC_DEFINE(USE_ALIAS, 1, [Define slurm_ prefix function aliases for plugins]) ;;
107110
esac
111+
AC_SUBST(CMD_LDFLAGS)
108112

109113
AC_SLURM_SEMAPHORE
110114

slurm/bnr.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ typedef int BNR_gid;
1313
#define BNR_SUCCESS 0
1414
#define BNR_ERROR 1
1515

16-
int BNR_Init(BNR_gid *mygid);
17-
int BNR_Put(BNR_gid gid, char *attr, char *val);
18-
int BNR_Fence(BNR_gid gid);
19-
int BNR_Get(BNR_gid gid, char *attr, char *val);
20-
int BNR_Finalize();
21-
int BNR_Rank(BNR_gid group, int *myrank);
22-
int BNR_Nprocs(BNR_gid group, int *nprocs);
16+
extern int BNR_Init(BNR_gid *mygid);
17+
extern int BNR_Put(BNR_gid gid, char *attr, char *val);
18+
extern int BNR_Fence(BNR_gid gid);
19+
extern int BNR_Get(BNR_gid gid, char *attr, char *val);
20+
extern int BNR_Finalize();
21+
extern int BNR_Rank(BNR_gid group, int *myrank);
22+
extern int BNR_Nprocs(BNR_gid group, int *nprocs);
2323

src/common/cbuf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1275,7 +1275,7 @@ cbuf_get_fd (void *dstbuf, int *psrcfd, int len)
12751275

12761276
do {
12771277
n = read(*psrcfd, dstbuf, len);
1278-
} while ((n < 0) && (errno == EINTR));
1278+
} while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN)));
12791279
return(n);
12801280
}
12811281

src/common/log.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,28 @@ static char *vxstrfmt(const char *fmt, va_list ap)
414414
} else
415415
xstrcat(buf, "%d");
416416
break;
417+
case 'u':
418+
if (unprocessed == 0) {
419+
snprintf(tmp, sizeof(tmp), "%u",
420+
va_arg(ap, int));
421+
xstrcat(buf, tmp);
422+
} else
423+
xstrcat(buf, "%u");
424+
break;
425+
case 'l':
426+
if ((unprocessed == 0) && (*(p+1) == 'u')) {
427+
snprintf(tmp, sizeof(tmp), "%lu",
428+
va_arg(ap, long unsigned));
429+
xstrcat(buf, tmp);
430+
p++;
431+
} else if ((unprocessed==0) && (*(p+1)=='d')) {
432+
snprintf(tmp, sizeof(tmp), "%ld",
433+
va_arg(ap, long int));
434+
xstrcat(buf, tmp);
435+
p++;
436+
} else
437+
xstrcat(buf, "%l");
438+
break;
417439
default: /* try to handle the rest */
418440
xstrcatchar(buf, '%');
419441
xstrcatchar(buf, *p);

src/common/macros.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,17 +192,47 @@ typedef enum {false, true} bool;
192192
} \
193193
} _STMT_END
194194

195+
# ifdef PTHREAD_SCOPE_SYSTEM
196+
# define slurm_attr_init(attr) \
197+
_STMT_START { \
198+
if (pthread_attr_init(attr)) \
199+
fatal("pthread_attr_init: %m"); \
200+
/* we want 1:1 threads if there is a choice */ \
201+
if (pthread_attr_setscope(attr, PTHREAD_SCOPE_SYSTEM)) \
202+
error("pthread_attr_setscope: %m"); \
203+
if (pthread_attr_setstacksize(attr, 1024*1024)) \
204+
error("pthread_attr_setstacksize: %m"); \
205+
} _STMT_END
206+
# else
207+
# define slurm_attr_init(attr) \
208+
_STMT_START { \
209+
if (pthread_attr_init(attr)) \
210+
fatal("pthread_attr_init: %m"); \
211+
if (pthread_attr_setstacksize(attr, 1024*1024)) \
212+
error("pthread_attr_setstacksize: %m"); \
213+
} _STMT_END
214+
# endif
215+
195216
#else /* !WITH_PTHREADS */
196217

197218
# define slurm_mutex_init(mutex)
198219
# define slurm_mutex_destroy(mutex)
199220
# define slurm_mutex_lock(mutex)
200221
# define slurm_mutex_unlock(mutex)
222+
# define slurm_attr_init(attr)
201223

202224
#endif /* WITH_PTHREADS */
203225

204226
#ifndef strong_alias
205-
# define strong_alias(name, aliasname) \
206-
extern __typeof (name) aliasname __attribute ((alias (#name)))
227+
# if USE_ALIAS
228+
# define strong_alias(name, aliasname) \
229+
extern __typeof (name) aliasname __attribute ((alias (#name)))
230+
# else
231+
/* dummy function definition,
232+
* confirm "aliasname" is free and waste "name" */
233+
# define strong_alias(name, aliasname) \
234+
extern void aliasname(int name)
235+
# endif
207236
#endif
237+
208238
#endif /* !_MACROS_H */

src/common/slurm_cred.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@
3131

3232
#include <slurm/slurm_errno.h>
3333

34-
#include <stdarg.h>
3534
#include <fcntl.h>
35+
#include <stdarg.h>
36+
#include <stdlib.h>
37+
#include <sys/time.h>
3638

3739
/*
3840
* OpenSSL includes
@@ -395,14 +397,21 @@ slurm_cred_faker(slurm_cred_arg_t *arg)
395397
cred->ctime = time(NULL);
396398
cred->siglen = SLURM_IO_KEY_SIZE;
397399

398-
if ((fd = open("/dev/urandom", O_RDONLY)) < 0)
399-
error ("unable to open /dev/random: %m");
400-
401400
cred->signature = xmalloc(cred->siglen * sizeof(char));
402-
read(fd, cred->signature, cred->siglen);
403401

404-
if (close(fd) < 0)
405-
error ("close(/dev/random): %m");
402+
if ((fd = open("/dev/urandom", O_RDONLY)) >= 0) {
403+
read(fd, cred->signature, cred->siglen);
404+
if (close(fd) < 0)
405+
error ("close(/dev/urandom): %m");
406+
} else { /* Note: some systems lack this file */
407+
unsigned int i;
408+
struct timeval tv;
409+
gettimeofday(&tv, NULL);
410+
i = (unsigned int) (tv.tv_sec + tv.tv_usec);
411+
srand((unsigned int) i);
412+
for (i=0; i<cred->siglen; i++)
413+
cred->signature[i] = (rand() & 0xff);
414+
}
406415

407416
slurm_mutex_unlock(&cred->mutex);
408417
return cred;
@@ -966,6 +975,7 @@ _slurm_cred_sign(slurm_cred_ctx_t ctx, slurm_cred_t cred)
966975
rc = SLURM_ERROR;
967976
}
968977

978+
EVP_MD_CTX_cleanup(&ectx);
969979
free_buf(buffer);
970980

971981
return rc;

src/common/slurm_protocol_interface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
#include "src/common/pack.h"
6666
#include "src/common/slurm_protocol_common.h"
6767

68-
#define SLURM_MESSAGE_TIMEOUT_MSEC_STATIC 3000
68+
#define SLURM_MESSAGE_TIMEOUT_MSEC_STATIC 5000
6969

7070
/****************\
7171
** Data Types **

src/common/slurm_protocol_socket_implementation.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ int _slurm_send_timeout(slurm_fd fd, char *buf, size_t size,
252252
goto done;
253253
}
254254
if ((rc = poll(&ufds, 1, timeout)) <= 0) {
255-
if ((rc == 0) || (errno == EINTR))
255+
if ((rc == 0) || (errno == EINTR) || (errno == EAGAIN))
256256
continue;
257257
else {
258258
debug("_slurm_send_timeout at %d of %d, "
@@ -329,7 +329,7 @@ int _slurm_recv_timeout(slurm_fd fd, char *buffer, size_t size,
329329
}
330330

331331
if ((rc = poll(&ufds, 1, timeout)) <= 0) {
332-
if ((errno == EINTR) || (rc == 0))
332+
if ((errno == EINTR) || (errno == EAGAIN) || (rc == 0))
333333
continue;
334334
else {
335335
debug("_slurm_recv_timeout at %d of %d, "

src/common/slurm_protocol_util.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ void init_header(header_t * header, slurm_msg_type_t msg_type,
6262
header->version = SLURM_PROTOCOL_VERSION;
6363
header->flags = flags;
6464
header->msg_type = msg_type;
65+
header->body_length = 0; /* over-written later */
6566
}
6667

6768
/*

src/common/slurm_xlator.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
* All SLURM functions referenced from the switch and auth plugins should
1212
* have aliases established. Functions not referenced from the plugins
13-
* need not be aliased.
13+
* need not be aliased.
1414
*
1515
* To use this header file:
1616
* 1. In the module containing the exported function code, establish an
@@ -24,6 +24,8 @@
2424
* and remove other slurm header files (they should all be in this header).
2525
* This logic will have the plugin link only to the function names with
2626
* the "slurm_" prefix.
27+
*
28+
* NOTE: Not all operating systems support this function aliasing (e.g. AIX).
2729
*****************************************************************************
2830
* Copyright (C) 2004 The Regents of the University of California.
2931
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -51,6 +53,12 @@
5153
#ifndef __SLURM_XLATOR_H__
5254
#define __SLURM_XLATOR_H__
5355

56+
#if HAVE_CONFIG_H
57+
# include "config.h"
58+
#endif
59+
60+
#if USE_ALIAS
61+
5462
/* arg_desc.[ch] functions*/
5563
#define arg_count slurm_arg_count
5664
#define arg_idx_by_name slurm_arg_idx_by_name
@@ -233,6 +241,8 @@
233241
#define xstrdup slurm_xstrdup
234242
#define xbasename slurm_xbasename
235243

244+
#endif /* USE_ALIAS */
245+
236246
/* Include the function definitions after redefining their names. */
237247
#include "src/common/arg_desc.h"
238248
#include "src/common/bitstring.h"

0 commit comments

Comments
 (0)