Skip to content

Commit

Permalink
Fix usleep() bug causing excessive host CPU consumption:
Browse files Browse the repository at this point in the history
The root cause of the bug was in my incorrect #define of MAX_TOD_UPDATE_USECS in featall.h, which was #defined to 1000000 (one million) instead of 999999, thereby causing the TXF rubato_thread to do a usleep for 1000000 (one million) microseconds, which, being invalid, was causing it to not sleep at all and instead return immediately with an error, causing it to loop continuously without sleeping, consuming all available host CPU due to the timer threads being the highest priority thread in Hercules.

comm3705.c also had a similar bug wherein it was using "min(1000000,delay)" for one of its usleep().

In addition to fixing the previously mentioned bug, I have also introduced a new USLEEP() macro which calls a new "herc_usleep()" function that will report any usleep() error that might occur. (I purposely did not code it to fix (hide) the problem but rather to just report it so that the caller can thus be fixed to not make invalid usleep calls.)

This should close GitHub Issue #589 "Auxiliary threads eating up CPU time" as well as the long running "CPU utilization on Raspberry Pi" thread that took place in the main Hercules group recently.
  • Loading branch information
Fish-Git committed Aug 5, 2023
1 parent cddce71 commit 87d04ed
Show file tree
Hide file tree
Showing 35 changed files with 85 additions and 65 deletions.
2 changes: 1 addition & 1 deletion cckddasd.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ int rc, i; /* Return code, Loop index */
while (cckd->ras)
{
release_lock(&cckdblk.ralock);
usleep(1);
USLEEP(1);
obtain_lock(&cckdblk.ralock);
}
release_lock(&cckdblk.ralock);
Expand Down
2 changes: 1 addition & 1 deletion cckddasd64.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ int rc, i; /* Return code, Loop index */
while (cckd->ras)
{
release_lock(&cckdblk.ralock);
usleep(1);
USLEEP(1);
obtain_lock(&cckdblk.ralock);
}
release_lock(&cckdblk.ralock);
Expand Down
4 changes: 2 additions & 2 deletions cgibin.c
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ int msgcount = 22;
panel_command(command);
// Wait a bit before proceeding in case
// the command issues a lot of messages
usleep(50000);
USLEEP(50000);
}

if((value = cgi_variable(webblk,"msgcount")))
Expand Down Expand Up @@ -1623,7 +1623,7 @@ void cgibin_api_v1_syslog(WEBBLK *webblk)
panel_command(command);
// Wait a bit before proceeding in case
// the command issues a lot of messages
usleep(50000);
USLEEP(50000);
hprintf(webblk->sock,"{\"command\": \"%s\",",command);
}
else
Expand Down
4 changes: 2 additions & 2 deletions channel.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ typedef struct PREFETCH PREFETCH;
#define IODELAY(_dev) \
do { \
if (sysblk.iodelay > 0 && (_dev)->devchar[10] == 0x20) \
usleep(sysblk.iodelay); \
USLEEP(sysblk.iodelay); \
} while(0)
#else
#define IODELAY(_dev)
Expand Down Expand Up @@ -4151,7 +4151,7 @@ do { \
#ifdef FEATURE_S370_CHANNEL
if (dev->devtype == 0x2703)
if (dev->commadpt->lnctl == COMMADPT_LNCTL_ASYNC)
usleep(5000);
USLEEP(5000);
#endif

#if DEBUG_DUMP
Expand Down
3 changes: 2 additions & 1 deletion cmdtab.h
Original file line number Diff line number Diff line change
Expand Up @@ -1828,7 +1828,8 @@
"When the z/Arch Transactional-Execution Facility *is* installed and\n" \
"enabled the minimum and default intervals are 200 and 400 microseconds.\n" \
"\n" \
"The maximum allowed interval is 1000000 microseconds (one second).\n" \
"The maximum allowed interval is "QSTR( MAX_TOD_UPDATE_USECS )" microseconds (one microsecond\n" \
"less than one second).\n" \
"\n" \
"Also note that due to host system limitations and/or design, some\n" \
"hosts may round up and/or coalesce short microsecond intervals to a\n" \
Expand Down
4 changes: 2 additions & 2 deletions comm3705.c
Original file line number Diff line number Diff line change
Expand Up @@ -1158,7 +1158,7 @@ static void *telnet_thread(void *vca)
ca->hangup = 0;
for (;;)
{
usleep(50000);
USLEEP(50000);
if (ca->hangup)
break;
/* read_socket has changed from 3.04 to 3.06 - we need old way */
Expand Down Expand Up @@ -1224,7 +1224,7 @@ static void *commadpt_thread(void *vca)
release_lock(&ca->lock);
if(ca->ackspeed == 0) delay = 50000 + (ca->unack_attn_count * 100000); /* Max's reliable algorithm */
else delay = (ca->unack_attn_count * ca->unack_attn_count + 1) * ca->ackspeed; /* much faster but TCAM hates it */
usleep(min(1000000,delay)); /* go to sleep, max. 1 second */
USLEEP(min((int)(ONE_MILLION-1),delay)); /* go to sleep, max. 1 second */
obtain_lock(&ca->lock);
make_sna_requests2(ca);
make_sna_requests3(ca);
Expand Down
2 changes: 1 addition & 1 deletion commadpt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1474,7 +1474,7 @@ static void *commadpt_thread(void *vca)
if(ca->inbfr.havedata || ca->eol_flag)
{
if (ca->term == COMMADPT_TERM_2741) {
usleep(10000);
USLEEP(10000);
}
ca->curpending=COMMADPT_PEND_IDLE;
signal_condition(&ca->ipc);
Expand Down
4 changes: 2 additions & 2 deletions console.c
Original file line number Diff line number Diff line change
Expand Up @@ -3632,7 +3632,7 @@ int prev_rlen3270;
{
// "COMM: pselect() failed: %s"
CONERROR( HHC90508, "D", strerror( select_errno ));
usleep( 50000 ); // (wait a bit; maybe it'll fix itself??)
USLEEP( 50000 ); // (wait a bit; maybe it'll fix itself??)
}
continue;

Expand Down Expand Up @@ -3696,7 +3696,7 @@ int prev_rlen3270;
{
// "COMM: accept() failed: %s"
CONERROR( HHC90509, "D", strerror( accept_errno ));
usleep( 50000 ); // (wait a bit; maybe it'll fix itself??)
USLEEP( 50000 ); // (wait a bit; maybe it'll fix itself??)
}
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion ctc_ctci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,7 @@ static void* CTCI_ReadThread( void* arg )
// Don't use sched_yield() here; use an actual non-dispatchable
// delay instead so as to allow another [possibly lower priority]
// thread to 'read' (remove) some packet(s) from our frame buffer.
usleep( CTC_DELAY_USECS ); // (wait a bit before retrying...)
USLEEP( CTC_DELAY_USECS ); // (wait a bit before retrying...)
}
}

Expand Down
8 changes: 4 additions & 4 deletions ctc_lcs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1691,7 +1691,7 @@ static void UpdatePortStarted( int bStarted, DEVBLK* pDEVBLK, PLCSPORT pLCSPORT
release_lock( &pLCSPORT->PortEventLock );

PTT_DEBUG( "UPDTPORT pause 150", 000, pDEVBLK->devnum, pLCSPORT->bPort );
usleep( 150*1000 );
USLEEP( 150*1000 );
}

// ====================================================================
Expand Down Expand Up @@ -2191,7 +2191,7 @@ static void LCS_EnqueueReplyFrame( PLCSDEV pLCSDEV, PLCSCMDHDR pReply, size_t iS
// Wait for LCS_Read to empty the buffer...

ASSERT( ENOBUFS == errno );
usleep( CTC_DELAY_USECS );
USLEEP( CTC_DELAY_USECS );
}
PTT_TIMING( "af repNQ", 0, iSize, 0 );
PTT_DEBUG( "ENQ RepFrame EXIT ", pReply->bCmdCode, pDEVBLK->devnum, bPort );
Expand Down Expand Up @@ -2798,7 +2798,7 @@ static void LCS_EnqueueEthFrame( PLCSPORT pLCSPORT, PLCSDEV pLCSDEV, BYTE* pData
// Wait for LCS_Read to empty the buffer...

ASSERT( ENOBUFS == errno );
usleep( CTC_DELAY_USECS );
USLEEP( CTC_DELAY_USECS );
}
PTT_TIMING( "af enqueue", 0, iSize, 0 );
PTT_DEBUG( "ENQ EthFrame EXIT ", 000, pDEVBLK->devnum, bPort );
Expand Down Expand Up @@ -4150,7 +4150,7 @@ static void* LCS_AttnThread( void* arg)
{

// Wait a small (but increasing) amount of time.
usleep(interval);
USLEEP(interval);

//?? // is there still something in our frame buffer?
//?? if (!pLCSDEV->fDataPending && !pLCSDEV->fReplyPending)
Expand Down
2 changes: 1 addition & 1 deletion ctc_ptp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2263,7 +2263,7 @@ void* ptp_read_thread( void* arg )
// Don't use schedyield() here; use an actual non-dispatchable
// delay instead so as to allow another [possibly lower priority]
// thread to 'read' (remove) the packet(s) from the read buffer.
usleep( PTP_DELAY_USECS ); // (wait a bit before retrying...)
USLEEP( PTP_DELAY_USECS ); // (wait a bit before retrying...)

continue;

Expand Down
10 changes: 5 additions & 5 deletions ctcadpt.c
Original file line number Diff line number Diff line change
Expand Up @@ -1929,7 +1929,7 @@ static int CTCE_Init( DEVBLK *dev, int argc, char *argv[] )
dev->allocated = 0;
RELEASE_DEVLOCK( dev );
{
usleep( 700000 );
USLEEP( 700000 );
}
OBTAIN_DEVLOCK( dev );
dev->allocated = 1;
Expand Down Expand Up @@ -2945,7 +2945,7 @@ static void* CTCE_RecvThread( void* argp )
{
CTCE_Info.de_ready_attn_rc = device_attention( pDEVBLK, CSW_DE );
}
while ((CTCE_Info.de_ready_attn_rc == 3) && (usleep( 100 ) == 0));
while ((CTCE_Info.de_ready_attn_rc == 3) && (USLEEP( 100 ) == 0));
}
OBTAIN_DEVLOCK( pDEVBLK );

Expand Down Expand Up @@ -2995,7 +2995,7 @@ static void* CTCE_RecvThread( void* argp )
// which needs to be configured using the CTCE option ATTNDELAY <nnn>
if( pDEVBLK->ctce_attn_delay && CTCE_Info.busy_waits == 0 )
{
usleep( pDEVBLK->ctce_attn_delay );
USLEEP( pDEVBLK->ctce_attn_delay );
}

CTCE_Info.working_attn_rc = device_attention( pDEVBLK, CSW_ATTN );
Expand All @@ -3014,7 +3014,7 @@ static void* CTCE_RecvThread( void* argp )
{
i = i * 2;
}
usleep(i);
USLEEP(i);

// Cancel the ATTN in case a CCW program
// has started in the mean time.
Expand Down Expand Up @@ -3632,7 +3632,7 @@ static void* CTCE_ConnectThread( void* argp )
if ( rc < 0 )
{
close_socket( fd );
usleep(connect_retry_interval * 1000) ;
USLEEP(connect_retry_interval * 1000) ;
}
OBTAIN_DEVLOCK( dev );

Expand Down
2 changes: 1 addition & 1 deletion dasdconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ static void delayed_exit (int exit_code)
{
/* Delay exiting is to give the system
* time to display the error message. */
usleep(100000);
USLEEP(100000);
exit(exit_code);
}

Expand Down
2 changes: 1 addition & 1 deletion dasdconv64.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ static void delayed_exit (int exit_code)
{
/* Delay exiting is to give the system
* time to display the error message. */
usleep(100000);
USLEEP(100000);
exit(exit_code);
}

Expand Down
2 changes: 1 addition & 1 deletion diagnose.c
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ U32 code;
for (i=0; i < secs; ++i)
SLEEP(1); /* (sleep one second at a time) */
if (usecs)
usleep(usecs); /* (remaining microseconds, if any) */
USLEEP(usecs); /* (remaining microseconds, if any) */
break;
}

Expand Down
2 changes: 1 addition & 1 deletion dyngui.c
Original file line number Diff line number Diff line change
Expand Up @@ -2177,7 +2177,7 @@ HDL_FINAL_SECTION
if (sysblk.shutdown)
{
bDoneProcessing = TRUE; // (tell main loop to stop processing)
usleep(100000); // (brief delay to give GUI time
USLEEP(100000); // (brief delay to give GUI time
// to display ALL shutdown msgs)
}
}
Expand Down
2 changes: 1 addition & 1 deletion featall.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@

#define MIN_TOD_UPDATE_USECS 50 /* Min TOD updt freq (usecs) */
#define DEF_TOD_UPDATE_USECS 50 /* Def TOD updt freq (usecs) */
#define MAX_TOD_UPDATE_USECS 1000000 /* Max TOD updt freq (usecs) */
#define MAX_TOD_UPDATE_USECS 999999 /* Max TOD updt freq (usecs) */

#define MAX_DEVICE_THREAD_IDLE_SECS 300 /* 5 Minute thread timeout */
//efine OPTION_LONG_HOSTINFO /* Detailed host & logo info */
Expand Down
2 changes: 1 addition & 1 deletion hao.c
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ static void* hao_thread(void* dummy)
// "HAO thread waiting for logger facility to become active"
WRMSG( HHC00090, "W" );
}
usleep( 50 * 1000 ); /* (wait for a bit) */
USLEEP( 50 * 1000 ); /* (wait for a bit) */
}

if (!sysblk.shutdown && did_waiting_msg)
Expand Down
6 changes: 3 additions & 3 deletions hsccmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ static void* quit_thread( void* arg )
// messages at least twice to ensure that the "exit"
// command has time to be echoed to the screen.

usleep( quitdelay_usecs );
USLEEP( quitdelay_usecs );

// Now proceed with a normal shutdown, which waits for
// the guest to quiesce itself beforehand (if appropriate)
Expand Down Expand Up @@ -1987,7 +1987,7 @@ static void try_scsi_refresh( DEVBLK* dev )
gen_parms.dev = dev;

VERIFY( dev->tmh->generic( &gen_parms ) == 0 ); // (maybe update status)
usleep( 10 * 1000 ); // (let thread start/end)
USLEEP( 10 * 1000 ); // (let thread start/end)
}

/*-------------------------------------------------------------------*/
Expand Down Expand Up @@ -7536,7 +7536,7 @@ char **save_argv = NULL;
|| (dev->scsw.flag3 & SCSW3_SC_PEND)))
{
RELEASE_DEVLOCK( dev );
usleep(5000);
USLEEP(5000);
OBTAIN_DEVLOCK( dev );
}
}
Expand Down
6 changes: 3 additions & 3 deletions hscmisc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1455,7 +1455,7 @@ static void do_shutdown_now()
// (hack to prevent minor message glitch during shutdown)
fflush( stdout );
fflush( stderr );
usleep( 10000 );
USLEEP( 10000 );

ASSERT( !sysblk.shutfini ); // (sanity check)
sysblk.shutfini = FALSE; // (shutdown NOT finished yet)
Expand All @@ -1467,7 +1467,7 @@ static void do_shutdown_now()
for (n=0; sysblk.devtnbr && n < 100; ++n)
{
signal_condition( &sysblk.ioqcond );
usleep( 10000 );
USLEEP( 10000 );
}
}

Expand All @@ -1477,7 +1477,7 @@ static void do_shutdown_now()
// (hack to prevent minor message glitch during shutdown)
fflush( stdout );
fflush( stderr );
usleep( 10000 );
USLEEP( 10000 );

#if !defined( _MSVC_ )
logger_unredirect();
Expand Down
15 changes: 15 additions & 0 deletions hscutl.c
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,21 @@ DLL_EXPORT void list_all_symbols()
WRMSG( HHC02199, "I", tok->var, tok->val ? tok->val : "" );
}

/* Hercules microsecond sleep */
DLL_EXPORT int herc_usleep( useconds_t usecs )
{
int rc;
if ((rc = usleep( usecs )) != 0)
{
int save_errno = errno;
// "Error in function %s: %s"
WRMSG( HHC00075, "E", "usleep()", strerror( save_errno ));
errno = save_errno;
rc = -1;
}
return rc;
}

/* Subtract 'beg_timeval' from 'end_timeval' yielding 'dif_timeval' */
/* Return code: success == 0, error == -1 (difference was negative) */

Expand Down
3 changes: 3 additions & 0 deletions hscutl.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ strlcat(char *dst, const char *src, size_t siz);
#define STRLCPY( dst, src ) strlcpy( (dst), (src), sizeof(dst) )
#define STRLCAT( dst, src ) strlcat( (dst), (src), sizeof(dst) )

#define USLEEP( _u ) herc_usleep( _u )
HUT_DLL_IMPORT int herc_usleep( useconds_t usecs );

/* Subtract/add gettimeofday struct timeval */
HUT_DLL_IMPORT int timeval_subtract (struct timeval *beg_timeval, struct timeval *end_timeval, struct timeval *dif_timeval);
HUT_DLL_IMPORT int timeval_add (struct timeval *dif_timeval, struct timeval *accum_timeval);
Expand Down
3 changes: 2 additions & 1 deletion html/hercconf.html
Original file line number Diff line number Diff line change
Expand Up @@ -1620,7 +1620,8 @@ <h3>System parameters</h3>
When the z/Architecure Transactional-Execution Facility <i><u>is</u></i> installed and
enabled, the minimum and default intervals are 200 and 400 microseconds.
<p>
The maximum allowed interval is 1000000 microseconds (one second).
The maximum allowed interval is 999999 microseconds
(one microsecond less than one second).
<p>
Also note that due to host system limitations and/or design, some
hosts may round up and/or
Expand Down
Loading

0 comments on commit 87d04ed

Please sign in to comment.