Skip to content

Commit

Permalink
PRD: MNFG thresholding for RCD parity error reconfig loops
Browse files Browse the repository at this point in the history
Change-Id: Ie0282529d66cbe4b3169ad7ee601dbd2cb49f779
CQ: SW392001
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/42136
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
  • Loading branch information
zane131 committed Jun 22, 2017
1 parent d1924a3 commit 5c7c983
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 23 deletions.
20 changes: 8 additions & 12 deletions src/usr/diag/mdia/mdia.C
Expand Up @@ -117,20 +117,16 @@ errlHndl_t runStep(const TargetHandleList & i_targetList)

doStepCleanup(globals);

if ( nullptr != top &&
0 != top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>() )
// If this step completes without the need for a reconfig due to an RCD
// parity error, clear all RCD parity error counters.
ATTR_RECONFIGURE_LOOP_type attr = top->getAttr<ATTR_RECONFIGURE_LOOP>();
if ( 0 == (attr & RECONFIGURE_LOOP_RCD_PARITY_ERROR) )
{
// Reset the RCD parity error reconfig loop counter if this step
// completes without an RCD parity error. Note that PRD will only set
// the RCD parity error flag if there is an RCD parity error and the
// total count of reconfig loops is under threshold. At threshold, a
// part will be deconfigured, forcing a reconfig, but the RCD parity
// error flag will not be set to ensure this code is activated and the
// count it reset.
ATTR_RECONFIGURE_LOOP_type attr = top->getAttr<ATTR_RECONFIGURE_LOOP>();
if ( 0 == (attr & RECONFIGURE_LOOP_RCD_PARITY_ERROR) )
TargetHandleList trgtList; getAllChiplets( trgtList, TYPE_MCA );
for ( auto & trgt : trgtList )
{
top->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(0);
if ( 0 != trgt->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>() )
trgt->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(0);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/usr/diag/prdf/plat/mem/prdfP9Mca.C
Expand Up @@ -109,7 +109,7 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip,
// documented below.

// Nothing more to do if this is a checkstop attention.
if ( CHECK_STOP != io_sc.service_data->getPrimaryAttnType() )
if ( CHECK_STOP == io_sc.service_data->getPrimaryAttnType() )
return SUCCESS;

#ifdef __HOSTBOOT_RUNTIME // TPS only supported at runtime.
Expand Down Expand Up @@ -160,7 +160,7 @@ int32_t RcdParityError( ExtensibleChip * i_mcaChip,
{
// Recovery is disabled. Issue a reconfig loop. Make the error log
// predictive if threshold is reached.
if ( rcdParityErrorReconfigLoop() )
if ( rcdParityErrorReconfigLoop(i_mcaChip->getTrgt()) )
io_sc.service_data->setServiceCall();
}
else
Expand Down
25 changes: 18 additions & 7 deletions src/usr/diag/prdf/plat/prdfPlatServices_ipl.C
Expand Up @@ -39,6 +39,7 @@

//#include <prdfCenDqBitmap.H> TODO RTC 164707
#include <prdfMemScrubUtils.H>
#include <prdfMfgThresholdMgr.H>

#include <diag/mdia/mdia.H>
#include <config.h>
Expand Down Expand Up @@ -107,15 +108,25 @@ int32_t mdiaSendEventMsg( TargetHandle_t i_trgt,

//------------------------------------------------------------------------------

bool rcdParityErrorReconfigLoop()
bool rcdParityErrorReconfigLoop( TargetHandle_t i_trgt )
{
TargetHandle_t top = getSystemTarget();

// Check the current reconfig count.
uint8_t allowed = top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOPS_ALLOWED>();
uint8_t count = top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>();
// Get the current reconfig count and increment.
uint8_t count = i_trgt->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>() + 1;

if ( count <= allowed )
// Get the reconfig threshold and check MNFG threshold, if needed.
uint8_t th = top->getAttr<ATTR_RCD_PARITY_RECONFIG_LOOPS_ALLOWED>() + 1;
if ( mfgMode() )
{
uint8_t mnfgTh = MfgThresholdMgr::getInstance()->
getThreshold(ATTR_MNFG_TH_RCD_PARITY_ERRORS);
if ( mnfgTh < th )
th = mnfgTh;
}

// If the count is under threshold, trigger a reconfig loop.
if ( count < th )
{
// Set the RCD parity error flag in the reconfig loop attribute. This
// will trigger a reconfig loop at the end of the current istep.
Expand All @@ -126,8 +137,8 @@ bool rcdParityErrorReconfigLoop()
top->setAttr<ATTR_RECONFIGURE_LOOP>(attr);
}

// Increment the count.
top->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(++count);
// Write the new count to the attribute.
i_trgt->setAttr<ATTR_RCD_PARITY_RECONFIG_LOOP_COUNT>(count);

return false;
}
Expand Down
3 changes: 2 additions & 1 deletion src/usr/diag/prdf/plat/prdfPlatServices_ipl.H
Expand Up @@ -65,10 +65,11 @@ int32_t mdiaSendEventMsg( TARGETING::TargetHandle_t i_trgt,

/**
* @brief Initiates a reconfig loop due to an RCD parity error.
* @param i_trgt An MCA target.
* @return True if the number of allowed reconfig loops has been exceeded.
* False otherwise.
*/
bool rcdParityErrorReconfigLoop();
bool rcdParityErrorReconfigLoop( TARGETING::TargetHandle_t i_trgt );

/**
* @brief Invokes the restore DRAM repairs hardware procedure.
Expand Down
2 changes: 1 addition & 1 deletion src/usr/targeting/common/xmltohb/target_types.xml
Expand Up @@ -705,7 +705,6 @@
<attribute><id>MNFG_TH_MEMORY_IUES</id></attribute>
<attribute><id>MNFG_TH_MEMORY_IMPES</id></attribute>
<attribute><id>RCD_PARITY_RECONFIG_LOOPS_ALLOWED</id></attribute>
<attribute><id>RCD_PARITY_RECONFIG_LOOP_COUNT</id></attribute>
<attribute><id>OPT_MEMMAP_GROUP_POLICY</id></attribute>
<attribute><id>BRAZOS_RX_FIFO_OVERRIDE</id></attribute>
<attribute><id>MRW_MBA_CACHELINE_INTERLEAVE_MODE_CONTROL</id></attribute>
Expand Down Expand Up @@ -1939,6 +1938,7 @@
<attribute><id>VPD_OVERRIDE_MW_ENABLE</id></attribute>
<attribute><id>VPD_OVERRIDE_MW</id></attribute>
<attribute><id>PRD_HWP_PLID</id></attribute>
<attribute><id>RCD_PARITY_RECONFIG_LOOP_COUNT</id></attribute>
</targetType>

<targetType>
Expand Down

0 comments on commit 5c7c983

Please sign in to comment.