From 910b8651331e9678eced95c74fe1bd9d8eed7bad Mon Sep 17 00:00:00 2001 From: Caleb Palmer Date: Thu, 29 Jun 2017 08:25:46 -0500 Subject: [PATCH] PRD: Mask mainline NCE/TCE during TD Change-Id: I4d1eb0e7e52e0ae78649a552a809d194d0c6acc6 CQ: SW394335 Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/42672 Tested-by: Jenkins Server Reviewed-by: Benjamin J. Weisenbeck Reviewed-by: Brian J. Stegmiller Reviewed-by: Zane C. Shelley Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/42858 Tested-by: Jenkins OP Build CI Tested-by: Jenkins OP HW Tested-by: FSP CI Jenkins --- .../diag/prdf/common/plat/p9/p9_mca_regs.rule | 8 + src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C | 206 +++++++++++++++++- src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H | 22 ++ src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C | 9 + 4 files changed, 239 insertions(+), 6 deletions(-) diff --git a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule index 16c6c601949..4f457a149db 100644 --- a/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule +++ b/src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule @@ -51,6 +51,14 @@ access write_only; }; + register MCAECCFIR_MASK_AND + { + name "P9 MCA target MCAECCFIR MASK AND"; + scomaddr 0x07010a04; + capture group never; + access write_only; + }; + register MCAECCFIR_MASK_OR { name "P9 MCA target MCAECCFIR MASK OR"; diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C index ea725091818..fbd3526c4ac 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.C @@ -330,14 +330,14 @@ uint32_t MemTdCtlr::analyzeCmdComplete( bool & o_errorsFound, #ifdef __HOSTBOOT_RUNTIME - // If the queue is still empty then it is possible that background - // scrubbing only stopped for FFDC. In that case, simply resume the - // command instead of starting a new one. if ( iv_queue.empty() ) { - // It is possible to get here if we were running a TD procedure - // and the PRD service is reset. Therefore, we must check if - // background scrubbing was actually configured. + // The queue is empty so it is possible that background scrubbing + // only stopped for FFDC. Simply resume the command instead of + // starting a new one. Note that it is possible to get here if we + // were running a TD procedure and the PRD service is reset. + // Therefore, we must check if background scrubbing was actually + // configured. bool isBgScrub; o_rc = isBgScrubConfig( iv_chip, isBgScrub ); if ( SUCCESS != o_rc ) @@ -349,6 +349,21 @@ uint32_t MemTdCtlr::analyzeCmdComplete( bool & o_errorsFound, if ( isBgScrub ) iv_resumeBgScrub = true; } + else + { + // The analyzeCmdComplete() function is only called if there was a + // command complete attention and there were no TD procedures + // currently in progress. At this point, there are new TD procedures + // in the queue so we want to mask certain fetch attentions to avoid + // the complication of handling the attentions during the TD + // procedures. + o_rc = maskEccAttns(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "maskEccAttns() failed" ); + break; + } + } #endif @@ -361,6 +376,185 @@ uint32_t MemTdCtlr::analyzeCmdComplete( bool & o_errorsFound, //------------------------------------------------------------------------------ +template<> +uint32_t MemTdCtlr::maskEccAttns() +{ + #define PRDF_FUNC "[MemTdCtlr::maskEccAttns] " + + uint32_t o_rc = SUCCESS; + + // Loop through all MCAs. + for ( uint32_t ps = 0; ps < MAX_PORT_PER_MCBIST; ps++ ) + { + ExtensibleChip * mcaChip = getConnectedChild( iv_chip, TYPE_MCA, ps ); + SCAN_COMM_REGISTER_CLASS * mask = + mcaChip->getRegister( "MCAECCFIR_MASK_OR" ); + + mask->clearAllBits(); + mask->SetBit(8); // Mainline read NCE + mask->SetBit(9); // Mainline read TCE + + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on MCAECCFIR_MASK_OR" ); + break; + } + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t MemTdCtlr::unmaskEccAttns() +{ + #define PRDF_FUNC "[MemTdCtlr::unmaskEccAttns] " + + uint32_t o_rc = SUCCESS; + + // Memory CEs were masked at the beginning of the TD procedure, so + // clear and unmask them. Also, it is possible that memory UEs have + // thresholded so clear and unmask them as well. + + // Loop through all MCAs. + for ( uint32_t ps = 0; ps < MAX_PORT_PER_MCBIST; ps++ ) + { + ExtensibleChip * mcaChip = getConnectedChild( iv_chip, TYPE_MCA, ps ); + SCAN_COMM_REGISTER_CLASS * fir = + mcaChip->getRegister( "MCAECCFIR_AND" ); + SCAN_COMM_REGISTER_CLASS * mask = + mcaChip->getRegister( "MCAECCFIR_MASK_AND" ); + + fir->setAllBits(); mask->setAllBits(); + + // Don't clear the NCE and TCE attentions if specified to save the mask + // in the iv_saveEccMask array. + if ( !iv_saveEccMask[ps] ) + { + fir->ClearBit(8); mask->ClearBit(8); // Mainline read NCE + fir->ClearBit(9); mask->ClearBit(9); // Mainline read TCE + } + fir->ClearBit(14); mask->ClearBit(14); // Mainline read UE + + o_rc = fir->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on MCAECCFIR_AND" ); + break; + } + + o_rc = mask->Write(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "Write() failed on MCAECCFIR_MASK_AND" ); + break; + } + } + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t MemTdCtlr::maskEccAttns() +{ + #define PRDF_FUNC "[MemTdCtlr::maskEccAttns] " + + uint32_t o_rc = SUCCESS; + + // TODO RTC 176901 + //do + //{ + // // Don't want to handle memory CEs during any TD procedures, so + // // mask them. + + // const char * reg_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR_MASK_OR" + // : "MBA1_MBSECCFIR_MASK_OR"; + // SCAN_COMM_REGISTER_CLASS * reg = iv_membChip->getRegister(reg_str); + + // reg->clearAllBits(); + // reg->SetBit(16); // fetch NCE + // reg->SetBit(17); // fetch RCE + // reg->SetBit(43); // prefetch UE + + // o_rc = reg->Write(); + // if ( SUCCESS != o_rc ) + // { + // PRDF_ERR( PRDF_FUNC "Write() failed on %s", reg_str ); + // break; + // } + + // iv_fetchAttnsMasked = true; + + //} while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + +template<> +uint32_t MemTdCtlr::unmaskEccAttns() +{ + #define PRDF_FUNC "[MemTdCtlr::unmaskEccAttns] " + + uint32_t o_rc = SUCCESS; + + // TODO RTC 176901 + //do + //{ + // // Memory CEs where masked at the beginning of the TD procedure, so + // // clear and unmask them. Also, it is possible that memory UEs have + // // thresholded so clear and unmask them as well. + + // const char * fir_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR_AND" + // : "MBA1_MBSECCFIR_AND"; + // const char * msk_str = (0 == iv_mbaPos) ? "MBA0_MBSECCFIR_MASK_AND" + // : "MBA1_MBSECCFIR_MASK_AND"; + + // SCAN_COMM_REGISTER_CLASS * fir = iv_membChip->getRegister( fir_str ); + // SCAN_COMM_REGISTER_CLASS * msk = iv_membChip->getRegister( msk_str ); + + // fir->setAllBits(); msk->setAllBits(); + // fir->ClearBit(16); msk->ClearBit(16); // fetch NCE + // fir->ClearBit(17); msk->ClearBit(17); // fetch RCE + // fir->ClearBit(19); msk->ClearBit(19); // fetch UE + // fir->ClearBit(43); msk->ClearBit(43); // prefetch UE + + // o_rc = fir->Write(); + // if ( SUCCESS != o_rc ) + // { + // PRDF_ERR( PRDF_FUNC "Write() failed on %s", fir_str ); + // break; + // } + + // o_rc = msk->Write(); + // if ( SUCCESS != o_rc ) + // { + // PRDF_ERR( PRDF_FUNC "Write() failed on %s", msk_str ); + // break; + // } + + // iv_fetchAttnsMasked = false; + + //} while (0); + + return o_rc; + + #undef PRDF_FUNC +} + +//------------------------------------------------------------------------------ + // Avoid linker errors with the template. template class MemTdCtlr; template class MemTdCtlr; diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H index 78db9102c34..4fe1005285a 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr.H @@ -189,6 +189,23 @@ class MemTdCtlr void collectStateCaptureData( STEP_CODE_DATA_STRUCT & io_sc, const char * i_startEnd ); + /** + * @brief Masks NCE and TCE ECC attentions. + * @note Only intended to be used just before starting a new TD procedure. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t maskEccAttns(); + + /** + * @brief Clears and unmasks NCE and TCE ECC attentions. + * @note maskEccAttns() will not mask fetch UEs, however, this function + * will unmask them because it is possible that UEs exceeded + * threshold and were masked by the rule code. + * @note Only intended to be used just after completing a TD procedure. + * @return Non-SUCCESS if an internal function fails, SUCCESS otherwise. + */ + uint32_t unmaskEccAttns(); + #ifdef __HOSTBOOT_RUNTIME /** @@ -230,6 +247,11 @@ class MemTdCtlr * diagnostics are complete. */ TdRankListEntry iv_stoppedRank; + /** An array to track whether we want to keep the NCE and TCE ECC attentions + * for each port masked at the end of the TD procedures or not. True if we + * keep them masked, false if we unmask and clear them. */ + bool iv_saveEccMask[MAX_PORT_PER_MCBIST] = { false }; + #ifdef __HOSTBOOT_RUNTIME /** True if the TD controller has been initialized. False otherwise. */ diff --git a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C index 7cc6e58034a..f82253a58c7 100644 --- a/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C +++ b/src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_rt.C @@ -285,6 +285,15 @@ uint32_t MemTdCtlr::defaultStep( STEP_CODE_DATA_STRUCT & io_sc ) } else { + + // Unmask the ECC attentions that were explicitly masked during the + // TD procedure. + o_rc = unmaskEccAttns(); + if ( SUCCESS != o_rc ) + { + PRDF_ERR( PRDF_FUNC "unmaskEccAttns() failed" ); + } + // A TD procedure has completed. Restart background scrubbing on the // next rank.