Skip to content

Commit

Permalink
PRD: rule file updates for MCA unit
Browse files Browse the repository at this point in the history
Change-Id: Ib37b1ec290081428a9c627ec1683568382d3612f
RTC: 169104
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36316
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Reviewed-by: Benjamin J. Weisenbeck <bweisenb@us.ibm.com>
Reviewed-by: Caleb N. Palmer <cnpalmer@us.ibm.com>
Reviewed-by: Zane C. Shelley <zshelle@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/36515
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
  • Loading branch information
zane131 committed Feb 16, 2017
1 parent 2fc2af3 commit a4c7097
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 27 deletions.
51 changes: 47 additions & 4 deletions src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.C
Expand Up @@ -49,6 +49,51 @@ namespace MemEcc

//------------------------------------------------------------------------------

template<>
void calloutMemUe<TYPE_MCA>( ExtensibleChip * i_chip, const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[MemEcc::calloutMemUe] "

PRDF_ASSERT( TYPE_MCA == i_chip->getType() );

SCAN_COMM_REGISTER_CLASS * fir = i_chip->getRegister( "DDRPHYFIR" );
int32_t l_rc = fir->Read();
if ( SUCCESS != l_rc )
{
PRDF_ERR( PRDF_FUNC "Read() failed on DDRPHYFIR: i_chip=0x%08x",
i_chip->getHuid() );
}

// Check DDRPHYFIR[54:55,57:59] to determine if this UE is a side-effect.
if ( SUCCESS == l_rc && (0 != (fir->GetBitFieldJustified(54,6) & 0x37)) )
{
// Callout the MCA.
io_sc.service_data->SetCallout( i_chip->getTrgt() );
}
else
{
// Callout the rank anyway.
MemoryMru memmru ( i_chip->getTrgt(), i_rank,
MemoryMruData::CALLOUT_RANK );
io_sc.service_data->SetCallout( memmru );
}

#undef PRDF_FUNC
}

template<>
void calloutMemUe<TYPE_MBA>( ExtensibleChip * i_chip, const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc )
{
PRDF_ASSERT( TYPE_MBA == i_chip->getType() );

MemoryMru memmru ( i_chip->getTrgt(), i_rank, MemoryMruData::CALLOUT_RANK );
io_sc.service_data->SetCallout( memmru );
}

//------------------------------------------------------------------------------

#ifdef __HOSTBOOT_RUNTIME

template<TARGETING::TYPE T>
Expand Down Expand Up @@ -258,11 +303,9 @@ uint32_t analyzeFetchUe( ExtensibleChip * i_chip,
D db = static_cast<D>(i_chip->getDataBundle());
db->iv_ueTable.addEntry( UE_TABLE::FETCH_UE, addr );

// Callout the rank.
// Make the hardware callout.
MemRank rank = addr.getRank();
MemoryMru memmru ( i_chip->getTrgt(), rank,
MemoryMruData::CALLOUT_RANK );
io_sc.service_data->SetCallout( memmru );
calloutMemUe<T>( i_chip, rank, io_sc );

#ifdef __HOSTBOOT_RUNTIME

Expand Down
13 changes: 12 additions & 1 deletion src/usr/diag/prdf/common/plat/mem/prdfMemEccAnalysis.H
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2016 */
/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -40,6 +40,17 @@ namespace PRDF
namespace MemEcc
{

/**
* @brief Will check if the UE is a side-effect attention and make a callout
* appropriately.
* @param i_chip MCA or MBA.
* @param i_rank Target rank.
* @param io_sc The step code data struct.
*/
template<TARGETING::TYPE T>
void calloutMemUe( ExtensibleChip * i_chip, const MemRank & i_rank,
STEP_CODE_DATA_STRUCT & io_sc );

/**
* @brief Analyzes a fetch MPE attention.
* @param i_chip MCA or MBA.
Expand Down
83 changes: 82 additions & 1 deletion src/usr/diag/prdf/common/plat/mem/prdfP9Mca_common.C
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2016 */
/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -80,6 +80,87 @@ int32_t PostAnalysis( ExtensibleChip * i_chip, STEP_CODE_DATA_STRUCT & io_sc )
}
PRDF_PLUGIN_DEFINE( p9_mca, PostAnalysis );

//##############################################################################
//
// DDRPHYFIR
//
//##############################################################################

/**
* @brief DDRPHYFIR[54:55,57:59] MCA/UE algorithm
* @param i_chip MCA chip.
* @param io_sc The step code data struct.
* @return SUCCESS
*/
int32_t mcaUeAlgorithm( ExtensibleChip * i_chip,
STEP_CODE_DATA_STRUCT & io_sc )
{
#define PRDF_FUNC "[p9_mca::mcaUeAlgorithm] "

SCAN_COMM_REGISTER_CLASS * fir = nullptr;
SCAN_COMM_REGISTER_CLASS * msk = nullptr;

// If the attention is currently at threshold or if there is a mainline or
// maintenance UE on at the same time as the attention:
// - Make the error log predictive.
// - Mask the attention.
// - Do not clear the attention. This will be used during maintenance and
// memory UE analysis to indicate that the MCA should be called out
// instead of the DIMMs. This is unconventional process is needed because
// maintenance UEs are always masked (handled manually in maintenance
// command complete attentions) and memory UEs will get unmasked anytime
// Targeted Diagnostics is complete on that area of memory. So we never
// truly have a way to permanently mask the UEs.

bool maskDoNotClearAttn = io_sc.service_data->IsAtThreshold();

if ( !maskDoNotClearAttn )
{
fir = i_chip->getRegister("MCAECCFIR");
if ( SUCCESS != fir->Read() )
{
PRDF_ERR( PRDF_FUNC "Read() failed on MCAECCFIR: i_chip=0x%08x",
i_chip->getHuid() );
}
else
{
maskDoNotClearAttn = fir->IsBitSet(14) || fir->IsBitSet(34);
}
}

if ( maskDoNotClearAttn )
{
// Get the active attentions of DDRPHYFIR[54:55,57:59] and mask.
fir = i_chip->getRegister("DDRPHYFIR");

if ( SUCCESS != fir->Read() )
{
PRDF_ERR( PRDF_FUNC "Read() failed on DDRPHYFIR: i_chip=0x%08x",
i_chip->getHuid() );
}
else
{
uint64_t tmp = fir->GetBitFieldJustified(54, 6) & 0x37;

msk = i_chip->getRegister("DDRPHYFIR_MASK_OR");

msk->clearAllBits();
msk->SetBitFieldJustified( 54, 6, tmp );

if ( SUCCESS != msk->Write() )
{
PRDF_ERR( PRDF_FUNC "Write() failed on DDRPHYFIR_MASK_OR: "
"i_chip=0x%08x", i_chip->getHuid() );
}
}
}

return maskDoNotClearAttn ? PRD_NO_CLEAR_FIR_BITS : SUCCESS;

#undef PRDF_FUNC
}
PRDF_PLUGIN_DEFINE( p9_mca, mcaUeAlgorithm );

//##############################################################################
//
// MCAECCFIR
Expand Down
8 changes: 7 additions & 1 deletion src/usr/diag/prdf/common/plat/p9/p9_common_actions.rule
Expand Up @@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
# Contributors Listed Below - COPYRIGHT 2016
# Contributors Listed Below - COPYRIGHT 2016,2017
# [+] International Business Machines Corp.
#
#
Expand Down Expand Up @@ -83,6 +83,12 @@ actionclass threshold5phour
threshold( field(5 / hour) );
};

/** Threshold of 5 per day */
actionclass threshold5pday
{
threshold( field(5 / day) );
};

################################################################################
# Threshold and Mask policy
################################################################################
Expand Down
18 changes: 9 additions & 9 deletions src/usr/diag/prdf/common/plat/p9/p9_mca.rule
Expand Up @@ -414,7 +414,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 )
/** MCAECCFIR[17]
* Mainline read IUE
*/
(rMCAECCFIR, bit(17)) ? defaultMaskedError;
(rMCAECCFIR, bit(17)) ? mainline_iue_handling;

/** MCAECCFIR[18]
* Mainline read IRCD
Expand All @@ -424,7 +424,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 )
/** MCAECCFIR[19]
* Mainline read IMPE
*/
(rMCAECCFIR, bit(19)) ? defaultMaskedError;
(rMCAECCFIR, bit(19)) ? mainline_impe_handling;

/** MCAECCFIR[20:27]
* Maintenance MPE
Expand Down Expand Up @@ -479,7 +479,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 )
/** MCAECCFIR[37]
* Maintenance IUE
*/
(rMCAECCFIR, bit(37)) ? defaultMaskedError;
(rMCAECCFIR, bit(37)) ? maintenance_iue_handling;

/** MCAECCFIR[38]
* Maintenance IRCD
Expand All @@ -489,7 +489,7 @@ group gMCAECCFIR filter singlebit, cs_root_cause( 14 )
/** MCAECCFIR[39]
* Maintenance IMPE
*/
(rMCAECCFIR, bit(39)) ? defaultMaskedError;
(rMCAECCFIR, bit(39)) ? maintenance_impe_handling;

/** MCAECCFIR[40]
* spare
Expand Down Expand Up @@ -625,12 +625,12 @@ group gDDRPHYFIR filter singlebit, cs_root_cause
/** DDRPHYFIR[54]
* Non-recoverable FSM error
*/
(rDDRPHYFIR, bit(54)) ? defaultMaskedError;
(rDDRPHYFIR, bit(54)) ? mca_ue_algorithm_th_5perDay;

/** DDRPHYFIR[55]
* Full bus impact Register Parity Error
*/
(rDDRPHYFIR, bit(55)) ? defaultMaskedError;
(rDDRPHYFIR, bit(55)) ? mca_ue_algorithm_th_1;

/** DDRPHYFIR[56]
* DDRPHY Parity errors
Expand All @@ -640,17 +640,17 @@ group gDDRPHYFIR filter singlebit, cs_root_cause
/** DDRPHYFIR[57]
* FSM errors
*/
(rDDRPHYFIR, bit(57)) ? defaultMaskedError;
(rDDRPHYFIR, bit(57)) ? mca_ue_algorithm_th_5perDay;

/** DDRPHYFIR[58]
* Register parity error impacting 16 bits
*/
(rDDRPHYFIR, bit(58)) ? defaultMaskedError;
(rDDRPHYFIR, bit(58)) ? mca_ue_algorithm_th_1;

/** DDRPHYFIR[59]
* Register parity error impacting 8 bits
*/
(rDDRPHYFIR, bit(59)) ? defaultMaskedError;
(rDDRPHYFIR, bit(59)) ? mca_ue_algorithm_th_1;

/** DDRPHYFIR[60]
* Register PE 4 bit impact
Expand Down
23 changes: 22 additions & 1 deletion src/usr/diag/prdf/common/plat/p9/p9_mca_actions.rule
Expand Up @@ -5,7 +5,7 @@
#
# OpenPOWER HostBoot Project
#
# Contributors Listed Below - COPYRIGHT 2016
# Contributors Listed Below - COPYRIGHT 2016,2017
# [+] International Business Machines Corp.
#
#
Expand Down Expand Up @@ -76,3 +76,24 @@ actionclass rcd_parity_error
funccall("RcdParityError"); # Run TPS on TH for all MCA ranks
};

actionclass mainline_iue_handling { TBDDefaultCallout; }; # TODO RTC 165383
actionclass mainline_impe_handling { TBDDefaultCallout; }; # TODO RTC 165384
actionclass maintenance_iue_handling { TBDDefaultCallout; }; # TODO RTC 165383
actionclass maintenance_impe_handling { TBDDefaultCallout; }; # TODO RTC 165384

/** MCA/UE algroithm, threshold 5 per day */
actionclass mca_ue_algorithm_th_5perDay
{
calloutSelfMed;
threshold5pday;
funccall("mcaUeAlgorithm"); # must be called last
};

/** MCA/UE algroithm, threshold 1 */
actionclass mca_ue_algorithm_th_1
{
calloutSelfMed;
threshold1;
funccall("mcaUeAlgorithm"); # must be called last
};

13 changes: 13 additions & 0 deletions src/usr/diag/prdf/common/plat/p9/p9_mca_regs.rule
Expand Up @@ -30,6 +30,7 @@
############################################################################
# P9 MCA target MCAECCFIR
############################################################################

register MCAECCFIR_AND
{
name "P9 MCA target MCAECCFIR AND";
Expand All @@ -38,6 +39,18 @@
access write_only;
};

############################################################################
# P9 MCA target DDRPHYFIR
############################################################################

register DDRPHYFIR_MASK_OR
{
name "P9 MCA target DDRPHYFIR MASK OR";
scomaddr 0x07011005;
capture group never;
access write_only;
};

############################################################################
# P9 Hardware Mark Stores
############################################################################
Expand Down
6 changes: 3 additions & 3 deletions src/usr/diag/prdf/plat/mem/prdfMemTdCtlr_ipl.C
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2016 */
/* Contributors Listed Below - COPYRIGHT 2016,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand All @@ -30,6 +30,7 @@
#include <prdfMemTdCtlr.H>

// Platform includes
#include <prdfMemEccAnalysis.H>
#include <prdfMemMark.H>
#include <prdfMemoryMru.H>
#include <prdfMemScrubUtils.H>
Expand Down Expand Up @@ -137,8 +138,7 @@ uint32_t __checkEcc( ExtensibleChip * i_chip, const MemRank & i_rank,
io_sc.service_data->setSignature( huid, PRDFSIG_MaintUE );

// Add the rank to the callout list.
MemoryMru mm { trgt, i_rank, MemoryMruData::CALLOUT_RANK };
io_sc.service_data->SetCallout( mm );
MemEcc::calloutMemUe<T>( i_chip, i_rank, io_sc );

// Make the error log predictive.
io_sc.service_data->setServiceCall();
Expand Down

0 comments on commit a4c7097

Please sign in to comment.