Skip to content

Commit

Permalink
Disable bad MCAs based on CRP0:Lx keyword data
Browse files Browse the repository at this point in the history
The CRP0 record has 8 records (L1-L8) that correspond to the 8 ports
(MCA targets) for the chip.  One of the fields inside the keyword indicates
if the port is disabled.  This field marks the MCA as non-functional, but
the MCA remains present.

Change-Id: I2c7c89c9567ec9f048a426b2204b18e10ccda601
RTC:166354
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/34256
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
Reviewed-by: Christian R. Geddes <crgeddes@us.ibm.com>
Reviewed-by: William G. Hoffa <wghoffa@us.ibm.com>
  • Loading branch information
mgloff authored and wghoffa committed Feb 13, 2017
1 parent 3192051 commit 54896a9
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 102 deletions.
7 changes: 2 additions & 5 deletions src/include/usr/hwas/common/hwasCommon.H
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ const uint32_t VPD_VINI_PR_DATA_LENGTH = 8; //@deprecrated
* vector. The caller is responsible for allocating and de-allocating the
* space.
*
* @param[in] i_proc processor target to read the Lx keyword from
* @param[in] i_mca MCA target indicating which Lx keyword to read
* @param[out] o_lxData pointer to area that will hold the Lx keyword
* read from VPD; must be malloc'ed by the caller,
Expand All @@ -279,13 +278,11 @@ const uint32_t VPD_VINI_PR_DATA_LENGTH = 8; //@deprecrated
* @return errlHndl_t valid errlHndl_t handle if there was an error
* NULL if no errors;
*/
errlHndl_t platReadLx(const TARGETING::TargetHandle_t &i_proc,
const TARGETING::TargetHandle_t &i_mca,
errlHndl_t platReadLx(const TARGETING::TargetHandle_t &i_mca,
void *o_lxData);

// constants the platReadLx will use for looking at the VPD data
const uint32_t VPD_CRP0_LX_HDR_LENGTH = 1;
const uint32_t VPD_CRP0_LX_DATA_LENGTH = 256;
const uint32_t VPD_CRP0_LX_HDR_DATA_LENGTH = 256;

const uint32_t VPD_CRP0_LX_FREQ_INDEP_INDEX = 8;
const uint32_t VPD_CRP0_LX_PORT_DISABLED = 0;
Expand Down
1 change: 1 addition & 0 deletions src/include/usr/hwas/hwasplatreasoncodes.H
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ namespace HWAS
RC_GARD_REPOSITORY_FULL = HWAS_COMP_ID | 0x82,
RC_BAD_CHIPID = HWAS_COMP_ID | 0x83,
RC_BAD_LX = HWAS_COMP_ID | 0x84,
RC_BAD_MCA = HWAS_COMP_ID | 0x85,
};
};

Expand Down
88 changes: 39 additions & 49 deletions src/usr/hwas/common/hwas.C
Original file line number Diff line number Diff line change
Expand Up @@ -2495,67 +2495,57 @@ void calculateEffectiveEC()
errlHndl_t markDisabledMcas()
{
errlHndl_t l_errl = nullptr;
uint8_t lxData[HWAS::VPD_CRP0_LX_DATA_LENGTH];
uint8_t lxData[HWAS::VPD_CRP0_LX_HDR_DATA_LENGTH];

HWAS_INF("markDisabledMcas entry");

do
{
//Get all functional chips
TARGETING::TargetHandleList l_procList;
getAllChips(l_procList, TYPE_PROC);
//Get the functional MCAs
TargetHandleList l_mcaList;
getAllChiplets(l_mcaList, TYPE_MCA, true);

//Loop through all functional procs
for(auto l_proc : l_procList)
for (auto l_mca : l_mcaList)
{
//Get the functional MCAs for this proc
TargetHandleList l_mcaList;
getChildChiplets(l_mcaList, l_proc, TYPE_MCA, true);

for (auto l_mca : l_mcaList)
{
// fill the Lx data buffer with zeros
memset(lxData, 0x00, VPD_CRP0_LX_DATA_LENGTH);
// fill the Lx data buffer with zeros
memset(lxData, 0x00, VPD_CRP0_LX_HDR_DATA_LENGTH);

#ifdef __HOSTBOOT_MODULE
//@TODO RTC:167294 Need to remove conditional after
// additional implementation
//Read Lx keyword for associated proc and MCA
l_errl = platReadLx(l_proc,
l_mca,
lxData);
//@TODO RTC:167294 Need to remove conditional after
// additional implementation
//Read Lx keyword for associated proc and MCA
l_errl = platReadLx(l_mca,
lxData);
#endif

if (l_errl)
{
// commit the error but keep going
errlCommit(l_errl, HWAS_COMP_ID);
}
if (l_errl)
{
// commit the error but keep going
errlCommit(l_errl, HWAS_COMP_ID);
}

if (lxData[VPD_CRP0_LX_FREQ_INDEP_INDEX
+ VPD_CRP0_LX_PORT_DISABLED] != 0)
{
// Since port is disabled, MCA is not functional, but
// it's present.
enableHwasState(l_mca,
true, // present
false, // not functional
DeconfigGard::DECONFIGURED_BY_DISABLED_PORT
);
HWAS_DBG("MCA %.8X - marked present, not functional",
l_mca->getAttr<ATTR_HUID>());

TargetInfo l_TargetInfo;
l_TargetInfo.affinityPath =
l_mca->getAttr<ATTR_AFFINITY_PATH>();
l_TargetInfo.pThisTarget = l_mca;
l_TargetInfo.type = l_mca->getAttr<ATTR_TYPE>();
l_TargetInfo.reason =
DeconfigGard::DECONFIGURED_BY_DISABLED_PORT;

// Deconfigure child targets for this MCA
deconfigPresentByAssoc(l_TargetInfo);
}
if (lxData[VPD_CRP0_LX_FREQ_INDEP_INDEX
+ VPD_CRP0_LX_PORT_DISABLED] != 0)
{
// Since port is disabled, MCA is not functional, but
// it's present.
enableHwasState(l_mca,
true, // present
false, // not functional
DeconfigGard::DECONFIGURED_BY_DISABLED_PORT);
HWAS_DBG("MCA %.8X - marked present, not functional",
l_mca->getAttr<ATTR_HUID>());

TargetInfo l_TargetInfo;
l_TargetInfo.affinityPath =
l_mca->getAttr<ATTR_AFFINITY_PATH>();
l_TargetInfo.pThisTarget = l_mca;
l_TargetInfo.type = l_mca->getAttr<ATTR_TYPE>();
l_TargetInfo.reason =
DeconfigGard::DECONFIGURED_BY_DISABLED_PORT;

// Deconfigure child targets for this MCA
deconfigPresentByAssoc(l_TargetInfo);
}
}

Expand Down
101 changes: 60 additions & 41 deletions src/usr/hwas/hwasPlat.C
Original file line number Diff line number Diff line change
Expand Up @@ -305,74 +305,93 @@ errlHndl_t platReadPR(const TargetHandle_t &i_target,
//******************************************************************************
// platReadLx function
//******************************************************************************
errlHndl_t platReadLx(const TargetHandle_t &i_proc,
const TargetHandle_t &i_mca,
errlHndl_t platReadLx(const TargetHandle_t &i_mca,
void *o_lxData)
{
errlHndl_t errl = nullptr;
uint8_t l_chip_unit = i_mca->getAttr<TARGETING::ATTR_CHIP_UNIT>();
uint8_t l_x = VPD_CRP0_LX_MIN_X + l_chip_unit;
uint8_t l_chip_unit;
uint8_t l_x;
const TARGETING::Target* l_proc;

HWAS_DBG( "i_proc %.8X, i_mca %.8X, Lx = L%1d",
i_proc->getAttr<ATTR_HUID>(),
i_mca->getAttr<ATTR_HUID>(),
l_x);

//Look for an invalid x value
if( l_x > VPD_CRP0_LX_MAX_X)
if (!(i_mca->tryGetAttr<TARGETING::ATTR_CHIP_UNIT>(l_chip_unit)))
{
HWAS_ERR("Invalid Lx with x=%1d for MCA %.8X on %.8X",
l_x,
i_mca->getAttr<ATTR_HUID>(),
i_proc->getAttr<ATTR_HUID>());
HWAS_ERR("Bad MCA target");
/*@
* @errortype ERRORLOG::ERRL_SEV_UNRECOVERABLE
* @moduleid HWAS::MOD_PLAT_READLX
* @reasoncode HWAS::RC_BAD_LX
* @userdata1[0:31] Target proc HUID
* @userdata1[32:63] Target MCA HUID
* @userdata2 Value of x for Lx keyword
* @devdesc platReadLx> Invalid Lx keyword
* @reasoncode HWAS::RC_BAD_MCA
* @userdata1 0
* @userdata2 0
* @devdesc platReadLx> Bad MCA target
*/
errl = new ERRORLOG::ErrlEntry(
ERRORLOG::ERRL_SEV_UNRECOVERABLE,
errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE,
HWAS::MOD_PLAT_READLX,
HWAS::RC_BAD_LX,
TWO_UINT32_TO_UINT64(
TARGETING::get_huid(i_proc),
TARGETING::get_huid(i_mca)),
l_x);
HWAS::RC_BAD_MCA,
0,
0);

// make code the highest callout
errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
}
else
{
l_x = VPD_CRP0_LX_MIN_X + l_chip_unit;

HWAS_DBG( "i_mca %.8X, Lx = L%1d",
i_mca->getAttr<ATTR_HUID>(),
l_x);

//Check for an invalid x value
if( l_x > VPD_CRP0_LX_MAX_X)
{
HWAS_ERR("Invalid Lx with x=%1d for MCA %.8X",
l_x,
i_mca->getAttr<ATTR_HUID>());
/*@
* @errortype ERRORLOG::ERRL_SEV_UNRECOVERABLE
* @moduleid HWAS::MOD_PLAT_READLX
* @reasoncode HWAS::RC_BAD_LX
* @userdata1 Target MCA HUID
* @userdata2 Value of x for Lx keyword
* @devdesc platReadLx> Invalid Lx keyword
*/
errl = new ERRORLOG::ErrlEntry(ERRORLOG::ERRL_SEV_UNRECOVERABLE,
HWAS::MOD_PLAT_READLX,
HWAS::RC_BAD_LX,
TARGETING::get_huid(i_mca),
l_x);

// make code the highest callout
errl->addProcedureCallout(HWAS::EPUB_PRC_HB_CODE,
HWAS::SRCI_PRIORITY_HIGH);
}
else
{
l_proc = getParentChip( i_mca );

HWAS_DBG( "i_mca %.8X, Lx = L%1d, l_proc %.8X",
i_mca->getAttr<ATTR_HUID>(),
l_x,
l_proc->getAttr<ATTR_HUID>());
}
}

if (errl == nullptr)
{ // no error, so we got a valid chip unit value back
// call deviceRead() to find the Lx record
uint8_t lxRaw[VPD_CRP0_LX_HDR_LENGTH + VPD_CRP0_LX_DATA_LENGTH];
size_t lxSize = sizeof(lxRaw);

errl = deviceRead(i_proc, lxRaw, lxSize,
size_t l_lxLength = VPD_CRP0_LX_HDR_DATA_LENGTH;
errl = deviceRead((TARGETING::Target*)l_proc, o_lxData, l_lxLength,
DEVICE_MVPD_ADDRESS(MVPD::CRP0,
MVPD::L1 + l_chip_unit));

if (errl != nullptr)
{ // trace the error condition
HWAS_INF( "i_proc %.8X, i_mca %.8X - failed L%1d read",
i_proc->getAttr<ATTR_HUID>(),
HWAS_INF( "l_proc %.8X, i_mca %.8X - failed L%1d read",
l_proc->getAttr<ATTR_HUID>(),
i_mca->getAttr<ATTR_HUID>(),
l_x);
}
else
{
// skip past the header
void *lxData = static_cast<void *>(&lxRaw[0]);
HWAS_DBG_BIN("Lx record", lxData, VPD_CRP0_LX_DATA_LENGTH);
// copy the data back into the caller's buffer
memcpy(o_lxData, lxData, VPD_CRP0_LX_DATA_LENGTH);
}
}

return errl;
Expand Down
53 changes: 46 additions & 7 deletions src/usr/hwas/test/hwas1test.H
Original file line number Diff line number Diff line change
Expand Up @@ -1545,6 +1545,8 @@ public:
using namespace HWAS;
using namespace TARGETING;

TS_INFO( "testHWASpervStates entry" );

// find top level target
Target * pSys;
targetService().getTopLevelTarget(pSys);
Expand Down Expand Up @@ -1638,30 +1640,67 @@ public:
TS_INFO( "testHWASplatReadLx entry" );

// call platReadLx with target that isn't in the VPD
errlHndl_t l_errl;
errlHndl_t l_errl = nullptr;

// Get system target
Target* pSys;
targetService().getTopLevelTarget(pSys);

// Get processor targets
TARGETING::TargetHandleList l_procList;
getAllChips(l_procList, TYPE_PROC);

TargetHandleList l_mcaList;
getChildChiplets(l_mcaList, *(l_procList.begin()), TYPE_MCA, true);
// Get children of first processor target
TargetHandleList l_childList;
getChildChiplets(l_childList, *(l_procList.begin()), TYPE_NA, false);

uint8_t lxData[HWAS::VPD_CRP0_LX_DATA_LENGTH];
uint8_t lxData[HWAS::VPD_CRP0_LX_HDR_DATA_LENGTH];

l_errl = HWAS::platReadLx(pSys, *(l_mcaList.begin()), lxData);
// Try using system target which does not have a chip unit attribute
l_errl = HWAS::platReadLx(pSys, lxData);

// Check that an error log is returned
if (l_errl)
{
// error log is expected case, delete it
delete l_errl;
l_errl = nullptr;
}
else
{
TS_FAIL("testHWASplatReadLx>"
"No error from platReadLx(pSys, *(l_mcaList.begin()).");
TS_FAIL("testHWASplatReadLx> No error from platReadLx(pSys).");
}

// Find a target that has a large chip unit and use that target
for( const auto & l_child_target: l_childList )
{
uint8_t l_chip_unit;
if (l_child_target->
tryGetAttr<TARGETING::ATTR_CHIP_UNIT>(l_chip_unit))
{
// Check if chip unit attribute is large enough
if(l_chip_unit >= HWAS::VPD_CRP0_LX_MAX_X)
{
// Try using target which has larger than expected chip unit
l_errl = HWAS::platReadLx(l_child_target, lxData);

// Check that an error log is returned
if (l_errl)
{
// error log is expected case, delete it
delete l_errl;
l_errl = nullptr;
}
else
{
TS_FAIL("testHWASplatReadLx> No error from "
"platReadLx(l_child_target 0x%8X).",
l_child_target->getAttr<ATTR_HUID>());
}

break;
}
}
}

TS_INFO( "testHWASplatReadLx exit" );
Expand Down

0 comments on commit 54896a9

Please sign in to comment.