Skip to content

Commit

Permalink
Verify OCC checkpoint is reached after starting PM complex
Browse files Browse the repository at this point in the history
RTC:165644
Change-Id: Id47ffd1781082d57726507372dd461080bddaecc
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/35061
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Reviewed-by: Martin Gloff <mgloff@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
cvswen authored and dcrowell77 committed Feb 3, 2017
1 parent da3675d commit 0acb956
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 57 deletions.
4 changes: 3 additions & 1 deletion src/include/usr/isteps/istep_reasoncodes.H
Expand Up @@ -5,7 +5,7 @@
/* */
/* OpenPOWER HostBoot Project */
/* */
/* Contributors Listed Below - COPYRIGHT 2015,2016 */
/* Contributors Listed Below - COPYRIGHT 2015,2017 */
/* [+] International Business Machines Corp. */
/* */
/* */
Expand Down Expand Up @@ -53,6 +53,7 @@ namespace ISTEP
MOD_CHECK_SLAVE_SBE_SEEPROM_COMPLETE = 0x11,
MOD_PM_LOAD_HOST_DATA_TO_SRAM = 0x12,
MOD_VOLTAGE_CONFIG = 0x13,
MOD_PM_VERIFY_OCC_CHKPT = 0x14,
};

/**
Expand Down Expand Up @@ -88,6 +89,7 @@ namespace ISTEP
RC_NO_FUNCTIONAL_PROCS = ISTEP_COMP_ID | 0x19,
RC_NO_PRESENT_EQS = ISTEP_COMP_ID | 0x1A,
RC_NOMINAL_FREQ_MISMATCH = ISTEP_COMP_ID | 0x1B,
RC_PM_OCC_CHKPT_TIMEOUT = ISTEP_COMP_ID | 0x1C,
};

};
Expand Down
22 changes: 12 additions & 10 deletions src/include/usr/isteps/pm/pm_common_ext.H
Expand Up @@ -58,27 +58,29 @@ namespace HBPM
uint64_t i_phys_addr);

/**
* @brief Load PM Complex for all proc targets.
* @brief Load and start PM Complex for all proc targets.
*
* @param[in] i_mode Load / Reload
* @param[in] i_mode Load / Reload
* @param[out] o_failTarget Failing proc target
*
* @return errlHndl_t Error log of loadPMAll failed
* @return errlHndl_t Error log of loadAndStartPMAll failed
*/
errlHndl_t loadPMAll(loadPmMode i_mode);
errlHndl_t loadAndStartPMAll( loadPmMode i_mode,
TARGETING::Target* & o_proc_target);

/**
* @brief Start PM Complex for all proc targets.
* @brief Reset PM Complex for all proc targets.
*
* @return errlHndl_t Error log if startPMAll failed
* @return errlHndl_t Error log if resetPMAll failed
*/
errlHndl_t startPMAll();
errlHndl_t resetPMAll();

/**
* @brief Reset PM Complex for all proc targets.
* @brief Verify all OCCs at checkpoint.
*
* @return errlHndl_t Error log if resetPMAll failed
* @return errlHndl_t Error log if verifyOccChkptAll failed
*/
errlHndl_t resetPMAll();
errlHndl_t verifyOccChkptAll();

/**
* @brief Fetch the ring overrides (if they exist)
Expand Down
2 changes: 1 addition & 1 deletion src/usr/isteps/HBconfig
Expand Up @@ -10,7 +10,7 @@ config SET_NOMINAL_PSTATE
Set the PState to Nominal just before starting the payload.

config START_OCC_DURING_BOOT
default n
default y
help
Activates all the OCCs during IPL

Expand Down
32 changes: 23 additions & 9 deletions src/usr/isteps/istep21/call_host_runtime_setup.C
Expand Up @@ -153,24 +153,38 @@ void* call_host_runtime_setup (void *io_pArgs)

if(l_activatePM)
{
l_err = HBPM::loadPMAll(HBPM::PM_LOAD);
TARGETING::Target* l_failTarget = NULL;
bool pmStartSuccess = true;

l_err = loadAndStartPMAll(HBPM::PM_LOAD, l_failTarget);
if (l_err)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"loadPMAll failed");
"loadAndStartPMAll failed");

// Commit the error and continue with the istep
errlCommit(l_err, ISTEP_COMP_ID);
pmStartSuccess = false;
}
l_err = HBPM::startPMAll();
if (l_err)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"startPMAll failed");

// Commit the error and continue with the istep
errlCommit(l_err, ISTEP_COMP_ID);
#ifdef CONFIG_HTMGT
// Report PM status to HTMGT
HTMGT::processOccStartStatus(pmStartSuccess,l_failTarget);
#else
// Verify all OCCs have reached the checkpoint
if (pmStartSuccess)
{
l_err = HBPM::verifyOccChkptAll();
if (l_err)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"verifyOccCheckpointAll failed");

// Commit the error and continue with the istep
errlCommit(l_err, ISTEP_COMP_ID);
}
}
#endif
}

#if 0 //@TODO-RTC:164022-Support max pstate without OCC
Expand Down
156 changes: 120 additions & 36 deletions src/usr/isteps/pm/pm_common.C
Expand Up @@ -32,6 +32,7 @@

#include <sys/misc.h>
#include <sys/mm.h>
#include <sys/time.h>
// targeting support
#include <targeting/common/commontargeting.H>
#include <targeting/common/utilFilter.H>
Expand Down Expand Up @@ -62,6 +63,7 @@
#include <p9_hcode_image_defines.H>

#include <arch/ppc.H>
#include <occ/occAccess.H>

#ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS
#include <diag/prdf/prdfWriteHomerFirData.H>
Expand Down Expand Up @@ -92,6 +94,10 @@ namespace HBPM
constexpr uint64_t HOMER_INSTANCE_SIZE_IN_MB =
sizeof(Homerlayout_t) / ONE_MB;

constexpr uint32_t OCC_SRAM_RSP_ADDR = 0xFFFBF000;
constexpr uint16_t OCC_CHKPT_COMPLETE = 0x0EFF;


std::shared_ptr<UtilLidMgr> g_pOccLidMgr (nullptr);
std::shared_ptr<UtilLidMgr> g_pHcodeLidMgr (nullptr);

Expand Down Expand Up @@ -667,6 +673,12 @@ namespace HBPM
break;
}

// Zero out the HOMER memory for LOAD only
if(PM_LOAD == i_mode)
{
memset(l_homerVAddr, 0, VMM_HOMER_INSTANCE_SIZE);
}

uint64_t l_occImgPaddr = i_homerPhysAddr
+ HOMER_OFFSET_TO_OCC_IMG;
uint64_t l_occImgVaddr = reinterpret_cast <uint64_t>(l_homerVAddr)
Expand Down Expand Up @@ -844,17 +856,18 @@ namespace HBPM


/**
* @brief Load PM complex for all chips
* @brief Load and start PM complex for all chips
*/
errlHndl_t loadPMAll(loadPmMode i_mode)
errlHndl_t loadAndStartPMAll(loadPmMode i_mode,
TARGETING::Target* & o_failTarget)
{
errlHndl_t l_errl = nullptr;

TargetHandleList l_procChips;
getAllChips(l_procChips, TYPE_PROC, true);

TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"loadPMAll: %s %d proc(s) found",
"loadAndStartPMAll: %s %d proc(s) found",
(PM_LOAD == i_mode) ? "LOAD" : "RELOAD",
l_procChips.size() );

Expand All @@ -875,91 +888,162 @@ namespace HBPM
if( l_errl )
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
ERR_MRK"loadPMAll: "
ERR_MRK"loadAndStartPMAll: "
"load PM complex failed!" );
o_failTarget = l_procChip;
break;
}

l_errl = startPMComplex(l_procChip);
if( l_errl )
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
ERR_MRK"loadAndStartPMAll: "
"start PM complex failed!" );
o_failTarget = l_procChip;
break;
}
}

return l_errl;
} // loadPMAll
} // loadAndStartPMAll


/**
* @brief Start PM complex for all chips
* @brief Reset PM complex for all chips
*/
errlHndl_t startPMAll()
errlHndl_t resetPMAll()
{
errlHndl_t l_errl = nullptr;

TargetHandleList l_procChips;
getAllChips(l_procChips, TYPE_PROC, true);

TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"startPMAll: %d proc(s) found",
"resetPMAll: %d proc(s) found",
l_procChips.size());

for (const auto & l_procChip: l_procChips)
{
l_errl = startPMComplex(l_procChip);
l_errl = resetPMComplex(l_procChip);
if( l_errl )
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
ERR_MRK"startPMAll: "
"start PM complex failed!" );
ERR_MRK"resetPMAll: "
"reset PM complex failed!" );
break;
}

// RTC 165644 Enable this when readSRAM is available
// Add constants for addr/act/exp values
/*
// OCC checkpoint
l_errl = readSRAM(l_procChip,0xfffbf000,l_buffer);
if(((l_buffer.getWord(0)) & 0xFFF) == 0xEFF)
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"startPMALL: OCC checkpoint detected" );
}
else
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"startPMALL: OCC checkpoint not detected" );
}
*/
}

return l_errl;
} // startPMAll
} // resetPMAll


/**
* @brief Reset PM complex for all chips
* @brief Verify all OCCs at checkpoint
*/
errlHndl_t resetPMAll()
errlHndl_t verifyOccChkptAll()
{
errlHndl_t l_errl = nullptr;

TargetHandleList l_procChips;
getAllChips(l_procChips, TYPE_PROC, true);

TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"resetPMAll: %d proc(s) found",
"verifyOccChkptAll: %d proc(s) found",
l_procChips.size());

// Wait up to 15 seconds for all OCCs to be ready (150 * 100ms = 15s)
const size_t NS_BETWEEN_READ = 100 * NS_PER_MSEC;
const size_t READ_RETRY_LIMIT = 150;
const uint16_t l_readLength = 8;

for (const auto & l_procChip: l_procChips)
{
l_errl = resetPMComplex(l_procChip);
uint64_t l_checkpoint = 0x0;
uint8_t retryCount = 0;
bool chkptReached = false;

while (retryCount++ < READ_RETRY_LIMIT)
{
// Read SRAM response buffer to check for OCC checkpoint
l_errl = HBOCC::readSRAM( l_procChip,OCC_SRAM_RSP_ADDR,
&(l_checkpoint),
l_readLength );

if( l_errl )
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"verifyOccChkptAll: SRAM read failed "
"HUID 0x%X", get_huid(l_procChip));
break;
}

if( OCC_CHKPT_COMPLETE == (l_checkpoint & 0xFFFF) )
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
"verifyOccChkptAll: OCC checkpoint detected "
"HUID 0x%X", get_huid(l_procChip));
chkptReached = true;
break;
}

// Sleep before we check again
nanosleep(0, NS_BETWEEN_READ);
}

if( l_errl )
{
break;
}

if( !chkptReached )
{
TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace,
ERR_MRK"resetPMAll: "
"reset PM complex failed!" );
"verifyOccChkptAll: Timeout waiting for OCC checkpoint "
"HUID 0x%X Checkpoint 0x%08X",
get_huid(l_procChip), l_checkpoint);

/*@
* @errortype
* @reasoncode ISTEP::RC_PM_OCC_CHKPT_TIMEOUT
* @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE
* @moduleid ISTEP::MOD_PM_VERIFY_OCC_CHKPT
* @userdata1 HUID
* @userdata2 Checkpoint value
* @devdesc Timeout waiting for OCC checkpoint
* @custdesc A problem occurred during the IPL
* of the system.
*/
l_errl = new ERRORLOG::ErrlEntry(
ERRORLOG::ERRL_SEV_UNRECOVERABLE,
ISTEP::MOD_PM_VERIFY_OCC_CHKPT,
ISTEP::RC_PM_OCC_CHKPT_TIMEOUT,
get_huid(l_procChip),
l_checkpoint,
true);

TARGETING::TargetHandleList l_Occs;
getChildChiplets(l_Occs, l_procChip, TARGETING::TYPE_OCC);

if( l_Occs[0] != nullptr )
{
l_errl->addHwCallout( l_Occs[0],
HWAS::SRCI_PRIORITY_HIGH,
HWAS::NO_DECONFIG,
HWAS::GARD_NULL );
}

l_errl->collectTrace(FAPI_TRACE_NAME,256);
l_errl->collectTrace(FAPI_IMP_TRACE_NAME,256);
l_errl->collectTrace("ISTEPS_TRACE",256);

break;
}
}

return l_errl;
} // resetPMAll
} // verifyOccChkptAll


/**
Expand Down

0 comments on commit 0acb956

Please sign in to comment.