From 0acb956bb98770c4ffcc65759c32b6032bfeb921 Mon Sep 17 00:00:00 2001 From: Corey Swenson Date: Wed, 4 Jan 2017 11:19:02 -0600 Subject: [PATCH] Verify OCC checkpoint is reached after starting PM complex RTC:165644 Change-Id: Id47ffd1781082d57726507372dd461080bddaecc Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/35061 Tested-by: Jenkins Server Tested-by: FSP CI Jenkins Tested-by: Jenkins OP Build CI Reviewed-by: Martin Gloff Reviewed-by: Daniel M. Crowell --- src/include/usr/isteps/istep_reasoncodes.H | 4 +- src/include/usr/isteps/pm/pm_common_ext.H | 22 +-- src/usr/isteps/HBconfig | 2 +- .../isteps/istep21/call_host_runtime_setup.C | 32 +++- src/usr/isteps/pm/pm_common.C | 156 ++++++++++++++---- 5 files changed, 159 insertions(+), 57 deletions(-) diff --git a/src/include/usr/isteps/istep_reasoncodes.H b/src/include/usr/isteps/istep_reasoncodes.H index f135f5595d9..6241c94665b 100644 --- a/src/include/usr/isteps/istep_reasoncodes.H +++ b/src/include/usr/isteps/istep_reasoncodes.H @@ -5,7 +5,7 @@ /* */ /* OpenPOWER HostBoot Project */ /* */ -/* Contributors Listed Below - COPYRIGHT 2015,2016 */ +/* Contributors Listed Below - COPYRIGHT 2015,2017 */ /* [+] International Business Machines Corp. */ /* */ /* */ @@ -53,6 +53,7 @@ namespace ISTEP MOD_CHECK_SLAVE_SBE_SEEPROM_COMPLETE = 0x11, MOD_PM_LOAD_HOST_DATA_TO_SRAM = 0x12, MOD_VOLTAGE_CONFIG = 0x13, + MOD_PM_VERIFY_OCC_CHKPT = 0x14, }; /** @@ -88,6 +89,7 @@ namespace ISTEP RC_NO_FUNCTIONAL_PROCS = ISTEP_COMP_ID | 0x19, RC_NO_PRESENT_EQS = ISTEP_COMP_ID | 0x1A, RC_NOMINAL_FREQ_MISMATCH = ISTEP_COMP_ID | 0x1B, + RC_PM_OCC_CHKPT_TIMEOUT = ISTEP_COMP_ID | 0x1C, }; }; diff --git a/src/include/usr/isteps/pm/pm_common_ext.H b/src/include/usr/isteps/pm/pm_common_ext.H index 0d024ac43b1..a8428b73f03 100644 --- a/src/include/usr/isteps/pm/pm_common_ext.H +++ b/src/include/usr/isteps/pm/pm_common_ext.H @@ -58,27 +58,29 @@ namespace HBPM uint64_t i_phys_addr); /** - * @brief Load PM Complex for all proc targets. + * @brief Load and start PM Complex for all proc targets. * - * @param[in] i_mode Load / Reload + * @param[in] i_mode Load / Reload + * @param[out] o_failTarget Failing proc target * - * @return errlHndl_t Error log of loadPMAll failed + * @return errlHndl_t Error log of loadAndStartPMAll failed */ - errlHndl_t loadPMAll(loadPmMode i_mode); + errlHndl_t loadAndStartPMAll( loadPmMode i_mode, + TARGETING::Target* & o_proc_target); /** - * @brief Start PM Complex for all proc targets. + * @brief Reset PM Complex for all proc targets. * - * @return errlHndl_t Error log if startPMAll failed + * @return errlHndl_t Error log if resetPMAll failed */ - errlHndl_t startPMAll(); + errlHndl_t resetPMAll(); /** - * @brief Reset PM Complex for all proc targets. + * @brief Verify all OCCs at checkpoint. * - * @return errlHndl_t Error log if resetPMAll failed + * @return errlHndl_t Error log if verifyOccChkptAll failed */ - errlHndl_t resetPMAll(); + errlHndl_t verifyOccChkptAll(); /** * @brief Fetch the ring overrides (if they exist) diff --git a/src/usr/isteps/HBconfig b/src/usr/isteps/HBconfig index 7f6468287bb..ce6573a2e67 100644 --- a/src/usr/isteps/HBconfig +++ b/src/usr/isteps/HBconfig @@ -10,7 +10,7 @@ config SET_NOMINAL_PSTATE Set the PState to Nominal just before starting the payload. config START_OCC_DURING_BOOT - default n + default y help Activates all the OCCs during IPL diff --git a/src/usr/isteps/istep21/call_host_runtime_setup.C b/src/usr/isteps/istep21/call_host_runtime_setup.C index af7699b6c06..f32827fbf91 100644 --- a/src/usr/isteps/istep21/call_host_runtime_setup.C +++ b/src/usr/isteps/istep21/call_host_runtime_setup.C @@ -153,24 +153,38 @@ void* call_host_runtime_setup (void *io_pArgs) if(l_activatePM) { - l_err = HBPM::loadPMAll(HBPM::PM_LOAD); + TARGETING::Target* l_failTarget = NULL; + bool pmStartSuccess = true; + + l_err = loadAndStartPMAll(HBPM::PM_LOAD, l_failTarget); if (l_err) { TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "loadPMAll failed"); + "loadAndStartPMAll failed"); // Commit the error and continue with the istep errlCommit(l_err, ISTEP_COMP_ID); + pmStartSuccess = false; } - l_err = HBPM::startPMAll(); - if (l_err) - { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "startPMAll failed"); - // Commit the error and continue with the istep - errlCommit(l_err, ISTEP_COMP_ID); +#ifdef CONFIG_HTMGT + // Report PM status to HTMGT + HTMGT::processOccStartStatus(pmStartSuccess,l_failTarget); +#else + // Verify all OCCs have reached the checkpoint + if (pmStartSuccess) + { + l_err = HBPM::verifyOccChkptAll(); + if (l_err) + { + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + "verifyOccCheckpointAll failed"); + + // Commit the error and continue with the istep + errlCommit(l_err, ISTEP_COMP_ID); + } } +#endif } #if 0 //@TODO-RTC:164022-Support max pstate without OCC diff --git a/src/usr/isteps/pm/pm_common.C b/src/usr/isteps/pm/pm_common.C index 7930432616f..a73d96c3bc7 100644 --- a/src/usr/isteps/pm/pm_common.C +++ b/src/usr/isteps/pm/pm_common.C @@ -32,6 +32,7 @@ #include #include +#include // targeting support #include #include @@ -62,6 +63,7 @@ #include #include +#include #ifdef CONFIG_ENABLE_CHECKSTOP_ANALYSIS #include @@ -92,6 +94,10 @@ namespace HBPM constexpr uint64_t HOMER_INSTANCE_SIZE_IN_MB = sizeof(Homerlayout_t) / ONE_MB; + constexpr uint32_t OCC_SRAM_RSP_ADDR = 0xFFFBF000; + constexpr uint16_t OCC_CHKPT_COMPLETE = 0x0EFF; + + std::shared_ptr g_pOccLidMgr (nullptr); std::shared_ptr g_pHcodeLidMgr (nullptr); @@ -667,6 +673,12 @@ namespace HBPM break; } + // Zero out the HOMER memory for LOAD only + if(PM_LOAD == i_mode) + { + memset(l_homerVAddr, 0, VMM_HOMER_INSTANCE_SIZE); + } + uint64_t l_occImgPaddr = i_homerPhysAddr + HOMER_OFFSET_TO_OCC_IMG; uint64_t l_occImgVaddr = reinterpret_cast (l_homerVAddr) @@ -844,9 +856,10 @@ namespace HBPM /** - * @brief Load PM complex for all chips + * @brief Load and start PM complex for all chips */ - errlHndl_t loadPMAll(loadPmMode i_mode) + errlHndl_t loadAndStartPMAll(loadPmMode i_mode, + TARGETING::Target* & o_failTarget) { errlHndl_t l_errl = nullptr; @@ -854,7 +867,7 @@ namespace HBPM getAllChips(l_procChips, TYPE_PROC, true); TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "loadPMAll: %s %d proc(s) found", + "loadAndStartPMAll: %s %d proc(s) found", (PM_LOAD == i_mode) ? "LOAD" : "RELOAD", l_procChips.size() ); @@ -875,20 +888,31 @@ namespace HBPM if( l_errl ) { TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - ERR_MRK"loadPMAll: " + ERR_MRK"loadAndStartPMAll: " "load PM complex failed!" ); + o_failTarget = l_procChip; + break; + } + + l_errl = startPMComplex(l_procChip); + if( l_errl ) + { + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + ERR_MRK"loadAndStartPMAll: " + "start PM complex failed!" ); + o_failTarget = l_procChip; break; } } return l_errl; - } // loadPMAll + } // loadAndStartPMAll /** - * @brief Start PM complex for all chips + * @brief Reset PM complex for all chips */ - errlHndl_t startPMAll() + errlHndl_t resetPMAll() { errlHndl_t l_errl = nullptr; @@ -896,46 +920,29 @@ namespace HBPM getAllChips(l_procChips, TYPE_PROC, true); TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "startPMAll: %d proc(s) found", + "resetPMAll: %d proc(s) found", l_procChips.size()); for (const auto & l_procChip: l_procChips) { - l_errl = startPMComplex(l_procChip); + l_errl = resetPMComplex(l_procChip); if( l_errl ) { TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - ERR_MRK"startPMAll: " - "start PM complex failed!" ); + ERR_MRK"resetPMAll: " + "reset PM complex failed!" ); break; } - - // RTC 165644 Enable this when readSRAM is available - // Add constants for addr/act/exp values - /* - // OCC checkpoint - l_errl = readSRAM(l_procChip,0xfffbf000,l_buffer); - if(((l_buffer.getWord(0)) & 0xFFF) == 0xEFF) - { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "startPMALL: OCC checkpoint detected" ); - } - else - { - TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "startPMALL: OCC checkpoint not detected" ); - } - */ } return l_errl; - } // startPMAll + } // resetPMAll /** - * @brief Reset PM complex for all chips + * @brief Verify all OCCs at checkpoint */ - errlHndl_t resetPMAll() + errlHndl_t verifyOccChkptAll() { errlHndl_t l_errl = nullptr; @@ -943,23 +950,100 @@ namespace HBPM getAllChips(l_procChips, TYPE_PROC, true); TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - "resetPMAll: %d proc(s) found", + "verifyOccChkptAll: %d proc(s) found", l_procChips.size()); + // Wait up to 15 seconds for all OCCs to be ready (150 * 100ms = 15s) + const size_t NS_BETWEEN_READ = 100 * NS_PER_MSEC; + const size_t READ_RETRY_LIMIT = 150; + const uint16_t l_readLength = 8; + for (const auto & l_procChip: l_procChips) { - l_errl = resetPMComplex(l_procChip); + uint64_t l_checkpoint = 0x0; + uint8_t retryCount = 0; + bool chkptReached = false; + + while (retryCount++ < READ_RETRY_LIMIT) + { + // Read SRAM response buffer to check for OCC checkpoint + l_errl = HBOCC::readSRAM( l_procChip,OCC_SRAM_RSP_ADDR, + &(l_checkpoint), + l_readLength ); + + if( l_errl ) + { + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + "verifyOccChkptAll: SRAM read failed " + "HUID 0x%X", get_huid(l_procChip)); + break; + } + + if( OCC_CHKPT_COMPLETE == (l_checkpoint & 0xFFFF) ) + { + TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, + "verifyOccChkptAll: OCC checkpoint detected " + "HUID 0x%X", get_huid(l_procChip)); + chkptReached = true; + break; + } + + // Sleep before we check again + nanosleep(0, NS_BETWEEN_READ); + } + if( l_errl ) + { + break; + } + + if( !chkptReached ) { TRACFCOMP( ISTEPS_TRACE::g_trac_isteps_trace, - ERR_MRK"resetPMAll: " - "reset PM complex failed!" ); + "verifyOccChkptAll: Timeout waiting for OCC checkpoint " + "HUID 0x%X Checkpoint 0x%08X", + get_huid(l_procChip), l_checkpoint); + + /*@ + * @errortype + * @reasoncode ISTEP::RC_PM_OCC_CHKPT_TIMEOUT + * @severity ERRORLOG::ERRL_SEV_UNRECOVERABLE + * @moduleid ISTEP::MOD_PM_VERIFY_OCC_CHKPT + * @userdata1 HUID + * @userdata2 Checkpoint value + * @devdesc Timeout waiting for OCC checkpoint + * @custdesc A problem occurred during the IPL + * of the system. + */ + l_errl = new ERRORLOG::ErrlEntry( + ERRORLOG::ERRL_SEV_UNRECOVERABLE, + ISTEP::MOD_PM_VERIFY_OCC_CHKPT, + ISTEP::RC_PM_OCC_CHKPT_TIMEOUT, + get_huid(l_procChip), + l_checkpoint, + true); + + TARGETING::TargetHandleList l_Occs; + getChildChiplets(l_Occs, l_procChip, TARGETING::TYPE_OCC); + + if( l_Occs[0] != nullptr ) + { + l_errl->addHwCallout( l_Occs[0], + HWAS::SRCI_PRIORITY_HIGH, + HWAS::NO_DECONFIG, + HWAS::GARD_NULL ); + } + + l_errl->collectTrace(FAPI_TRACE_NAME,256); + l_errl->collectTrace(FAPI_IMP_TRACE_NAME,256); + l_errl->collectTrace("ISTEPS_TRACE",256); + break; } } return l_errl; - } // resetPMAll + } // verifyOccChkptAll /**