Skip to content

Commit

Permalink
Set HB to ignore draminit_training fails
Browse files Browse the repository at this point in the history
Change-Id: I92bd5cdc52adad0a1414fb61ec6d215d3c51165e
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/41484
Tested-by: Jenkins Server <pfd-jenkins+hostboot@us.ibm.com>
Tested-by: Hostboot CI <hostboot-ci+hostboot@us.ibm.com>
Reviewed-by: STEPHEN GLANCY <sglancy@us.ibm.com>
Reviewed-by: Louis Stermole <stermole@us.ibm.com>
Dev-Ready: JACOB L. HARVEY <jlharvey@us.ibm.com>
Reviewed-by: Jennifer A. Stofer <stofer@us.ibm.com>
Reviewed-on: http://ralgit01.raleigh.ibm.com/gerrit1/41492
Reviewed-by: Hostboot Team <hostboot@us.ibm.com>
Tested-by: Jenkins OP Build CI <op-jenkins+hostboot@us.ibm.com>
Tested-by: Jenkins OP HW <op-hw-jenkins+hostboot@us.ibm.com>
Tested-by: FSP CI Jenkins <fsp-CI-jenkins+hostboot@us.ibm.com>
Reviewed-by: Daniel M. Crowell <dcrowell@us.ibm.com>
  • Loading branch information
JacobHarvey authored and dcrowell77 committed Jun 30, 2017
1 parent 0e89cd3 commit d0a8f18
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 21 deletions.
1 change: 1 addition & 0 deletions src/import/chips/p9/procedures/hwp/memory/lib/mc/port.H
Expand Up @@ -517,6 +517,7 @@ fapi_try_exit:
/// @param[in] i_target the target
/// @param[in] i_state the state
/// @return FAPI2_RC_SUCCESS if and only if ok
/// @note Disable Port Fail after recurring RCD errors.
///
template< fapi2::TargetType T, typename TT = portTraits<T> >
fapi2::ReturnCode change_port_fail_disable( const fapi2::Target<T>& i_target, states i_state )
Expand Down
Expand Up @@ -35,6 +35,7 @@

#include <fapi2.H>
#include <mss.H>
#include <vector>

#include <p9_mss_draminit_training.H>
#include <lib/utils/count_dimm.H>
Expand All @@ -61,8 +62,10 @@ extern "C"
const uint8_t i_abort_on_error)
{
// Keep track of the last error seen by a port
fapi2::ReturnCode l_port_error = fapi2::FAPI2_RC_SUCCESS;
fapi2::buffer<uint32_t> l_cal_steps_enabled = i_special_training;
fapi2::ReturnCode l_port_error ( fapi2::FAPI2_RC_SUCCESS );
fapi2::buffer<uint32_t> l_cal_steps_enabled( i_special_training );

std::vector<fapi2::ReturnCode> l_fails;

FAPI_INF("Start draminit training");

Expand Down Expand Up @@ -170,44 +173,79 @@ extern "C"
}

// Execute selected cal steps
FAPI_TRY( mss::setup_and_execute_cal(p, rp, l_cal_steps_enabled, i_abort_on_error) );
FAPI_TRY( mss::setup_and_execute_cal(p, rp, l_cal_steps_enabled, l_cal_abort_on_error) );

fapi2::ReturnCode l_rc (fapi2::current_err);

// If we're aborting on error we can just FAPI_TRY. If we're not, we don't want to exit if there's
// If we're aborting on error we can just jump to the end.
// If we're not, we don't want to exit if there's
// an error but we want to log the error and keep on keeping on.
if ((fapi2::current_err = mss::process_initial_cal_errors(p)) != fapi2::FAPI2_RC_SUCCESS)
if ((l_rc = mss::process_initial_cal_errors(p)) != fapi2::FAPI2_RC_SUCCESS)
{
fapi2::logError(fapi2::current_err);

if (l_cal_abort_on_error)
{
goto fapi_try_exit;
FAPI_TRY( l_rc );
}

l_fails.push_back(l_rc);

// Keep tack of the last cal error we saw.
l_rank_pair_error = fapi2::current_err;
l_rank_pair_error = l_rc;
}
}// rank pairs

// Conducts workarounds after training if needed
FAPI_TRY( mss::workarounds::dp16::post_training_workarounds( p, l_cal_steps_enabled ) );
}// rank pairs

// Once we've trained all the rank pairs we can record the bad bits in the attributes if we have an error
// This error is the most recent error seen on a port, too, so we keep track of that.
if (l_rank_pair_error != fapi2::FAPI2_RC_SUCCESS)
{
FAPI_TRY( mss::dp16::record_bad_bits(p) );
l_port_error = l_rank_pair_error;
fapi2::ReturnCode l_rc (fapi2::FAPI2_RC_SUCCESS);
// Conducts workarounds after training if needed
l_rc = mss::workarounds::dp16::post_training_workarounds( p, l_cal_steps_enabled );

if ( l_rc != fapi2::FAPI2_RC_SUCCESS)
{
l_fails.push_back(l_rc);
}

// Going to treat bad_bits errors as similar to training errors
// If we're in hostboot, we update the attribute and keep running
// If we're cronus, we'll error out
l_rc = mss::dp16::record_bad_bits(p);

if ( l_rc != fapi2::FAPI2_RC_SUCCESS)
{
l_fails.push_back(l_rc);
}
}

// Resetting current_err.
// The error has either already been "logged" or we have exited and returned the error up the call stack.
fapi2::current_err = fapi2::FAPI2_RC_SUCCESS;
}

// So we're calibrated the entire port. If we're here either we didn't have any errors or the last error
// seen on a port is the error for this entire controller.
FAPI_TRY(l_port_error, "Seeing port error, exiting training");
// So we want to record the errors as informational and not mess with current_err
#ifdef __HOSTBOOT_MODULE

for (auto l_iter = l_fails.begin(); l_iter != l_fails.end(); ++l_iter)
{
// fapi2 doesn't have INFO flag, so the RECOVERED flag will do
// Same behavior (no printouts to the custonmer and no deconfigures/ fail outs)
// We want to have these fail logs for the future, but we'll let memdiags catch the errors
fapi2::logError(*l_iter, fapi2::FAPI2_ERRL_SEV_RECOVERED);
}

// If we're in cronus, we're just going to bomb out. Error logging doesn't work as of 6/17 JLH
// The errors should be printed out as FAPI_ERR's when the ReturnCode was made though
#else
{
if (l_fails.size() != 0)
{
FAPI_TRY(l_fails[0]);
}
}
#endif
// Unmask FIR
FAPI_TRY( mss::unmask::after_draminit_training(i_target) );


fapi_try_exit:
FAPI_INF("End draminit training");
return fapi2::current_err;
Expand Down

0 comments on commit d0a8f18

Please sign in to comment.