Skip to content

Commit

Permalink
Add support to host_exerciser for varying HE LB data bus widths (#3110)
Browse files Browse the repository at this point in the history
New API version exported by HW includes a CSR with the bus width.

Signed-off-by: Michael Adler <michael.adler@intel.com>
  • Loading branch information
michael-adler committed Feb 25, 2024
1 parent dfabc03 commit 67840c5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 25 deletions.
2 changes: 0 additions & 2 deletions samples/host_exerciser/host_exerciser.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ using opae::fpga::types::token;

static const uint64_t HELPBK_TEST_TIMEOUT = 30000;
static const uint64_t HELPBK_TEST_SLEEP_INVL = 100;
static const uint64_t CL = 64;
static const uint64_t KB = 1024;
static const uint64_t MB = KB * 1024;
static const uint64_t LOG2_CL = 6;
static const size_t LPBK1_DSM_SIZE = 2 * KB;
static const size_t LPBK1_BUFFER_SIZE = 64 * KB;
static const size_t LPBK1_BUFFER_ALLOCATION_SIZE = 64 * KB;
Expand Down
57 changes: 34 additions & 23 deletions samples/host_exerciser/host_exerciser_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class host_exerciser_cmd : public test_command
he_lpbk_cfg_.value = 0;
he_lpbk_ctl_.value = 0;
he_lpbk_max_reqlen_ = HOSTEXE_CLS_8;
he_lpbk_bus_bytes_ = 64;
he_lpbk_bus_bytes_log2_ = 6;
he_lpbk_api_ver_ = 0;
he_lpbk_atomics_supported_ = false;
is_ase_sim_ = false;
Expand Down Expand Up @@ -119,13 +121,14 @@ class host_exerciser_cmd : public test_command
}

inline uint64_t cacheline_aligned_addr(uint64_t num) {
return num >> LOG2_CL;
return num >> he_lpbk_bus_bytes_log2_;
}

// Convert number of transactions to bandwidth (GB/s)
double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks)
{
return (double)(num_lines * 64) / ((1000.0 / host_exe_->he_clock_mhz_ * num_ticks));
return (double)(num_lines * he_lpbk_bus_bytes_) /
((1000.0 / host_exe_->he_clock_mhz_ * num_ticks));
}

inline uint64_t dsm_num_ticks(const volatile he_dsm_status *dsm_status)
Expand Down Expand Up @@ -167,7 +170,7 @@ class host_exerciser_cmd : public test_command
else
num_cache_lines = dsm_num_reads(dsm_status) + dsm_num_writes(dsm_status);
} else {
num_cache_lines = (LPBK1_BUFFER_SIZE / (1 * CL));
num_cache_lines = (LPBK1_BUFFER_SIZE / (1 * he_lpbk_bus_bytes_));
}

host_exerciser_status();
Expand Down Expand Up @@ -250,13 +253,13 @@ class host_exerciser_cmd : public test_command
// Compare and swap? If so, seed the source buffers with values will
// match. The hardware tests uses the line index as the test.
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_CAS_4) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 3) {
buffer->write<uint32_t>(i, i*CL);
for (uint32_t i = 0; i < buffer->size()/he_lpbk_bus_bytes_; i += 3) {
buffer->write<uint32_t>(i, i*he_lpbk_bus_bytes_);
}
}
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_CAS_8) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 3) {
buffer->write<uint64_t>(i, i*CL);
for (uint32_t i = 0; i < buffer->size()/he_lpbk_bus_bytes_; i += 3) {
buffer->write<uint64_t>(i, i*he_lpbk_bus_bytes_);
}
}

Expand All @@ -265,9 +268,9 @@ class host_exerciser_cmd : public test_command
// the value at the start of each line to the second position so
// it can be used as a check later.
if (he_lpbk_cfg_.AtomicFunc != HOSTEXE_ATOMIC_OFF) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 1) {
uint64_t v = buffer->read<uint64_t>(i*CL);
buffer->write<uint64_t>(v ^ 0xabababababababab, i*CL + 8);
for (uint32_t i = 0; i < buffer->size()/he_lpbk_bus_bytes_; i += 1) {
uint64_t v = buffer->read<uint64_t>(i*he_lpbk_bus_bytes_);
buffer->write<uint64_t>(v ^ 0xabababababababab, i*he_lpbk_bus_bytes_ + 8);
}
}
}
Expand All @@ -280,14 +283,14 @@ class host_exerciser_cmd : public test_command
for (uint64_t i = 0; i < 8; i++)
{
std::cout << std::hex
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*7)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*6)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*5)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*4)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*3)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*2)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*1)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*7)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*6)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*5)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*4)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*3)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*2)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*1)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_)
<< std::dec << std::endl;
}

Expand Down Expand Up @@ -319,13 +322,13 @@ class host_exerciser_cmd : public test_command
bool is_cas = (he_lpbk_cfg_.AtomicFunc & 0xc) == 8;

// Ignore the last entry to work around an off by one copy error
for (uint64_t i = 0; i < source_->size()/CL; i += 1) {
for (uint64_t i = 0; i < source_->size()/he_lpbk_bus_bytes_; i += 1) {
// In source_, the first entry in every line is the atomically modified
// value. The second entry holds the original value, hashed with a constant
// so it isn't a simple repetition.
uint64_t src_a = source_->read<uint64_t>(i*CL);
uint64_t src_a = source_->read<uint64_t>(i*he_lpbk_bus_bytes_);
// Read the original value and reverse the hash.
uint64_t src_b = source_->read<uint64_t>(i*CL + 8) ^ 0xabababababababab;
uint64_t src_b = source_->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8) ^ 0xabababababababab;
uint64_t upd_a = 0;

// Compute expected value
Expand Down Expand Up @@ -360,7 +363,7 @@ class host_exerciser_cmd : public test_command

// The destination is comparatively easy. For all functions it is
// simply the original source value.
uint64_t dst_a = destination_->read<uint64_t>(i*CL);
uint64_t dst_a = destination_->read<uint64_t>(i*he_lpbk_bus_bytes_);
if (size_is_4) {
src_b &= 0xffffffff;
dst_a &= 0xffffffff;
Expand Down Expand Up @@ -744,6 +747,12 @@ class host_exerciser_cmd : public test_command
he_lpbk_api_ver_ = (he_info >> 16);
std::cout << "API version: " << uint32_t(he_lpbk_api_ver_) << std::endl;

if (he_lpbk_api_ver_ >= 3) {
he_lpbk_bus_bytes_log2_ = 5 + ((he_info >> 25) & 3);
he_lpbk_bus_bytes_ = 1 << he_lpbk_bus_bytes_log2_;
}
std::cout << "Bus width: " << he_lpbk_bus_bytes_ << " bytes" << std::endl;

// For atomics support, the version must not be zero and bit 24 must be 0.
he_lpbk_atomics_supported_ = (he_lpbk_api_ver_ != 0) &&
(0 == ((he_info >> 24) & 1));
Expand Down Expand Up @@ -832,7 +841,7 @@ class host_exerciser_cmd : public test_command
std::fill_n(dsm_->c_type(), LPBK1_DSM_SIZE, 0x0);

// Number of cache lines
d_afu->write64(HE_NUM_LINES, (LPBK1_BUFFER_SIZE / (1 * CL)) -1);
d_afu->write64(HE_NUM_LINES, (LPBK1_BUFFER_SIZE / (1 * he_lpbk_bus_bytes_)) -1);

int status = 0;
if (host_exe_->he_test_all_)
Expand Down Expand Up @@ -902,6 +911,8 @@ class host_exerciser_cmd : public test_command
token::ptr_t token_;
token::ptr_t token_device_;
hostexe_req_len he_lpbk_max_reqlen_;
uint32_t he_lpbk_bus_bytes_;
uint32_t he_lpbk_bus_bytes_log2_;
uint8_t he_lpbk_api_ver_;
bool he_lpbk_atomics_supported_;
bool is_ase_sim_;
Expand Down

0 comments on commit 67840c5

Please sign in to comment.