Skip to content

Commit

Permalink
Merge branch 'OFS:master' into usrclk_dfhv1
Browse files Browse the repository at this point in the history
  • Loading branch information
pajgaonk committed Feb 28, 2024
2 parents 54949e8 + 67840c5 commit 79aa019
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 97 deletions.
72 changes: 40 additions & 32 deletions libraries/afu-test/afu_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class afu {
, shared_(false)
, timeout_msec_(60000)
, handle_(nullptr)
, handle_device_(nullptr)
, current_command_(nullptr)
{
if (!afu_id_.empty())
Expand All @@ -196,55 +197,65 @@ class afu {
return fpga::properties::get(handle_);
}

int open_handle(const char *afu_id) {
bool enum_fpga_device()
{
auto filter = fpga::properties::get(); // Get an empty properties object

// The following code attempts to get a token+handle for the DEVICE.
// The following code attempts to get a token+handle for the DEVICE.
// This is to allow access to OPAE-API functions that are only supported
// through the xfpga plugin (i.e accessing sysfs entries)
// In contrast, the ACCELERATOR token may be underlied by the vfio plugin.
// Set PCIe segment, bus, and device properties to enumerate FPGA DEVICE (FME)
if (!pci_addr_.empty()) {
auto p = pcie_address::parse(pci_addr_.c_str());
filter->segment = p.fields.domain;
filter->bus = p.fields.bus;
filter->device = p.fields.device;
auto p = pcie_address::parse(pci_addr_.c_str());
filter->segment = p.fields.domain;
filter->bus = p.fields.bus;
filter->device = p.fields.device;
}

filter->type = FPGA_DEVICE;
auto tokens = fpga::token::enumerate({filter});
auto tokens = fpga::token::enumerate({ filter });

// Error out if the # of tokens != 1
if (tokens.size() < 1) {
if (pci_addr_.empty()) {
logger_->error("no DEVICE found");
} else {
logger_->error("no accelerator found at PCIe address {1}",
pci_addr_);
}
return exit_codes::not_found;
}
logger_->info("no FPGA DEVICE found");
return false;
}

if (tokens.size() > 1) {
std::cerr << "more than one DEVICE found matching filter\n";
logger_->info("more than one FPGA DEVICE found ");
return false;
}

int flags = shared_ ? FPGA_OPEN_SHARED : 0;

// Open a handle to the resource
try {
handle_device_ = fpga::handle::open(tokens[0], flags);
} catch (fpga::no_access &err) {
std::cerr << err.what() << "\n";
return exit_codes::no_access;
handle_device_ = fpga::handle::open(tokens[0], flags);
}
catch (fpga::no_access& err) {
std::cerr << err.what() << "\n";
return false;
}
return true;
}

// The following code attempts to get a token + handle for the AFU
// (ACCELERATOR device) matching the given command's afu_id.
// Set PCIe segment, bus, device, and functionproperties to enumerate FPGA ACCELERATOR
int open_handle(const char *afu_id) {

enum_fpga_device();

auto filter = fpga::properties::get(); // Get an empty properties object

// The following code attempts to get a token+handle for the DEVICE.
// This is to allow access to OPAE-API functions that are only supported
// through the xfpga plugin (i.e accessing sysfs entries)
// In contrast, the ACCELERATOR token may be underlied by the vfio plugin.
// Set PCIe segment, bus, and device properties to enumerate FPGA DEVICE (FME)
if (!pci_addr_.empty()) {
auto p = pcie_address::parse(pci_addr_.c_str());
filter->function = p.fields.function;
auto p = pcie_address::parse(pci_addr_.c_str());
filter->segment = p.fields.domain;
filter->bus = p.fields.bus;
filter->device = p.fields.device;
filter->function = p.fields.function;
}

auto app_afu_id = afu_id ? afu_id : afu_id_.c_str();
Expand All @@ -255,7 +266,7 @@ class afu {
return error;
}

tokens = fpga::token::enumerate({filter});
auto tokens = fpga::token::enumerate({filter});
if (tokens.size() < 1) {
if (pci_addr_.empty()) {
logger_->error("no accelerator found with id: {0}", app_afu_id);
Expand All @@ -266,10 +277,7 @@ class afu {
return exit_codes::not_found;
}

if (tokens.size() > 1) {
std::cerr << "more than one accelerator found matching filter\n";
}

int flags = shared_ ? FPGA_OPEN_SHARED : 0;
try {
handle_ = fpga::handle::open(tokens[0], flags);
} catch (fpga::no_access &err) {
Expand Down
6 changes: 3 additions & 3 deletions samples/host_exerciser/host_exerciser.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ using opae::fpga::types::token;

static const uint64_t HELPBK_TEST_TIMEOUT = 30000;
static const uint64_t HELPBK_TEST_SLEEP_INVL = 100;
static const uint64_t CL = 64;
static const uint64_t KB = 1024;
static const uint64_t MB = KB * 1024;
static const uint64_t LOG2_CL = 6;
static const size_t LPBK1_DSM_SIZE = 2 * KB;
static const size_t LPBK1_BUFFER_SIZE = 64 * KB;
static const size_t LPBK1_BUFFER_ALLOCATION_SIZE = 64 * KB;
Expand Down Expand Up @@ -575,7 +573,9 @@ class host_exerciser : public test_afu {

token::ptr_t get_token_device()
{
return handle_device_->get_token();
if (handle_device_)
return handle_device_->get_token();
return nullptr;
}

bool option_passed(std::string option_str)
Expand Down
145 changes: 83 additions & 62 deletions samples/host_exerciser/host_exerciser_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ class host_exerciser_cmd : public test_command
he_lpbk_cfg_.value = 0;
he_lpbk_ctl_.value = 0;
he_lpbk_max_reqlen_ = HOSTEXE_CLS_8;
he_lpbk_bus_bytes_ = 64;
he_lpbk_bus_bytes_log2_ = 6;
he_lpbk_api_ver_ = 0;
he_lpbk_atomics_supported_ = false;
is_ase_sim_ = false;
Expand Down Expand Up @@ -119,13 +121,14 @@ class host_exerciser_cmd : public test_command
}

inline uint64_t cacheline_aligned_addr(uint64_t num) {
return num >> LOG2_CL;
return num >> he_lpbk_bus_bytes_log2_;
}

// Convert number of transactions to bandwidth (GB/s)
double he_num_xfers_to_bw(uint64_t num_lines, uint64_t num_ticks)
{
return (double)(num_lines * 64) / ((1000.0 / host_exe_->he_clock_mhz_ * num_ticks));
return (double)(num_lines * he_lpbk_bus_bytes_) /
((1000.0 / host_exe_->he_clock_mhz_ * num_ticks));
}

inline uint64_t dsm_num_ticks(const volatile he_dsm_status *dsm_status)
Expand Down Expand Up @@ -167,7 +170,7 @@ class host_exerciser_cmd : public test_command
else
num_cache_lines = dsm_num_reads(dsm_status) + dsm_num_writes(dsm_status);
} else {
num_cache_lines = (LPBK1_BUFFER_SIZE / (1 * CL));
num_cache_lines = (LPBK1_BUFFER_SIZE / (1 * he_lpbk_bus_bytes_));
}

host_exerciser_status();
Expand Down Expand Up @@ -250,13 +253,13 @@ class host_exerciser_cmd : public test_command
// Compare and swap? If so, seed the source buffers with values will
// match. The hardware tests uses the line index as the test.
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_CAS_4) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 3) {
buffer->write<uint32_t>(i, i*CL);
for (uint32_t i = 0; i < buffer->size()/he_lpbk_bus_bytes_; i += 3) {
buffer->write<uint32_t>(i, i*he_lpbk_bus_bytes_);
}
}
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_CAS_8) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 3) {
buffer->write<uint64_t>(i, i*CL);
for (uint32_t i = 0; i < buffer->size()/he_lpbk_bus_bytes_; i += 3) {
buffer->write<uint64_t>(i, i*he_lpbk_bus_bytes_);
}
}

Expand All @@ -265,9 +268,9 @@ class host_exerciser_cmd : public test_command
// the value at the start of each line to the second position so
// it can be used as a check later.
if (he_lpbk_cfg_.AtomicFunc != HOSTEXE_ATOMIC_OFF) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 1) {
uint64_t v = buffer->read<uint64_t>(i*CL);
buffer->write<uint64_t>(v ^ 0xabababababababab, i*CL + 8);
for (uint32_t i = 0; i < buffer->size()/he_lpbk_bus_bytes_; i += 1) {
uint64_t v = buffer->read<uint64_t>(i*he_lpbk_bus_bytes_);
buffer->write<uint64_t>(v ^ 0xabababababababab, i*he_lpbk_bus_bytes_ + 8);
}
}
}
Expand All @@ -280,14 +283,14 @@ class host_exerciser_cmd : public test_command
for (uint64_t i = 0; i < 8; i++)
{
std::cout << std::hex
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*7)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*6)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*5)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*4)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*3)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*2)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*1)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*7)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*6)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*5)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*4)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*3)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*2)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8*1)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*he_lpbk_bus_bytes_)
<< std::dec << std::endl;
}

Expand Down Expand Up @@ -319,13 +322,13 @@ class host_exerciser_cmd : public test_command
bool is_cas = (he_lpbk_cfg_.AtomicFunc & 0xc) == 8;

// Ignore the last entry to work around an off by one copy error
for (uint64_t i = 0; i < source_->size()/CL; i += 1) {
for (uint64_t i = 0; i < source_->size()/he_lpbk_bus_bytes_; i += 1) {
// In source_, the first entry in every line is the atomically modified
// value. The second entry holds the original value, hashed with a constant
// so it isn't a simple repetition.
uint64_t src_a = source_->read<uint64_t>(i*CL);
uint64_t src_a = source_->read<uint64_t>(i*he_lpbk_bus_bytes_);
// Read the original value and reverse the hash.
uint64_t src_b = source_->read<uint64_t>(i*CL + 8) ^ 0xabababababababab;
uint64_t src_b = source_->read<uint64_t>(i*he_lpbk_bus_bytes_ + 8) ^ 0xabababababababab;
uint64_t upd_a = 0;

// Compute expected value
Expand Down Expand Up @@ -360,7 +363,7 @@ class host_exerciser_cmd : public test_command

// The destination is comparatively easy. For all functions it is
// simply the original source value.
uint64_t dst_a = destination_->read<uint64_t>(i*CL);
uint64_t dst_a = destination_->read<uint64_t>(i*he_lpbk_bus_bytes_);
if (size_is_4) {
src_b &= 0xffffffff;
dst_a &= 0xffffffff;
Expand Down Expand Up @@ -737,51 +740,19 @@ class host_exerciser_cmd : public test_command
host_exe_ = dynamic_cast<host_exerciser*>(afu);

token_ = d_afu->get_token();
token_device_ = d_afu->get_token_device();

// Check if memory calibration has failed and error out before proceeding
// with the test. The dfl-emif driver creates sysfs entries to report the
// calibration status for each memory channel. sysobjects are the OPAE-API's
// abstraction for sysfs entries. However, at this time, these are only
// accessible through tokens that use the xfpga plugin and not the vfio
// plugin. Hence our use of the DEVICE token (token_device_). One
// non-ideality of the following implementation is the use of
// MAX_NUM_MEM_CHANNELS. We are essentially doing a brute-force query of
// sysfs entries since we don't know how many mem channels exist on the
// given platform. What about glob wildcards? Why not simply glob for
// "*dfl*/**/inf*_cal_fail" and use the OPAE-API's support for arrays of
// sysobjects? The reason is that, at the time of this writing, the
// xfpga-plugin's sysobject implementation does not support arrays
// specifically when the glob contains a recursive wildcard "/**/". It's a
// strange and perhaps unnecessary limitation. Therefore, future work is to
// fix that and clean up the code below.
for (size_t i = 0; i < MAX_NUM_MEM_CHANNELS; i++) {
std::stringstream mem_cal_glob;
// Construct the glob string to search for the cal_fail sysfs entry
// for the i'th mem channel
mem_cal_glob << "*dfl*/**/inf" << i << "_cal_fail";
// Ask for a sysobject with this glob string
fpga::sysobject::ptr_t testobj = fpga::sysobject::get(
token_device_, mem_cal_glob.str().c_str(), FPGA_OBJECT_GLOB);

// if test obj !=null, the sysfs entry was found.
// Read the calibration status from the sysfs entry.
// A non-zero value (typically '1') means
// calibration has failed --> we error out.
if (testobj && testobj->read64(0)) {
std::cout
<< "This sysfs entry reports that memory calibration has failed:"
<< mem_cal_glob.str().c_str() << std::endl;
return -1;
}
}


fpga_emif_status(afu);
// Read HW details
uint64_t he_info = host_exe_->read64(HE_INFO0);
he_lpbk_api_ver_ = (he_info >> 16);
std::cout << "API version: " << uint32_t(he_lpbk_api_ver_) << std::endl;

if (he_lpbk_api_ver_ >= 3) {
he_lpbk_bus_bytes_log2_ = 5 + ((he_info >> 25) & 3);
he_lpbk_bus_bytes_ = 1 << he_lpbk_bus_bytes_log2_;
}
std::cout << "Bus width: " << he_lpbk_bus_bytes_ << " bytes" << std::endl;

// For atomics support, the version must not be zero and bit 24 must be 0.
he_lpbk_atomics_supported_ = (he_lpbk_api_ver_ != 0) &&
(0 == ((he_info >> 24) & 1));
Expand Down Expand Up @@ -870,7 +841,7 @@ class host_exerciser_cmd : public test_command
std::fill_n(dsm_->c_type(), LPBK1_DSM_SIZE, 0x0);

// Number of cache lines
d_afu->write64(HE_NUM_LINES, (LPBK1_BUFFER_SIZE / (1 * CL)) -1);
d_afu->write64(HE_NUM_LINES, (LPBK1_BUFFER_SIZE / (1 * he_lpbk_bus_bytes_)) -1);

int status = 0;
if (host_exe_->he_test_all_)
Expand All @@ -880,6 +851,54 @@ class host_exerciser_cmd : public test_command

return status;
}

void fpga_emif_status(test_afu* afu)
{
auto d_afu = dynamic_cast<host_exerciser*>(afu);
token_device_ = d_afu->get_token_device();

if (!token_device_)
return;

// Check if memory calibration has failed and error out before proceeding
// with the test. The dfl-emif driver creates sysfs entries to report the
// calibration status for each memory channel. sysobjects are the OPAE-API's
// abstraction for sysfs entries. However, at this time, these are only
// accessible through tokens that use the xfpga plugin and not the vfio
// plugin. Hence our use of the DEVICE token (token_device_). One
// non-ideality of the following implementation is the use of
// MAX_NUM_MEM_CHANNELS. We are essentially doing a brute-force query of
// sysfs entries since we don't know how many mem channels exist on the
// given platform. What about glob wildcards? Why not simply glob for
// "*dfl*/**/inf*_cal_fail" and use the OPAE-API's support for arrays of
// sysobjects? The reason is that, at the time of this writing, the
// xfpga-plugin's sysobject implementation does not support arrays
// specifically when the glob contains a recursive wildcard "/**/". It's a
// strange and perhaps unnecessary limitation. Therefore, future work is to
// fix that and clean up the code below.

for (size_t i = 0; i < MAX_NUM_MEM_CHANNELS; i++) {
std::stringstream mem_cal_glob;
// Construct the glob string to search for the cal_fail sysfs entry
// for the i'th mem channel
mem_cal_glob << "*dfl*/**/inf" << i << "_cal_fail";
// Ask for a sysobject with this glob string
fpga::sysobject::ptr_t testobj = fpga::sysobject::get(
token_device_, mem_cal_glob.str().c_str(), FPGA_OBJECT_GLOB);

// if test obj !=null, the sysfs entry was found.
// Read the calibration status from the sysfs entry.
// A non-zero value (typically '1') means
// calibration has failed --> we error out.
if (testobj && testobj->read64(0)) {
std::cout
<< "This sysfs entry reports that memory calibration has failed:"
<< mem_cal_glob.str().c_str() << std::endl;
return;
}
}

}

protected:
he_cfg he_lpbk_cfg_;
Expand All @@ -892,6 +911,8 @@ class host_exerciser_cmd : public test_command
token::ptr_t token_;
token::ptr_t token_device_;
hostexe_req_len he_lpbk_max_reqlen_;
uint32_t he_lpbk_bus_bytes_;
uint32_t he_lpbk_bus_bytes_log2_;
uint8_t he_lpbk_api_ver_;
bool he_lpbk_atomics_supported_;
bool is_ase_sim_;
Expand Down

0 comments on commit 79aa019

Please sign in to comment.