Skip to content

Commit

Permalink
Merge pull request #2983 from robert-purcaru/master
Browse files Browse the repository at this point in the history
multi-channel mem_tg test
  • Loading branch information
kroberso-intc committed Jul 12, 2023
2 parents 4aca768 + 337a11a commit 3b20a50
Show file tree
Hide file tree
Showing 2 changed files with 197 additions and 103 deletions.
31 changes: 27 additions & 4 deletions samples/mem_tg/mem_tg.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright(c) 2022, Intel Corporation
// Copyright(c) 2022-2023, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
Expand All @@ -25,6 +25,9 @@
// POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <opae/cxx/core/token.h>
#include <iostream>
#include <vector>
#include <string>

#include "afu_test.h"

Expand Down Expand Up @@ -64,7 +67,6 @@ const std::map<std::string, uint32_t> tg_pattern = {
{ "prbs31", TG_DATA_PRBS31},
{ "rot1", TG_DATA_PRBS31},
};


enum {
AFU_DFH = 0x0000,
Expand Down Expand Up @@ -208,7 +210,7 @@ class mem_tg : public test_afu {

public:
uint32_t count_;
uint32_t mem_ch_;
std::vector<std::string> mem_ch_;
uint32_t loop_;
uint32_t wcnt_;
uint32_t rcnt_;
Expand All @@ -234,6 +236,27 @@ class mem_tg : public test_afu {
return handle_->get_token();
}

// Duplicate contents of this mem_tg to duplicate_obj.
// commands_, current_command_, app_ are ommited since they are not
// relevant to closed instances of mem_tg that don't interact with commands.
void duplicate(mem_tg *duplicate_obj) const {
duplicate_obj->count_ = this->count_;
duplicate_obj->loop_ = this->loop_;
duplicate_obj->wcnt_ = this->wcnt_;
duplicate_obj->rcnt_ = this->rcnt_;
duplicate_obj->bcnt_ = this->bcnt_;
duplicate_obj->stride_ = this->stride_;
duplicate_obj->pattern_ = this->pattern_;
duplicate_obj->mem_speed_ = this->mem_speed_;
duplicate_obj->name_ = this->name_;
duplicate_obj->afu_id_ = this->afu_id_;
duplicate_obj->pci_addr_ = this->pci_addr_;
duplicate_obj->log_level_ = this->log_level_;
duplicate_obj->shared_ = this->shared_;
duplicate_obj->timeout_msec_ = this->timeout_msec_;
duplicate_obj->handle_ = this->handle_;
duplicate_obj->logger_ = this->logger_;
}

};
} // end of namespace mem_tg

269 changes: 170 additions & 99 deletions samples/mem_tg/tg_test.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright(c) 2022, Intel Corporation
// Copyright(c) 2022-2023, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -26,6 +26,11 @@
#pragma once

#include <unistd.h>
#include <thread>
#include <iostream>
#include <vector>
#include <future>
#include <string>

#include "afu_test.h"
#include "mem_tg.h"
Expand All @@ -39,17 +44,18 @@ class tg_test : public test_command
{
public:
tg_test()
:tg_exe_(NULL) {
tg_offset_ = 0x0;
}
virtual ~tg_test(){}
: tg_offset_(0x0)
, tg_exe_(NULL)
{}

virtual ~tg_test() {}

virtual const char *name() const override
virtual const char *name() const override
{
return "tg_test";
}

virtual const char *description() const override
virtual const char *description() const override
{
return "configure & run mem traffic generator test";
}
Expand All @@ -59,134 +65,199 @@ class tg_test : public test_command
return AFU_ID;
}


// Convert number of transactions to bandwidth (GB/s)
double bw_calc(uint64_t xfer_bytes, uint64_t num_ticks)
{
return (double)(xfer_bytes) / ((1000.0 / (double)tg_exe_->mem_speed_ * (double)num_ticks));
}

void tg_perf () {
uint32_t mem_ch_offset = (tg_exe_->mem_ch_) << 0x3;
uint64_t num_ticks = tg_exe_->read64(MEM_TG_CLOCKS + mem_ch_offset);
std::cout << "Mem Clock Cycles: " << std::dec << num_ticks << std::endl;
uint64_t write_bytes = 64 * (tg_exe_->loop_*tg_exe_->wcnt_*tg_exe_->bcnt_);
uint64_t read_bytes = 64 * (tg_exe_->loop_*tg_exe_->rcnt_*tg_exe_->bcnt_);
void tg_perf (mem_tg *tg_exe_) {
uint32_t mem_ch_offset = (std::stoi(tg_exe_->mem_ch_[0])) << 0x3;
uint64_t num_ticks = tg_exe_->read64(MEM_TG_CLOCKS + mem_ch_offset);
std::cout << "Mem Clock Cycles: " << std::dec << num_ticks << std::endl;

std::cout << "Write BW: " << bw_calc(write_bytes,num_ticks) << " GB/s" << std::endl;
std::cout << "Read BW: " << bw_calc(read_bytes,num_ticks) << " GB/s" << std::endl;
uint64_t write_bytes = 64 * (tg_exe_->loop_*tg_exe_->wcnt_*tg_exe_->bcnt_);
uint64_t read_bytes = 64 * (tg_exe_->loop_*tg_exe_->rcnt_*tg_exe_->bcnt_);
std::cout << "Write BW: " << bw_calc(write_bytes,num_ticks) << " GB/s" << std::endl;
std::cout << "Read BW: " << bw_calc(read_bytes,num_ticks) << " GB/s\n" << std::endl;
}

bool tg_wait_test_completion ()
bool tg_wait_test_completion (mem_tg *tg_exe_)
{
/* Wait for test completion */
uint32_t timeout = MEM_TG_TEST_TIMEOUT;

// poll while active bit is set (channel status = {pass,fail,timeout,active})
uint32_t tg_status = 0x1;
tg_status = 0xF&(tg_exe_->read64(MEM_TG_STAT) >> (0x4*(tg_exe_->mem_ch_)));
while ( tg_status == TG_STATUS_ACTIVE ) {
tg_status = 0xF&(tg_exe_->read64(MEM_TG_STAT) >> (0x4*(tg_exe_->mem_ch_)));
usleep(TEST_SLEEP_INVL);
if (--timeout == 0) {
std::cout << "TG TEST TIME OUT" << std::endl;
return false;
}
uint32_t timeout = MEM_TG_TEST_TIMEOUT;
// poll while active bit is set (channel status = {pass,fail,timeout,active})
uint32_t tg_status = 0x1;
tg_status = 0xF & (tg_exe_->read64(MEM_TG_STAT) >> (0x4*(std::stoi(tg_exe_->mem_ch_[0]))));
while (tg_status == TG_STATUS_ACTIVE) {
tg_status = 0xF & (tg_exe_->read64(MEM_TG_STAT) >> (0x4*(std::stoi(tg_exe_->mem_ch_[0]))));
std::this_thread::yield();
if (--timeout == 0) {
std::cout << "TG TEST TIME OUT" << std::endl;
return false;
}
}
std::cout << "Channel " << std::stoi(tg_exe_->mem_ch_[0]) << ":" << std::endl;
if (tg_status == TG_STATUS_TIMEOUT) {
std::cout << "TG TIMEOUT" << std::endl;
return false;
}
uint32_t tg_fail_exp = 0;
uint32_t tg_fail_act = 0;
uint64_t tg_fail_addr = 0;

if (tg_status == TG_STATUS_ERROR) {
std::cout << "TG ERROR" << std::endl;
tg_fail_addr = tg_exe_->read64(tg_offset_ + TG_FIRST_FAIL_ADDR_L);
tg_fail_exp = tg_exe_->read64(tg_offset_ + TG_FAIL_EXPECTED_DATA);
tg_fail_act = tg_exe_->read64(tg_offset_ + TG_FAIL_READ_DATA);
std::cout << "Failed at address 0x" << std::hex << tg_fail_addr << " exp=0x" << tg_fail_exp << " act=0x" << tg_fail_act << std::endl;
return false;
}
if(tg_status == TG_STATUS_TIMEOUT) {
std::cout << "TG TIMEOUT" << std::endl;
return false;
}
uint32_t tg_fail_exp = 0;
uint32_t tg_fail_act = 0;
uint64_t tg_fail_addr = 0;

if (tg_status == TG_STATUS_ERROR) {
std::cout << "TG ERROR" << std::endl;
tg_fail_addr = tg_exe_->read64(tg_offset_ + TG_FIRST_FAIL_ADDR_L);
tg_fail_exp = tg_exe_->read64(tg_offset_ + TG_FAIL_EXPECTED_DATA);
tg_fail_act = tg_exe_->read64(tg_offset_ + TG_FAIL_READ_DATA);
std::cout << "Failed at address 0x" << std::hex << tg_fail_addr << " exp=0x" << tg_fail_exp << " act=0x" << tg_fail_act << std::endl;
return false;
}
std::cout << "TG PASS" << std::endl;
std::cout << "TG PASS" << std::endl;
return true;
}


int config_input_options()
int config_input_options(mem_tg *tg_exe_)
{
if (!tg_exe_)
return -1;

return -1;
uint64_t mem_capability = tg_exe_->read64(MEM_TG_CTRL);
if((mem_capability & (0x1 << tg_exe_->mem_ch_)) == 0) {
std::cerr << "No traffic generator for mem[" << tg_exe_->mem_ch_ << "]" << std::endl;
return -1;
} else {
tg_offset_ = AFU_DFH + (MEM_TG_CFG_OFFSET * (1+tg_exe_->mem_ch_));
}

tg_exe_->write32(tg_offset_+TG_LOOP_COUNT, tg_exe_->loop_);
tg_exe_->write32(tg_offset_+TG_WRITE_COUNT, tg_exe_->wcnt_);
tg_exe_->write32(tg_offset_+TG_READ_COUNT, tg_exe_->rcnt_);
tg_exe_->write32(tg_offset_+TG_BURST_LENGTH, tg_exe_->bcnt_);
tg_exe_->write32(tg_offset_+TG_SEQ_ADDR_INCR, tg_exe_->stride_);
tg_exe_->write32(tg_offset_+TG_PPPG_SEL, tg_exe_->pattern_);
if ((mem_capability & (0x1 << std::stoi(tg_exe_->mem_ch_[0]))) == 0) {
std::cerr << "No traffic generator for mem[" << std::stoi(tg_exe_->mem_ch_[0]) << "]" << std::endl;
return -1;
}

tg_offset_ = AFU_DFH + (MEM_TG_CFG_OFFSET * (1+std::stoi(tg_exe_->mem_ch_[0])));

// address increment mode
tg_exe_->write32(tg_offset_+TG_ADDR_MODE_WR, TG_ADDR_SEQ);
tg_exe_->write32(tg_offset_+TG_ADDR_MODE_RD, TG_ADDR_SEQ);
tg_exe_->write32(tg_offset_+TG_LOOP_COUNT, tg_exe_->loop_);
tg_exe_->write32(tg_offset_+TG_WRITE_COUNT, tg_exe_->wcnt_);
tg_exe_->write32(tg_offset_+TG_READ_COUNT, tg_exe_->rcnt_);
tg_exe_->write32(tg_offset_+TG_BURST_LENGTH, tg_exe_->bcnt_);
tg_exe_->write32(tg_offset_+TG_SEQ_ADDR_INCR, tg_exe_->stride_);
tg_exe_->write32(tg_offset_+TG_PPPG_SEL, tg_exe_->pattern_);

// address increment mode
tg_exe_->write32(tg_offset_+TG_ADDR_MODE_WR, TG_ADDR_SEQ);
tg_exe_->write32(tg_offset_+TG_ADDR_MODE_RD, TG_ADDR_SEQ);
return 0;
}

// The test state has been configured. Run one test instance.
int run_mem_test()
int run_mem_test(mem_tg *tg_exe_)
{
int status = 0;
int status = 0;

tg_exe_->logger_->debug("Start Test");
tg_exe_->logger_->debug("Start Test");

tg_exe_->write32(tg_offset_+TG_START,0x1);
tg_exe_->write32(tg_offset_ + TG_START, 0x1);

if(!tg_wait_test_completion())
status = -1;
if (!tg_wait_test_completion(tg_exe_))
status = -1;

tg_perf();
return status;
tg_perf(tg_exe_);

return status;
}

int run_thread_single_channel(mem_tg *tg_exe_) {
auto ret = config_input_options(tg_exe_);
if (ret != 0) {
std::cerr << "Failed to configure TG input options" << std::endl;
return ret;
}
return run_mem_test(tg_exe_);
}

virtual int run(test_afu *afu, CLI::App *app) override
{
(void)app;

auto d_afu = dynamic_cast<mem_tg*>(afu);
tg_exe_ = dynamic_cast<mem_tg*>(afu);

token_ = d_afu->get_token();

// Read HW details

if (0 == tg_exe_->mem_speed_) {
tg_exe_->mem_speed_ = 300;
std::cout << "Memory channel clock frequency unknown. Assuming "
<< tg_exe_->mem_speed_ << " MHz." << std::endl;
}
else {
std::cout << "Memory clock from command line: "
<< tg_exe_->mem_speed_ << " MHz" << std::endl;
(void)app;
auto d_afu = dynamic_cast<mem_tg *>(afu);
tg_exe_ = dynamic_cast<mem_tg *>(afu);

token_ = d_afu->get_token();

// Read HW details

if (0 == tg_exe_->mem_speed_) {
tg_exe_->mem_speed_ = 300;
std::cout << "Memory channel clock frequency unknown. Assuming "
<< tg_exe_->mem_speed_ << " MHz." << std::endl;
} else {
std::cout << "Memory clock from command line: "
<< tg_exe_->mem_speed_ << " MHz" << std::endl;
}

if (0 >= (tg_exe_->mem_ch_).size()) {
std::cout << "Insufficient arguments provided" << std::endl;
exit(1);
}

// Parse mem_ch_ into array of selected channels and number of channels
int *channels = NULL;
int num_channels = 0;
if ((tg_exe_->mem_ch_[0]).find("all") == 0) {
uint64_t mem_capability = tg_exe_->read64(MEM_TG_CTRL);
channels = new int[sizeof(uint64_t)]; // size should be same as mem_capability
for (uint32_t i = 0; i < sizeof(uint64_t); i++) { // number of iterations should be same as mem_capability
if ((mem_capability & (1ULL << i)) != 0) {
channels[num_channels] = i;
num_channels += 1;
}
}

auto ret = config_input_options();
if (ret != 0) {
std::cerr << "Failed to configure TG input options" << std::endl;
return ret;
channels[num_channels] = -1; // EOL
} else {
channels = new int[tg_exe_->mem_ch_.size()];
num_channels = tg_exe_->mem_ch_.size();
try{
for (unsigned i = 0; i < tg_exe_->mem_ch_.size(); i++) {
channels[i] = std::stoi(tg_exe_->mem_ch_[i]);
}
} catch (std::invalid_argument &e) {
std::cerr << "Error: invalid argument to std::stoi";
delete[] channels;
return 1;
}

return run_mem_test();
}

// Spawn threads for each channel:
mem_tg *thread_tg_exe_objects[num_channels];
std::vector<std::future<int>> futures;
std::vector<std::promise<int>> promises(num_channels);
std::vector<std::thread> threads;
for (int i = 0; i < num_channels; i++) {
if (channels[i] == -1) break;
thread_tg_exe_objects[i] = new mem_tg;
tg_exe_->duplicate(thread_tg_exe_objects[i]);
thread_tg_exe_objects[i]->mem_ch_.clear();
thread_tg_exe_objects[i]->mem_ch_.push_back(std::to_string(channels[i]));
futures.push_back(promises[i].get_future());
threads.emplace_back([&, i] {
promises[i].set_value(run_thread_single_channel(thread_tg_exe_objects[i]));
});
}

// Wait for all threads to exit then collect their exit statuses
for (auto &thread : threads) {
thread.join();
}

std::vector<int> exit_codes;
for (auto &future : futures) {
exit_codes.push_back(future.get());
}

// Print message showing thread statuses
for (int i = 0; i < num_channels; i++) {
std::cout << "Thread on channel " << channels[i] << " exited with status " << (long)exit_codes[i] << std::endl;
}

// Delete dynamic allocations
delete[] channels;
for (int i = 0; i < num_channels; i++) {
delete thread_tg_exe_objects[i];
}
return 0;
}

protected:
Expand Down

0 comments on commit 3b20a50

Please sign in to comment.