Skip to content

Commit

Permalink
sample/gpu_direct: add PCIE mem bar bandwidth profiling (#885)
Browse files Browse the repository at this point in the history
Signed-off-by: Frank Du <frank.du@intel.com>
  • Loading branch information
frankdjx committed Jun 3, 2024
1 parent dc0c298 commit 4b2b5b4
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 0 deletions.
46 changes: 46 additions & 0 deletions app/sample/ext_frame/rx_st20p_hdr_split_gpu_direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,51 @@ struct rx_st20p_hg_ctx {
off_t cpu_copy_offset;
};

static int gaddr_profiling(struct rx_st20p_hg_ctx* ctx) {
clock_t start, end;
float sec;
struct st_ext_frame* frame = &ctx->gddr_frame;
int loop_cnt;
float throughput_bit;
uint8_t buf[256];

info("%s, start on %p, size %" PRIu64 "\n", __func__, frame->addr[0], frame->size);
/* read */
loop_cnt = 3;
start = clock();
/* read is very slow, not known why */
size_t r_sz = 0x100000;
if (frame->size < r_sz) r_sz = frame->size;
for (int loop = 0; loop < loop_cnt; loop++) {
uint8_t* addr = (uint8_t*)frame->addr[0];
for (size_t i = 0; i < r_sz; i++) {
buf[i & 0xFF] = addr[i];
dbg("%s, value %u at %d\n", __func__, addr[i], (int)i);
}
}
end = clock();
sec = (float)(end - start) / CLOCKS_PER_SEC;
throughput_bit = (float)r_sz * 8 * loop_cnt;
info("%s, read throughput: %f Mbps, time %fs\n", __func__,
throughput_bit / sec / 1000 / 1000, sec);

/* write */
loop_cnt = 20;
start = clock();
for (int loop = 0; loop < loop_cnt; loop++) {
uint8_t* addr = (uint8_t*)frame->addr[0];
for (size_t i = 0; i < frame->size; i++) {
addr[i] = buf[i & 0xFF];
}
}
end = clock();
sec = (float)(end - start) / CLOCKS_PER_SEC;
throughput_bit = (float)frame->size * 8 * loop_cnt;
info("%s, write throughput: %f Mbps, time %fs\n", __func__,
throughput_bit / sec / 1000 / 1000, sec);
return 0;
}

static int gddr_map(struct st_sample_context* ctx, struct st_ext_frame* frame, size_t sz,
int fd) {
off_t off = ctx->gddr_pa + ctx->gddr_offset;
Expand Down Expand Up @@ -255,6 +300,7 @@ int main(int argc, char** argv) {
fb_sz = mtl_size_page_align(fb_sz, app[i]->pg_sz);
ret = gddr_map(&ctx, &app[i]->gddr_frame, fb_sz, dev_mem_fd);
if (ret < 0) goto error;
if (ctx.profiling_gddr) gaddr_profiling(app[i]);

if (!ctx.use_cpu_copy) {
ops_rx.flags |= ST20P_RX_FLAG_HDR_SPLIT;
Expand Down
5 changes: 5 additions & 0 deletions app/sample/sample_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ enum sample_args_cmd {
SAMPLE_ARG_TRANSPORT_FMT,
SAMPLE_ARG_PACKING,
SAMPLE_ARG_GDDR_PA,
SAMPLE_ARG_PROFILING_GDDR,
SAMPLE_ARG_RX_DUMP,
SAMPLE_ARG_USE_CPU_COPY,
SAMPLE_ARG_USER_META,
Expand Down Expand Up @@ -138,6 +139,7 @@ static struct option sample_args_options[] = {
{"udp_tx_bps_g", required_argument, 0, SAMPLE_ARG_UDP_TX_BPS_G},
{"gddr_pa", required_argument, 0, SAMPLE_ARG_GDDR_PA},
{"use_cpu_copy", no_argument, 0, SAMPLE_ARG_USE_CPU_COPY},
{"profiling_gddr", no_argument, 0, SAMPLE_ARG_PROFILING_GDDR},
{"rx_dump", no_argument, 0, SAMPLE_ARG_RX_DUMP},
{"user_meta", no_argument, 0, SAMPLE_ARG_USER_META},
{"perf_frames", required_argument, 0, SAMPLE_ARG_PERF_FRAMES},
Expand Down Expand Up @@ -444,6 +446,9 @@ static int _sample_parse_args(struct st_sample_context* ctx, int argc, char** ar
case SAMPLE_ARG_USE_CPU_COPY:
ctx->use_cpu_copy = true;
break;
case SAMPLE_ARG_PROFILING_GDDR:
ctx->profiling_gddr = true;
break;
case SAMPLE_ARG_PERF_FRAMES:
ctx->perf_frames = atoi(optarg);
break;
Expand Down
1 change: 1 addition & 0 deletions app/sample/sample_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct st_sample_context {
off_t gddr_pa;
off_t gddr_offset;
bool use_cpu_copy;
bool profiling_gddr;

bool has_user_meta; /* if provide user meta data with the st2110-20 frame */

Expand Down

0 comments on commit 4b2b5b4

Please sign in to comment.