Skip to content

Commit 72e4365

Browse files
committed
avcodec/nvdec: avoid needless copy of output frame
Replaces the data pointers with the mapped cuvid ones. Adds buffer_refs to the frame to ensure the needed contexts stay alive and the cuvid idx stays allocated. Adds another buffer_ref to unmap the frame when it's unreferenced itself.
1 parent 13f2db0 commit 72e4365

File tree

1 file changed

+73
-23
lines changed

1 file changed

+73
-23
lines changed

libavcodec/nvdec.c

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
308308
params.CodecType = cuvid_codec_type;
309309
params.ChromaFormat = cuvid_chroma_format;
310310
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
311-
params.ulNumOutputSurfaces = 1;
311+
params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
312312

313313
ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
314314
if (ret < 0) {
@@ -354,13 +354,40 @@ static void nvdec_fdd_priv_free(void *priv)
354354
av_freep(&priv);
355355
}
356356

357+
static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
358+
{
359+
NVDECFrame *unmap_data = (NVDECFrame*)data;
360+
NVDECDecoder *decoder = (NVDECDecoder*)unmap_data->decoder_ref->data;
361+
CUdeviceptr devptr = (CUdeviceptr)opaque;
362+
CUresult err;
363+
CUcontext dummy;
364+
365+
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
366+
if (err != CUDA_SUCCESS) {
367+
av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
368+
goto finish;
369+
}
370+
371+
err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
372+
if (err != CUDA_SUCCESS)
373+
av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
374+
375+
decoder->cudl->cuCtxPopCurrent(&dummy);
376+
377+
finish:
378+
av_buffer_unref(&unmap_data->idx_ref);
379+
av_buffer_unref(&unmap_data->decoder_ref);
380+
av_free(unmap_data);
381+
}
382+
357383
static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
358384
{
359385
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
360386
NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
361387
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
362388

363389
CUVIDPROCPARAMS vpp = { .progressive_frame = 1 };
390+
NVDECFrame *unmap_data = NULL;
364391

365392
CUresult err;
366393
CUcontext dummy;
@@ -383,32 +410,39 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
383410
goto finish;
384411
}
385412

386-
for (i = 0; frame->data[i]; i++) {
387-
CUDA_MEMCPY2D cpy = {
388-
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
389-
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
390-
.srcDevice = devptr,
391-
.dstDevice = (CUdeviceptr)frame->data[i],
392-
.srcPitch = pitch,
393-
.dstPitch = frame->linesize[i],
394-
.srcY = offset,
395-
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
396-
.Height = frame->height >> (i ? 1 : 0),
397-
};
398-
399-
err = decoder->cudl->cuMemcpy2D(&cpy);
400-
if (err != CUDA_SUCCESS) {
401-
av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
402-
err);
403-
ret = AVERROR_UNKNOWN;
404-
goto copy_fail;
405-
}
413+
unmap_data = av_mallocz(sizeof(*unmap_data));
414+
if (!unmap_data) {
415+
ret = AVERROR(ENOMEM);
416+
goto copy_fail;
417+
}
406418

407-
offset += cpy.Height;
419+
frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
420+
nvdec_unmap_mapped_frame, (void*)devptr,
421+
AV_BUFFER_FLAG_READONLY);
422+
if (!frame->buf[1]) {
423+
ret = AVERROR(ENOMEM);
424+
goto copy_fail;
408425
}
409426

427+
unmap_data->idx = cf->idx;
428+
unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
429+
unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
430+
431+
for (i = 0; frame->linesize[i]; i++) {
432+
frame->data[i] = (uint8_t*)(devptr + offset);
433+
frame->linesize[i] = pitch;
434+
offset += pitch * (frame->height >> (i ? 1 : 0));
435+
}
436+
437+
goto finish;
438+
410439
copy_fail:
411-
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
440+
if (!frame->buf[1]) {
441+
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
442+
av_freep(&unmap_data);
443+
} else {
444+
av_buffer_unref(&frame->buf[1]);
445+
}
412446

413447
finish:
414448
decoder->cudl->cuCtxPopCurrent(&dummy);
@@ -521,6 +555,16 @@ int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
521555
return 0;
522556
}
523557

558+
static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
559+
{
560+
av_buffer_pool_uninit(&ctx->pool);
561+
}
562+
563+
static AVBufferRef *nvdec_alloc_dummy(int size)
564+
{
565+
return av_buffer_create(NULL, 0, NULL, NULL, 0);
566+
}
567+
524568
int ff_nvdec_frame_params(AVCodecContext *avctx,
525569
AVBufferRef *hw_frames_ctx,
526570
int dpb_size)
@@ -550,6 +594,12 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
550594
frames_ctx->height = (avctx->coded_height + 1) & ~1;
551595
frames_ctx->initial_pool_size = dpb_size;
552596

597+
frames_ctx->free = nvdec_free_dummy;
598+
frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
599+
600+
if (!frames_ctx->pool)
601+
return AVERROR(ENOMEM);
602+
553603
switch (sw_desc->comp[0].depth) {
554604
case 8:
555605
frames_ctx->sw_format = AV_PIX_FMT_NV12;

0 commit comments

Comments
 (0)