Skip to content

Commit cbdd183

Browse files
committed
[WIP] avcodec/nvdec: avoid needless copy of output frame
Replaces the data pointers with the mapped cuvid ones. Adds buffer_refs to the frame to ensure the needed contexts stay alive and the cuvid idx stays allocated. Adds another buffer_ref to unmap the frame when it's unreferenced itself.
1 parent 0736f32 commit cbdd183

File tree

1 file changed

+49
-25
lines changed

1 file changed

+49
-25
lines changed

libavcodec/nvdec.c

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
308308
params.CodecType = cuvid_codec_type;
309309
params.ChromaFormat = cuvid_chroma_format;
310310
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
311-
params.ulNumOutputSurfaces = 1;
311+
params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
312312

313313
ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
314314
if (ret < 0) {
@@ -354,6 +354,28 @@ static void nvdec_fdd_priv_free(void *priv)
354354
av_freep(&priv);
355355
}
356356

357+
static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
358+
{
359+
AVFrame *frame = (AVFrame*)opaque;
360+
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
361+
NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
362+
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
363+
CUresult err;
364+
CUcontext dummy;
365+
366+
err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
367+
if (err != CUDA_SUCCESS) {
368+
av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
369+
return;
370+
}
371+
372+
err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, (CUdeviceptr)frame->data[0]);
373+
if (err != CUDA_SUCCESS)
374+
av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");
375+
376+
decoder->cudl->cuCtxPopCurrent(&dummy);
377+
}
378+
357379
static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
358380
{
359381
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
@@ -383,32 +405,31 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
383405
goto finish;
384406
}
385407

386-
for (i = 0; frame->data[i]; i++) {
387-
CUDA_MEMCPY2D cpy = {
388-
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
389-
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
390-
.srcDevice = devptr,
391-
.dstDevice = (CUdeviceptr)frame->data[i],
392-
.srcPitch = pitch,
393-
.dstPitch = frame->linesize[i],
394-
.srcY = offset,
395-
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
396-
.Height = frame->height >> (i ? 1 : 0),
397-
};
398-
399-
err = decoder->cudl->cuMemcpy2D(&cpy);
400-
if (err != CUDA_SUCCESS) {
401-
av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
402-
err);
403-
ret = AVERROR_UNKNOWN;
404-
goto copy_fail;
405-
}
408+
frame->buf[1] = av_buffer_create(NULL, 0, nvdec_unmap_mapped_frame, frame, AV_BUFFER_FLAG_READONLY);
409+
frame->buf[2] = av_buffer_ref(cf->idx_ref);
410+
frame->buf[3] = av_buffer_ref(cf->decoder_ref);
406411

407-
offset += cpy.Height;
412+
if (!frame->buf[1] || !frame->buf[2] || !frame->buf[3]) {
413+
ret = AVERROR(ENOMEM);
414+
goto copy_fail;
415+
}
416+
417+
for (i = 0; frame->data[i]; i++) {
418+
frame->data[i] = (uint8_t*)(devptr + offset);
419+
frame->linesize[i] = pitch;
420+
offset += pitch * (frame->height >> (i ? 1 : 0));
408421
}
409422

423+
goto finish;
424+
410425
copy_fail:
411-
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
426+
if (!frame->buf[1])
427+
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
428+
else
429+
av_buffer_unref(&frame->buf[1]);
430+
av_buffer_unref(&frame->buf[2]);
431+
av_buffer_unref(&frame->buf[3]);
432+
return ret;
412433

413434
finish:
414435
decoder->cudl->cuCtxPopCurrent(&dummy);
@@ -546,9 +567,12 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
546567
}
547568

548569
frames_ctx->format = AV_PIX_FMT_CUDA;
549-
frames_ctx->width = (avctx->coded_width + 1) & ~1;
550-
frames_ctx->height = (avctx->coded_height + 1) & ~1;
551570
frames_ctx->initial_pool_size = dpb_size;
571+
// We are not actually using this hw_frames_ctx to allocate frames
572+
// It only exists because hwaccel infra mandates it to exist. (maybe?)
573+
// avcodec_default_get_buffer2 overrides width/height, so we can do this here:
574+
frames_ctx->width = 0;
575+
frames_ctx->height = 0;
552576

553577
switch (sw_desc->comp[0].depth) {
554578
case 8:

0 commit comments

Comments
 (0)