Skip to content

Commit

Permalink
[WIP] avcodec/nvdec: avoid needless copy of output frame
Browse files Browse the repository at this point in the history
Replaces the data pointers with the mapped cuvid ones.
Adds buffer_refs to the frame to ensure the needed contexts stay alive
and the cuvid idx stays allocated.
Adds another buffer_ref to unmap the frame when it's unreferenced itself.
  • Loading branch information
BtbN committed May 7, 2018
1 parent 0736f32 commit cbdd183
Showing 1 changed file with 49 additions and 25 deletions.
74 changes: 49 additions & 25 deletions libavcodec/nvdec.c
Expand Up @@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
params.CodecType = cuvid_codec_type;
params.ChromaFormat = cuvid_chroma_format;
params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
params.ulNumOutputSurfaces = 1;
params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;

ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
if (ret < 0) {
Expand Down Expand Up @@ -354,6 +354,28 @@ static void nvdec_fdd_priv_free(void *priv)
av_freep(&priv);
}

static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
{
AVFrame *frame = (AVFrame*)opaque;
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
CUresult err;
CUcontext dummy;

err = decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx);
if (err != CUDA_SUCCESS) {
av_log(NULL, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
return;
}

err = decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, (CUdeviceptr)frame->data[0]);
if (err != CUDA_SUCCESS)
av_log(NULL, AV_LOG_ERROR, "cuvidUnmapVideoFrame failed\n");

decoder->cudl->cuCtxPopCurrent(&dummy);
}

static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
{
FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
Expand Down Expand Up @@ -383,32 +405,31 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
goto finish;
}

for (i = 0; frame->data[i]; i++) {
CUDA_MEMCPY2D cpy = {
.srcMemoryType = CU_MEMORYTYPE_DEVICE,
.dstMemoryType = CU_MEMORYTYPE_DEVICE,
.srcDevice = devptr,
.dstDevice = (CUdeviceptr)frame->data[i],
.srcPitch = pitch,
.dstPitch = frame->linesize[i],
.srcY = offset,
.WidthInBytes = FFMIN(pitch, frame->linesize[i]),
.Height = frame->height >> (i ? 1 : 0),
};

err = decoder->cudl->cuMemcpy2D(&cpy);
if (err != CUDA_SUCCESS) {
av_log(logctx, AV_LOG_ERROR, "Error copying decoded frame: %d\n",
err);
ret = AVERROR_UNKNOWN;
goto copy_fail;
}
frame->buf[1] = av_buffer_create(NULL, 0, nvdec_unmap_mapped_frame, frame, AV_BUFFER_FLAG_READONLY);
frame->buf[2] = av_buffer_ref(cf->idx_ref);
frame->buf[3] = av_buffer_ref(cf->decoder_ref);

offset += cpy.Height;
if (!frame->buf[1] || !frame->buf[2] || !frame->buf[3]) {
ret = AVERROR(ENOMEM);
goto copy_fail;
}

for (i = 0; frame->data[i]; i++) {
frame->data[i] = (uint8_t*)(devptr + offset);
frame->linesize[i] = pitch;
offset += pitch * (frame->height >> (i ? 1 : 0));
}

goto finish;

copy_fail:
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
if (!frame->buf[1])
decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr);
else
av_buffer_unref(&frame->buf[1]);
av_buffer_unref(&frame->buf[2]);
av_buffer_unref(&frame->buf[3]);
return ret;

finish:
decoder->cudl->cuCtxPopCurrent(&dummy);
Expand Down Expand Up @@ -546,9 +567,12 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
}

frames_ctx->format = AV_PIX_FMT_CUDA;
frames_ctx->width = (avctx->coded_width + 1) & ~1;
frames_ctx->height = (avctx->coded_height + 1) & ~1;
frames_ctx->initial_pool_size = dpb_size;
// We are not actually using this hw_frames_ctx to allocate frames
// It only exists because hwaccel infra mandates it to exist. (maybe?)
// avcodec_default_get_buffer2 overrides width/height, so we can do this here:
frames_ctx->width = 0;
frames_ctx->height = 0;

switch (sw_desc->comp[0].depth) {
case 8:
Expand Down

0 comments on commit cbdd183

Please sign in to comment.