Skip to content
Permalink
Browse files

avcodec: implement vp9 nvdec hwaccel

  • Loading branch information
BtbN committed Nov 11, 2017
1 parent a1630cf commit f01a3efa00660db31dd888ca9dbd3e694f89eb02
Showing with 234 additions and 1 deletion.
  1. +2 −0 configure
  2. +1 −0 libavcodec/Makefile
  3. +1 −0 libavcodec/allcodecs.c
  4. +8 −0 libavcodec/cuviddec.c
  5. +212 −0 libavcodec/nvdec_vp9.c
  6. +10 −1 libavcodec/vp9.c
@@ -2753,6 +2753,8 @@ vp9_d3d11va2_hwaccel_select="vp9_decoder"
vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
vp9_dxva2_hwaccel_select="vp9_decoder"
vp9_mediacodec_hwaccel_deps="mediacodec"
vp9_nvdec_hwaccel_deps="cuda nvdec"
vp9_nvdec_hwaccel_select="vp9_decoder"
vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
vp9_vaapi_hwaccel_select="vp9_decoder"
wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
@@ -870,6 +870,7 @@ OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o
OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o
OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL) += dxva2_vp9.o
OBJS-$(CONFIG_VP9_DXVA2_HWACCEL) += dxva2_vp9.o
OBJS-$(CONFIG_VP9_NVDEC_HWACCEL) += nvdec_vp9.o
OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec_other.o

@@ -123,6 +123,7 @@ static void register_all(void)
REGISTER_HWACCEL(VP9_D3D11VA2, vp9_d3d11va2);
REGISTER_HWACCEL(VP9_DXVA2, vp9_dxva2);
REGISTER_HWACCEL(VP9_MEDIACODEC, vp9_mediacodec);
REGISTER_HWACCEL(VP9_NVDEC, vp9_nvdec);
REGISTER_HWACCEL(VP9_VAAPI, vp9_vaapi);
REGISTER_HWACCEL(WMV3_D3D11VA, wmv3_d3d11va);
REGISTER_HWACCEL(WMV3_D3D11VA2, wmv3_d3d11va2);
@@ -339,6 +339,14 @@ static int CUDAAPI cuvid_handle_picture_decode(void *opaque, CUVIDPICPARAMS* pic

av_log(avctx, AV_LOG_TRACE, "pfnDecodePicture\n");

av_log(avctx, AV_LOG_WARNING, "PicWidthInMbs: %d\n", picparams->PicWidthInMbs);
av_log(avctx, AV_LOG_WARNING, "FrameHeightInMbs Field 2: %d\n", picparams->FrameHeightInMbs);
av_log(avctx, AV_LOG_WARNING, "field_pic_flag Field 3: %d\n", picparams->field_pic_flag);
av_log(avctx, AV_LOG_WARNING, "bottom_field_flag Field 4: %d\n", picparams->bottom_field_flag);
av_log(avctx, AV_LOG_WARNING, "second_field Field 5: %d\n", picparams->second_field);
av_log(avctx, AV_LOG_WARNING, "ref_pic_flag Field 6: %d\n", picparams->ref_pic_flag);
av_log(avctx, AV_LOG_WARNING, "intra_pic_flag Field 7: %d\n", picparams->intra_pic_flag);

ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidDecodePicture(ctx->cudecoder, picparams));
if (ctx->internal_error < 0)
return 0;
@@ -0,0 +1,212 @@
/*
* VP9 HW decode acceleration through NVDEC
*
* Copyright (c) 2016 Timo Rothenpieler
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "libavutil/pixdesc.h"

#include "avcodec.h"
#include "nvdec.h"
#include "decode.h"
#include "internal.h"
#include "vp9shared.h"

static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
{
VP9SharedContext *h = avctx->priv_data;
const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);

NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
CUVIDPICPARAMS *pp = &ctx->pic_params;
CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
FrameDecodeData *fdd;
NVDECFrame *cf;
AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;

int ret, i;

ret = ff_nvdec_start_frame(avctx, cur_frame);
if (ret < 0)
return ret;

fdd = (FrameDecodeData*)cur_frame->private_ref->data;
cf = (NVDECFrame*)fdd->hwaccel_priv;

*pp = (CUVIDPICPARAMS) {
.PicWidthInMbs = cur_frame->width / 16,
.FrameHeightInMbs = cur_frame->height / 16,
.CurrPicIdx = cf->idx,

.CodecSpecific.vp9 = {
.width = cur_frame->width,
.height = cur_frame->height,

.LastRefIdx = h->h.refidx[0],
.GoldenRefIdx = h->h.refidx[1],
.AltRefIdx = h->h.refidx[2],

.profile = h->h.profile,
.frameContextIdx = h->h.framectxid,
.frameType = !h->h.keyframe,
.showFrame = !h->h.invisible,
.errorResilient = h->h.errorres,
.frameParallelDecoding = h->h.parallelmode,
.subSamplingX = pixdesc->log2_chroma_w,
.subSamplingY = pixdesc->log2_chroma_h,
.intraOnly = h->h.intraonly,
.allow_high_precision_mv = h->h.keyframe ? 0 : h->h.highprecisionmvs,
.refreshEntropyProbs = 0, //TODO

.refFrameSignBias[0] = h->h.signbias[0],
.refFrameSignBias[1] = h->h.signbias[1],
.refFrameSignBias[2] = h->h.signbias[2],
.refFrameSignBias[3] = 0, //TODO: ???

.bitDepthMinus8Luma = pixdesc->comp[0].depth - 8,
.bitDepthMinus8Chroma = pixdesc->comp[1].depth - 8,

.loopFilterLevel = h->h.filter.level,
.loopFilterSharpness = h->h.filter.sharpness,
.modeRefLfEnabled = h->h.lf_delta.enabled,

.log2_tile_columns = h->h.tiling.log2_tile_cols,
.log2_tile_rows = h->h.tiling.log2_tile_rows,

.segmentEnabled = h->h.segmentation.enabled,
.segmentMapUpdate = h->h.segmentation.update_map,
.segmentMapTemporalUpdate = h->h.segmentation.temporal,
.segmentFeatureMode = h->h.segmentation.absolute_vals, //TODO: verify

// TODO: verify if needs processing
.qpYAc = h->h.yac_qi,
.qpYDc = h->h.ydc_qdelta,
.qpChDc = h->h.uvdc_qdelta,
.qpChAc = h->h.uvac_qdelta,

// TODO: ???
.activeRefIdx[0] = 0,
.activeRefIdx[1] = 0,
.activeRefIdx[2] = 0,

.resetFrameContext = h->h.resetctx,
.mcomp_filter_type = h->h.filtermode ^ (h->h.filtermode <= 1),
.frameTagSize = 0, //TODO
.offsetToDctParts = 0, //TODO
}
};

for (i = 0; i < 2; i++)
ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];

for (i = 0; i < 4; i++)
ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];

for (i = 0; i < 7; i++)
ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];

for (i = 0; i < 3; i++)
ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];

for (i = 0; i < 8; i++) {
ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;

ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
ppc->segmentFeatureData[i][3] = 0;
}

switch (avctx->colorspace) {
default:
case AVCOL_SPC_UNSPECIFIED:
ppc->colorSpace = 0;
break;
case AVCOL_SPC_BT470BG:
ppc->colorSpace = 1;
break;
case AVCOL_SPC_BT709:
ppc->colorSpace = 2;
break;
case AVCOL_SPC_SMPTE170M:
ppc->colorSpace = 3;
break;
case AVCOL_SPC_SMPTE240M:
ppc->colorSpace = 4;
break;
case AVCOL_SPC_BT2020_NCL:
ppc->colorSpace = 5;
break;
case AVCOL_SPC_RESERVED:
ppc->colorSpace = 6;
break;
case AVCOL_SPC_RGB:
ppc->colorSpace = 7;
break;
}

return 0;
}

static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
{
NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
void *tmp;

tmp = av_fast_realloc(ctx->bitstream, &ctx->bitstream_allocated,
ctx->bitstream_len + size);
if (!tmp)
return AVERROR(ENOMEM);
ctx->bitstream = tmp;

tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
(ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
if (!tmp)
return AVERROR(ENOMEM);
ctx->slice_offsets = tmp;

memcpy(ctx->bitstream + ctx->bitstream_len, buffer, size);
ctx->slice_offsets[ctx->nb_slices] = ctx->bitstream_len;
ctx->bitstream_len += size;
ctx->nb_slices++;

return 0;
}

static int nvdec_vp9_decode_init(AVCodecContext *avctx)
{
// VP9 uses a fixed size pool of 8 possible reference frames
return ff_nvdec_decode_init(avctx, 8);
}

AVHWAccel ff_vp9_nvdec_hwaccel = {
.name = "vp9_nvdec",
.type = AVMEDIA_TYPE_VIDEO,
.id = AV_CODEC_ID_VP9,
.pix_fmt = AV_PIX_FMT_CUDA,
.start_frame = nvdec_vp9_start_frame,
.end_frame = ff_nvdec_end_frame,
.decode_slice = nvdec_vp9_decode_slice,
.init = nvdec_vp9_decode_init,
.uninit = ff_nvdec_decode_uninit,
.priv_data_size = sizeof(NVDECContext),
};
@@ -169,7 +169,10 @@ static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)

static int update_size(AVCodecContext *avctx, int w, int h)
{
#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
CONFIG_VP9_NVDEC_HWACCEL + \
CONFIG_VP9_VAAPI_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
VP9Context *s = avctx->priv_data;
uint8_t *p;
@@ -191,12 +194,18 @@ static int update_size(AVCodecContext *avctx, int w, int h)
*fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
*fmtp++ = AV_PIX_FMT_D3D11;
#endif
#if CONFIG_VP9_NVDEC_HWACCEL
*fmtp++ = AV_PIX_FMT_CUDA;
#endif
#if CONFIG_VP9_VAAPI_HWACCEL
*fmtp++ = AV_PIX_FMT_VAAPI;
#endif
break;
case AV_PIX_FMT_YUV420P10:
case AV_PIX_FMT_YUV420P12:
#if CONFIG_VP9_NVDEC_HWACCEL
*fmtp++ = AV_PIX_FMT_CUDA;
#endif
#if CONFIG_VP9_VAAPI_HWACCEL
*fmtp++ = AV_PIX_FMT_VAAPI;
#endif

0 comments on commit f01a3ef

Please sign in to comment.