Skip to content

Commit 8de3458

Browse files
Ben ChangBtbN
authored andcommitted
avcodec/nvenc: surface allocation reduction
This patch aims to reduce the number of input/output surfaces NVENC allocates per session. Previous default sets allocated surfaces to 32 (unless there is user specified param or lookahead involved). Having large number of surfaces consumes extra video memory (esp for higher resolution encoding). The patch changes the surfaces calculation for default, B-frames, lookahead scenario respectively. The other change involves surface selection. Previously, if a session allocates x surfaces, only x-1 surfaces are used (due to combination of output delay and lock toggle logic). To prevent unused surfaces, changing surface rotation to using predefined fifo. Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
1 parent 78a5fc4 commit 8de3458

File tree

4 files changed

+43
-22
lines changed

4 files changed

+43
-22
lines changed

libavcodec/nvenc.c

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx)
644644
static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx)
645645
{
646646
NvencContext *ctx = avctx->priv_data;
647-
int nb_surfaces = 0;
647+
// default minimum of 4 surfaces
648+
// multiply by 2 for number of NVENCs on gpu (hardcode to 2)
649+
// another multiply by 2 to avoid blocking next PBB group
650+
int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2);
648651

652+
// lookahead enabled
649653
if (ctx->rc_lookahead > 0) {
650-
nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4;
651-
if (ctx->nb_surfaces < nb_surfaces) {
654+
// +1 is to account for lkd_bound calculation later
655+
// +4 is to allow sufficient pipelining with lookahead
656+
nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4));
657+
if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0)
658+
{
652659
av_log(avctx, AV_LOG_WARNING,
653660
"Defined rc_lookahead requires more surfaces, "
654661
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
655-
ctx->nb_surfaces = nb_surfaces;
656662
}
663+
ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces);
664+
} else {
665+
if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0)
666+
{
667+
av_log(avctx, AV_LOG_WARNING,
668+
"Defined b-frame requires more surfaces, "
669+
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces);
670+
ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces);
671+
}
672+
else if (ctx->nb_surfaces <= 0)
673+
ctx->nb_surfaces = nb_surfaces;
674+
// otherwise use user specified value
657675
}
658676

659677
ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces));
@@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
10861104
NvencContext *ctx = avctx->priv_data;
10871105
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
10881106
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
1107+
NvencSurface* tmp_surface = &ctx->surfaces[idx];
10891108

10901109
NVENCSTATUS nv_status;
10911110
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
@@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
11211140
ctx->surfaces[idx].height = allocSurf.height;
11221141
}
11231142

1124-
ctx->surfaces[idx].lockCount = 0;
1125-
11261143
/* 1MB is large enough to hold most output frames.
11271144
* NVENC increases this automaticaly if it is not enough. */
11281145
allocOut.size = 1024 * 1024;
@@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
11411158
ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer;
11421159
ctx->surfaces[idx].size = allocOut.size;
11431160

1161+
av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL);
1162+
11441163
return 0;
11451164
}
11461165

@@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
11561175
ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t));
11571176
if (!ctx->timestamp_list)
11581177
return AVERROR(ENOMEM);
1178+
1179+
ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
1180+
if (!ctx->unused_surface_queue)
1181+
return AVERROR(ENOMEM);
1182+
11591183
ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*));
11601184
if (!ctx->output_surface_queue)
11611185
return AVERROR(ENOMEM);
@@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
12221246
av_fifo_freep(&ctx->timestamp_list);
12231247
av_fifo_freep(&ctx->output_surface_ready_queue);
12241248
av_fifo_freep(&ctx->output_surface_queue);
1249+
av_fifo_freep(&ctx->unused_surface_queue);
12251250

12261251
if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) {
12271252
for (i = 0; i < ctx->nb_surfaces; ++i) {
@@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
13051330

13061331
static NvencSurface *get_free_frame(NvencContext *ctx)
13071332
{
1308-
int i;
1333+
NvencSurface *tmp_surf;
13091334

1310-
for (i = 0; i < ctx->nb_surfaces; i++) {
1311-
if (!ctx->surfaces[i].lockCount) {
1312-
ctx->surfaces[i].lockCount = 1;
1313-
return &ctx->surfaces[i];
1314-
}
1315-
}
1335+
if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
1336+
// queue empty
1337+
return NULL;
13161338

1317-
return NULL;
1339+
av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL);
1340+
return tmp_surf;
13181341
}
13191342

13201343
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface,
@@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
17121735
}
17131736

17141737
if (res) {
1715-
inSurf->lockCount = 0;
17161738
return res;
17171739
}
17181740

@@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
17901812
if (res)
17911813
return res;
17921814

1793-
av_assert0(tmpoutsurf->lockCount);
1794-
tmpoutsurf->lockCount--;
1815+
av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
17951816

17961817
*got_packet = 1;
17971818
} else {

libavcodec/nvenc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ typedef struct NvencSurface
4444
NV_ENC_OUTPUT_PTR output_surface;
4545
NV_ENC_BUFFER_FORMAT format;
4646
int size;
47-
int lockCount;
4847
} NvencSurface;
4948

5049
typedef struct NvencDynLoadFunctions
@@ -110,6 +109,7 @@ typedef struct NvencContext
110109
int nb_surfaces;
111110
NvencSurface *surfaces;
112111

112+
AVFifoBuffer *unused_surface_queue;
113113
AVFifoBuffer *output_surface_queue;
114114
AVFifoBuffer *output_surface_ready_queue;
115115
AVFifoBuffer *timestamp_list;

libavcodec/nvenc_h264.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ static const AVOption options[] = {
7979
0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
8080
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
8181
{ "rc-lookahead", "Number of frames to look ahead for rate-control",
82-
OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
83-
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE },
82+
OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
83+
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
8484
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
8585
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
8686
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",

libavcodec/nvenc_hevc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ static const AVOption options[] = {
7878
0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
7979
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
8080
{ "rc-lookahead", "Number of frames to look ahead for rate-control",
81-
OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
82-
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, MAX_REGISTERED_FRAMES, VE },
81+
OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
82+
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_REGISTERED_FRAMES, VE },
8383
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
8484
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
8585
{ "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.",

0 commit comments

Comments
 (0)