@@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx)
644644static av_cold int nvenc_recalc_surfaces (AVCodecContext * avctx )
645645{
646646 NvencContext * ctx = avctx -> priv_data ;
647- int nb_surfaces = 0 ;
647+ // default minimum of 4 surfaces
648+ // multiply by 2 for number of NVENCs on gpu (hardcode to 2)
649+ // another multiply by 2 to avoid blocking next PBB group
650+ int nb_surfaces = FFMAX (4 , ctx -> encode_config .frameIntervalP * 2 * 2 );
648651
652+ // lookahead enabled
649653 if (ctx -> rc_lookahead > 0 ) {
650- nb_surfaces = ctx -> rc_lookahead + ((ctx -> encode_config .frameIntervalP > 0 ) ? ctx -> encode_config .frameIntervalP : 0 ) + 1 + 4 ;
651- if (ctx -> nb_surfaces < nb_surfaces ) {
654+ // +1 is to account for lkd_bound calculation later
655+ // +4 is to allow sufficient pipelining with lookahead
656+ nb_surfaces = FFMAX (1 , FFMAX (nb_surfaces , ctx -> rc_lookahead + ctx -> encode_config .frameIntervalP + 1 + 4 ));
657+ if (nb_surfaces > ctx -> nb_surfaces && ctx -> nb_surfaces > 0 )
658+ {
652659 av_log (avctx , AV_LOG_WARNING ,
653660 "Defined rc_lookahead requires more surfaces, "
654661 "increasing used surfaces %d -> %d\n" , ctx -> nb_surfaces , nb_surfaces );
655- ctx -> nb_surfaces = nb_surfaces ;
656662 }
663+ ctx -> nb_surfaces = FFMAX (nb_surfaces , ctx -> nb_surfaces );
664+ } else {
665+ if (ctx -> encode_config .frameIntervalP > 1 && ctx -> nb_surfaces < nb_surfaces && ctx -> nb_surfaces > 0 )
666+ {
667+ av_log (avctx , AV_LOG_WARNING ,
668+ "Defined b-frame requires more surfaces, "
669+ "increasing used surfaces %d -> %d\n" , ctx -> nb_surfaces , nb_surfaces );
670+ ctx -> nb_surfaces = FFMAX (ctx -> nb_surfaces , nb_surfaces );
671+ }
672+ else if (ctx -> nb_surfaces <= 0 )
673+ ctx -> nb_surfaces = nb_surfaces ;
674+ // otherwise use user specified value
657675 }
658676
659677 ctx -> nb_surfaces = FFMAX (1 , FFMIN (MAX_REGISTERED_FRAMES , ctx -> nb_surfaces ));
@@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
10861104 NvencContext * ctx = avctx -> priv_data ;
10871105 NvencDynLoadFunctions * dl_fn = & ctx -> nvenc_dload_funcs ;
10881106 NV_ENCODE_API_FUNCTION_LIST * p_nvenc = & dl_fn -> nvenc_funcs ;
1107+ NvencSurface * tmp_surface = & ctx -> surfaces [idx ];
10891108
10901109 NVENCSTATUS nv_status ;
10911110 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
@@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
11211140 ctx -> surfaces [idx ].height = allocSurf .height ;
11221141 }
11231142
1124- ctx -> surfaces [idx ].lockCount = 0 ;
1125-
11261143 /* 1MB is large enough to hold most output frames.
11271144 * NVENC increases this automaticaly if it is not enough. */
11281145 allocOut .size = 1024 * 1024 ;
@@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
11411158 ctx -> surfaces [idx ].output_surface = allocOut .bitstreamBuffer ;
11421159 ctx -> surfaces [idx ].size = allocOut .size ;
11431160
1161+ av_fifo_generic_write (ctx -> unused_surface_queue , & tmp_surface , sizeof (tmp_surface ), NULL );
1162+
11441163 return 0 ;
11451164}
11461165
@@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
11561175 ctx -> timestamp_list = av_fifo_alloc (ctx -> nb_surfaces * sizeof (int64_t ));
11571176 if (!ctx -> timestamp_list )
11581177 return AVERROR (ENOMEM );
1178+
1179+ ctx -> unused_surface_queue = av_fifo_alloc (ctx -> nb_surfaces * sizeof (NvencSurface * ));
1180+ if (!ctx -> unused_surface_queue )
1181+ return AVERROR (ENOMEM );
1182+
11591183 ctx -> output_surface_queue = av_fifo_alloc (ctx -> nb_surfaces * sizeof (NvencSurface * ));
11601184 if (!ctx -> output_surface_queue )
11611185 return AVERROR (ENOMEM );
@@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
12221246 av_fifo_freep (& ctx -> timestamp_list );
12231247 av_fifo_freep (& ctx -> output_surface_ready_queue );
12241248 av_fifo_freep (& ctx -> output_surface_queue );
1249+ av_fifo_freep (& ctx -> unused_surface_queue );
12251250
12261251 if (ctx -> surfaces && avctx -> pix_fmt == AV_PIX_FMT_CUDA ) {
12271252 for (i = 0 ; i < ctx -> nb_surfaces ; ++ i ) {
@@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
13051330
13061331static NvencSurface * get_free_frame (NvencContext * ctx )
13071332{
1308- int i ;
1333+ NvencSurface * tmp_surf ;
13091334
1310- for (i = 0 ; i < ctx -> nb_surfaces ; i ++ ) {
1311- if (!ctx -> surfaces [i ].lockCount ) {
1312- ctx -> surfaces [i ].lockCount = 1 ;
1313- return & ctx -> surfaces [i ];
1314- }
1315- }
1335+ if (!(av_fifo_size (ctx -> unused_surface_queue ) > 0 ))
1336+ // queue empty
1337+ return NULL ;
13161338
1317- return NULL ;
1339+ av_fifo_generic_read (ctx -> unused_surface_queue , & tmp_surf , sizeof (tmp_surf ), NULL );
1340+ return tmp_surf ;
13181341}
13191342
13201343static int nvenc_copy_frame (AVCodecContext * avctx , NvencSurface * nv_surface ,
@@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
17121735 }
17131736
17141737 if (res ) {
1715- inSurf -> lockCount = 0 ;
17161738 return res ;
17171739 }
17181740
@@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
17901812 if (res )
17911813 return res ;
17921814
1793- av_assert0 (tmpoutsurf -> lockCount );
1794- tmpoutsurf -> lockCount -- ;
1815+ av_fifo_generic_write (ctx -> unused_surface_queue , & tmpoutsurf , sizeof (tmpoutsurf ), NULL );
17951816
17961817 * got_packet = 1 ;
17971818 } else {
0 commit comments