@@ -308,7 +308,7 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
308308 params .CodecType = cuvid_codec_type ;
309309 params .ChromaFormat = cuvid_chroma_format ;
310310 params .ulNumDecodeSurfaces = frames_ctx -> initial_pool_size ;
311- params .ulNumOutputSurfaces = 1 ;
311+ params .ulNumOutputSurfaces = frames_ctx -> initial_pool_size ;
312312
313313 ret = nvdec_decoder_create (& ctx -> decoder_ref , frames_ctx -> device_ref , & params , avctx );
314314 if (ret < 0 ) {
@@ -354,6 +354,28 @@ static void nvdec_fdd_priv_free(void *priv)
354354 av_freep (& priv );
355355}
356356
357+ static void nvdec_unmap_mapped_frame (void * opaque , uint8_t * data )
358+ {
359+ AVFrame * frame = (AVFrame * )opaque ;
360+ FrameDecodeData * fdd = (FrameDecodeData * )frame -> private_ref -> data ;
361+ NVDECFrame * cf = (NVDECFrame * )fdd -> hwaccel_priv ;
362+ NVDECDecoder * decoder = (NVDECDecoder * )cf -> decoder_ref -> data ;
363+ CUresult err ;
364+ CUcontext dummy ;
365+
366+ err = decoder -> cudl -> cuCtxPushCurrent (decoder -> cuda_ctx );
367+ if (err != CUDA_SUCCESS ) {
368+ av_log (NULL , AV_LOG_ERROR , "cuCtxPushCurrent failed\n" );
369+ return ;
370+ }
371+
372+ err = decoder -> cvdl -> cuvidUnmapVideoFrame (decoder -> decoder , (CUdeviceptr )frame -> data [0 ]);
373+ if (err != CUDA_SUCCESS )
374+ av_log (NULL , AV_LOG_ERROR , "cuvidUnmapVideoFrame failed\n" );
375+
376+ decoder -> cudl -> cuCtxPopCurrent (& dummy );
377+ }
378+
357379static int nvdec_retrieve_data (void * logctx , AVFrame * frame )
358380{
359381 FrameDecodeData * fdd = (FrameDecodeData * )frame -> private_ref -> data ;
@@ -383,32 +405,31 @@ static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
383405 goto finish ;
384406 }
385407
386- for (i = 0 ; frame -> data [i ]; i ++ ) {
387- CUDA_MEMCPY2D cpy = {
388- .srcMemoryType = CU_MEMORYTYPE_DEVICE ,
389- .dstMemoryType = CU_MEMORYTYPE_DEVICE ,
390- .srcDevice = devptr ,
391- .dstDevice = (CUdeviceptr )frame -> data [i ],
392- .srcPitch = pitch ,
393- .dstPitch = frame -> linesize [i ],
394- .srcY = offset ,
395- .WidthInBytes = FFMIN (pitch , frame -> linesize [i ]),
396- .Height = frame -> height >> (i ? 1 : 0 ),
397- };
398-
399- err = decoder -> cudl -> cuMemcpy2D (& cpy );
400- if (err != CUDA_SUCCESS ) {
401- av_log (logctx , AV_LOG_ERROR , "Error copying decoded frame: %d\n" ,
402- err );
403- ret = AVERROR_UNKNOWN ;
404- goto copy_fail ;
405- }
408+ frame -> buf [1 ] = av_buffer_create (NULL , 0 , nvdec_unmap_mapped_frame , frame , AV_BUFFER_FLAG_READONLY );
409+ frame -> buf [2 ] = av_buffer_ref (cf -> idx_ref );
410+ frame -> buf [3 ] = av_buffer_ref (cf -> decoder_ref );
406411
407- offset += cpy .Height ;
412+ if (!frame -> buf [1 ] || !frame -> buf [2 ] || !frame -> buf [3 ]) {
413+ ret = AVERROR (ENOMEM );
414+ goto copy_fail ;
415+ }
416+
417+ for (i = 0 ; frame -> data [i ]; i ++ ) {
418+ frame -> data [i ] = (uint8_t * )(devptr + offset );
419+ frame -> linesize [i ] = pitch ;
420+ offset += pitch * (frame -> height >> (i ? 1 : 0 ));
408421 }
409422
423+ goto finish ;
424+
410425copy_fail :
411- decoder -> cvdl -> cuvidUnmapVideoFrame (decoder -> decoder , devptr );
426+ if (!frame -> buf [1 ])
427+ decoder -> cvdl -> cuvidUnmapVideoFrame (decoder -> decoder , devptr );
428+ else
429+ av_buffer_unref (& frame -> buf [1 ]);
430+ av_buffer_unref (& frame -> buf [2 ]);
431+ av_buffer_unref (& frame -> buf [3 ]);
432+ return ret ;
412433
413434finish :
414435 decoder -> cudl -> cuCtxPopCurrent (& dummy );
@@ -546,9 +567,12 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
546567 }
547568
548569 frames_ctx -> format = AV_PIX_FMT_CUDA ;
549- frames_ctx -> width = (avctx -> coded_width + 1 ) & ~1 ;
550- frames_ctx -> height = (avctx -> coded_height + 1 ) & ~1 ;
551570 frames_ctx -> initial_pool_size = dpb_size ;
571+ // We are not actually using this hw_frames_ctx to allocate frames
572+ // It only exists because hwaccel infra mandates it to exist. (maybe?)
573+ // avcodec_default_get_buffer2 overrides width/height, so we can do this here:
574+ frames_ctx -> width = 0 ;
575+ frames_ctx -> height = 0 ;
552576
553577 switch (sw_desc -> comp [0 ].depth ) {
554578 case 8 :
0 commit comments