Skip to content

Commit

Permalink
NVDEC: Fix blank screen when exiting playback for CUDA/NVDEC
Browse files Browse the repository at this point in the history
- the hardware context was being re-created when GetFormat was called
and the interop was not being deleted - so don't re-create without
reason.
- create our own CUDA context whose lifetime we can manage and hence
allow us to release CUDA resources when the interop goes out of scope.
  • Loading branch information
mark-kendall committed Apr 23, 2019
1 parent 421c0a7 commit 0b8a2f6
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 81 deletions.
44 changes: 34 additions & 10 deletions mythtv/libs/libmythtv/mythnvdeccontext.cpp
Expand Up @@ -37,7 +37,7 @@ MythCodecID MythNVDECContext::GetSupportedCodec(AVCodecContext*,
QString name = QString((*Codec)->name) + "_cuvid";
if (name == "mpeg2video_cuvid")
name = "mpeg2_cuvid";
AVCodec *codec = avcodec_find_decoder_by_name (name.toLocal8Bit());
AVCodec *codec = avcodec_find_decoder_by_name(name.toLocal8Bit());
if (codec)
{
LOG(VB_PLAYBACK, LOG_INFO, LOC + QString("HW device type '%1' supports decoding '%2'")
Expand Down Expand Up @@ -73,25 +73,39 @@ int MythNVDECContext::InitialiseDecoder(AVCodecContext *Context)
if (type == MythOpenGLInterop::Unsupported)
return -1;

// Allocate the device context
AVBufferRef* hwdeviceref = nullptr;
if (av_hwdevice_ctx_create(&hwdeviceref, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 0) < 0)
// Create interop (and CUDA context)
MythNVDECInterop *interop = MythNVDECInterop::Create(render);
if (!interop)
return -1;
if (!interop->IsValid())
{
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to create device context");
interop->DecrRef();
return -1;
}

// Set release method and interop
MythNVDECInterop *interop = MythNVDECInterop::Create(render);
if (!interop)
// Allocate the device context
AVBufferRef* hwdeviceref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
if (!hwdeviceref)
{
av_buffer_unref(&hwdeviceref);
interop->DecrRef();
return -1;
}

// Set release method, interop and CUDA context
AVHWDeviceContext* hwdevicecontext = reinterpret_cast<AVHWDeviceContext*>(hwdeviceref->data);
hwdevicecontext->free = &MythHWContext::DeviceContextFinished;
hwdevicecontext->user_opaque = interop;
AVCUDADeviceContext *devicehwctx = reinterpret_cast<AVCUDADeviceContext*>(hwdevicecontext->hwctx);
devicehwctx->cuda_ctx = interop->GetCUDAContext();
devicehwctx->stream = nullptr;

if (av_hwdevice_ctx_init(hwdeviceref) < 0)
{
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to init CUDA hw device");
av_buffer_unref(&hwdeviceref);
return -1;
}

Context->hw_device_ctx = hwdeviceref;
LOG(VB_PLAYBACK, LOG_INFO, LOC + "Created CUDA device context");
return 0;
Expand All @@ -108,9 +122,19 @@ enum AVPixelFormat MythNVDECContext::GetFormat(AVCodecContext* Context, const AV
while (*PixFmt != AV_PIX_FMT_NONE)
{
if (*PixFmt == AV_PIX_FMT_CUDA)
if (HwDecoderInit(Context) >= 0)
{
if (!Context->hw_device_ctx)
{
if (HwDecoderInit(Context) >= 0)
return *PixFmt;
}
else
{
LOG(VB_PLAYBACK, LOG_INFO, LOC + "Re-using CUDA context");
return *PixFmt;
}
PixFmt++;
}
}
return AV_PIX_FMT_NONE;
}
Expand Down
181 changes: 111 additions & 70 deletions mythtv/libs/libmythtv/mythnvdecinterop.cpp
Expand Up @@ -4,56 +4,32 @@
#include "videocolourspace.h"
#include "mythnvdecinterop.h"

// FFmpeg
extern "C" {
#include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/hwcontext_cuda.h"
}

#define LOC QString("NVDECInterop: ")

#define CUDA_CHECK(CUDA_CALL) { CUresult res = CUDA_CALL; if (res != CUDA_SUCCESS) { \
LOG(VB_GENERAL, LOG_ERR, LOC + QString("CUDA error: %1").arg(res)); }}

/*! \brief A simple wrapper around resources allocated by the given CUDA context.
*
* \note This does not currently release m_resource as the CUDA context is released
* before AVHWDeviceContext::free is called. This may be the reason why OpenGL
* rendering is broken after using NVDEC direct rendering - but seems unlikely.
*/
class NVDECData
{
public:
NVDECData(CUarray Array, CUgraphicsResource Resource, AVCUDADeviceContext* Context)
: m_array(Array),
m_resource(Resource),
m_context(Context)
{
}

~NVDECData()
{
if (m_context && m_context->cuda_ctx && m_context->internal && m_context->internal->cuda_dl)
{
CUcontext dummy;
CUDA_CHECK(m_context->internal->cuda_dl->cuCtxPushCurrent(m_context->cuda_ctx));
CUDA_CHECK(m_context->internal->cuda_dl->cuGraphicsUnregisterResource(m_resource));
CUDA_CHECK(m_context->internal->cuda_dl->cuCtxPopCurrent(&dummy));
}
}

CUarray m_array;
CUgraphicsResource m_resource;
AVCUDADeviceContext* m_context;
};
#define CUDA_CHECK(CUDA_CALL) \
{ \
CUresult res = CUDA_CALL; \
if (res != CUDA_SUCCESS) { \
const char * desc; \
m_cudaFuncs->cuGetErrorString(res, &desc); \
LOG(VB_GENERAL, LOG_ERR, LOC + QString("CUDA error %1 (%2)").arg(res).arg(desc)); \
} \
}

MythNVDECInterop::MythNVDECInterop(MythRenderOpenGL *Context)
: MythOpenGLInterop(Context, NVDEC)
: MythOpenGLInterop(Context, NVDEC),
m_cudaContext(),
m_cudaFuncs(nullptr)
{
InitialiseCuda();
}

MythNVDECInterop::~MythNVDECInterop()
{
CUcontext dummy;
if (m_cudaContext && m_cudaFuncs)
CUDA_CHECK(m_cudaFuncs->cuCtxPushCurrent(m_cudaContext));

if (!m_openglTextures.isEmpty())
{
LOG(VB_PLAYBACK, LOG_INFO, LOC + "Deleting CUDA resources");
Expand All @@ -64,14 +40,36 @@ MythNVDECInterop::~MythNVDECInterop()
vector<MythVideoTexture*>::iterator it2 = textures.begin();
for ( ; it2 != textures.end(); ++it2)
{
// see comment in NVDECData
NVDECData *data = reinterpret_cast<NVDECData*>((*it2)->m_data);
if (data)
delete data;
QPair<CUarray,CUgraphicsResource> *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>((*it2)->m_data);
// Don't error check here - for some reason the context is deemed destroyed but pop/destroy below
// work fine
if (m_cudaFuncs && data && data->second)
m_cudaFuncs->cuGraphicsUnregisterResource(&(data->second));
delete data;
(*it2)->m_data = nullptr;
}
}
}

if (m_cudaFuncs)
{
if (m_cudaContext)
{
CUDA_CHECK(m_cudaFuncs->cuCtxPopCurrent(&dummy));
CUDA_CHECK(m_cudaFuncs->cuCtxDestroy(m_cudaContext));
}
cuda_free_functions(&m_cudaFuncs);
}
}

bool MythNVDECInterop::IsValid(void)
{
return m_cudaFuncs && m_cudaContext;
}

CUcontext MythNVDECInterop::GetCUDAContext(void)
{
return m_cudaContext;
}

MythNVDECInterop* MythNVDECInterop::Create(MythRenderOpenGL *Context)
Expand All @@ -85,7 +83,6 @@ MythOpenGLInterop::Type MythNVDECInterop::GetInteropType(MythCodecID CodecId, My
{
if (!codec_is_nvdec(CodecId) || !gCoreContext->IsUIThread())
return Unsupported;

if (!Context)
Context = MythRenderOpenGL::GetOpenGLRender();
if (!Context)
Expand All @@ -97,7 +94,7 @@ MythOpenGLInterop::Type MythNVDECInterop::GetInteropType(MythCodecID CodecId, My
*
* \note This is not zero copy - although the copy will be extremely fast.
* It may be marginally quicker to implement a custom FFmpeg buffer pool that allocates
* textures and maps the texture storage to a CUdeviceptr (if that is possible). Alternativel
* textures and maps the texture storage to a CUdeviceptr (if that is possible). Alternatively
* EGL interopability may also be useful.
*/
vector<MythVideoTexture*> MythNVDECInterop::Acquire(MythRenderOpenGL *Context,
Expand All @@ -106,7 +103,7 @@ vector<MythVideoTexture*> MythNVDECInterop::Acquire(MythRenderOpenGL *Context,
FrameScanType)
{
vector<MythVideoTexture*> result;
if (!Frame)
if (!Frame || !m_cudaContext || !m_cudaFuncs)
return result;

if (Context && (Context != m_context))
Expand Down Expand Up @@ -139,22 +136,13 @@ vector<MythVideoTexture*> MythNVDECInterop::Acquire(MythRenderOpenGL *Context,
return result;
}

AVBufferRef* buffer = reinterpret_cast<AVBufferRef*>(Frame->priv[1]);
if (!buffer || (buffer && !buffer->data))
return result;
AVHWDeviceContext* context = reinterpret_cast<AVHWDeviceContext*>(buffer->data);
AVCUDADeviceContext *devicecontext = reinterpret_cast<AVCUDADeviceContext*>(context->hwctx);
if (!devicecontext)
return result;

CudaFunctions *cu = devicecontext->internal->cuda_dl;
CUdeviceptr cudabuffer = reinterpret_cast<CUdeviceptr>(Frame->buf);
if (!cu || !cudabuffer)
if (!cudabuffer)
return result;

// make the CUDA context current
CUcontext dummy;
CUDA_CHECK(cu->cuCtxPushCurrent(devicecontext->cuda_ctx));
CUDA_CHECK(m_cudaFuncs->cuCtxPushCurrent(m_cudaContext));

// create and map textures for a new buffer
VideoFrameType type = (Frame->sw_pix_fmt == AV_PIX_FMT_NONE) ? FMT_NV12 :
Expand All @@ -169,7 +157,7 @@ vector<MythVideoTexture*> MythNVDECInterop::Acquire(MythRenderOpenGL *Context,
m_context, FMT_NVDEC, type, sizes, QOpenGLTexture::Target2D);
if (textures.empty())
{
CUDA_CHECK(cu->cuCtxPopCurrent(&dummy));
CUDA_CHECK(m_cudaFuncs->cuCtxPopCurrent(&dummy));
return result;
}

Expand All @@ -194,20 +182,20 @@ vector<MythVideoTexture*> MythNVDECInterop::Acquire(MythRenderOpenGL *Context,

CUarray array;
CUgraphicsResource graphicsResource = nullptr;
CUDA_CHECK(cu->cuGraphicsGLRegisterImage(&graphicsResource, tex->m_textureId,
CUDA_CHECK(m_cudaFuncs->cuGraphicsGLRegisterImage(&graphicsResource, tex->m_textureId,
QOpenGLTexture::Target2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
CUDA_CHECK(cu->cuGraphicsMapResources(1, &graphicsResource, nullptr));
CUDA_CHECK(cu->cuGraphicsSubResourceGetMappedArray(&array, graphicsResource, 0, 0));
CUDA_CHECK(cu->cuGraphicsUnmapResources(1, &graphicsResource, nullptr));
tex->m_data = reinterpret_cast<unsigned char*>(new NVDECData(array, graphicsResource, devicecontext));
CUDA_CHECK(m_cudaFuncs->cuGraphicsMapResources(1, &graphicsResource, nullptr));
CUDA_CHECK(m_cudaFuncs->cuGraphicsSubResourceGetMappedArray(&array, graphicsResource, 0, 0));
CUDA_CHECK(m_cudaFuncs->cuGraphicsUnmapResources(1, &graphicsResource, nullptr));
tex->m_data = reinterpret_cast<unsigned char*>(new QPair<CUarray,CUgraphicsResource>(array, graphicsResource));
}
MythVideoTexture::SetTextureFilters(m_context, textures, QOpenGLTexture::Linear, QOpenGLTexture::ClampToEdge);
m_openglTextures.insert(cudabuffer, textures);
}

if (!m_openglTextures.contains(cudabuffer))
{
CUDA_CHECK(cu->cuCtxPopCurrent(&dummy));
CUDA_CHECK(m_cudaFuncs->cuCtxPopCurrent(&dummy));
return result;
}

Expand All @@ -217,20 +205,73 @@ vector<MythVideoTexture*> MythNVDECInterop::Acquire(MythRenderOpenGL *Context,
result = m_openglTextures[cudabuffer];
for (uint i = 0; i < result.size(); ++i)
{
NVDECData *data = reinterpret_cast<NVDECData*>(result[i]->m_data);
QPair<CUarray,CUgraphicsResource> *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(result[i]->m_data);
CUDA_MEMCPY2D cpy;
memset(&cpy, 0, sizeof(cpy));
cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
cpy.srcDevice = cudabuffer + static_cast<CUdeviceptr>(Frame->offsets[i]);
cpy.srcPitch = static_cast<size_t>(Frame->pitches[i]);
cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY;
cpy.dstArray = data->m_array;
cpy.dstArray = data->first;
cpy.WidthInBytes = static_cast<size_t>(result[i]->m_size.width()) * (hdr ? 2 : 1);
cpy.Height = static_cast<size_t>(result[i]->m_size.height());
CUDA_CHECK(cu->cuMemcpy2D(&cpy));
CUDA_CHECK(m_cudaFuncs->cuMemcpy2D(&cpy));
}

CUDA_CHECK(cu->cuCtxPopCurrent(&dummy));
CUDA_CHECK(m_cudaFuncs->cuCtxPopCurrent(&dummy));
return result;
}

/*! \brief Initialise a CUDA context
* \note We do not use the FFmpeg internal context creation as the created context
* is deleted before we have a chance to cleanup our own CUDA resources.
*/
bool MythNVDECInterop::InitialiseCuda(void)
{
if (!m_context)
return false;

OpenGLLocker locker(m_context);

// retrieve CUDA entry points
if (cuda_load_functions(&m_cudaFuncs, nullptr) != 0)
{
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to load functions");
return false;
}

// create a CUDA context for the current device
CUdevice cudevice;
CUcontext dummy;
CUresult res = m_cudaFuncs->cuInit(0);
if (res != CUDA_SUCCESS)
{
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to initialise CUDA API");
return false;
}

unsigned int devicecount;
res = m_cudaFuncs->cuGLGetDevices(&devicecount, &cudevice, 1, CU_GL_DEVICE_LIST_ALL);
if (res != CUDA_SUCCESS)
{
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to get CUDA device");
return false;
}

if (devicecount < 1)
{
LOG(VB_GENERAL, LOG_ERR, LOC + "No CUDA devices");
return false;
}

res = m_cudaFuncs->cuCtxCreate(&m_cudaContext, CU_CTX_SCHED_BLOCKING_SYNC, cudevice);
if (res != CUDA_SUCCESS)
{
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to create CUDA context");
return false;
}

m_cudaFuncs->cuCtxPopCurrent(&dummy);
LOG(VB_GENERAL, LOG_INFO, LOC + "Created CUDA context");
return true;
}
18 changes: 17 additions & 1 deletion mythtv/libs/libmythtv/mythnvdecinterop.h
Expand Up @@ -4,17 +4,33 @@
// MythTV
#include "mythopenglinterop.h"

// FFmpeg
extern "C" {
#include "compat/cuda/dynlink_loader.h"
#include "libavutil/hwcontext_cuda.h"
}

class MythNVDECInterop : public MythOpenGLInterop
{
public:
static MythNVDECInterop* Create(MythRenderOpenGL *Context);
static Type GetInteropType(MythCodecID CodecId, MythRenderOpenGL *Context = nullptr);

MythNVDECInterop(MythRenderOpenGL *Context);
~MythNVDECInterop() override;

bool IsValid(void);
CUcontext GetCUDAContext(void);
vector<MythVideoTexture*> Acquire(MythRenderOpenGL *Context, VideoColourSpace *ColourSpace,
VideoFrame *Frame, FrameScanType Scan) override;

protected:
MythNVDECInterop(MythRenderOpenGL *Context);

private:
bool InitialiseCuda(void);

CUcontext m_cudaContext;
CudaFunctions *m_cudaFuncs;
};

#endif // MYTHNVDECINTEROP_H

0 comments on commit 0b8a2f6

Please sign in to comment.