Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
mythtv/mythtv/libs/libmythtv/opengl/mythnvdecinterop.cpp
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
426 lines (370 sloc)
14.5 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// MythTV | |
#include "libmythbase/mythconfig.h" | |
#include "libmythbase/mythcorecontext.h" | |
#include "mythvideocolourspace.h" | |
#include "opengl/mythnvdecinterop.h" | |
// Std | |
#include <chrono> | |
#include <thread> | |
#define LOC QString("NVDECInterop: ") | |
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) | |
#define CUDA_CHECK(CUDA_FUNCS, CUDA_CALL) \ | |
{ \ | |
CUresult res = (CUDA_FUNCS)->CUDA_CALL; \ | |
if (res != CUDA_SUCCESS) { \ | |
const char * desc; \ | |
(CUDA_FUNCS)->cuGetErrorString(res, &desc); \ | |
LOG(VB_GENERAL, LOG_ERR, LOC + QString("CUDA error %1 (%2)").arg(res).arg(desc)); \ | |
} \ | |
} | |
MythNVDECInterop::MythNVDECInterop(MythPlayerUI* Player, MythRenderOpenGL* Context) | |
: MythOpenGLInterop(Context, GL_NVDEC, Player) | |
{ | |
InitialiseCuda(); | |
} | |
MythNVDECInterop::~MythNVDECInterop() | |
{ | |
m_referenceFrames.clear(); | |
MythNVDECInterop::DeleteTextures(); | |
CleanupContext(m_openglContext, m_cudaFuncs, m_cudaContext); | |
} | |
void MythNVDECInterop::DeleteTextures() | |
{ | |
if (!(m_cudaContext && m_cudaFuncs)) | |
return; | |
OpenGLLocker locker(m_openglContext); | |
CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext)) | |
if (!m_openglTextures.isEmpty()) | |
{ | |
LOG(VB_PLAYBACK, LOG_INFO, LOC + "Deleting CUDA resources"); | |
for (auto it = m_openglTextures.constBegin(); it != m_openglTextures.constEnd(); ++it) | |
{ | |
std::vector<MythVideoTextureOpenGL*> textures = it.value(); | |
for (auto & texture : textures) | |
{ | |
auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data); | |
if (data && data->second) | |
CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second)) | |
delete data; | |
texture->m_data = nullptr; | |
} | |
} | |
} | |
CUcontext dummy = nullptr; | |
CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy)) | |
MythOpenGLInterop::DeleteTextures(); | |
} | |
bool MythNVDECInterop::IsValid() | |
{ | |
return m_cudaFuncs && m_cudaContext; | |
} | |
CUcontext MythNVDECInterop::GetCUDAContext() | |
{ | |
return m_cudaContext; | |
} | |
MythNVDECInterop* MythNVDECInterop::CreateNVDEC(MythPlayerUI* Player, MythRenderOpenGL* Context) | |
{ | |
if (!(Context && Player)) | |
return nullptr; | |
MythInteropGPU::InteropMap types; | |
GetNVDECTypes(Context, types); | |
if (auto nvdec = types.find(FMT_NVDEC); nvdec != types.end()) | |
{ | |
auto matchType = [](auto type){ return (type == GL_NVDEC); }; | |
if (std::any_of(nvdec->second.cbegin(), nvdec->second.cend(), matchType)) | |
return new MythNVDECInterop(Player, Context); | |
} | |
return nullptr; | |
} | |
void MythNVDECInterop::GetNVDECTypes(MythRenderOpenGL* Render, MythInteropGPU::InteropMap& Types) | |
{ | |
if (Render) | |
Types[FMT_NVDEC] = { GL_NVDEC }; | |
} | |
/*! \brief Map CUDA video memory to OpenGL textures. | |
* | |
* \note This is not zero copy - although the copy will be extremely fast. | |
* It may be marginally quicker to implement a custom FFmpeg buffer pool that allocates | |
* textures and maps the texture storage to a CUdeviceptr (if that is possible). Alternatively | |
* EGL interopability may also be useful. | |
*/ | |
std::vector<MythVideoTextureOpenGL*> | |
MythNVDECInterop::Acquire(MythRenderOpenGL* Context, | |
MythVideoColourSpace* ColourSpace, | |
MythVideoFrame* Frame, | |
FrameScanType Scan) | |
{ | |
std::vector<MythVideoTextureOpenGL*> result; | |
if (!Frame || !m_cudaContext || !m_cudaFuncs) | |
return result; | |
if (Context && (Context != m_openglContext)) | |
LOG(VB_GENERAL, LOG_WARNING, LOC + "Mismatched OpenGL contexts"); | |
// Check size | |
QSize surfacesize(Frame->m_width, Frame->m_height); | |
if (m_textureSize != surfacesize) | |
{ | |
if (!m_textureSize.isEmpty()) | |
{ | |
LOG(VB_GENERAL, LOG_WARNING, LOC + QString("Video texture size changed! %1x%2->%3x%4") | |
.arg(m_textureSize.width()).arg(m_textureSize.height()) | |
.arg(Frame->m_width).arg(Frame->m_height)); | |
} | |
DeleteTextures(); | |
m_textureSize = surfacesize; | |
} | |
// Lock | |
OpenGLLocker locker(m_openglContext); | |
// Update colourspace and initialise on first frame | |
if (ColourSpace) | |
{ | |
if (m_openglTextures.isEmpty()) | |
ColourSpace->SetSupportedAttributes(ALL_PICTURE_ATTRIBUTES); | |
ColourSpace->UpdateColourSpace(Frame); | |
} | |
// Retrieve hardware frames context and AVCUDADeviceContext | |
if ((Frame->m_pixFmt != AV_PIX_FMT_CUDA) || (Frame->m_type != FMT_NVDEC) || | |
!Frame->m_buffer || !Frame->m_priv[0] || !Frame->m_priv[1]) | |
{ | |
return result; | |
} | |
auto cudabuffer = reinterpret_cast<CUdeviceptr>(Frame->m_buffer); | |
if (!cudabuffer) | |
return result; | |
// make the CUDA context current | |
CUcontext dummy = nullptr; | |
CUDA_CHECK(m_cudaFuncs, cuCtxPushCurrent(m_cudaContext)) | |
// create and map textures for a new buffer | |
VideoFrameType type = (Frame->m_swPixFmt == AV_PIX_FMT_NONE) ? FMT_NV12 : | |
MythAVUtil::PixelFormatToFrameType(static_cast<AVPixelFormat>(Frame->m_swPixFmt)); | |
bool p010 = MythVideoFrame::ColorDepth(type) > 8; | |
if (!m_openglTextures.contains(cudabuffer)) | |
{ | |
std::vector<QSize> sizes; | |
sizes.emplace_back(Frame->m_width, Frame->m_height); | |
sizes.emplace_back(Frame->m_width, Frame->m_height >> 1); | |
std::vector<MythVideoTextureOpenGL*> textures = | |
MythVideoTextureOpenGL::CreateTextures(m_openglContext, FMT_NVDEC, type, sizes); | |
if (textures.empty()) | |
{ | |
CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy)) | |
return result; | |
} | |
bool success = true; | |
for (uint plane = 0; plane < textures.size(); ++plane) | |
{ | |
// N.B. I think the texture formats for P010 are not strictly compliant | |
// with OpenGL ES 3.X but the Nvidia driver does not complain. | |
MythVideoTextureOpenGL *tex = textures[plane]; | |
tex->m_allowGLSLDeint = true; | |
m_openglContext->glBindTexture(tex->m_target, tex->m_textureId); | |
QOpenGLTexture::PixelFormat format = QOpenGLTexture::Red; | |
QOpenGLTexture::PixelType pixtype = p010 ? QOpenGLTexture::UInt16 : QOpenGLTexture::UInt8; | |
QOpenGLTexture::TextureFormat internal = p010 ? QOpenGLTexture::R16_UNorm : QOpenGLTexture::R8_UNorm; | |
int width = tex->m_size.width(); | |
if (plane) | |
{ | |
internal = p010 ? QOpenGLTexture::RG16_UNorm : QOpenGLTexture::RG8_UNorm; | |
format = QOpenGLTexture::RG; | |
width /= 2; | |
} | |
m_openglContext->glTexImage2D(tex->m_target, 0, internal, width, tex->m_size.height(), | |
0, format, pixtype, nullptr); | |
CUarray array = nullptr; | |
CUgraphicsResource graphicsResource = nullptr; | |
CUDA_CHECK(m_cudaFuncs, cuGraphicsGLRegisterImage(&graphicsResource, tex->m_textureId, | |
QOpenGLTexture::Target2D, CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)) | |
if (graphicsResource) | |
{ | |
CUDA_CHECK(m_cudaFuncs, cuGraphicsMapResources(1, &graphicsResource, nullptr)) | |
CUDA_CHECK(m_cudaFuncs, cuGraphicsSubResourceGetMappedArray(&array, graphicsResource, 0, 0)) | |
CUDA_CHECK(m_cudaFuncs, cuGraphicsUnmapResources(1, &graphicsResource, nullptr)) | |
tex->m_data = reinterpret_cast<unsigned char*>(new QPair<CUarray,CUgraphicsResource>(array, graphicsResource)); | |
} | |
else | |
{ | |
success = false; | |
break; | |
} | |
} | |
if (success) | |
{ | |
m_openglTextures.insert(cudabuffer, textures); | |
} | |
else | |
{ | |
for (auto & texture : textures) | |
{ | |
auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(texture->m_data); | |
if (data && data->second) | |
CUDA_CHECK(m_cudaFuncs, cuGraphicsUnregisterResource(data->second)) | |
delete data; | |
texture->m_data = nullptr; | |
if (texture->m_textureId) | |
m_openglContext->glDeleteTextures(1, &texture->m_textureId); | |
MythVideoTextureOpenGL::DeleteTexture(m_openglContext, texture); | |
} | |
} | |
} | |
if (!m_openglTextures.contains(cudabuffer)) | |
{ | |
CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy)) | |
return result; | |
} | |
// Copy device data to array data (i.e. texture) - surely this can be avoided? | |
// In theory, asynchronous copies should not be required but we use async | |
// followed by stream synchronisation to ensure CUDA and OpenGL are in sync | |
// which avoids presenting old/stale frames when the GPU is under load. | |
result = m_openglTextures[cudabuffer]; | |
for (uint i = 0; i < result.size(); ++i) | |
{ | |
auto *data = reinterpret_cast<QPair<CUarray,CUgraphicsResource>*>(result[i]->m_data); | |
CUDA_MEMCPY2D cpy; | |
memset(&cpy, 0, sizeof(cpy)); | |
cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE; | |
cpy.srcDevice = cudabuffer + static_cast<CUdeviceptr>(Frame->m_offsets[i]); | |
cpy.srcPitch = static_cast<size_t>(Frame->m_pitches[i]); | |
cpy.dstMemoryType = CU_MEMORYTYPE_ARRAY; | |
cpy.dstArray = data->first; | |
cpy.WidthInBytes = static_cast<size_t>(result[i]->m_size.width()) * (p010 ? 2 : 1); | |
cpy.Height = static_cast<size_t>(result[i]->m_size.height()); | |
CUDA_CHECK(m_cudaFuncs, cuMemcpy2DAsync(&cpy, nullptr)) | |
} | |
CUDA_CHECK(m_cudaFuncs, cuStreamSynchronize(nullptr)) | |
CUDA_CHECK(m_cudaFuncs, cuCtxPopCurrent(&dummy)) | |
// GLSL deinterlacing. The decoder will pick up any CPU or driver preference | |
// and return a stream of deinterlaced frames. Just check for GLSL here. | |
bool needreferences = false; | |
if (is_interlaced(Scan) && !Frame->m_alreadyDeinterlaced) | |
{ | |
MythDeintType shader = Frame->GetDoubleRateOption(DEINT_SHADER); | |
if (shader) | |
needreferences = shader == DEINT_HIGH; | |
else | |
needreferences = Frame->GetSingleRateOption(DEINT_SHADER) == DEINT_HIGH; | |
} | |
if (needreferences) | |
{ | |
if (qAbs(Frame->m_frameCounter - m_discontinuityCounter) > 1) | |
m_referenceFrames.clear(); | |
RotateReferenceFrames(cudabuffer); | |
int size = m_referenceFrames.size(); | |
CUdeviceptr next = m_referenceFrames[0]; | |
CUdeviceptr current = m_referenceFrames[size > 1 ? 1 : 0]; | |
CUdeviceptr last = m_referenceFrames[size > 2 ? 2 : 0]; | |
if (!m_openglTextures.contains(next) || !m_openglTextures.contains(current) || | |
!m_openglTextures.contains(last)) | |
{ | |
LOG(VB_GENERAL, LOG_ERR, LOC + "Reference frame error"); | |
return result; | |
} | |
result = m_openglTextures[last]; | |
std::copy(m_openglTextures[current].cbegin(), m_openglTextures[current].cend(), std::back_inserter(result)); | |
std::copy(m_openglTextures[next].cbegin(), m_openglTextures[next].cend(), std::back_inserter(result)); | |
return result; | |
} | |
m_referenceFrames.clear(); | |
m_discontinuityCounter = Frame->m_frameCounter; | |
return result; | |
} | |
/*! \brief Initialise a CUDA context | |
* \note We do not use the FFmpeg internal context creation as the created context | |
* is deleted before we have a chance to cleanup our own CUDA resources. | |
*/ | |
bool MythNVDECInterop::InitialiseCuda() | |
{ | |
return CreateCUDAContext(m_openglContext, m_cudaFuncs, m_cudaContext); | |
} | |
bool MythNVDECInterop::CreateCUDAPriv(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs, | |
CUcontext& CudaContext, bool& Retry) | |
{ | |
Retry = false; | |
if (!GLContext) | |
return false; | |
// Make OpenGL context current | |
OpenGLLocker locker(GLContext); | |
// retrieve CUDA entry points | |
if (cuda_load_functions(&CudaFuncs, nullptr) != 0) | |
{ | |
LOG(VB_PLAYBACK, LOG_ERR, LOC + "Failed to load functions"); | |
return false; | |
} | |
// create a CUDA context for the current device | |
CUdevice cudevice = 0; | |
CUcontext dummy = nullptr; | |
CUresult res = CudaFuncs->cuInit(0); | |
if (res != CUDA_SUCCESS) | |
{ | |
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to initialise CUDA API"); | |
return false; | |
} | |
unsigned int devicecount = 0; | |
res = CudaFuncs->cuGLGetDevices(&devicecount, &cudevice, 1, CU_GL_DEVICE_LIST_ALL); | |
if (res != CUDA_SUCCESS) | |
{ | |
LOG(VB_GENERAL, LOG_ERR, LOC + "Failed to get CUDA device"); | |
return false; | |
} | |
if (devicecount < 1) | |
{ | |
LOG(VB_GENERAL, LOG_ERR, LOC + "No CUDA devices"); | |
return false; | |
} | |
res = CudaFuncs->cuCtxCreate(&CudaContext, CU_CTX_SCHED_BLOCKING_SYNC, cudevice); | |
if (res != CUDA_SUCCESS) | |
{ | |
LOG(VB_GENERAL, LOG_ERR, LOC + QString("Failed to create CUDA context (Err: %1)") | |
.arg(res)); | |
Retry = true; | |
return false; | |
} | |
CudaFuncs->cuCtxPopCurrent(&dummy); | |
LOG(VB_PLAYBACK, LOG_INFO, LOC + "Created CUDA context"); | |
return true; | |
} | |
bool MythNVDECInterop::CreateCUDAContext(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs, | |
CUcontext& CudaContext) | |
{ | |
if (!gCoreContext->IsUIThread()) | |
{ | |
LOG(VB_GENERAL, LOG_ERR, LOC + "Must create CUDA context from main thread"); | |
return false; | |
} | |
int retries = 0; | |
bool retry = false; | |
while (retries++ < 5) | |
{ | |
if (CreateCUDAPriv(GLContext, CudaFuncs, CudaContext, retry)) | |
return true; | |
CleanupContext(GLContext, CudaFuncs, CudaContext); | |
if (!retry) | |
break; | |
LOG(VB_GENERAL, LOG_WARNING, LOC + "Will retry in 50ms"); | |
std::this_thread::sleep_for(50ms); | |
} | |
return false; | |
} | |
void MythNVDECInterop::CleanupContext(MythRenderOpenGL* GLContext, CudaFunctions*& CudaFuncs, | |
CUcontext& CudaContext) | |
{ | |
if (!GLContext) | |
return; | |
OpenGLLocker locker(GLContext); | |
if (CudaFuncs) | |
{ | |
if (CudaContext) | |
CUDA_CHECK(CudaFuncs, cuCtxDestroy(CudaContext)) | |
cuda_free_functions(&CudaFuncs); | |
} | |
} | |
void MythNVDECInterop::RotateReferenceFrames(CUdeviceptr Buffer) | |
{ | |
if (!Buffer) | |
return; | |
// don't retain twice for double rate | |
if (!m_referenceFrames.empty() && (m_referenceFrames[0] == Buffer)) | |
return; | |
m_referenceFrames.push_front(Buffer); | |
// release old frames | |
while (m_referenceFrames.size() > 3) | |
m_referenceFrames.pop_back(); | |
} |