Skip to content

Commit

Permalink
New tile group (OpenVisualCloud#348)
Browse files Browse the repository at this point in the history
* Enable tile group for better tile parallelism

Signed-off-by: Jing Li <jing.b.li@intel.com>

* set reference info

Signed-off-by: Jing Li <jing.b.li@intel.com>
  • Loading branch information
lijing0010 authored and 1480c1 committed Oct 4, 2019
1 parent 4b2972e commit cea889e
Show file tree
Hide file tree
Showing 15 changed files with 338 additions and 238 deletions.
10 changes: 5 additions & 5 deletions Source/Lib/Codec/EbCodingLoop.c
Original file line number Diff line number Diff line change
Expand Up @@ -1882,8 +1882,8 @@ static void EncodePassMvPrediction(
contextPtr->cuPtr->predictionUnitArray->interPredDirectionIndex == BI_PRED)
{
FillAMVPCandidates(
pictureControlSetPtr->epMvNeighborArray[contextPtr->tileIndex],
pictureControlSetPtr->epModeTypeNeighborArray[contextPtr->tileIndex],
pictureControlSetPtr->epMvNeighborArray[contextPtr->encDecTileIndex],
pictureControlSetPtr->epModeTypeNeighborArray[contextPtr->encDecTileIndex],
contextPtr->cuOriginX,
contextPtr->cuOriginY,
contextPtr->cuStats->size,
Expand Down Expand Up @@ -1929,8 +1929,8 @@ static void EncodePassMvPrediction(
contextPtr->cuPtr->predictionUnitArray->interPredDirectionIndex == BI_PRED)
{
FillAMVPCandidates(
pictureControlSetPtr->epMvNeighborArray[contextPtr->tileIndex],
pictureControlSetPtr->epModeTypeNeighborArray[contextPtr->tileIndex],
pictureControlSetPtr->epMvNeighborArray[contextPtr->encDecTileIndex],
pictureControlSetPtr->epModeTypeNeighborArray[contextPtr->encDecTileIndex],
contextPtr->cuOriginX,
contextPtr->cuOriginY,
contextPtr->cuStats->size,
Expand Down Expand Up @@ -2988,7 +2988,7 @@ EB_EXTERN void EncodePass(
EB_COLOR_FORMAT colorFormat = contextPtr->colorFormat;
const EB_U16 subWidthCMinus1 = (colorFormat == EB_YUV444 ? 1 : 2) - 1;

EB_U32 tileIdx = contextPtr->tileIndex;
EB_U32 tileIdx = contextPtr->encDecTileIndex;
EbPictureBufferDesc_t *reconBuffer = is16bit ? pictureControlSetPtr->reconPicture16bitPtr : pictureControlSetPtr->reconPicturePtr;
EbPictureBufferDesc_t *coeffBufferTB = lcuPtr->quantizedCoeff;

Expand Down
1 change: 1 addition & 0 deletions Source/Lib/Codec/EbDefinitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ extern "C" {

//#define BENCHMARK 0
#define LATENCY_PROFILE 0
//#define DEBUG_LIFE_CYCLE 0
// Internal Marcos
#define NON_AVX512_SUPPORT

Expand Down
267 changes: 161 additions & 106 deletions Source/Lib/Codec/EbEncDecProcess.c

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions Source/Lib/Codec/EbEncDecProcess.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ typedef struct EncDecContext_s
EB_PM_MODE pmMode; // agressive vs. conservative
EB_BOOL pmMethod; // 1-stgae vs. 2-stage

EB_U16 tileRowIndex;
EB_U16 tileIndex;
EB_U16 encDecTileIndex;
////
} EncDecContext_t;

Expand Down
2 changes: 1 addition & 1 deletion Source/Lib/Codec/EbEncDecTasks.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ typedef struct EncDecTasks_s
EB_U32 inputType;
EB_S16 encDecSegmentRow;

EB_U32 tileRowIndex;
EB_U32 tileGroupIndex;
} EncDecTasks_t;

typedef struct EncDecTasksInitData_s
Expand Down
31 changes: 31 additions & 0 deletions Source/Lib/Codec/EbEncHandle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1158,13 +1158,21 @@ EB_API EB_ERRORTYPE EbInitEncoder(EB_COMPONENTTYPE *h265EncComponent)

inputData.encDecSegmentCol = 0;
inputData.encDecSegmentRow = 0;
inputData.tileGroupCol = 0;
inputData.tileGroupRow = 0;
for(i=0; i <= encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->staticConfig.hierarchicalLevels; ++i) {
inputData.encDecSegmentCol = encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->encDecSegmentColCountArray[i] > inputData.encDecSegmentCol ?
(EB_U16) encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->encDecSegmentColCountArray[i] :
inputData.encDecSegmentCol;
inputData.encDecSegmentRow = encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->encDecSegmentRowCountArray[i] > inputData.encDecSegmentRow ?
(EB_U16) encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->encDecSegmentRowCountArray[i] :
inputData.encDecSegmentRow;
inputData.tileGroupCol= encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->tileGroupColCountArray[i] > inputData.tileGroupCol ?
(EB_U16) encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->tileGroupColCountArray[i] :
inputData.tileGroupCol;
inputData.tileGroupRow = encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->tileGroupRowCountArray[i] > inputData.tileGroupRow ?
(EB_U16) encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->tileGroupRowCountArray[i] :
inputData.tileGroupRow;
}

inputData.pictureWidth = encHandlePtr->sequenceControlSetInstanceArray[instanceIndex]->sequenceControlSetPtr->maxInputLumaWidth;
Expand Down Expand Up @@ -2090,6 +2098,9 @@ void LoadDefaultBufferConfigurationSettings(
EB_U32 meSegH = (((sequenceControlSetPtr->maxInputLumaHeight + 32) / MAX_LCU_SIZE) < 6) ? 1 : 6;
EB_U32 meSegW = (((sequenceControlSetPtr->maxInputLumaWidth + 32) / MAX_LCU_SIZE) < 10) ? 1 : 10;

EB_U16 tileColCount = sequenceControlSetPtr->staticConfig.tileColumnCount;
EB_U16 tileRowCount = sequenceControlSetPtr->staticConfig.tileRowCount;

EB_U32 inputPic = SetParentPcs(&sequenceControlSetPtr->staticConfig);

unsigned int lpCount = GetNumProcessors();
Expand Down Expand Up @@ -2148,6 +2159,26 @@ void LoadDefaultBufferConfigurationSettings(
sequenceControlSetPtr->encDecSegmentColCountArray[4] = encDecSegW;
sequenceControlSetPtr->encDecSegmentColCountArray[5] = encDecSegW;

// Jing: TODO:
// Tune it later, different layer may have different Tile Group
EB_U16 tileGroupColCount = 1;//1 col will have better perf for segments
EB_U16 tileGroupRowCount = tileRowCount;// > 1 ? (tileRowCount / 2) : 1;

// Tile group
sequenceControlSetPtr->tileGroupColCountArray[0] = tileGroupColCount;
sequenceControlSetPtr->tileGroupColCountArray[1] = tileGroupColCount;
sequenceControlSetPtr->tileGroupColCountArray[2] = tileGroupColCount;
sequenceControlSetPtr->tileGroupColCountArray[3] = tileGroupColCount;
sequenceControlSetPtr->tileGroupColCountArray[4] = tileGroupColCount;
sequenceControlSetPtr->tileGroupColCountArray[5] = tileGroupColCount;

sequenceControlSetPtr->tileGroupRowCountArray[0] = tileGroupRowCount;
sequenceControlSetPtr->tileGroupRowCountArray[1] = tileGroupRowCount;
sequenceControlSetPtr->tileGroupRowCountArray[2] = tileGroupRowCount;
sequenceControlSetPtr->tileGroupRowCountArray[3] = tileGroupRowCount;
sequenceControlSetPtr->tileGroupRowCountArray[4] = tileGroupRowCount;
sequenceControlSetPtr->tileGroupRowCountArray[5] = tileGroupRowCount;

//#====================== Data Structures and Picture Buffers ======================
sequenceControlSetPtr->pictureControlSetPoolInitCount = inputPic;
sequenceControlSetPtr->pictureControlSetPoolInitCountChild = MAX(4, coreCount / 6);
Expand Down
17 changes: 0 additions & 17 deletions Source/Lib/Codec/EbEntropyCodingProcess.c
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,6 @@ void* EntropyCodingKernel(void *inputPtr)
// If the picture is complete, terminate the slice
if (pictureControlSetPtr->entropyCodingInfo[tileIdx]->entropyCodingCurrentRow == pictureControlSetPtr->entropyCodingInfo[tileIdx]->entropyCodingRowCount)
{
EB_U32 refIdx;
EB_BOOL pic_ready = EB_TRUE;

//assert(lastLcuFlagInTile == EB_TRUE);
Expand Down Expand Up @@ -507,22 +506,6 @@ void* EntropyCodingKernel(void *inputPtr)
EbReleaseMutex(pictureControlSetPtr->entropyCodingPicMutex);

if (pic_ready) {
// Release the List 0 Reference Pictures
for (refIdx = 0; refIdx < pictureControlSetPtr->ParentPcsPtr->refList0Count; ++refIdx) {
if (pictureControlSetPtr->refPicPtrArray[0] != EB_NULL) {

EbReleaseObject(pictureControlSetPtr->refPicPtrArray[0]);
}
}

// Release the List 1 Reference Pictures
for (refIdx = 0; refIdx < pictureControlSetPtr->ParentPcsPtr->refList1Count; ++refIdx) {
if (pictureControlSetPtr->refPicPtrArray[1] != EB_NULL) {

EbReleaseObject(pictureControlSetPtr->refPicPtrArray[1]);
}
}

// Get Empty Entropy Coding Results
EbGetEmptyObject(
contextPtr->entropyCodingOutputFifoPtr,
Expand Down
33 changes: 18 additions & 15 deletions Source/Lib/Codec/EbModeDecisionConfigurationProcess.c
Original file line number Diff line number Diff line change
Expand Up @@ -2081,22 +2081,25 @@ void* ModeDecisionConfigurationKernel(void *inputPtr)
SVT_LOG("POC %lld MDC OUT \n", pictureControlSetPtr->pictureNumber);
#endif
// Post the results to the MD processes

//printf("MDC, post POC %d, decoder order %d\n",
// pictureControlSetPtr->pictureNumber, pictureControlSetPtr->ParentPcsPtr->decodeOrder);
for (unsigned tileRowIdx = 0; tileRowIdx < pictureControlSetPtr->ParentPcsPtr->tileRowCount; tileRowIdx++) {
// TODO: Too many objects may drain the FIFO and downgrade the perf
EbGetEmptyObject(
contextPtr->modeDecisionConfigurationOutputFifoPtr,
&encDecTasksWrapperPtr);
encDecTasksPtr = (EncDecTasks_t*) encDecTasksWrapperPtr->objectPtr;
encDecTasksPtr->pictureControlSetWrapperPtr = rateControlResultsPtr->pictureControlSetWrapperPtr;
encDecTasksPtr->inputType = ENCDEC_TASKS_MDC_INPUT;
encDecTasksPtr->tileRowIndex = tileRowIdx;

// Post the Full Results Object
EbPostFullObject(encDecTasksWrapperPtr);
EB_U16 tileGroupRowCnt = sequenceControlSetPtr->tileGroupRowCountArray[pictureControlSetPtr->temporalLayerIndex];
EB_U16 tileGroupColCnt = sequenceControlSetPtr->tileGroupColCountArray[pictureControlSetPtr->temporalLayerIndex];

for (EB_U16 r = 0; r < tileGroupRowCnt; r++) {
for (EB_U16 c = 0; c < tileGroupColCnt; c++) {
unsigned tileGroupIdx = c + r * tileGroupColCnt;
EbGetEmptyObject(
contextPtr->modeDecisionConfigurationOutputFifoPtr,
&encDecTasksWrapperPtr);
encDecTasksPtr = (EncDecTasks_t*) encDecTasksWrapperPtr->objectPtr;
encDecTasksPtr->pictureControlSetWrapperPtr = rateControlResultsPtr->pictureControlSetWrapperPtr;
encDecTasksPtr->inputType = ENCDEC_TASKS_MDC_INPUT;
encDecTasksPtr->tileGroupIndex = tileGroupIdx;

// Post the Full Results Object
EbPostFullObject(encDecTasksWrapperPtr);
}
}

#if LATENCY_PROFILE
double latency = 0.0;
EB_U64 finishTimeSeconds = 0;
Expand Down
50 changes: 1 addition & 49 deletions Source/Lib/Codec/EbModeDecisionProcess.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,32 +176,6 @@ EB_ERRORTYPE ModeDecisionContextCtor(
return EB_ErrorNone;
}

/**************************************************
* Reset Mode Decision Neighbor Arrays
*************************************************/
static void ResetModeDecisionNeighborArrays(PictureControlSet_t *pictureControlSetPtr, EB_U32 tileIdx)
{
EB_U8 depth;
for (depth = 0; depth < NEIGHBOR_ARRAY_TOTAL_COUNT; depth++) {
NeighborArrayUnitReset(pictureControlSetPtr->mdIntraLumaModeNeighborArray[depth][tileIdx]);
NeighborArrayUnitReset(pictureControlSetPtr->mdMvNeighborArray[depth][tileIdx]);
NeighborArrayUnitReset(pictureControlSetPtr->mdSkipFlagNeighborArray[depth][tileIdx]);
NeighborArrayUnitReset(pictureControlSetPtr->mdModeTypeNeighborArray[depth][tileIdx]);
NeighborArrayUnitReset(pictureControlSetPtr->mdLeafDepthNeighborArray[depth][tileIdx]);
}

return;
}


static void ResetMdRefinmentNeighborArrays(PictureControlSet_t *pictureControlSetPtr, EB_U32 tileIdx)
{
NeighborArrayUnitReset(pictureControlSetPtr->mdRefinementIntraLumaModeNeighborArray[tileIdx]);
NeighborArrayUnitReset(pictureControlSetPtr->mdRefinementModeTypeNeighborArray[tileIdx]);
NeighborArrayUnitReset(pictureControlSetPtr->mdRefinementLumaReconNeighborArray[tileIdx]);

return;
}


extern void lambdaAssignLowDelay(
Expand Down Expand Up @@ -311,12 +285,9 @@ const EB_LAMBDA_ASSIGN_FUNC lambdaAssignmentFunctionTable[4] = {
void ProductResetModeDecision(
ModeDecisionContext_t *contextPtr,
PictureControlSet_t *pictureControlSetPtr,
SequenceControlSet_t *sequenceControlSetPtr,
EB_U32 tileRowIndex,
EB_U32 segmentIndex)
SequenceControlSet_t *sequenceControlSetPtr)
{
EB_PICTURE sliceType;
EB_U32 lcuRowIndex;
MdRateEstimationContext_t *mdRateEstimationArray;

// SAO
Expand Down Expand Up @@ -404,25 +375,6 @@ void ProductResetModeDecision(
// Reset CABAC Contexts
contextPtr->coeffEstEntropyCoderPtr = pictureControlSetPtr->coeffEstEntropyCoderPtr;

// Reset Neighbor Arrays at start of new Segment / Picture
// Jing: Current segments will cross tiles
if (segmentIndex == 0) {
for (unsigned int tileIdx = tileRowIndex * pictureControlSetPtr->ParentPcsPtr->tileColumnCount;
tileIdx < (tileRowIndex + 1) * pictureControlSetPtr->ParentPcsPtr->tileColumnCount;
tileIdx++) {
ResetModeDecisionNeighborArrays(pictureControlSetPtr, tileIdx);
ResetMdRefinmentNeighborArrays(pictureControlSetPtr, tileIdx);

//Jing: TODO Change to tile
// Used in DLF, need to double check if need tile level parameters
for(lcuRowIndex = 0; lcuRowIndex< ((sequenceControlSetPtr->lumaHeight + MAX_LCU_SIZE - 1)/MAX_LCU_SIZE); lcuRowIndex++){
pictureControlSetPtr->encPrevCodedQp[tileIdx][lcuRowIndex] = (EB_U8)pictureControlSetPtr->pictureQp;
pictureControlSetPtr->encPrevQuantGroupCodedQp[tileIdx][lcuRowIndex] = (EB_U8)pictureControlSetPtr->pictureQp;
}
}

}

return;
}

Expand Down
5 changes: 2 additions & 3 deletions Source/Lib/Codec/EbModeDecisionProcess.h
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,8 @@ extern const EB_LAMBDA_ASSIGN_FUNC lambdaAssignmentFunctionTable[4];
extern void ProductResetModeDecision(
ModeDecisionContext_t *contextPtr,
PictureControlSet_t *pictureControlSetPtr,
SequenceControlSet_t *sequenceControlSetPtr,
EB_U32 tileRowIndex,
EB_U32 segmentIndex);
SequenceControlSet_t *sequenceControlSetPtr);

extern void ModeDecisionConfigureLcu(
ModeDecisionContext_t *contextPtr,
LargestCodingUnit_t *lcuPtr,
Expand Down
19 changes: 9 additions & 10 deletions Source/Lib/Codec/EbPictureControlSet.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ EB_ERRORTYPE PictureControlSetCtor(
EB_U32 encDecSegCol = initDataPtr->encDecSegmentCol;
EB_U16 pictureWidthInLcu = (EB_U16)((initDataPtr->pictureWidth + initDataPtr->lcuSize - 1) / initDataPtr->lcuSize);
EB_U16 pictureHeightInLcu = (EB_U16)((initDataPtr->pictureHeight + initDataPtr->lcuSize - 1) / initDataPtr->lcuSize);
EB_U32 tileGroupCnt = initDataPtr->tileGroupRow * initDataPtr->tileGroupCol;

EB_U16 tileIdx;
EB_U16 r, c;
Expand Down Expand Up @@ -626,17 +627,12 @@ EB_ERRORTYPE PictureControlSetCtor(
//Jing:
//Alloc segment per tile group
// Segments
EB_MALLOC(EncDecSegments_t**, objectPtr->encDecSegmentCtrl, sizeof(EncDecSegments_t*) * initDataPtr->tileRowCount, EB_N_PTR);

for (tileIdx = 0; tileIdx < initDataPtr->tileRowCount; tileIdx++) {
if (totalTileCount > 1) {
//Jing: Tuning segments number, put tile info to pps
encDecSegRow = pictureHeightInLcu / initDataPtr->tileRowCount;
encDecSegCol = pictureWidthInLcu;
}
EB_MALLOC(EncDecSegments_t**, objectPtr->encDecSegmentCtrl, sizeof(EncDecSegments_t*) * tileGroupCnt, EB_N_PTR);

for (EB_U32 tileGroupIdx = 0; tileGroupIdx < tileGroupCnt; tileGroupIdx++) {
//Can reduce encDecSegCol and encDecSegRow a bit to save memory
return_error = EncDecSegmentsCtor(
&(objectPtr->encDecSegmentCtrl[tileIdx]),
&(objectPtr->encDecSegmentCtrl[tileGroupIdx]),
encDecSegCol,
encDecSegRow);
if (return_error == EB_ErrorInsufficientResources){
Expand Down Expand Up @@ -666,8 +662,9 @@ EB_ERRORTYPE PictureControlSetCtor(
EB_CREATEMUTEX(EB_HANDLE, objectPtr->intraMutex, sizeof(EB_HANDLE), EB_MUTEX);

objectPtr->encDecCodedLcuCount = 0;
return EB_ErrorNone;
objectPtr->resetDone = EB_FALSE;

return EB_ErrorNone;
}


Expand All @@ -691,6 +688,8 @@ EB_ERRORTYPE PictureParentControlSetCtor(
// Jing: Tiles
EB_U32 totalTileCount = initDataPtr->tileRowCount * initDataPtr->tileColumnCount;
EB_MALLOC(TileInfo_t*, objectPtr->tileInfoArray, sizeof(TileInfo_t) * totalTileCount, EB_N_PTR);
EB_MALLOC(TileGroupInfo_t*, objectPtr->tileGroupInfoArray, sizeof(TileGroupInfo_t) * totalTileCount, EB_N_PTR);

objectPtr->pictureWidthInLcu = (EB_U16)((initDataPtr->pictureWidth + initDataPtr->lcuSize - 1) / initDataPtr->lcuSize);
objectPtr->pictureHeightInLcu = (EB_U16)((initDataPtr->pictureHeight + initDataPtr->lcuSize - 1) / initDataPtr->lcuSize);
objectPtr->tileRowCount = initDataPtr->tileRowCount;
Expand Down
16 changes: 16 additions & 0 deletions Source/Lib/Codec/EbPictureControlSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ typedef struct PictureControlSet_s

EB_HANDLE intraMutex;
EB_U32 intraCodedArea;
EB_BOOL resetDone;
EB_U32 encDecCodedLcuCount;

// Mode Decision Config
Expand Down Expand Up @@ -331,6 +332,18 @@ typedef struct LcuStat_s {
EB_U8 lowDistLogo;

} LcuStat_t;

//Jing
//Add logical tile group for parallelism in EncDec stage
typedef struct TileGroupInfo_s {
EB_U16 tileGroupLcuOriginX;
EB_U16 tileGroupLcuOriginY;
EB_U16 tileGroupLcuEndX;
EB_U16 tileGroupLcuEndY;
EB_U16 tileGroupWidthInLcu;
EB_U16 tileGroupHeightInLcu;
} TileGroupInfo_t;


//CHKN
// Add the concept of PictureParentControlSet which is a subset of the old PictureControlSet.
Expand Down Expand Up @@ -386,6 +399,7 @@ typedef struct PictureParentControlSet_s
EB_U16 pictureHeightInLcu;

TileInfo_t *tileInfoArray; //Tile info in raster scan order
TileGroupInfo_t *tileGroupInfoArray;
LcuEdgeInfo_t *lcuEdgeInfoArray; //LCU tile/picture edge info


Expand Down Expand Up @@ -607,6 +621,8 @@ typedef struct PictureControlSetInitData_s
EB_U32 compressedTenBitFormat;
EB_U16 encDecSegmentCol;
EB_U16 encDecSegmentRow;
EB_U16 tileGroupCol;
EB_U16 tileGroupRow;

EB_ENC_MODE encMode;

Expand Down
Loading

0 comments on commit cea889e

Please sign in to comment.