Skip to content
Permalink
Browse files

reduce memory for SAO

  • Loading branch information...
Fabrice Bellard authored and mraulet committed Jan 12, 2015
1 parent 868ec1e commit 5d9f79edef2c11b915bdac3a025b59a32082f409
Showing with 225 additions and 18 deletions.
  1. +39 −4 libavcodec/hevc.c
  2. +11 −3 libavcodec/hevc.h
  3. +175 −11 libavcodec/hevc_filter.c
@@ -131,7 +131,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)

s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
s->tab_ipm = av_mallocz(min_pu_size);
s->is_pcm = av_malloc(min_pu_size);
s->is_pcm = av_mallocz(min_pu_size);

if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
goto fail;
@@ -392,9 +392,34 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
ff_videodsp_init (&s->vdsp, sps->bit_depth);

if (sps->sao_enabled) {
#ifdef USE_SAO_SMALL_BUFFER
{
int ctb_size = 1 << sps->log2_ctb_size;
int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
int c_idx, i;

for (i = 0; i < s->threads_number ; i++) {
HEVCLocalContext *lc = s->HEVClcList[i];
lc->sao_pixel_buffer =
av_malloc(((ctb_size + 2) * (ctb_size + 2)) <<
sps->pixel_shift);
}
for(c_idx = 0; c_idx < c_count; c_idx++) {
int w = sps->width >> sps->hshift[c_idx];
int h = sps->height >> sps->vshift[c_idx];
s->sao_pixel_buffer_h[c_idx] =
av_malloc((w * 2 * sps->ctb_height) <<
sps->pixel_shift);
s->sao_pixel_buffer_v[c_idx] =
av_malloc((h * 2 * sps->ctb_width) <<
sps->pixel_shift);
}
}
#else
av_frame_unref(s->tmp_frame);
ret = get_buffer_sao(s, s->tmp_frame, sps);
s->sao_frame = s->tmp_frame;
#endif
}

s->sps = sps;
@@ -2893,7 +2918,6 @@ static int hevc_frame_start(HEVCContext *s)
int pic_size_in_ctb = ((s->sps->width >> s->sps->log2_min_cb_size) + 1) *
((s->sps->height >> s->sps->log2_min_cb_size) + 1);
int ret = 0;
AVFrame *cur_frame;
av_log(s->avctx, AV_LOG_DEBUG, "frame start %d\n", s->decoder_id);


@@ -2947,8 +2971,7 @@ static int hevc_frame_start(HEVCContext *s)
if (ret < 0)
goto fail;

cur_frame = s->sps->sao_enabled ? s->sao_frame : s->frame;
cur_frame->pict_type = 3 - s->sh.slice_type;
s->frame->pict_type = 3 - s->sh.slice_type;

if (!IS_IRAP(s))
ff_hevc_bump_frame(s);
@@ -3594,7 +3617,17 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)

av_freep(&s->cabac_state);

#ifdef USE_SAO_SMALL_BUFFER
for (i = 0; i < s->threads_number; i++) {
av_freep(&s->HEVClcList[i]->sao_pixel_buffer);
}
for (i = 0; i < 3; i++) {
av_freep(&s->sao_pixel_buffer_h[i]);
av_freep(&s->sao_pixel_buffer_v[i]);
}
#else
av_frame_free(&s->tmp_frame);
#endif
av_frame_free(&s->output_frame);

for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
@@ -3658,9 +3691,11 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
if (!s->cabac_state)
goto fail;

#ifndef USE_SAO_SMALL_BUFFER
s->tmp_frame = av_frame_alloc();
if (!s->tmp_frame)
goto fail;
#endif

s->output_frame = av_frame_alloc();
if (!s->output_frame)
@@ -38,6 +38,7 @@
#include "hevc_defs.h"

#define TEST_MV_POC
//#define USE_SAO_SMALL_BUFFER /* reduce the memory used by SAO */

#define HM_MV 1
#define MAX_DPB_SIZE 16 // A.4.1
@@ -948,7 +949,9 @@ typedef struct HEVCLocalContext {
CodingUnit cu;
PredictionUnit pu;
NeighbourAvailable na;

#ifdef USE_SAO_SMALL_BUFFER
uint8_t *sao_pixel_buffer;
#endif
uint8_t cabac_state[HEVC_CONTEXTS];

uint8_t stat_coeff[4];
@@ -996,9 +999,14 @@ typedef struct HEVCContext {
uint8_t *cabac_state;

AVFrame *frame;
AVFrame *sao_frame;
AVFrame *tmp_frame;
AVFrame *output_frame;
#ifdef USE_SAO_SMALL_BUFFER
uint8_t *sao_pixel_buffer_h[3];
uint8_t *sao_pixel_buffer_v[3];
#else
AVFrame *tmp_frame;
AVFrame *sao_frame;
#endif

const HEVCVPS *vps;
const HEVCSPS *sps;
@@ -125,6 +125,9 @@ static int get_qPy_pred(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
else
qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];

av_assert2(qPy_a >= -s->sps->qp_bd_offset && qPy_a < 52);
av_assert2(qPy_b >= -s->sps->qp_bd_offset && qPy_b < 52);

return (qPy_a + qPy_b + 1) >> 1;
}

@@ -148,7 +151,7 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
return s->qp_y_tab[x + y * s->sps->min_cb_width];
}

static void copy_CTB(uint8_t *dst, uint8_t *src,
static void copy_CTB(uint8_t *dst, const uint8_t *src,
int width, int height, int stride_dst, int stride_src)
{
int i;
@@ -160,27 +163,79 @@ static void copy_CTB(uint8_t *dst, uint8_t *src,
}
}

static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int height, int c_idx)
#if defined(USE_SAO_SMALL_BUFFER)
static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
{
if (pixel_shift)
*(uint16_t *)dst = *(uint16_t *)src;
else
*dst = *src;

}

static void copy_vert(uint8_t *dst, const uint8_t *src,
int pixel_shift, int height,
int stride_dst, int stride_src)
{
int i;
if (pixel_shift == 0) {
for (i = 0; i < height; i++) {
*dst = *src;
dst += stride_dst;
src += stride_src;
}
} else {
for (i = 0; i < height; i++) {
*(uint16_t *)dst = *(uint16_t *)src;
dst += stride_dst;
src += stride_src;
}
}
}

static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
int stride_src, int x, int y, int width, int height,
int c_idx, int x_ctb, int y_ctb)
{
int sh = s->sps->pixel_shift;
int w = s->sps->width >> s->sps->hshift[c_idx];
int h = s->sps->height >> s->sps->vshift[c_idx];

/* copy horizontal edges */
memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
src, width << sh);
memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
src + stride_src * (height - 1), width << sh);

/* copy vertical edges */
copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);

copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
}
#endif

static void restore_tqb_pixels(HEVCContext *s,
uint8_t *src1, const uint8_t *dst1,
ptrdiff_t stride_src, ptrdiff_t stride_dst,
int x0, int y0, int width, int height, int c_idx)
{
if ( s->pps->transquant_bypass_enable_flag ||
(s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
int x, y;
ptrdiff_t stride_dst = s->sao_frame->linesize[c_idx];
ptrdiff_t stride_src = s->frame->linesize[c_idx];
int min_pu_size = 1 << s->sps->log2_min_pu_size;
int hshift = s->sps->hshift[c_idx];
int vshift = s->sps->vshift[c_idx];
int x_min = ((x0 ) >> s->sps->log2_min_pu_size);
int y_min = ((y0 ) >> s->sps->log2_min_pu_size);
int x_max = ((x0 + width ) >> s->sps->log2_min_pu_size);
int y_max = ((y0 + height) >> s->sps->log2_min_pu_size);
int len = min_pu_size >> hshift;
int len = (min_pu_size >> hshift) << s->sps->pixel_shift;
for (y = y_min; y < y_max; y++) {
for (x = x_min; x < x_max; x++) {
if (s->is_pcm[y * s->sps->min_pu_width + x]) {
int n;
uint8_t *src = &s->frame->data[c_idx][ ((y << s->sps->log2_min_pu_size) >> vshift) * stride_src + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride_dst + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
uint8_t *src = src1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
const uint8_t *dst = dst1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
for (n = 0; n < (min_pu_size >> vshift); n++) {
memcpy(src, dst, len);
src += stride_src;
@@ -256,28 +311,133 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
int x0 = x >> s->sps->hshift[c_idx];
int y0 = y >> s->sps->vshift[c_idx];
int stride_src = s->frame->linesize[c_idx];
int stride_dst = s->sao_frame->linesize[c_idx];
int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
int width = FFMIN(ctb_size_h, (s->sps->width >> s->sps->hshift[c_idx]) - x0);
int height = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
#if defined(USE_SAO_SMALL_BUFFER)
int stride_dst = ((1 << (s->sps->log2_ctb_size)) + 2) << s->sps->pixel_shift;
uint8_t *dst = lc->sao_pixel_buffer + (1 * stride_dst) + (1 << s->sps->pixel_shift);
#else
int stride_dst = s->sao_frame->linesize[c_idx];
uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
#endif

switch (sao->type_idx[c_idx]) {
case SAO_BAND:
copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
#if defined(USE_SAO_SMALL_BUFFER)
copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
x_ctb, y_ctb);
#endif
s->hevcdsp.sao_band_filter(src, dst,
stride_src, stride_dst,
sao,
edges, width,
height, c_idx);
restore_tqb_pixels(s, x, y, width, height, c_idx);
restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
case SAO_EDGE:
{
uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
#if defined(USE_SAO_SMALL_BUFFER)
int w = s->sps->width >> s->sps->hshift[c_idx];
int h = s->sps->height >> s->sps->vshift[c_idx];
int left_edge = edges[0];
int top_edge = edges[1];
int right_edge = edges[2];
int bottom_edge = edges[3];
int sh = s->sps->pixel_shift;
int left_pixels, right_pixels;

if (!top_edge) {
int left = 1 - left_edge;
int right = 1 - right_edge;
const uint8_t *src1[2];
uint8_t *dst1;
int src_idx, pos;

dst1 = dst - stride_dst - (left << sh);
src1[0] = src - stride_src - (left << sh);
src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
pos = 0;
if (left) {
src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1, src1[src_idx], sh);
pos += (1 << sh);
}
src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
SAO_APPLIED);
memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
if (right) {
pos += width << sh;
src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
}
}
if (!bottom_edge) {
int left = 1 - left_edge;
int right = 1 - right_edge;
const uint8_t *src1[2];
uint8_t *dst1;
int src_idx, pos;

dst1 = dst + height * stride_dst - (left << sh);
src1[0] = src + height * stride_src - (left << sh);
src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
pos = 0;
if (left) {
src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1, src1[src_idx], sh);
pos += (1 << sh);
}
src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
SAO_APPLIED);
memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
if (right) {
pos += width << sh;
src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
SAO_APPLIED);
copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
}
}
left_pixels = 0;
if (!left_edge) {
if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
copy_vert(dst - (1 << sh),
s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
sh, height, stride_dst, 1 << sh);
} else {
left_pixels = 1;
}
}
right_pixels = 0;
if (!right_edge) {
if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
copy_vert(dst + (width << sh),
s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
sh, height, stride_dst, 1 << sh);
} else {
right_pixels = 1;
}
}

copy_CTB(dst - (left_pixels << sh),
src - (left_pixels << sh),
(width + left_pixels + right_pixels) << sh,
height, stride_dst, stride_src);

copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
x_ctb, y_ctb);
#else
uint8_t left_pixels;
/* get the CTB edge pixels from the SAO pixel buffer */
left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
if (!edges[1]) {
uint8_t top_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
@@ -305,6 +465,9 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
src - (left_pixels << s->sps->pixel_shift),
(width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
#endif
/* XXX: could handle the restoration here to simplify the
DSP functions */
s->hevcdsp.sao_edge_filter(src, dst, stride_src, stride_dst, sao, width, height, c_idx);
s->hevcdsp.sao_edge_restore[restore](src, dst,
stride_src, stride_dst,
@@ -314,7 +477,8 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
vert_edge,
horiz_edge,
diag_edge);
restore_tqb_pixels(s, x, y, width, height, c_idx);
restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
x, y, width, height, c_idx);
sao->type_idx[c_idx] = SAO_APPLIED;
break;
}

0 comments on commit 5d9f79e

Please sign in to comment.
You can’t perform that action at this time.