Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
swscale: add support for P010LE/BE output
  • Loading branch information
BtbN committed Aug 31, 2016
1 parent 2625b95 commit 99882d0
Show file tree
Hide file tree
Showing 13 changed files with 119 additions and 5 deletions.
98 changes: 97 additions & 1 deletion libswscale/output.c
Expand Up @@ -311,6 +311,98 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
}
}


#define output_pixel(pos, val) \
if (big_endian) { \
AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
} else { \
AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
}

static void yuv2p010l1_c(const int16_t *src,
uint16_t *dest, int dstW,
int big_endian)
{
int i;
int shift = 5;

for (i = 0; i < dstW; i++) {
int val = src[i] + (1 << (shift - 1));
output_pixel(&dest[i], val);
}
}

static void yuv2p010lX_c(const int16_t *filter, int filterSize,
const int16_t **src, uint16_t *dest, int dstW,
int big_endian)
{
int i, j;
int shift = 17;

for (i = 0; i < dstW; i++) {
int val = 1 << (shift - 1);

for (j = 0; j < filterSize; j++)
val += src[j][i] * filter[j];

output_pixel(&dest[i], val);
}
}

static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
const int16_t **chrUSrc, const int16_t **chrVSrc,
uint8_t *dest8, int chrDstW)
{
uint16_t *dest = (uint16_t*)dest8;
int shift = 17;
int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
int i, j;

for (i = 0; i < chrDstW; i++) {
int u = 1 << (shift - 1);
int v = 1 << (shift - 1);

for (j = 0; j < chrFilterSize; j++) {
u += chrUSrc[j][i] * chrFilter[j];
v += chrVSrc[j][i] * chrFilter[j];
}

output_pixel(&dest[2*i] , u);
output_pixel(&dest[2*i+1], v);
}
}

static void yuv2p010l1_LE_c(const int16_t *src,
uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
}

static void yuv2p010l1_BE_c(const int16_t *src,
uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
}

static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
}

static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest, int dstW,
const uint8_t *dither, int offset)
{
yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
}

#undef output_pixel


#define accumulate_bit(acc, val) \
acc <<= 1; \
acc |= (val) >= 234
Expand Down Expand Up @@ -2085,7 +2177,11 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
enum AVPixelFormat dstFormat = c->dstFormat;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);

if (is16BPS(dstFormat)) {
if (dstFormat == AV_PIX_FMT_P010LE || dstFormat == AV_PIX_FMT_P010BE) {
*yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
*yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
*yuv2nv12cX = yuv2p010cX_c;
} else if (is16BPS(dstFormat)) {
*yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
*yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
} else if (is9_OR_10BPS(dstFormat)) {
Expand Down
4 changes: 2 additions & 2 deletions libswscale/utils.c
Expand Up @@ -246,8 +246,8 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
[AV_PIX_FMT_XYZ12BE] = { 1, 1, 1 },
[AV_PIX_FMT_XYZ12LE] = { 1, 1, 1 },
[AV_PIX_FMT_AYUV64LE] = { 1, 1},
[AV_PIX_FMT_P010LE] = { 1, 0 },
[AV_PIX_FMT_P010BE] = { 1, 0 },
[AV_PIX_FMT_P010LE] = { 1, 1 },
[AV_PIX_FMT_P010BE] = { 1, 1 },
};

int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
Expand Down
4 changes: 2 additions & 2 deletions libswscale/x86/swscale.c
Expand Up @@ -429,14 +429,14 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
switch(c->dstBpc){ \
case 16: do_16_case; break; \
case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
case 8: if ((condition_8bit) && !c->use_mmx_vfilter) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
}
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
switch(c->dstBpc){ \
case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
case 10: if (!isBE(c->dstFormat) && c->dstFormat != AV_PIX_FMT_P010LE && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
default: av_assert0(c->dstBpc>8); \
Expand Down
1 change: 1 addition & 0 deletions tests/ref/fate/filter-pixdesc-p010be
@@ -0,0 +1 @@
pixdesc-p010be 784a49bf554861da9d0809a615bcf813
1 change: 1 addition & 0 deletions tests/ref/fate/filter-pixdesc-p010le
@@ -0,0 +1 @@
pixdesc-p010le 0268fd44f63022e21ada69704534fc85
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-copy
Expand Up @@ -35,6 +35,8 @@ monob 8b04f859fee6a0be856be184acd7a0b5
monow 54d16d2c01abfd72ecdb5e51e283937c
nv12 8e24feb2c544dc26a20047a71e4c27aa
nv21 335d85c9af6110f26ae9e187a82ed2cf
p010be 7f9842d6015026136bad60d03c035cc3
p010le 1929db89609c4b8c6d9c9030a9e7843d
pal8 ff5929f5b42075793b2c34cb441bede5
rgb0 0de71e5a1f97f81fb51397a0435bfa72
rgb24 f4438057d046e6d98ade4e45294b21be
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-crop
Expand Up @@ -33,6 +33,8 @@ gray16be 38f599da990224de86e3dc7a543121a9
gray16le 9ff7c866bd98def4e6c91542c1c45f80
nv12 92cda427f794374731ec0321ee00caac
nv21 1bcfc197f4fb95de85ba58182d8d2f69
p010be 8b2de2eb6b099bbf355bfc55a0694ddc
p010le a1e4f713e145dfc465bfe0cc77096a03
pal8 1f2cdc8e718f95c875dbc1034a688bfb
rgb0 736646b70dd9a0be22b8da8041e35035
rgb24 c5fbbf816bb2000f4d2914e335698ef5
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-field
Expand Up @@ -35,6 +35,8 @@ monob 2129cc72a484d7e10a44de9117aa9f80
monow 03d783611d265cae78293f88ea126ea1
nv12 16f7a46708ef25ebd0b72e47920cc11e
nv21 7294574037cc7f9373ef5695d8ebe809
p010be a0311a09bba7383553267d2b3b9c075e
p010le f1cc90d292046109a626db2da9f0f9b6
pal8 0658c18dcd8d052d59dfbe23f5b368d9
rgb0 ca3fa6e865b91b3511c7f2bf62830059
rgb24 25ab271e26a5785be169578d99da5dd0
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-hflip
Expand Up @@ -33,6 +33,8 @@ gray16be cf7294d9aa23e1b838692ec01ade587b
gray16le d91ce41e304419bcf32ac792f01bd64f
nv12 801e58f1be5fd0b5bc4bf007c604b0b4
nv21 9f10dfff8963dc327d3395af21f0554f
p010be 744b13e44d39e1ff7588983fa03e0101
p010le aeb31f50c66f376b0530c7bb6287212b
pal8 5b7c77d99817b4f52339742a47de7797
rgb0 0092452f37d73da20193265ace0b7d57
rgb24 21571104e6091a689feabb7867e513dd
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-il
Expand Up @@ -35,6 +35,8 @@ monob faba75df28033ba7ce3d82ff2a99ee68
monow 6e9cfb8d3a344c5f0c3e1d5e1297e580
nv12 3c3ba9b1b4c4dfff09c26f71b51dd146
nv21 ab586d8781246b5a32d8760a61db9797
p010be 3df51286ef66b53e3e283dbbab582263
p010le 38945445b360fa737e9e37257393e823
rgb0 cfaf68671e43248267d8cd50cae8c13f
rgb24 88894f608cf33ba310f21996748d77a7
rgb444be 99d36d814988fb388aacdef575dacfcf
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-null
Expand Up @@ -35,6 +35,8 @@ monob 8b04f859fee6a0be856be184acd7a0b5
monow 54d16d2c01abfd72ecdb5e51e283937c
nv12 8e24feb2c544dc26a20047a71e4c27aa
nv21 335d85c9af6110f26ae9e187a82ed2cf
p010be 7f9842d6015026136bad60d03c035cc3
p010le 1929db89609c4b8c6d9c9030a9e7843d
pal8 ff5929f5b42075793b2c34cb441bede5
rgb0 0de71e5a1f97f81fb51397a0435bfa72
rgb24 f4438057d046e6d98ade4e45294b21be
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-scale
Expand Up @@ -35,6 +35,8 @@ monob f01cb0b623357387827902d9d0963435
monow 35c68b86c226d6990b2dcb573a05ff6b
nv12 b118d24a3653fe66e5d9e079033aef79
nv21 c74bb1c10dbbdee8a1f682b194486c4d
p010be 1d6726d94bf1385996a9a9840dd0e878
p010le 5d436e6b35292a0e356d81f37f989b66
pal8 29e10892009b2cfe431815ec3052ed3b
rgb0 fbd27e98154efb7535826afed41e9bb0
rgb24 e022e741451e81f2ecce1c7240b93e87
Expand Down
2 changes: 2 additions & 0 deletions tests/ref/fate/filter-pixfmts-vflip
Expand Up @@ -35,6 +35,8 @@ monob 7810c4857822ccfc844d78f5e803269a
monow 90a947bfcd5f2261e83b577f48ec57b1
nv12 261ebe585ae2aa4e70d39a10c1679294
nv21 2909feacd27bebb080c8e0fa41795269
p010be 06e9354b6e0e38ba41736352cedc0bd5
p010le cdf6a3c38d9d4e3f079fa369e1dda662
pal8 450b0155d0f2d5628bf95a442db5f817
rgb0 56a7ea69541bcd27bef6a5615784722b
rgb24 195e6dae1c3a488b9d3ceb7560d25d85
Expand Down

2 comments on commit 99882d0

@ggnull35
Copy link
Contributor

@ggnull35 ggnull35 commented on 99882d0 Sep 1, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nvidia Pascal Titan X GPU can not reach to 50 fps on Main 10 P010LE HEVC encoding:

ffmpeg -loglevel verbose -i /media/usb1/4k_sampels/Samsung_SUHD_Picture_Quality\ Demo_Nano_Crystal\ Display_UK-Version.mp4 -c:v:0 nvenc_hevc -preset hp -cbr 1 -2pass 0 -r 50 -vb 28000k -minrate 28000k -maxrate 28000k -bufsize 28000k -muxrate 30000k -c:a:0 aac -b:a:0 192k -pix_fmt p010le 'udp://233.33.33.1:5001'

FPS waves around 41-43 fps. If same command with YUV420P, it reaches to 120 - 130 fps.

GPU NVENC Load:

 nvidia-smi dmon -i 0
# gpu   pwr  temp    sm   mem   enc   dec  mclk  pclk
# Idx     W     C     %     %     %     %   MHz   MHz
    0    81    67     9     2    41     0  4513  1809
    0    80    66     9     2    41     0  4513  1809
    0    81    67     9     2    42     0  4513  1809
    0    80    67    10     2    41     0  4513  1809
    0    81    67     9     2    44     0  4513  1809

I think bottleneck is not at GPU side, pixel convertion maybe needs speed up improvement.

@ggnull35
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If codec changed to rawvideo to test pixel format convertion performance testing, FPS again waves around 39-40 fps

Please sign in to comment.