From 0eda6929db5b8eba1dd0cd019dc3978cf3bce096 Mon Sep 17 00:00:00 2001 From: Krzysztof Lecki Date: Fri, 6 Oct 2023 12:04:25 +0200 Subject: [PATCH] Update dependecies: OpenCV, libtiff (#89) Revert the webp patch as it is no longer needed Signed-off-by: Krzysztof Lecki --- README.rst | 8 +- build_scripts/build_opencv.sh | 1 - patches/opencv-libwebp-CVE-2023-5129.patch | 13905 ------------------- third_party/libtiff | 2 +- third_party/opencv | 2 +- 5 files changed, 6 insertions(+), 13912 deletions(-) delete mode 100644 patches/opencv-libwebp-CVE-2023-5129.patch diff --git a/README.rst b/README.rst index f286eb3..9d67f03 100644 --- a/README.rst +++ b/README.rst @@ -96,8 +96,8 @@ The repository consists mostly of externally hosted subrepositories: .. _opencv: https://github.com/opencv/opencv/ .. |opencvlic| replace:: Apache License 2.0 .. _opencvlic: https://github.com/opencv/opencv/blob/master/LICENSE -.. |opencvver| replace:: 4.8.0 -.. _opencvver: https://github.com/opencv/opencv/releases/tag/4.8.0 +.. |opencvver| replace:: 4.8.1 +.. _opencvver: https://github.com/opencv/opencv/releases/tag/4.8.1 .. _openjpeg: https://github.com/uclouvain/openjpeg .. |openjpeglic| replace:: BSD-2 license @@ -108,8 +108,8 @@ The repository consists mostly of externally hosted subrepositories: .. _libtiff: https://gitlab.com/libtiff/libtiff .. |libtifflic| replace:: BSD-2 license .. _libtifflic: https://gitlab.com/libtiff/libtiff/-/blob/master/README.md -.. |libtiffver| replace:: 4.5.1 (+ Build System Patch) -.. _libtiffver: https://gitlab.com/libtiff/libtiff/-/tree/v4.5.1 +.. |libtiffver| replace:: 4.6.0 (+ Build System Patch) +.. _libtiffver: https://gitlab.com/libtiff/libtiff/-/tree/v4.6.0 .. _zstd: https://github.com/facebook/zstd .. |zstdlic| replace:: BSD-3 license diff --git a/build_scripts/build_opencv.sh b/build_scripts/build_opencv.sh index a853f3b..508e76d 100755 --- a/build_scripts/build_opencv.sh +++ b/build_scripts/build_opencv.sh @@ -16,7 +16,6 @@ # OpenCV pushd third_party/opencv -patch -p1 < ${ROOT_DIR}/patches/opencv-libwebp-CVE-2023-5129.patch mkdir -p build cd build cmake -DCMAKE_BUILD_TYPE=RELEASE \ diff --git a/patches/opencv-libwebp-CVE-2023-5129.patch b/patches/opencv-libwebp-CVE-2023-5129.patch deleted file mode 100644 index 057ae37..0000000 --- a/patches/opencv-libwebp-CVE-2023-5129.patch +++ /dev/null @@ -1,13905 +0,0 @@ -From 74a671d5394e3b2e6e8dbd1f32e152260c60e847 Mon Sep 17 00:00:00 2001 -From: Vincent Rabaud -Date: Thu, 14 Sep 2023 09:49:29 +0200 -Subject: [PATCH 1/2] Bump libwebp to 1.3.2 - ---- - 3rdparty/libwebp/CMakeLists.txt | 6 - - .../patches/20190910-msa-asm-patch.diff | 22 - - 3rdparty/libwebp/sharpyuv/sharpyuv.c | 565 ++++++++++++++ - 3rdparty/libwebp/sharpyuv/sharpyuv.h | 174 +++++ - 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c | 14 + - 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h | 22 + - 3rdparty/libwebp/sharpyuv/sharpyuv_csp.c | 110 +++ - 3rdparty/libwebp/sharpyuv/sharpyuv_csp.h | 60 ++ - 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c | 104 +++ - 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h | 28 + - 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c | 419 +++++++++++ - 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h | 38 + - 3rdparty/libwebp/sharpyuv/sharpyuv_neon.c | 181 +++++ - 3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c | 201 +++++ - 3rdparty/libwebp/src/dec/alpha_dec.c | 37 +- - 3rdparty/libwebp/src/dec/buffer_dec.c | 12 +- - 3rdparty/libwebp/src/dec/frame_dec.c | 2 +- - 3rdparty/libwebp/src/dec/io_dec.c | 98 ++- - 3rdparty/libwebp/src/dec/tree_dec.c | 3 +- - 3rdparty/libwebp/src/dec/vp8_dec.c | 6 +- - 3rdparty/libwebp/src/dec/vp8i_dec.h | 4 +- - 3rdparty/libwebp/src/dec/vp8l_dec.c | 244 +++--- - 3rdparty/libwebp/src/dec/vp8li_dec.h | 15 +- - 3rdparty/libwebp/src/dec/webp_dec.c | 52 +- - 3rdparty/libwebp/src/dec/webpi_dec.h | 4 + - 3rdparty/libwebp/src/demux/anim_decode.c | 52 +- - 3rdparty/libwebp/src/demux/demux.c | 23 +- - 3rdparty/libwebp/src/dsp/alpha_processing.c | 66 +- - .../libwebp/src/dsp/alpha_processing_neon.c | 27 +- - .../libwebp/src/dsp/alpha_processing_sse2.c | 73 +- - .../libwebp/src/dsp/alpha_processing_sse41.c | 8 +- - 3rdparty/libwebp/src/dsp/cost.c | 5 +- - 3rdparty/libwebp/src/dsp/cost_neon.c | 4 +- - 3rdparty/libwebp/src/dsp/cpu.c | 21 +- - 3rdparty/libwebp/src/dsp/cpu.h | 266 +++++++ - 3rdparty/libwebp/src/dsp/dec.c | 7 +- - 3rdparty/libwebp/src/dsp/dec_neon.c | 4 +- - 3rdparty/libwebp/src/dsp/dec_sse2.c | 93 +-- - 3rdparty/libwebp/src/dsp/dec_sse41.c | 2 +- - 3rdparty/libwebp/src/dsp/dsp.h | 264 ++----- - 3rdparty/libwebp/src/dsp/enc.c | 7 +- - 3rdparty/libwebp/src/dsp/enc_neon.c | 13 +- - 3rdparty/libwebp/src/dsp/enc_sse2.c | 293 ++++++-- - 3rdparty/libwebp/src/dsp/filters.c | 14 +- - 3rdparty/libwebp/src/dsp/filters_sse2.c | 5 + - 3rdparty/libwebp/src/dsp/lossless.c | 81 +- - 3rdparty/libwebp/src/dsp/lossless.h | 53 +- - 3rdparty/libwebp/src/dsp/lossless_common.h | 8 +- - 3rdparty/libwebp/src/dsp/lossless_enc.c | 73 +- - .../libwebp/src/dsp/lossless_enc_mips32.c | 22 +- - 3rdparty/libwebp/src/dsp/lossless_enc_neon.c | 2 +- - 3rdparty/libwebp/src/dsp/lossless_enc_sse2.c | 121 ++- - 3rdparty/libwebp/src/dsp/lossless_enc_sse41.c | 175 +++-- - .../libwebp/src/dsp/lossless_mips_dsp_r2.c | 37 +- - 3rdparty/libwebp/src/dsp/lossless_neon.c | 22 +- - 3rdparty/libwebp/src/dsp/lossless_sse2.c | 130 ++-- - 3rdparty/libwebp/src/dsp/lossless_sse41.c | 133 ++++ - 3rdparty/libwebp/src/dsp/msa_macro.h | 37 +- - 3rdparty/libwebp/src/dsp/neon.h | 11 +- - 3rdparty/libwebp/src/dsp/quant.h | 16 +- - 3rdparty/libwebp/src/dsp/rescaler.c | 12 +- - 3rdparty/libwebp/src/dsp/rescaler_sse2.c | 6 +- - 3rdparty/libwebp/src/dsp/ssim.c | 3 +- - 3rdparty/libwebp/src/dsp/upsampling.c | 11 +- - 3rdparty/libwebp/src/dsp/upsampling_neon.c | 2 +- - 3rdparty/libwebp/src/dsp/upsampling_sse2.c | 2 +- - 3rdparty/libwebp/src/dsp/yuv.c | 85 +-- - 3rdparty/libwebp/src/dsp/yuv.h | 2 +- - 3rdparty/libwebp/src/dsp/yuv_neon.c | 108 --- - 3rdparty/libwebp/src/dsp/yuv_sse2.c | 132 +--- - 3rdparty/libwebp/src/dsp/yuv_sse41.c | 6 +- - 3rdparty/libwebp/src/enc/alpha_enc.c | 32 +- - 3rdparty/libwebp/src/enc/analysis_enc.c | 12 +- - .../src/enc/backward_references_cost_enc.c | 75 +- - .../libwebp/src/enc/backward_references_enc.c | 83 ++- - .../libwebp/src/enc/backward_references_enc.h | 12 +- - 3rdparty/libwebp/src/enc/frame_enc.c | 21 +- - 3rdparty/libwebp/src/enc/histogram_enc.c | 252 +++---- - 3rdparty/libwebp/src/enc/histogram_enc.h | 30 +- - 3rdparty/libwebp/src/enc/picture_csp_enc.c | 502 ++----------- - 3rdparty/libwebp/src/enc/picture_enc.c | 46 +- - .../libwebp/src/enc/picture_rescale_enc.c | 119 ++- - 3rdparty/libwebp/src/enc/picture_tools_enc.c | 45 +- - 3rdparty/libwebp/src/enc/predictor_enc.c | 52 +- - 3rdparty/libwebp/src/enc/quant_enc.c | 120 +-- - 3rdparty/libwebp/src/enc/syntax_enc.c | 8 +- - 3rdparty/libwebp/src/enc/vp8i_enc.h | 29 +- - 3rdparty/libwebp/src/enc/vp8l_enc.c | 694 +++++++++--------- - 3rdparty/libwebp/src/enc/vp8li_enc.h | 28 +- - 3rdparty/libwebp/src/enc/webp_enc.c | 14 +- - 3rdparty/libwebp/src/mux/anim_encode.c | 25 +- - 3rdparty/libwebp/src/mux/muxedit.c | 6 +- - 3rdparty/libwebp/src/mux/muxi.h | 4 +- - 3rdparty/libwebp/src/mux/muxinternal.c | 9 +- - 3rdparty/libwebp/src/mux/muxread.c | 11 +- - .../libwebp/src/utils/bit_reader_inl_utils.h | 13 +- - 3rdparty/libwebp/src/utils/bit_reader_utils.c | 3 +- - 3rdparty/libwebp/src/utils/bit_reader_utils.h | 3 +- - 3rdparty/libwebp/src/utils/bit_writer_utils.c | 4 +- - .../libwebp/src/utils/color_cache_utils.c | 22 +- - .../libwebp/src/utils/huffman_encode_utils.c | 5 +- - .../libwebp/src/utils/huffman_encode_utils.h | 2 +- - 3rdparty/libwebp/src/utils/huffman_utils.c | 99 ++- - 3rdparty/libwebp/src/utils/huffman_utils.h | 27 +- - 3rdparty/libwebp/src/utils/palette.c | 402 ++++++++++ - 3rdparty/libwebp/src/utils/palette.h | 60 ++ - .../src/utils/quant_levels_dec_utils.c | 2 +- - 3rdparty/libwebp/src/utils/rescaler_utils.c | 114 +-- - 3rdparty/libwebp/src/utils/rescaler_utils.h | 13 +- - 3rdparty/libwebp/src/utils/utils.c | 77 +- - 3rdparty/libwebp/src/utils/utils.h | 43 +- - 3rdparty/libwebp/src/webp/decode.h | 44 +- - 3rdparty/libwebp/src/webp/encode.h | 6 +- - 3rdparty/libwebp/src/webp/format_constants.h | 2 +- - 3rdparty/libwebp/src/webp/types.h | 6 +- - 115 files changed, 5352 insertions(+), 2782 deletions(-) - delete mode 100644 3rdparty/libwebp/patches/20190910-msa-asm-patch.diff - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv.c - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv.h - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_csp.c - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_csp.h - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_neon.c - create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c - create mode 100644 3rdparty/libwebp/src/dsp/cpu.h - create mode 100644 3rdparty/libwebp/src/dsp/lossless_sse41.c - create mode 100644 3rdparty/libwebp/src/utils/palette.c - create mode 100644 3rdparty/libwebp/src/utils/palette.h - -diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt -index 723575c8db3d..532c03026568 100644 ---- a/3rdparty/libwebp/CMakeLists.txt -+++ b/3rdparty/libwebp/CMakeLists.txt -@@ -21,12 +21,6 @@ if(ANDROID AND ARMEABI_V7A AND NOT NEON) - endforeach() - endif() - --# FIX for quant.h - requires C99 for() loops --ocv_check_flag_support(C "-std=c99" _varname "${CMAKE_C_FLAGS}") --if(${_varname}) -- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") --endif() -- - - # ---------------------------------------------------------------------------------- - # Define the library target: -diff --git a/3rdparty/libwebp/patches/20190910-msa-asm-patch.diff b/3rdparty/libwebp/patches/20190910-msa-asm-patch.diff -deleted file mode 100644 -index 1be213520312..000000000000 ---- a/3rdparty/libwebp/patches/20190910-msa-asm-patch.diff -+++ /dev/null -@@ -1,22 +0,0 @@ --diff --git a/3rdparty/libwebp/src/dsp/msa_macro.h b/3rdparty/libwebp/src/dsp/msa_macro.h --index de026a1d9e..a16c0bb300 100644 ----- a/3rdparty/libwebp/src/dsp/msa_macro.h --+++ b/3rdparty/libwebp/src/dsp/msa_macro.h --@@ -73,7 +73,7 @@ -- static inline TYPE FUNC_NAME(const void* const psrc) { \ -- const uint8_t* const psrc_m = (const uint8_t*)psrc; \ -- TYPE val_m; \ --- asm volatile ( \ --+ __asm__ volatile ( \ -- "" #INSTR " %[val_m], %[psrc_m] \n\t" \ -- : [val_m] "=r" (val_m) \ -- : [psrc_m] "m" (*psrc_m)); \ --@@ -86,7 +86,7 @@ -- static inline void FUNC_NAME(TYPE val, void* const pdst) { \ -- uint8_t* const pdst_m = (uint8_t*)pdst; \ -- TYPE val_m = val; \ --- asm volatile ( \ --+ __asm__ volatile ( \ -- " " #INSTR " %[val_m], %[pdst_m] \n\t" \ -- : [pdst_m] "=m" (*pdst_m) \ -- : [val_m] "r" (val_m)); \ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv.c b/3rdparty/libwebp/sharpyuv/sharpyuv.c -new file mode 100644 -index 000000000000..b94885a6c320 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv.c -@@ -0,0 +1,565 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Sharp RGB to YUV conversion. -+// -+// Author: Skal (pascal.massimino@gmail.com) -+ -+#include "sharpyuv/sharpyuv.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "src/webp/types.h" -+#include "sharpyuv/sharpyuv_cpu.h" -+#include "sharpyuv/sharpyuv_dsp.h" -+#include "sharpyuv/sharpyuv_gamma.h" -+ -+//------------------------------------------------------------------------------ -+ -+int SharpYuvGetVersion(void) { -+ return SHARPYUV_VERSION; -+} -+ -+//------------------------------------------------------------------------------ -+// Sharp RGB->YUV conversion -+ -+static const int kNumIterations = 4; -+ -+#define YUV_FIX 16 // fixed-point precision for RGB->YUV -+static const int kYuvHalf = 1 << (YUV_FIX - 1); -+ -+// Max bit depth so that intermediate calculations fit in 16 bits. -+static const int kMaxBitDepth = 14; -+ -+// Returns the precision shift to use based on the input rgb_bit_depth. -+static int GetPrecisionShift(int rgb_bit_depth) { -+ // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove -+ // bits if needed. -+ return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 -+ : (kMaxBitDepth - rgb_bit_depth); -+} -+ -+typedef int16_t fixed_t; // signed type with extra precision for UV -+typedef uint16_t fixed_y_t; // unsigned type with extra precision for W -+ -+//------------------------------------------------------------------------------ -+ -+static uint8_t clip_8b(fixed_t v) { -+ return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; -+} -+ -+static uint16_t clip(fixed_t v, int max) { -+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; -+} -+ -+static fixed_y_t clip_bit_depth(int y, int bit_depth) { -+ const int max = (1 << bit_depth) - 1; -+ return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; -+} -+ -+//------------------------------------------------------------------------------ -+ -+static int RGBToGray(int64_t r, int64_t g, int64_t b) { -+ const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; -+ return (int)(luma >> YUV_FIX); -+} -+ -+static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d, -+ int rgb_bit_depth, -+ SharpYuvTransferFunctionType transfer_type) { -+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); -+ const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type); -+ const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type); -+ const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type); -+ const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type); -+ return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth, -+ transfer_type); -+} -+ -+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, -+ int rgb_bit_depth, -+ SharpYuvTransferFunctionType transfer_type) { -+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); -+ int i; -+ for (i = 0; i < w; ++i) { -+ const uint32_t R = -+ SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type); -+ const uint32_t G = -+ SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type); -+ const uint32_t B = -+ SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type); -+ const uint32_t Y = RGBToGray(R, G, B); -+ dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type); -+ } -+} -+ -+static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, -+ fixed_t* dst, int uv_w, int rgb_bit_depth, -+ SharpYuvTransferFunctionType transfer_type) { -+ int i; -+ for (i = 0; i < uv_w; ++i) { -+ const int r = -+ ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0], -+ src2[0 * uv_w + 1], rgb_bit_depth, transfer_type); -+ const int g = -+ ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0], -+ src2[2 * uv_w + 1], rgb_bit_depth, transfer_type); -+ const int b = -+ ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0], -+ src2[4 * uv_w + 1], rgb_bit_depth, transfer_type); -+ const int W = RGBToGray(r, g, b); -+ dst[0 * uv_w] = (fixed_t)(r - W); -+ dst[1 * uv_w] = (fixed_t)(g - W); -+ dst[2 * uv_w] = (fixed_t)(b - W); -+ dst += 1; -+ src1 += 2; -+ src2 += 2; -+ } -+} -+ -+static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { -+ int i; -+ assert(w > 0); -+ for (i = 0; i < w; ++i) { -+ y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); -+ } -+} -+ -+//------------------------------------------------------------------------------ -+ -+static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { -+ const int v0 = (A * 3 + B + 2) >> 2; -+ return clip_bit_depth(v0 + W0, bit_depth); -+} -+ -+//------------------------------------------------------------------------------ -+ -+static WEBP_INLINE int Shift(int v, int shift) { -+ return (shift >= 0) ? (v << shift) : (v >> -shift); -+} -+ -+static void ImportOneRow(const uint8_t* const r_ptr, -+ const uint8_t* const g_ptr, -+ const uint8_t* const b_ptr, -+ int rgb_step, -+ int rgb_bit_depth, -+ int pic_width, -+ fixed_y_t* const dst) { -+ // Convert the rgb_step from a number of bytes to a number of uint8_t or -+ // uint16_t values depending the bit depth. -+ const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; -+ int i; -+ const int w = (pic_width + 1) & ~1; -+ for (i = 0; i < pic_width; ++i) { -+ const int off = i * step; -+ const int shift = GetPrecisionShift(rgb_bit_depth); -+ if (rgb_bit_depth == 8) { -+ dst[i + 0 * w] = Shift(r_ptr[off], shift); -+ dst[i + 1 * w] = Shift(g_ptr[off], shift); -+ dst[i + 2 * w] = Shift(b_ptr[off], shift); -+ } else { -+ dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift); -+ dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift); -+ dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift); -+ } -+ } -+ if (pic_width & 1) { // replicate rightmost pixel -+ dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; -+ dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; -+ dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; -+ } -+} -+ -+static void InterpolateTwoRows(const fixed_y_t* const best_y, -+ const fixed_t* prev_uv, -+ const fixed_t* cur_uv, -+ const fixed_t* next_uv, -+ int w, -+ fixed_y_t* out1, -+ fixed_y_t* out2, -+ int rgb_bit_depth) { -+ const int uv_w = w >> 1; -+ const int len = (w - 1) >> 1; // length to filter -+ int k = 3; -+ const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); -+ while (k-- > 0) { // process each R/G/B segments in turn -+ // special boundary case for i==0 -+ out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth); -+ out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth); -+ -+ SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, -+ bit_depth); -+ SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, -+ bit_depth); -+ -+ // special boundary case for i == w - 1 when w is even -+ if (!(w & 1)) { -+ out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], -+ best_y[w - 1 + 0], bit_depth); -+ out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], -+ best_y[w - 1 + w], bit_depth); -+ } -+ out1 += w; -+ out2 += w; -+ prev_uv += uv_w; -+ cur_uv += uv_w; -+ next_uv += uv_w; -+ } -+} -+ -+static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, -+ const int coeffs[4], int sfix) { -+ const int srounder = 1 << (YUV_FIX + sfix - 1); -+ const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + -+ coeffs[3] + srounder; -+ return (luma >> (YUV_FIX + sfix)); -+} -+ -+static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, -+ uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, -+ int u_stride, uint8_t* v_ptr, int v_stride, -+ int rgb_bit_depth, -+ int yuv_bit_depth, int width, int height, -+ const SharpYuvConversionMatrix* yuv_matrix) { -+ int i, j; -+ const fixed_t* const best_uv_base = best_uv; -+ const int w = (width + 1) & ~1; -+ const int h = (height + 1) & ~1; -+ const int uv_w = w >> 1; -+ const int uv_h = h >> 1; -+ const int sfix = GetPrecisionShift(rgb_bit_depth); -+ const int yuv_max = (1 << yuv_bit_depth) - 1; -+ -+ for (best_uv = best_uv_base, j = 0; j < height; ++j) { -+ for (i = 0; i < width; ++i) { -+ const int off = (i >> 1); -+ const int W = best_y[i]; -+ const int r = best_uv[off + 0 * uv_w] + W; -+ const int g = best_uv[off + 1 * uv_w] + W; -+ const int b = best_uv[off + 2 * uv_w] + W; -+ const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix); -+ if (yuv_bit_depth <= 8) { -+ y_ptr[i] = clip_8b(y); -+ } else { -+ ((uint16_t*)y_ptr)[i] = clip(y, yuv_max); -+ } -+ } -+ best_y += w; -+ best_uv += (j & 1) * 3 * uv_w; -+ y_ptr += y_stride; -+ } -+ for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { -+ for (i = 0; i < uv_w; ++i) { -+ const int off = i; -+ // Note r, g and b values here are off by W, but a constant offset on all -+ // 3 components doesn't change the value of u and v with a YCbCr matrix. -+ const int r = best_uv[off + 0 * uv_w]; -+ const int g = best_uv[off + 1 * uv_w]; -+ const int b = best_uv[off + 2 * uv_w]; -+ const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix); -+ const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix); -+ if (yuv_bit_depth <= 8) { -+ u_ptr[i] = clip_8b(u); -+ v_ptr[i] = clip_8b(v); -+ } else { -+ ((uint16_t*)u_ptr)[i] = clip(u, yuv_max); -+ ((uint16_t*)v_ptr)[i] = clip(v, yuv_max); -+ } -+ } -+ best_uv += 3 * uv_w; -+ u_ptr += u_stride; -+ v_ptr += v_stride; -+ } -+ return 1; -+} -+ -+//------------------------------------------------------------------------------ -+// Main function -+ -+static void* SafeMalloc(uint64_t nmemb, size_t size) { -+ const uint64_t total_size = nmemb * (uint64_t)size; -+ if (total_size != (size_t)total_size) return NULL; -+ return malloc((size_t)total_size); -+} -+ -+#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T))) -+ -+static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, -+ const uint8_t* b_ptr, int rgb_step, int rgb_stride, -+ int rgb_bit_depth, uint8_t* y_ptr, int y_stride, -+ uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, -+ int v_stride, int yuv_bit_depth, int width, -+ int height, -+ const SharpYuvConversionMatrix* yuv_matrix, -+ SharpYuvTransferFunctionType transfer_type) { -+ // we expand the right/bottom border if needed -+ const int w = (width + 1) & ~1; -+ const int h = (height + 1) & ~1; -+ const int uv_w = w >> 1; -+ const int uv_h = h >> 1; -+ uint64_t prev_diff_y_sum = ~0; -+ int j, iter; -+ -+ // TODO(skal): allocate one big memory chunk. But for now, it's easier -+ // for valgrind debugging to have several chunks. -+ fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch -+ fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); -+ fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); -+ fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); -+ fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); -+ fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); -+ fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); -+ fixed_y_t* best_y = best_y_base; -+ fixed_y_t* target_y = target_y_base; -+ fixed_t* best_uv = best_uv_base; -+ fixed_t* target_uv = target_uv_base; -+ const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); -+ int ok; -+ assert(w > 0); -+ assert(h > 0); -+ -+ if (best_y_base == NULL || best_uv_base == NULL || -+ target_y_base == NULL || target_uv_base == NULL || -+ best_rgb_y == NULL || best_rgb_uv == NULL || -+ tmp_buffer == NULL) { -+ ok = 0; -+ goto End; -+ } -+ -+ // Import RGB samples to W/RGB representation. -+ for (j = 0; j < height; j += 2) { -+ const int is_last_row = (j == height - 1); -+ fixed_y_t* const src1 = tmp_buffer + 0 * w; -+ fixed_y_t* const src2 = tmp_buffer + 3 * w; -+ -+ // prepare two rows of input -+ ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, -+ src1); -+ if (!is_last_row) { -+ ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, -+ rgb_step, rgb_bit_depth, width, src2); -+ } else { -+ memcpy(src2, src1, 3 * w * sizeof(*src2)); -+ } -+ StoreGray(src1, best_y + 0, w); -+ StoreGray(src2, best_y + w, w); -+ -+ UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type); -+ UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type); -+ UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type); -+ memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); -+ best_y += 2 * w; -+ best_uv += 3 * uv_w; -+ target_y += 2 * w; -+ target_uv += 3 * uv_w; -+ r_ptr += 2 * rgb_stride; -+ g_ptr += 2 * rgb_stride; -+ b_ptr += 2 * rgb_stride; -+ } -+ -+ // Iterate and resolve clipping conflicts. -+ for (iter = 0; iter < kNumIterations; ++iter) { -+ const fixed_t* cur_uv = best_uv_base; -+ const fixed_t* prev_uv = best_uv_base; -+ uint64_t diff_y_sum = 0; -+ -+ best_y = best_y_base; -+ best_uv = best_uv_base; -+ target_y = target_y_base; -+ target_uv = target_uv_base; -+ for (j = 0; j < h; j += 2) { -+ fixed_y_t* const src1 = tmp_buffer + 0 * w; -+ fixed_y_t* const src2 = tmp_buffer + 3 * w; -+ { -+ const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); -+ InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, -+ src1, src2, rgb_bit_depth); -+ prev_uv = cur_uv; -+ cur_uv = next_uv; -+ } -+ -+ UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type); -+ UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type); -+ UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type); -+ -+ // update two rows of Y and one row of RGB -+ diff_y_sum += -+ SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, -+ rgb_bit_depth + GetPrecisionShift(rgb_bit_depth)); -+ SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); -+ -+ best_y += 2 * w; -+ best_uv += 3 * uv_w; -+ target_y += 2 * w; -+ target_uv += 3 * uv_w; -+ } -+ // test exit condition -+ if (iter > 0) { -+ if (diff_y_sum < diff_y_threshold) break; -+ if (diff_y_sum > prev_diff_y_sum) break; -+ } -+ prev_diff_y_sum = diff_y_sum; -+ } -+ -+ // final reconstruction -+ ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr, -+ u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, -+ width, height, yuv_matrix); -+ -+ End: -+ free(best_y_base); -+ free(best_uv_base); -+ free(target_y_base); -+ free(target_uv_base); -+ free(best_rgb_y); -+ free(best_rgb_uv); -+ free(tmp_buffer); -+ return ok; -+} -+#undef SAFE_ALLOC -+ -+#if defined(WEBP_USE_THREAD) && !defined(_WIN32) -+#include // NOLINT -+ -+#define LOCK_ACCESS \ -+ static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \ -+ if (pthread_mutex_lock(&sharpyuv_lock)) return -+#define UNLOCK_ACCESS_AND_RETURN \ -+ do { \ -+ (void)pthread_mutex_unlock(&sharpyuv_lock); \ -+ return; \ -+ } while (0) -+#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) -+#define LOCK_ACCESS do {} while (0) -+#define UNLOCK_ACCESS_AND_RETURN return -+#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) -+ -+// Hidden exported init function. -+// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, -+// users can declare it as extern and call it with an alternate VP8CPUInfo -+// function. -+extern VP8CPUInfo SharpYuvGetCPUInfo; -+SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); -+void SharpYuvInit(VP8CPUInfo cpu_info_func) { -+ static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = -+ (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; -+ LOCK_ACCESS; -+ // Only update SharpYuvGetCPUInfo when called from external code to avoid a -+ // race on reading the value in SharpYuvConvert(). -+ if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { -+ SharpYuvGetCPUInfo = cpu_info_func; -+ } -+ if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { -+ UNLOCK_ACCESS_AND_RETURN; -+ } -+ -+ SharpYuvInitDsp(); -+ SharpYuvInitGammaTables(); -+ -+ sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo; -+ UNLOCK_ACCESS_AND_RETURN; -+} -+ -+int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, -+ int rgb_step, int rgb_stride, int rgb_bit_depth, -+ void* y_ptr, int y_stride, void* u_ptr, int u_stride, -+ void* v_ptr, int v_stride, int yuv_bit_depth, int width, -+ int height, const SharpYuvConversionMatrix* yuv_matrix) { -+ SharpYuvOptions options; -+ options.yuv_matrix = yuv_matrix; -+ options.transfer_type = kSharpYuvTransferFunctionSrgb; -+ return SharpYuvConvertWithOptions( -+ r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride, -+ u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options); -+} -+ -+int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix, -+ SharpYuvOptions* options, int version) { -+ const int major = (version >> 24); -+ const int minor = (version >> 16) & 0xff; -+ if (options == NULL || yuv_matrix == NULL || -+ (major == SHARPYUV_VERSION_MAJOR && major == 0 && -+ minor != SHARPYUV_VERSION_MINOR) || -+ (major != SHARPYUV_VERSION_MAJOR)) { -+ return 0; -+ } -+ options->yuv_matrix = yuv_matrix; -+ options->transfer_type = kSharpYuvTransferFunctionSrgb; -+ return 1; -+} -+ -+int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr, -+ const void* b_ptr, int rgb_step, int rgb_stride, -+ int rgb_bit_depth, void* y_ptr, int y_stride, -+ void* u_ptr, int u_stride, void* v_ptr, -+ int v_stride, int yuv_bit_depth, int width, -+ int height, const SharpYuvOptions* options) { -+ const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix; -+ SharpYuvTransferFunctionType transfer_type = options->transfer_type; -+ SharpYuvConversionMatrix scaled_matrix; -+ const int rgb_max = (1 << rgb_bit_depth) - 1; -+ const int rgb_round = 1 << (rgb_bit_depth - 1); -+ const int yuv_max = (1 << yuv_bit_depth) - 1; -+ const int sfix = GetPrecisionShift(rgb_bit_depth); -+ -+ if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX || -+ r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || -+ u_ptr == NULL || v_ptr == NULL) { -+ return 0; -+ } -+ if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && -+ rgb_bit_depth != 16) { -+ return 0; -+ } -+ if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { -+ return 0; -+ } -+ if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) { -+ // Step/stride should be even for uint16_t buffers. -+ return 0; -+ } -+ if (yuv_bit_depth > 8 && -+ (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { -+ // Stride should be even for uint16_t buffers. -+ return 0; -+ } -+ // The address of the function pointer is used to avoid a read race. -+ SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo); -+ -+ // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the -+ // rgb->yuv conversion matrix. -+ if (rgb_bit_depth == yuv_bit_depth) { -+ memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix)); -+ } else { -+ int i; -+ for (i = 0; i < 3; ++i) { -+ scaled_matrix.rgb_to_y[i] = -+ (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; -+ scaled_matrix.rgb_to_u[i] = -+ (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; -+ scaled_matrix.rgb_to_v[i] = -+ (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; -+ } -+ } -+ // Also incorporate precision change scaling. -+ scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix); -+ scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix); -+ scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix); -+ -+ return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, -+ rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride, -+ v_ptr, v_stride, yuv_bit_depth, width, height, -+ &scaled_matrix, transfer_type); -+} -+ -+//------------------------------------------------------------------------------ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv.h b/3rdparty/libwebp/sharpyuv/sharpyuv.h -new file mode 100644 -index 000000000000..23a69ce39c3e ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv.h -@@ -0,0 +1,174 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Sharp RGB to YUV conversion. -+ -+#ifndef WEBP_SHARPYUV_SHARPYUV_H_ -+#define WEBP_SHARPYUV_SHARPYUV_H_ -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#ifndef SHARPYUV_EXTERN -+#ifdef WEBP_EXTERN -+#define SHARPYUV_EXTERN WEBP_EXTERN -+#else -+// This explicitly marks library functions and allows for changing the -+// signature for e.g., Windows DLL builds. -+#if defined(__GNUC__) && __GNUC__ >= 4 -+#define SHARPYUV_EXTERN extern __attribute__((visibility("default"))) -+#else -+#if defined(_MSC_VER) && defined(WEBP_DLL) -+#define SHARPYUV_EXTERN __declspec(dllexport) -+#else -+#define SHARPYUV_EXTERN extern -+#endif /* _MSC_VER && WEBP_DLL */ -+#endif /* __GNUC__ >= 4 */ -+#endif /* WEBP_EXTERN */ -+#endif /* SHARPYUV_EXTERN */ -+ -+#ifndef SHARPYUV_INLINE -+#ifdef WEBP_INLINE -+#define SHARPYUV_INLINE WEBP_INLINE -+#else -+#ifndef _MSC_VER -+#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \ -+ (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) -+#define SHARPYUV_INLINE inline -+#else -+#define SHARPYUV_INLINE -+#endif -+#else -+#define SHARPYUV_INLINE __forceinline -+#endif /* _MSC_VER */ -+#endif /* WEBP_INLINE */ -+#endif /* SHARPYUV_INLINE */ -+ -+// SharpYUV API version following the convention from semver.org -+#define SHARPYUV_VERSION_MAJOR 0 -+#define SHARPYUV_VERSION_MINOR 4 -+#define SHARPYUV_VERSION_PATCH 0 -+// Version as a uint32_t. The major number is the high 8 bits. -+// The minor number is the middle 8 bits. The patch number is the low 16 bits. -+#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \ -+ (((MAJOR) << 24) | ((MINOR) << 16) | (PATCH)) -+#define SHARPYUV_VERSION \ -+ SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \ -+ SHARPYUV_VERSION_PATCH) -+ -+// Returns the library's version number, packed in hexadecimal. See -+// SHARPYUV_VERSION. -+SHARPYUV_EXTERN int SharpYuvGetVersion(void); -+ -+// RGB to YUV conversion matrix, in 16 bit fixed point. -+// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3] -+// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3] -+// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3] -+// Then y, u and v values are divided by 1<<16 and rounded. -+typedef struct { -+ int rgb_to_y[4]; -+ int rgb_to_u[4]; -+ int rgb_to_v[4]; -+} SharpYuvConversionMatrix; -+ -+typedef struct SharpYuvOptions SharpYuvOptions; -+ -+// Enums for transfer functions, as defined in H.273, -+// https://www.itu.int/rec/T-REC-H.273-202107-I/en -+typedef enum SharpYuvTransferFunctionType { -+ // 0 is reserved -+ kSharpYuvTransferFunctionBt709 = 1, -+ // 2 is unspecified -+ // 3 is reserved -+ kSharpYuvTransferFunctionBt470M = 4, -+ kSharpYuvTransferFunctionBt470Bg = 5, -+ kSharpYuvTransferFunctionBt601 = 6, -+ kSharpYuvTransferFunctionSmpte240 = 7, -+ kSharpYuvTransferFunctionLinear = 8, -+ kSharpYuvTransferFunctionLog100 = 9, -+ kSharpYuvTransferFunctionLog100_Sqrt10 = 10, -+ kSharpYuvTransferFunctionIec61966 = 11, -+ kSharpYuvTransferFunctionBt1361 = 12, -+ kSharpYuvTransferFunctionSrgb = 13, -+ kSharpYuvTransferFunctionBt2020_10Bit = 14, -+ kSharpYuvTransferFunctionBt2020_12Bit = 15, -+ kSharpYuvTransferFunctionSmpte2084 = 16, // PQ -+ kSharpYuvTransferFunctionSmpte428 = 17, -+ kSharpYuvTransferFunctionHlg = 18, -+ kSharpYuvTransferFunctionNum -+} SharpYuvTransferFunctionType; -+ -+// Converts RGB to YUV420 using a downsampling algorithm that minimizes -+// artefacts caused by chroma subsampling. -+// This is slower than standard downsampling (averaging of 4 UV values). -+// Assumes that the image will be upsampled using a bilinear filter. If nearest -+// neighbor is used instead, the upsampled image might look worse than with -+// standard downsampling. -+// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point -+// to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise. -+// rgb_step: distance in bytes between two horizontally adjacent pixels on the -+// r, g and b channels. If rgb_bit_depth is > 8, it should be a -+// multiple of 2. -+// rgb_stride: distance in bytes between two vertically adjacent pixels on the -+// r, g, and b channels. If rgb_bit_depth is > 8, it should be a -+// multiple of 2. -+// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16. -+// Note: 16 bit input is truncated to 14 bits before conversion to yuv. -+// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12. -+// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels. Should -+// point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers -+// otherwise. -+// y_stride, u_stride, v_stride: distance in bytes between two vertically -+// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they -+// should be multiples of 2. -+// width, height: width and height of the image in pixels -+// This function calls SharpYuvConvertWithOptions with a default transfer -+// function of kSharpYuvTransferFunctionSrgb. -+SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr, -+ const void* b_ptr, int rgb_step, -+ int rgb_stride, int rgb_bit_depth, -+ void* y_ptr, int y_stride, void* u_ptr, -+ int u_stride, void* v_ptr, int v_stride, -+ int yuv_bit_depth, int width, int height, -+ const SharpYuvConversionMatrix* yuv_matrix); -+ -+struct SharpYuvOptions { -+ // This matrix cannot be NULL and can be initialized by -+ // SharpYuvComputeConversionMatrix. -+ const SharpYuvConversionMatrix* yuv_matrix; -+ SharpYuvTransferFunctionType transfer_type; -+}; -+ -+// Internal, version-checked, entry point -+SHARPYUV_EXTERN int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix*, -+ SharpYuvOptions*, int); -+ -+// Should always be called, to initialize a fresh SharpYuvOptions -+// structure before modification. SharpYuvOptionsInit() must have succeeded -+// before using the 'options' object. -+static SHARPYUV_INLINE int SharpYuvOptionsInit( -+ const SharpYuvConversionMatrix* yuv_matrix, SharpYuvOptions* options) { -+ return SharpYuvOptionsInitInternal(yuv_matrix, options, SHARPYUV_VERSION); -+} -+ -+SHARPYUV_EXTERN int SharpYuvConvertWithOptions( -+ const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step, -+ int rgb_stride, int rgb_bit_depth, void* y_ptr, int y_stride, void* u_ptr, -+ int u_stride, void* v_ptr, int v_stride, int yuv_bit_depth, int width, -+ int height, const SharpYuvOptions* options); -+ -+// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422 -+// support (it's rarely used in practice, especially for images). -+ -+#ifdef __cplusplus -+} // extern "C" -+#endif -+ -+#endif // WEBP_SHARPYUV_SHARPYUV_H_ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c -new file mode 100644 -index 000000000000..29425a0c4918 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c -@@ -0,0 +1,14 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+#include "sharpyuv/sharpyuv_cpu.h" -+ -+// Include src/dsp/cpu.c to create SharpYuvGetCPUInfo from VP8GetCPUInfo. The -+// function pointer is renamed in sharpyuv_cpu.h. -+#include "src/dsp/cpu.c" -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h -new file mode 100644 -index 000000000000..176ca3eb1682 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h -@@ -0,0 +1,22 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+#ifndef WEBP_SHARPYUV_SHARPYUV_CPU_H_ -+#define WEBP_SHARPYUV_SHARPYUV_CPU_H_ -+ -+#include "sharpyuv/sharpyuv.h" -+ -+// Avoid exporting SharpYuvGetCPUInfo in shared object / DLL builds. -+// SharpYuvInit() replaces the use of the function pointer. -+#undef WEBP_EXTERN -+#define WEBP_EXTERN extern -+#define VP8GetCPUInfo SharpYuvGetCPUInfo -+#include "src/dsp/cpu.h" -+ -+#endif // WEBP_SHARPYUV_SHARPYUV_CPU_H_ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c -new file mode 100644 -index 000000000000..0ad22be9458c ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c -@@ -0,0 +1,110 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Colorspace utilities. -+ -+#include "sharpyuv/sharpyuv_csp.h" -+ -+#include -+#include -+#include -+ -+static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); } -+ -+void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, -+ SharpYuvConversionMatrix* matrix) { -+ const float kr = yuv_color_space->kr; -+ const float kb = yuv_color_space->kb; -+ const float kg = 1.0f - kr - kb; -+ const float cr = 0.5f / (1.0f - kb); -+ const float cb = 0.5f / (1.0f - kr); -+ -+ const int shift = yuv_color_space->bit_depth - 8; -+ -+ const float denom = (float)((1 << yuv_color_space->bit_depth) - 1); -+ float scale_y = 1.0f; -+ float add_y = 0.0f; -+ float scale_u = cr; -+ float scale_v = cb; -+ float add_uv = (float)(128 << shift); -+ assert(yuv_color_space->bit_depth >= 8); -+ -+ if (yuv_color_space->range == kSharpYuvRangeLimited) { -+ scale_y *= (219 << shift) / denom; -+ scale_u *= (224 << shift) / denom; -+ scale_v *= (224 << shift) / denom; -+ add_y = (float)(16 << shift); -+ } -+ -+ matrix->rgb_to_y[0] = ToFixed16(kr * scale_y); -+ matrix->rgb_to_y[1] = ToFixed16(kg * scale_y); -+ matrix->rgb_to_y[2] = ToFixed16(kb * scale_y); -+ matrix->rgb_to_y[3] = ToFixed16(add_y); -+ -+ matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u); -+ matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u); -+ matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u); -+ matrix->rgb_to_u[3] = ToFixed16(add_uv); -+ -+ matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v); -+ matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v); -+ matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v); -+ matrix->rgb_to_v[3] = ToFixed16(add_uv); -+} -+ -+// Matrices are in YUV_FIX fixed point precision. -+// WebP's matrix, similar but not identical to kRec601LimitedMatrix. -+static const SharpYuvConversionMatrix kWebpMatrix = { -+ {16839, 33059, 6420, 16 << 16}, -+ {-9719, -19081, 28800, 128 << 16}, -+ {28800, -24116, -4684, 128 << 16}, -+}; -+// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited -+static const SharpYuvConversionMatrix kRec601LimitedMatrix = { -+ {16829, 33039, 6416, 16 << 16}, -+ {-9714, -19071, 28784, 128 << 16}, -+ {28784, -24103, -4681, 128 << 16}, -+}; -+// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull -+static const SharpYuvConversionMatrix kRec601FullMatrix = { -+ {19595, 38470, 7471, 0}, -+ {-11058, -21710, 32768, 128 << 16}, -+ {32768, -27439, -5329, 128 << 16}, -+}; -+// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited -+static const SharpYuvConversionMatrix kRec709LimitedMatrix = { -+ {11966, 40254, 4064, 16 << 16}, -+ {-6596, -22189, 28784, 128 << 16}, -+ {28784, -26145, -2639, 128 << 16}, -+}; -+// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull -+static const SharpYuvConversionMatrix kRec709FullMatrix = { -+ {13933, 46871, 4732, 0}, -+ {-7509, -25259, 32768, 128 << 16}, -+ {32768, -29763, -3005, 128 << 16}, -+}; -+ -+const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix( -+ SharpYuvMatrixType matrix_type) { -+ switch (matrix_type) { -+ case kSharpYuvMatrixWebp: -+ return &kWebpMatrix; -+ case kSharpYuvMatrixRec601Limited: -+ return &kRec601LimitedMatrix; -+ case kSharpYuvMatrixRec601Full: -+ return &kRec601FullMatrix; -+ case kSharpYuvMatrixRec709Limited: -+ return &kRec709LimitedMatrix; -+ case kSharpYuvMatrixRec709Full: -+ return &kRec709FullMatrix; -+ case kSharpYuvMatrixNum: -+ return NULL; -+ } -+ return NULL; -+} -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h -new file mode 100644 -index 000000000000..3214e3ac6075 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h -@@ -0,0 +1,60 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Colorspace utilities. -+ -+#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_ -+#define WEBP_SHARPYUV_SHARPYUV_CSP_H_ -+ -+#include "sharpyuv/sharpyuv.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+// Range of YUV values. -+typedef enum { -+ kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit) -+ kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit) -+} SharpYuvRange; -+ -+// Constants that define a YUV color space. -+typedef struct { -+ // Kr and Kb are defined such that: -+ // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb. -+ float kr; -+ float kb; -+ int bit_depth; // 8, 10 or 12 -+ SharpYuvRange range; -+} SharpYuvColorSpace; -+ -+// Fills in 'matrix' for the given YUVColorSpace. -+SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix( -+ const SharpYuvColorSpace* yuv_color_space, -+ SharpYuvConversionMatrix* matrix); -+ -+// Enums for precomputed conversion matrices. -+typedef enum { -+ kSharpYuvMatrixWebp = 0, -+ kSharpYuvMatrixRec601Limited, -+ kSharpYuvMatrixRec601Full, -+ kSharpYuvMatrixRec709Limited, -+ kSharpYuvMatrixRec709Full, -+ kSharpYuvMatrixNum -+} SharpYuvMatrixType; -+ -+// Returns a pointer to a matrix for one of the predefined colorspaces. -+SHARPYUV_EXTERN const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix( -+ SharpYuvMatrixType matrix_type); -+ -+#ifdef __cplusplus -+} // extern "C" -+#endif -+ -+#endif // WEBP_SHARPYUV_SHARPYUV_CSP_H_ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c -new file mode 100644 -index 000000000000..0da3efc0b813 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c -@@ -0,0 +1,104 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Speed-critical functions for Sharp YUV. -+// -+// Author: Skal (pascal.massimino@gmail.com) -+ -+#include "sharpyuv/sharpyuv_dsp.h" -+ -+#include -+#include -+ -+#include "sharpyuv/sharpyuv_cpu.h" -+ -+//----------------------------------------------------------------------------- -+ -+#if !WEBP_NEON_OMIT_C_CODE -+static uint16_t clip(int v, int max) { -+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; -+} -+ -+static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src, -+ uint16_t* dst, int len, int bit_depth) { -+ uint64_t diff = 0; -+ int i; -+ const int max_y = (1 << bit_depth) - 1; -+ for (i = 0; i < len; ++i) { -+ const int diff_y = ref[i] - src[i]; -+ const int new_y = (int)dst[i] + diff_y; -+ dst[i] = clip(new_y, max_y); -+ diff += (uint64_t)abs(diff_y); -+ } -+ return diff; -+} -+ -+static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src, -+ int16_t* dst, int len) { -+ int i; -+ for (i = 0; i < len; ++i) { -+ const int diff_uv = ref[i] - src[i]; -+ dst[i] += diff_uv; -+ } -+} -+ -+static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len, -+ const uint16_t* best_y, uint16_t* out, -+ int bit_depth) { -+ int i; -+ const int max_y = (1 << bit_depth) - 1; -+ for (i = 0; i < len; ++i, ++A, ++B) { -+ const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; -+ const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; -+ out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y); -+ out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y); -+ } -+} -+#endif // !WEBP_NEON_OMIT_C_CODE -+ -+//----------------------------------------------------------------------------- -+ -+uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, -+ uint16_t* dst, int len, int bit_depth); -+void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst, -+ int len); -+void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, -+ const uint16_t* best_y, uint16_t* out, -+ int bit_depth); -+ -+extern VP8CPUInfo SharpYuvGetCPUInfo; -+extern void InitSharpYuvSSE2(void); -+extern void InitSharpYuvNEON(void); -+ -+void SharpYuvInitDsp(void) { -+#if !WEBP_NEON_OMIT_C_CODE -+ SharpYuvUpdateY = SharpYuvUpdateY_C; -+ SharpYuvUpdateRGB = SharpYuvUpdateRGB_C; -+ SharpYuvFilterRow = SharpYuvFilterRow_C; -+#endif -+ -+ if (SharpYuvGetCPUInfo != NULL) { -+#if defined(WEBP_HAVE_SSE2) -+ if (SharpYuvGetCPUInfo(kSSE2)) { -+ InitSharpYuvSSE2(); -+ } -+#endif // WEBP_HAVE_SSE2 -+ } -+ -+#if defined(WEBP_HAVE_NEON) -+ if (WEBP_NEON_OMIT_C_CODE || -+ (SharpYuvGetCPUInfo != NULL && SharpYuvGetCPUInfo(kNEON))) { -+ InitSharpYuvNEON(); -+ } -+#endif // WEBP_HAVE_NEON -+ -+ assert(SharpYuvUpdateY != NULL); -+ assert(SharpYuvUpdateRGB != NULL); -+ assert(SharpYuvFilterRow != NULL); -+} -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h -new file mode 100644 -index 000000000000..805fbadbf657 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h -@@ -0,0 +1,28 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Speed-critical functions for Sharp YUV. -+ -+#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_ -+#define WEBP_SHARPYUV_SHARPYUV_DSP_H_ -+ -+#include "sharpyuv/sharpyuv_cpu.h" -+#include "src/webp/types.h" -+ -+extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, -+ uint16_t* dst, int len, int bit_depth); -+extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, -+ int16_t* dst, int len); -+extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, -+ const uint16_t* best_y, uint16_t* out, -+ int bit_depth); -+ -+void SharpYuvInitDsp(void); -+ -+#endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c -new file mode 100644 -index 000000000000..fecadc64805d ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c -@@ -0,0 +1,419 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Gamma correction utilities. -+ -+#include "sharpyuv/sharpyuv_gamma.h" -+ -+#include -+#include -+#include -+ -+#include "src/webp/types.h" -+ -+// Gamma correction compensates loss of resolution during chroma subsampling. -+// Size of pre-computed table for converting from gamma to linear. -+#define GAMMA_TO_LINEAR_TAB_BITS 10 -+#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS) -+static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2]; -+#define LINEAR_TO_GAMMA_TAB_BITS 9 -+#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS) -+static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2]; -+ -+static const double kGammaF = 1. / 0.45; -+#define GAMMA_TO_LINEAR_BITS 16 -+ -+static volatile int kGammaTablesSOk = 0; -+void SharpYuvInitGammaTables(void) { -+ assert(GAMMA_TO_LINEAR_BITS <= 16); -+ if (!kGammaTablesSOk) { -+ int v; -+ const double a = 0.09929682680944; -+ const double thresh = 0.018053968510807; -+ const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; -+ // Precompute gamma to linear table. -+ { -+ const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE; -+ const double a_rec = 1. / (1. + a); -+ for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) { -+ const double g = norm * v; -+ double value; -+ if (g <= thresh * 4.5) { -+ value = g / 4.5; -+ } else { -+ value = pow(a_rec * (g + a), kGammaF); -+ } -+ kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); -+ } -+ // to prevent small rounding errors to cause read-overflow: -+ kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] = -+ kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE]; -+ } -+ // Precompute linear to gamma table. -+ { -+ const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE; -+ for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) { -+ const double g = scale * v; -+ double value; -+ if (g <= thresh) { -+ value = 4.5 * g; -+ } else { -+ value = (1. + a) * pow(g, 1. / kGammaF) - a; -+ } -+ kLinearToGammaTabS[v] = -+ (uint32_t)(final_scale * value + 0.5); -+ } -+ // to prevent small rounding errors to cause read-overflow: -+ kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] = -+ kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE]; -+ } -+ kGammaTablesSOk = 1; -+ } -+} -+ -+static WEBP_INLINE int Shift(int v, int shift) { -+ return (shift >= 0) ? (v << shift) : (v >> -shift); -+} -+ -+static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab, -+ int tab_pos_shift_right, -+ int tab_value_shift) { -+ const uint32_t tab_pos = Shift(v, -tab_pos_shift_right); -+ // fractional part, in 'tab_pos_shift' fixed-point precision -+ const uint32_t x = v - (tab_pos << tab_pos_shift_right); // fractional part -+ // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1]) -+ const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift); -+ const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift); -+ // Final interpolation. -+ const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0. -+ const int half = -+ (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0; -+ const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right); -+ return result; -+} -+ -+static uint32_t ToLinearSrgb(uint16_t v, int bit_depth) { -+ const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth; -+ if (shift > 0) { -+ return kGammaToLinearTabS[v << shift]; -+ } -+ return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0); -+} -+ -+static uint16_t FromLinearSrgb(uint32_t value, int bit_depth) { -+ return FixedPointInterpolation( -+ value, kLinearToGammaTabS, -+ (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS), -+ bit_depth - GAMMA_TO_LINEAR_BITS); -+} -+ -+//////////////////////////////////////////////////////////////////////////////// -+ -+#define CLAMP(x, low, high) \ -+ (((x) < (low)) ? (low) : (((high) < (x)) ? (high) : (x))) -+#define MIN(a, b) (((a) < (b)) ? (a) : (b)) -+#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -+ -+static WEBP_INLINE float Roundf(float x) { -+ if (x < 0) -+ return (float)ceil((double)(x - 0.5f)); -+ else -+ return (float)floor((double)(x + 0.5f)); -+} -+ -+static WEBP_INLINE float Powf(float base, float exp) { -+ return (float)pow((double)base, (double)exp); -+} -+ -+static WEBP_INLINE float Log10f(float x) { return (float)log10((double)x); } -+ -+static float ToLinear709(float gamma) { -+ if (gamma < 0.f) { -+ return 0.f; -+ } else if (gamma < 4.5f * 0.018053968510807f) { -+ return gamma / 4.5f; -+ } else if (gamma < 1.f) { -+ return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f); -+ } -+ return 1.f; -+} -+ -+static float FromLinear709(float linear) { -+ if (linear < 0.f) { -+ return 0.f; -+ } else if (linear < 0.018053968510807f) { -+ return linear * 4.5f; -+ } else if (linear < 1.f) { -+ return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f; -+ } -+ return 1.f; -+} -+ -+static float ToLinear470M(float gamma) { -+ return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.2f); -+} -+ -+static float FromLinear470M(float linear) { -+ return Powf(CLAMP(linear, 0.f, 1.f), 2.2f); -+} -+ -+static float ToLinear470Bg(float gamma) { -+ return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.8f); -+} -+ -+static float FromLinear470Bg(float linear) { -+ return Powf(CLAMP(linear, 0.f, 1.f), 2.8f); -+} -+ -+static float ToLinearSmpte240(float gamma) { -+ if (gamma < 0.f) { -+ return 0.f; -+ } else if (gamma < 4.f * 0.022821585529445f) { -+ return gamma / 4.f; -+ } else if (gamma < 1.f) { -+ return Powf((gamma + 0.111572195921731f) / 1.111572195921731f, 1.f / 0.45f); -+ } -+ return 1.f; -+} -+ -+static float FromLinearSmpte240(float linear) { -+ if (linear < 0.f) { -+ return 0.f; -+ } else if (linear < 0.022821585529445f) { -+ return linear * 4.f; -+ } else if (linear < 1.f) { -+ return 1.111572195921731f * Powf(linear, 0.45f) - 0.111572195921731f; -+ } -+ return 1.f; -+} -+ -+static float ToLinearLog100(float gamma) { -+ return (gamma < 0.01f) ? 0.0f : 1.0f + Log10f(MIN(gamma, 1.f)) / 2.0f; -+} -+ -+static float FromLinearLog100(float linear) { -+ // The function is non-bijective so choose the middle of [0, 0.01]. -+ const float mid_interval = 0.01f / 2.f; -+ return (linear <= 0.0f) ? mid_interval -+ : Powf(10.0f, 2.f * (MIN(linear, 1.f) - 1.0f)); -+} -+ -+static float ToLinearLog100Sqrt10(float gamma) { -+ return (gamma < 0.00316227766f) ? 0.0f -+ : 1.0f + Log10f(MIN(gamma, 1.f)) / 2.5f; -+} -+ -+static float FromLinearLog100Sqrt10(float linear) { -+ // The function is non-bijective so choose the middle of [0, 0.00316227766f[. -+ const float mid_interval = 0.00316227766f / 2.f; -+ return (linear < 0.0f) ? mid_interval -+ : Powf(10.0f, 2.5f * (MIN(linear, 1.f) - 1.0f)); -+} -+ -+static float ToLinearIec61966(float gamma) { -+ if (gamma <= -4.5f * 0.018053968510807f) { -+ return Powf((-gamma + 0.09929682680944f) / -1.09929682680944f, 1.f / 0.45f); -+ } else if (gamma < 4.5f * 0.018053968510807f) { -+ return gamma / 4.5f; -+ } -+ return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f); -+} -+ -+static float FromLinearIec61966(float linear) { -+ if (linear <= -0.018053968510807f) { -+ return -1.09929682680944f * Powf(-linear, 0.45f) + 0.09929682680944f; -+ } else if (linear < 0.018053968510807f) { -+ return linear * 4.5f; -+ } -+ return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f; -+} -+ -+static float ToLinearBt1361(float gamma) { -+ if (gamma < -0.25f) { -+ return -0.25f; -+ } else if (gamma < 0.f) { -+ return Powf((gamma - 0.02482420670236f) / -0.27482420670236f, 1.f / 0.45f) / -+ -4.f; -+ } else if (gamma < 4.5f * 0.018053968510807f) { -+ return gamma / 4.5f; -+ } else if (gamma < 1.f) { -+ return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f); -+ } -+ return 1.f; -+} -+ -+static float FromLinearBt1361(float linear) { -+ if (linear < -0.25f) { -+ return -0.25f; -+ } else if (linear < 0.f) { -+ return -0.27482420670236f * Powf(-4.f * linear, 0.45f) + 0.02482420670236f; -+ } else if (linear < 0.018053968510807f) { -+ return linear * 4.5f; -+ } else if (linear < 1.f) { -+ return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f; -+ } -+ return 1.f; -+} -+ -+static float ToLinearPq(float gamma) { -+ if (gamma > 0.f) { -+ const float pow_gamma = Powf(gamma, 32.f / 2523.f); -+ const float num = MAX(pow_gamma - 107.f / 128.f, 0.0f); -+ const float den = MAX(2413.f / 128.f - 2392.f / 128.f * pow_gamma, FLT_MIN); -+ return Powf(num / den, 4096.f / 653.f); -+ } -+ return 0.f; -+} -+ -+static float FromLinearPq(float linear) { -+ if (linear > 0.f) { -+ const float pow_linear = Powf(linear, 653.f / 4096.f); -+ const float num = 107.f / 128.f + 2413.f / 128.f * pow_linear; -+ const float den = 1.0f + 2392.f / 128.f * pow_linear; -+ return Powf(num / den, 2523.f / 32.f); -+ } -+ return 0.f; -+} -+ -+static float ToLinearSmpte428(float gamma) { -+ return Powf(0.91655527974030934f * MAX(gamma, 0.f), 1.f / 2.6f); -+} -+ -+static float FromLinearSmpte428(float linear) { -+ return Powf(MAX(linear, 0.f), 2.6f) / 0.91655527974030934f; -+} -+ -+// Conversion in BT.2100 requires RGB info. Simplify to gamma correction here. -+static float ToLinearHlg(float gamma) { -+ if (gamma < 0.f) { -+ return 0.f; -+ } else if (gamma <= 0.5f) { -+ return Powf((gamma * gamma) * (1.f / 3.f), 1.2f); -+ } -+ return Powf((expf((gamma - 0.55991073f) / 0.17883277f) + 0.28466892f) / 12.0f, -+ 1.2f); -+} -+ -+static float FromLinearHlg(float linear) { -+ linear = Powf(linear, 1.f / 1.2f); -+ if (linear < 0.f) { -+ return 0.f; -+ } else if (linear <= (1.f / 12.f)) { -+ return sqrtf(3.f * linear); -+ } -+ return 0.17883277f * logf(12.f * linear - 0.28466892f) + 0.55991073f; -+} -+ -+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth, -+ SharpYuvTransferFunctionType transfer_type) { -+ float v_float, linear; -+ if (transfer_type == kSharpYuvTransferFunctionSrgb) { -+ return ToLinearSrgb(v, bit_depth); -+ } -+ v_float = (float)v / ((1 << bit_depth) - 1); -+ switch (transfer_type) { -+ case kSharpYuvTransferFunctionBt709: -+ case kSharpYuvTransferFunctionBt601: -+ case kSharpYuvTransferFunctionBt2020_10Bit: -+ case kSharpYuvTransferFunctionBt2020_12Bit: -+ linear = ToLinear709(v_float); -+ break; -+ case kSharpYuvTransferFunctionBt470M: -+ linear = ToLinear470M(v_float); -+ break; -+ case kSharpYuvTransferFunctionBt470Bg: -+ linear = ToLinear470Bg(v_float); -+ break; -+ case kSharpYuvTransferFunctionSmpte240: -+ linear = ToLinearSmpte240(v_float); -+ break; -+ case kSharpYuvTransferFunctionLinear: -+ return v; -+ case kSharpYuvTransferFunctionLog100: -+ linear = ToLinearLog100(v_float); -+ break; -+ case kSharpYuvTransferFunctionLog100_Sqrt10: -+ linear = ToLinearLog100Sqrt10(v_float); -+ break; -+ case kSharpYuvTransferFunctionIec61966: -+ linear = ToLinearIec61966(v_float); -+ break; -+ case kSharpYuvTransferFunctionBt1361: -+ linear = ToLinearBt1361(v_float); -+ break; -+ case kSharpYuvTransferFunctionSmpte2084: -+ linear = ToLinearPq(v_float); -+ break; -+ case kSharpYuvTransferFunctionSmpte428: -+ linear = ToLinearSmpte428(v_float); -+ break; -+ case kSharpYuvTransferFunctionHlg: -+ linear = ToLinearHlg(v_float); -+ break; -+ default: -+ assert(0); -+ linear = 0; -+ break; -+ } -+ return (uint32_t)Roundf(linear * ((1 << 16) - 1)); -+} -+ -+uint16_t SharpYuvLinearToGamma(uint32_t v, int bit_depth, -+ SharpYuvTransferFunctionType transfer_type) { -+ float v_float, linear; -+ if (transfer_type == kSharpYuvTransferFunctionSrgb) { -+ return FromLinearSrgb(v, bit_depth); -+ } -+ v_float = (float)v / ((1 << 16) - 1); -+ switch (transfer_type) { -+ case kSharpYuvTransferFunctionBt709: -+ case kSharpYuvTransferFunctionBt601: -+ case kSharpYuvTransferFunctionBt2020_10Bit: -+ case kSharpYuvTransferFunctionBt2020_12Bit: -+ linear = FromLinear709(v_float); -+ break; -+ case kSharpYuvTransferFunctionBt470M: -+ linear = FromLinear470M(v_float); -+ break; -+ case kSharpYuvTransferFunctionBt470Bg: -+ linear = FromLinear470Bg(v_float); -+ break; -+ case kSharpYuvTransferFunctionSmpte240: -+ linear = FromLinearSmpte240(v_float); -+ break; -+ case kSharpYuvTransferFunctionLinear: -+ return v; -+ case kSharpYuvTransferFunctionLog100: -+ linear = FromLinearLog100(v_float); -+ break; -+ case kSharpYuvTransferFunctionLog100_Sqrt10: -+ linear = FromLinearLog100Sqrt10(v_float); -+ break; -+ case kSharpYuvTransferFunctionIec61966: -+ linear = FromLinearIec61966(v_float); -+ break; -+ case kSharpYuvTransferFunctionBt1361: -+ linear = FromLinearBt1361(v_float); -+ break; -+ case kSharpYuvTransferFunctionSmpte2084: -+ linear = FromLinearPq(v_float); -+ break; -+ case kSharpYuvTransferFunctionSmpte428: -+ linear = FromLinearSmpte428(v_float); -+ break; -+ case kSharpYuvTransferFunctionHlg: -+ linear = FromLinearHlg(v_float); -+ break; -+ default: -+ assert(0); -+ linear = 0; -+ break; -+ } -+ return (uint16_t)Roundf(linear * ((1 << bit_depth) - 1)); -+} -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h -new file mode 100644 -index 000000000000..b8ba7e98705e ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h -@@ -0,0 +1,38 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Gamma correction utilities. -+ -+#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ -+#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ -+ -+#include "sharpyuv/sharpyuv.h" -+#include "src/webp/types.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+// Initializes precomputed tables. Must be called once before calling -+// SharpYuvGammaToLinear or SharpYuvLinearToGamma. -+void SharpYuvInitGammaTables(void); -+ -+// Converts a 'bit_depth'-bit gamma color value to a 16-bit linear value. -+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth, -+ SharpYuvTransferFunctionType transfer_type); -+ -+// Converts a 16-bit linear color value to a 'bit_depth'-bit gamma value. -+uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth, -+ SharpYuvTransferFunctionType transfer_type); -+ -+#ifdef __cplusplus -+} // extern "C" -+#endif -+ -+#endif // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c b/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c -new file mode 100644 -index 000000000000..5840914865e0 ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c -@@ -0,0 +1,181 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Speed-critical functions for Sharp YUV. -+// -+// Author: Skal (pascal.massimino@gmail.com) -+ -+#include "sharpyuv/sharpyuv_dsp.h" -+ -+#if defined(WEBP_USE_NEON) -+#include -+#include -+#include -+ -+static uint16_t clip_NEON(int v, int max) { -+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; -+} -+ -+static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src, -+ uint16_t* dst, int len, int bit_depth) { -+ const int max_y = (1 << bit_depth) - 1; -+ int i; -+ const int16x8_t zero = vdupq_n_s16(0); -+ const int16x8_t max = vdupq_n_s16(max_y); -+ uint64x2_t sum = vdupq_n_u64(0); -+ uint64_t diff; -+ -+ for (i = 0; i + 8 <= len; i += 8) { -+ const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i)); -+ const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i)); -+ const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i)); -+ const int16x8_t D = vsubq_s16(A, B); // diff_y -+ const int16x8_t F = vaddq_s16(C, D); // new_y -+ const uint16x8_t H = -+ vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero)); -+ const int16x8_t I = vabsq_s16(D); // abs(diff_y) -+ vst1q_u16(dst + i, H); -+ sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I))); -+ } -+ diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1); -+ for (; i < len; ++i) { -+ const int diff_y = ref[i] - src[i]; -+ const int new_y = (int)(dst[i]) + diff_y; -+ dst[i] = clip_NEON(new_y, max_y); -+ diff += (uint64_t)(abs(diff_y)); -+ } -+ return diff; -+} -+ -+static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src, -+ int16_t* dst, int len) { -+ int i; -+ for (i = 0; i + 8 <= len; i += 8) { -+ const int16x8_t A = vld1q_s16(ref + i); -+ const int16x8_t B = vld1q_s16(src + i); -+ const int16x8_t C = vld1q_s16(dst + i); -+ const int16x8_t D = vsubq_s16(A, B); // diff_uv -+ const int16x8_t E = vaddq_s16(C, D); // new_uv -+ vst1q_s16(dst + i, E); -+ } -+ for (; i < len; ++i) { -+ const int diff_uv = ref[i] - src[i]; -+ dst[i] += diff_uv; -+ } -+} -+ -+static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B, -+ int len, const uint16_t* best_y, -+ uint16_t* out, int bit_depth) { -+ const int max_y = (1 << bit_depth) - 1; -+ int i; -+ const int16x8_t max = vdupq_n_s16(max_y); -+ const int16x8_t zero = vdupq_n_s16(0); -+ for (i = 0; i + 8 <= len; i += 8) { -+ const int16x8_t a0 = vld1q_s16(A + i + 0); -+ const int16x8_t a1 = vld1q_s16(A + i + 1); -+ const int16x8_t b0 = vld1q_s16(B + i + 0); -+ const int16x8_t b1 = vld1q_s16(B + i + 1); -+ const int16x8_t a0b1 = vaddq_s16(a0, b1); -+ const int16x8_t a1b0 = vaddq_s16(a1, b0); -+ const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1 -+ const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1) -+ const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0) -+ const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3); -+ const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3); -+ const int16x8_t e0 = vrhaddq_s16(c1, a0); -+ const int16x8_t e1 = vrhaddq_s16(c0, a1); -+ const int16x8x2_t f = vzipq_s16(e0, e1); -+ const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0)); -+ const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8)); -+ const int16x8_t h0 = vaddq_s16(g0, f.val[0]); -+ const int16x8_t h1 = vaddq_s16(g1, f.val[1]); -+ const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero); -+ const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero); -+ vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0)); -+ vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1)); -+ } -+ for (; i < len; ++i) { -+ const int a0b1 = A[i + 0] + B[i + 1]; -+ const int a1b0 = A[i + 1] + B[i + 0]; -+ const int a0a1b0b1 = a0b1 + a1b0 + 8; -+ const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; -+ const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; -+ out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y); -+ out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y); -+ } -+} -+ -+static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B, -+ int len, const uint16_t* best_y, -+ uint16_t* out, int bit_depth) { -+ const int max_y = (1 << bit_depth) - 1; -+ int i; -+ const uint16x8_t max = vdupq_n_u16(max_y); -+ for (i = 0; i + 4 <= len; i += 4) { -+ const int16x4_t a0 = vld1_s16(A + i + 0); -+ const int16x4_t a1 = vld1_s16(A + i + 1); -+ const int16x4_t b0 = vld1_s16(B + i + 0); -+ const int16x4_t b1 = vld1_s16(B + i + 1); -+ const int32x4_t a0b1 = vaddl_s16(a0, b1); -+ const int32x4_t a1b0 = vaddl_s16(a1, b0); -+ const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0); // A0+A1+B0+B1 -+ const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1); // 2*(A0+B1) -+ const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0); // 2*(A1+B0) -+ const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3); -+ const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3); -+ const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0)); -+ const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1)); -+ const int32x4x2_t f = vzipq_s32(e0, e1); -+ -+ const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i)); -+ const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g)); -+ const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g)); -+ const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1)); -+ const uint16x8_t i_clamped = vminq_u16(i_16, max); -+ vst1q_u16(out + 2 * i + 0, i_clamped); -+ } -+ for (; i < len; ++i) { -+ const int a0b1 = A[i + 0] + B[i + 1]; -+ const int a1b0 = A[i + 1] + B[i + 0]; -+ const int a0a1b0b1 = a0b1 + a1b0 + 8; -+ const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; -+ const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; -+ out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y); -+ out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y); -+ } -+} -+ -+static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len, -+ const uint16_t* best_y, uint16_t* out, -+ int bit_depth) { -+ if (bit_depth <= 10) { -+ SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth); -+ } else { -+ SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth); -+ } -+} -+ -+//------------------------------------------------------------------------------ -+ -+extern void InitSharpYuvNEON(void); -+ -+WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) { -+ SharpYuvUpdateY = SharpYuvUpdateY_NEON; -+ SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON; -+ SharpYuvFilterRow = SharpYuvFilterRow_NEON; -+} -+ -+#else // !WEBP_USE_NEON -+ -+extern void InitSharpYuvNEON(void); -+ -+void InitSharpYuvNEON(void) {} -+ -+#endif // WEBP_USE_NEON -diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c b/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c -new file mode 100644 -index 000000000000..9744d1bb6cfe ---- /dev/null -+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c -@@ -0,0 +1,201 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Speed-critical functions for Sharp YUV. -+// -+// Author: Skal (pascal.massimino@gmail.com) -+ -+#include "sharpyuv/sharpyuv_dsp.h" -+ -+#if defined(WEBP_USE_SSE2) -+#include -+#include -+ -+static uint16_t clip_SSE2(int v, int max) { -+ return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; -+} -+ -+static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, -+ uint16_t* dst, int len, int bit_depth) { -+ const int max_y = (1 << bit_depth) - 1; -+ uint64_t diff = 0; -+ uint32_t tmp[4]; -+ int i; -+ const __m128i zero = _mm_setzero_si128(); -+ const __m128i max = _mm_set1_epi16(max_y); -+ const __m128i one = _mm_set1_epi16(1); -+ __m128i sum = zero; -+ -+ for (i = 0; i + 8 <= len; i += 8) { -+ const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); -+ const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); -+ const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); -+ const __m128i D = _mm_sub_epi16(A, B); // diff_y -+ const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0) -+ const __m128i F = _mm_add_epi16(C, D); // new_y -+ const __m128i G = _mm_or_si128(E, one); // -1 or 1 -+ const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); -+ const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...)) -+ _mm_storeu_si128((__m128i*)(dst + i), H); -+ sum = _mm_add_epi32(sum, I); -+ } -+ _mm_storeu_si128((__m128i*)tmp, sum); -+ diff = tmp[3] + tmp[2] + tmp[1] + tmp[0]; -+ for (; i < len; ++i) { -+ const int diff_y = ref[i] - src[i]; -+ const int new_y = (int)dst[i] + diff_y; -+ dst[i] = clip_SSE2(new_y, max_y); -+ diff += (uint64_t)abs(diff_y); -+ } -+ return diff; -+} -+ -+static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, -+ int16_t* dst, int len) { -+ int i = 0; -+ for (i = 0; i + 8 <= len; i += 8) { -+ const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); -+ const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); -+ const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); -+ const __m128i D = _mm_sub_epi16(A, B); // diff_uv -+ const __m128i E = _mm_add_epi16(C, D); // new_uv -+ _mm_storeu_si128((__m128i*)(dst + i), E); -+ } -+ for (; i < len; ++i) { -+ const int diff_uv = ref[i] - src[i]; -+ dst[i] += diff_uv; -+ } -+} -+ -+static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B, -+ int len, const uint16_t* best_y, -+ uint16_t* out, int bit_depth) { -+ const int max_y = (1 << bit_depth) - 1; -+ int i; -+ const __m128i kCst8 = _mm_set1_epi16(8); -+ const __m128i max = _mm_set1_epi16(max_y); -+ const __m128i zero = _mm_setzero_si128(); -+ for (i = 0; i + 8 <= len; i += 8) { -+ const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); -+ const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); -+ const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); -+ const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); -+ const __m128i a0b1 = _mm_add_epi16(a0, b1); -+ const __m128i a1b0 = _mm_add_epi16(a1, b0); -+ const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1 -+ const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); -+ const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1) -+ const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0) -+ const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); -+ const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); -+ const __m128i d0 = _mm_add_epi16(c1, a0); -+ const __m128i d1 = _mm_add_epi16(c0, a1); -+ const __m128i e0 = _mm_srai_epi16(d0, 1); -+ const __m128i e1 = _mm_srai_epi16(d1, 1); -+ const __m128i f0 = _mm_unpacklo_epi16(e0, e1); -+ const __m128i f1 = _mm_unpackhi_epi16(e0, e1); -+ const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); -+ const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); -+ const __m128i h0 = _mm_add_epi16(g0, f0); -+ const __m128i h1 = _mm_add_epi16(g1, f1); -+ const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); -+ const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); -+ _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); -+ _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); -+ } -+ for (; i < len; ++i) { -+ // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = -+ // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 -+ // We reuse the common sub-expressions. -+ const int a0b1 = A[i + 0] + B[i + 1]; -+ const int a1b0 = A[i + 1] + B[i + 0]; -+ const int a0a1b0b1 = a0b1 + a1b0 + 8; -+ const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; -+ const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; -+ out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y); -+ out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y); -+ } -+} -+ -+static WEBP_INLINE __m128i s16_to_s32(__m128i in) { -+ return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16); -+} -+ -+static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B, -+ int len, const uint16_t* best_y, -+ uint16_t* out, int bit_depth) { -+ const int max_y = (1 << bit_depth) - 1; -+ int i; -+ const __m128i kCst8 = _mm_set1_epi32(8); -+ const __m128i max = _mm_set1_epi16(max_y); -+ const __m128i zero = _mm_setzero_si128(); -+ for (i = 0; i + 4 <= len; i += 4) { -+ const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0))); -+ const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1))); -+ const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0))); -+ const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1))); -+ const __m128i a0b1 = _mm_add_epi32(a0, b1); -+ const __m128i a1b0 = _mm_add_epi32(a1, b0); -+ const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0); // A0+A1+B0+B1 -+ const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8); -+ const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1); // 2*(A0+B1) -+ const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0); // 2*(A1+B0) -+ const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3); -+ const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3); -+ const __m128i d0 = _mm_add_epi32(c1, a0); -+ const __m128i d1 = _mm_add_epi32(c0, a1); -+ const __m128i e0 = _mm_srai_epi32(d0, 1); -+ const __m128i e1 = _mm_srai_epi32(d1, 1); -+ const __m128i f0 = _mm_unpacklo_epi32(e0, e1); -+ const __m128i f1 = _mm_unpackhi_epi32(e0, e1); -+ const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); -+ const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1)); -+ const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero); -+ _mm_storeu_si128((__m128i*)(out + 2 * i + 0), final); -+ } -+ for (; i < len; ++i) { -+ // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = -+ // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 -+ // We reuse the common sub-expressions. -+ const int a0b1 = A[i + 0] + B[i + 1]; -+ const int a1b0 = A[i + 1] + B[i + 0]; -+ const int a0a1b0b1 = a0b1 + a1b0 + 8; -+ const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; -+ const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; -+ out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y); -+ out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y); -+ } -+} -+ -+static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, -+ const uint16_t* best_y, uint16_t* out, -+ int bit_depth) { -+ if (bit_depth <= 10) { -+ SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth); -+ } else { -+ SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth); -+ } -+} -+ -+//------------------------------------------------------------------------------ -+ -+extern void InitSharpYuvSSE2(void); -+ -+WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) { -+ SharpYuvUpdateY = SharpYuvUpdateY_SSE2; -+ SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2; -+ SharpYuvFilterRow = SharpYuvFilterRow_SSE2; -+} -+#else // !WEBP_USE_SSE2 -+ -+extern void InitSharpYuvSSE2(void); -+ -+void InitSharpYuvSSE2(void) {} -+ -+#endif // WEBP_USE_SSE2 -diff --git a/3rdparty/libwebp/src/dec/alpha_dec.c b/3rdparty/libwebp/src/dec/alpha_dec.c -index bce735bfc248..663255c42fdc 100644 ---- a/3rdparty/libwebp/src/dec/alpha_dec.c -+++ b/3rdparty/libwebp/src/dec/alpha_dec.c -@@ -117,21 +117,12 @@ static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) { - const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width; - uint8_t* dst = dec->alpha_plane_ + row * width; - assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]); -- if (alph_dec->filter_ != WEBP_FILTER_NONE) { -- assert(WebPUnfilters[alph_dec->filter_] != NULL); -- for (y = 0; y < num_rows; ++y) { -- WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width); -- prev_line = dst; -- dst += width; -- deltas += width; -- } -- } else { -- for (y = 0; y < num_rows; ++y) { -- memcpy(dst, deltas, width * sizeof(*dst)); -- prev_line = dst; -- dst += width; -- deltas += width; -- } -+ assert(WebPUnfilters[alph_dec->filter_] != NULL); -+ for (y = 0; y < num_rows; ++y) { -+ WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width); -+ prev_line = dst; -+ dst += width; -+ deltas += width; - } - dec->alpha_prev_line_ = prev_line; - } else { // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION -@@ -155,7 +146,8 @@ static int AllocateAlphaPlane(VP8Decoder* const dec, const VP8Io* const io) { - dec->alpha_plane_mem_ = - (uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_)); - if (dec->alpha_plane_mem_ == NULL) { -- return 0; -+ return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, -+ "Alpha decoder initialization failed."); - } - dec->alpha_plane_ = dec->alpha_plane_mem_; - dec->alpha_prev_line_ = NULL; -@@ -183,16 +175,25 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec, - assert(dec != NULL && io != NULL); - - if (row < 0 || num_rows <= 0 || row + num_rows > height) { -- return NULL; // sanity check. -+ return NULL; - } - - if (!dec->is_alpha_decoded_) { - if (dec->alph_dec_ == NULL) { // Initialize decoder. - dec->alph_dec_ = ALPHNew(); -- if (dec->alph_dec_ == NULL) return NULL; -+ if (dec->alph_dec_ == NULL) { -+ VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY, -+ "Alpha decoder initialization failed."); -+ return NULL; -+ } - if (!AllocateAlphaPlane(dec, io)) goto Error; - if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_, - io, dec->alpha_plane_)) { -+ VP8LDecoder* const vp8l_dec = dec->alph_dec_->vp8l_dec_; -+ VP8SetError(dec, -+ (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY -+ : vp8l_dec->status_, -+ "Alpha decoder initialization failed."); - goto Error; - } - // if we allowed use of alpha dithering, check whether it's needed at all -diff --git a/3rdparty/libwebp/src/dec/buffer_dec.c b/3rdparty/libwebp/src/dec/buffer_dec.c -index 3cd94eb4d930..11ce76f19e2b 100644 ---- a/3rdparty/libwebp/src/dec/buffer_dec.c -+++ b/3rdparty/libwebp/src/dec/buffer_dec.c -@@ -75,7 +75,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { - const WebPRGBABuffer* const buf = &buffer->u.RGBA; - const int stride = abs(buf->stride); - const uint64_t size = -- MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride); -+ MIN_BUFFER_SIZE((uint64_t)width * kModeBpp[mode], height, stride); - ok &= (size <= buf->size); - ok &= (stride >= width * kModeBpp[mode]); - ok &= (buf->rgba != NULL); -@@ -102,7 +102,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { - int stride; - uint64_t size; - -- if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) { -+ if ((uint64_t)w * kModeBpp[mode] >= (1ull << 31)) { - return VP8_STATUS_INVALID_PARAM; - } - stride = w * kModeBpp[mode]; -@@ -117,7 +117,6 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { - } - total_size = size + 2 * uv_size + a_size; - -- // Security/sanity checks - output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output)); - if (output == NULL) { - return VP8_STATUS_OUT_OF_MEMORY; -@@ -156,11 +155,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) { - } - if (WebPIsRGBMode(buffer->colorspace)) { - WebPRGBABuffer* const buf = &buffer->u.RGBA; -- buf->rgba += (buffer->height - 1) * buf->stride; -+ buf->rgba += (int64_t)(buffer->height - 1) * buf->stride; - buf->stride = -buf->stride; - } else { - WebPYUVABuffer* const buf = &buffer->u.YUVA; -- const int H = buffer->height; -+ const int64_t H = buffer->height; - buf->y += (H - 1) * buf->y_stride; - buf->y_stride = -buf->y_stride; - buf->u += ((H - 1) >> 1) * buf->u_stride; -@@ -188,8 +187,7 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height, - const int ch = options->crop_height; - const int x = options->crop_left & ~1; - const int y = options->crop_top & ~1; -- if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || -- x + cw > width || y + ch > height) { -+ if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) { - return VP8_STATUS_INVALID_PARAM; // out of frame boundary. - } - width = cw; -diff --git a/3rdparty/libwebp/src/dec/frame_dec.c b/3rdparty/libwebp/src/dec/frame_dec.c -index 04609a8e56be..91ca1f8609a9 100644 ---- a/3rdparty/libwebp/src/dec/frame_dec.c -+++ b/3rdparty/libwebp/src/dec/frame_dec.c -@@ -705,7 +705,7 @@ static int AllocateMemory(VP8Decoder* const dec) { - + cache_size + alpha_size + WEBP_ALIGN_CST; - uint8_t* mem; - -- if (needed != (size_t)needed) return 0; // check for overflow -+ if (!CheckSizeOverflow(needed)) return 0; // check for overflow - if (needed > dec->mem_size_) { - WebPSafeFree(dec->mem_); - dec->mem_size_ = 0; -diff --git a/3rdparty/libwebp/src/dec/io_dec.c b/3rdparty/libwebp/src/dec/io_dec.c -index 29dc6345dfd1..5ef6298886eb 100644 ---- a/3rdparty/libwebp/src/dec/io_dec.c -+++ b/3rdparty/libwebp/src/dec/io_dec.c -@@ -298,46 +298,57 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { - const int uv_out_height = (out_height + 1) >> 1; - const int uv_in_width = (io->mb_w + 1) >> 1; - const int uv_in_height = (io->mb_h + 1) >> 1; -- const size_t work_size = 2 * out_width; // scratch memory for luma rescaler -+ // scratch memory for luma rescaler -+ const size_t work_size = 2 * (size_t)out_width; - const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones -- size_t tmp_size, rescaler_size; -+ uint64_t total_size; -+ size_t rescaler_size; - rescaler_t* work; - WebPRescaler* scalers; - const int num_rescalers = has_alpha ? 4 : 3; - -- tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work); -+ total_size = ((uint64_t)work_size + 2 * uv_work_size) * sizeof(*work); - if (has_alpha) { -- tmp_size += work_size * sizeof(*work); -+ total_size += (uint64_t)work_size * sizeof(*work); - } - rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST; -+ total_size += rescaler_size; -+ if (!CheckSizeOverflow(total_size)) { -+ return 0; -+ } - -- p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size); -+ p->memory = WebPSafeMalloc(1ULL, (size_t)total_size); - if (p->memory == NULL) { - return 0; // memory error - } - work = (rescaler_t*)p->memory; - -- scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size); -+ scalers = (WebPRescaler*)WEBP_ALIGN( -+ (const uint8_t*)work + total_size - rescaler_size); - p->scaler_y = &scalers[0]; - p->scaler_u = &scalers[1]; - p->scaler_v = &scalers[2]; - p->scaler_a = has_alpha ? &scalers[3] : NULL; - -- WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, -- buf->y, out_width, out_height, buf->y_stride, 1, -- work); -- WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, -- buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, -- work + work_size); -- WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, -- buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, -- work + work_size + uv_work_size); -+ if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, -+ buf->y, out_width, out_height, buf->y_stride, 1, -+ work) || -+ !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, -+ buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, -+ work + work_size) || -+ !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, -+ buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, -+ work + work_size + uv_work_size)) { -+ return 0; -+ } - p->emit = EmitRescaledYUV; - - if (has_alpha) { -- WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, -- buf->a, out_width, out_height, buf->a_stride, 1, -- work + work_size + 2 * uv_work_size); -+ if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, -+ buf->a, out_width, out_height, buf->a_stride, 1, -+ work + work_size + 2 * uv_work_size)) { -+ return 0; -+ } - p->emit_alpha = EmitRescaledAlphaYUV; - WebPInitAlphaProcessing(); - } -@@ -480,51 +491,58 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { - const int out_height = io->scaled_height; - const int uv_in_width = (io->mb_w + 1) >> 1; - const int uv_in_height = (io->mb_h + 1) >> 1; -- const size_t work_size = 2 * out_width; // scratch memory for one rescaler -+ // scratch memory for one rescaler -+ const size_t work_size = 2 * (size_t)out_width; - rescaler_t* work; // rescalers work area - uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion -- size_t tmp_size1, tmp_size2, total_size, rescaler_size; -+ uint64_t tmp_size1, tmp_size2, total_size; -+ size_t rescaler_size; - WebPRescaler* scalers; - const int num_rescalers = has_alpha ? 4 : 3; - -- tmp_size1 = 3 * work_size; -- tmp_size2 = 3 * out_width; -- if (has_alpha) { -- tmp_size1 += work_size; -- tmp_size2 += out_width; -- } -+ tmp_size1 = (uint64_t)num_rescalers * work_size; -+ tmp_size2 = (uint64_t)num_rescalers * out_width; - total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp); - rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST; -+ total_size += rescaler_size; -+ if (!CheckSizeOverflow(total_size)) { -+ return 0; -+ } - -- p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size); -+ p->memory = WebPSafeMalloc(1ULL, (size_t)total_size); - if (p->memory == NULL) { - return 0; // memory error - } - work = (rescaler_t*)p->memory; - tmp = (uint8_t*)(work + tmp_size1); - -- scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size); -+ scalers = (WebPRescaler*)WEBP_ALIGN( -+ (const uint8_t*)work + total_size - rescaler_size); - p->scaler_y = &scalers[0]; - p->scaler_u = &scalers[1]; - p->scaler_v = &scalers[2]; - p->scaler_a = has_alpha ? &scalers[3] : NULL; - -- WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, -- tmp + 0 * out_width, out_width, out_height, 0, 1, -- work + 0 * work_size); -- WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, -- tmp + 1 * out_width, out_width, out_height, 0, 1, -- work + 1 * work_size); -- WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, -- tmp + 2 * out_width, out_width, out_height, 0, 1, -- work + 2 * work_size); -+ if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h, -+ tmp + 0 * out_width, out_width, out_height, 0, 1, -+ work + 0 * work_size) || -+ !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height, -+ tmp + 1 * out_width, out_width, out_height, 0, 1, -+ work + 1 * work_size) || -+ !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height, -+ tmp + 2 * out_width, out_width, out_height, 0, 1, -+ work + 2 * work_size)) { -+ return 0; -+ } - p->emit = EmitRescaledRGB; - WebPInitYUV444Converters(); - - if (has_alpha) { -- WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, -- tmp + 3 * out_width, out_width, out_height, 0, 1, -- work + 3 * work_size); -+ if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h, -+ tmp + 3 * out_width, out_width, out_height, 0, 1, -+ work + 3 * work_size)) { -+ return 0; -+ } - p->emit_alpha = EmitRescaledAlphaRGB; - if (p->output->colorspace == MODE_RGBA_4444 || - p->output->colorspace == MODE_rgbA_4444) { -diff --git a/3rdparty/libwebp/src/dec/tree_dec.c b/3rdparty/libwebp/src/dec/tree_dec.c -index 1c6fdea27cc6..243460595329 100644 ---- a/3rdparty/libwebp/src/dec/tree_dec.c -+++ b/3rdparty/libwebp/src/dec/tree_dec.c -@@ -12,10 +12,11 @@ - // Author: Skal (pascal.massimino@gmail.com) - - #include "src/dec/vp8i_dec.h" -+#include "src/dsp/cpu.h" - #include "src/utils/bit_reader_inl_utils.h" - - #if !defined(USE_GENERIC_TREE) --#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) -+#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 - // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then. - #define USE_GENERIC_TREE 1 // ALTERNATE_CODE - #else -diff --git a/3rdparty/libwebp/src/dec/vp8_dec.c b/3rdparty/libwebp/src/dec/vp8_dec.c -index 8f736974784e..20b92e84c4fc 100644 ---- a/3rdparty/libwebp/src/dec/vp8_dec.c -+++ b/3rdparty/libwebp/src/dec/vp8_dec.c -@@ -335,7 +335,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) { - io->scaled_width = io->width; - io->scaled_height = io->height; - -- io->mb_w = io->width; // sanity check -+ io->mb_w = io->width; // for soundness - io->mb_h = io->height; // ditto - - VP8ResetProba(&dec->proba_); -@@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = { - 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 - }; - --// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 -+// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2 - static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { - int v; - if (!VP8GetBit(br, p[3], "coeffs")) { -@@ -494,6 +494,8 @@ static int GetCoeffsAlt(VP8BitReader* const br, - return 16; - } - -+extern VP8CPUInfo VP8GetCPUInfo; -+ - WEBP_DSP_INIT_FUNC(InitGetCoeffs) { - if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { - GetCoeffs = GetCoeffsAlt; -diff --git a/3rdparty/libwebp/src/dec/vp8i_dec.h b/3rdparty/libwebp/src/dec/vp8i_dec.h -index a0c0af15799e..1ae4ff62f2a4 100644 ---- a/3rdparty/libwebp/src/dec/vp8i_dec.h -+++ b/3rdparty/libwebp/src/dec/vp8i_dec.h -@@ -31,8 +31,8 @@ extern "C" { - - // version numbers - #define DEC_MAJ_VERSION 1 --#define DEC_MIN_VERSION 2 --#define DEC_REV_VERSION 0 -+#define DEC_MIN_VERSION 3 -+#define DEC_REV_VERSION 1 - - // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). - // Constraints are: We need to store one 16x16 block of luma samples (y), -diff --git a/3rdparty/libwebp/src/dec/vp8l_dec.c b/3rdparty/libwebp/src/dec/vp8l_dec.c -index 2d603b437974..11c00ea964a9 100644 ---- a/3rdparty/libwebp/src/dec/vp8l_dec.c -+++ b/3rdparty/libwebp/src/dec/vp8l_dec.c -@@ -12,6 +12,7 @@ - // Authors: Vikas Arora (vikaas.arora@gmail.com) - // Jyrki Alakuijala (jyrki@google.com) - -+#include - #include - - #include "src/dec/alphai_dec.h" -@@ -84,7 +85,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { - // to 256 (green component values) + 24 (length prefix values) - // + color_cache_size (between 0 and 2048). - // All values computed for 8-bit first level lookup with Mark Adler's tool: --// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c -+// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c - #define FIXED_TABLE_SIZE (630 * 3 + 410) - static const uint16_t kTableSize[12] = { - FIXED_TABLE_SIZE + 654, -@@ -101,6 +102,14 @@ static const uint16_t kTableSize[12] = { - FIXED_TABLE_SIZE + 2704 - }; - -+static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) { -+ // The oldest error reported takes precedence over the new one. -+ if (dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED) { -+ dec->status_ = error; -+ } -+ return 0; -+} -+ - static int DecodeImageStream(int xsize, int ysize, - int is_level0, - VP8LDecoder* const dec, -@@ -178,7 +187,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { - - //------------------------------------------------------------------------------ - // Decodes the next Huffman code from bit-stream. --// FillBitWindow(br) needs to be called at minimum every second call -+// VP8LFillBitWindow(br) needs to be called at minimum every second call - // to ReadSymbol, in order to pre-fetch enough bits. - static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, - VP8LBitReader* const br) { -@@ -253,11 +262,11 @@ static int ReadHuffmanCodeLengths( - int symbol; - int max_symbol; - int prev_code_len = DEFAULT_CODE_LENGTH; -- HuffmanCode table[1 << LENGTHS_TABLE_BITS]; -+ HuffmanTables tables; - -- if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, -- code_length_code_lengths, -- NUM_CODE_LENGTH_CODES)) { -+ if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) || -+ !VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS, -+ code_length_code_lengths, NUM_CODE_LENGTH_CODES)) { - goto End; - } - -@@ -277,7 +286,7 @@ static int ReadHuffmanCodeLengths( - int code_len; - if (max_symbol-- == 0) break; - VP8LFillBitWindow(br); -- p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; -+ p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; - VP8LSetBitPos(br, br->bit_pos_ + p->bits); - code_len = p->value; - if (code_len < kCodeLengthLiterals) { -@@ -300,14 +309,16 @@ static int ReadHuffmanCodeLengths( - ok = 1; - - End: -- if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -+ VP8LHuffmanTablesDeallocate(&tables); -+ if (!ok) return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR); - return ok; - } - - // 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman - // tree. - static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, -- int* const code_lengths, HuffmanCode* const table) { -+ int* const code_lengths, -+ HuffmanTables* const table) { - int ok = 0; - int size = 0; - VP8LBitReader* const br = &dec->br_; -@@ -321,7 +332,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, - // The first code is either 1 bit or 8 bit code. - int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); - code_lengths[symbol] = 1; -- // The second code (if present), is always 8 bit long. -+ // The second code (if present), is always 8 bits long. - if (num_symbols == 2) { - symbol = VP8LReadBits(br, 8); - code_lengths[symbol] = 1; -@@ -331,10 +342,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, - int i; - int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 }; - const int num_codes = VP8LReadBits(br, 4) + 4; -- if (num_codes > NUM_CODE_LENGTH_CODES) { -- dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -- return 0; -- } -+ assert(num_codes <= NUM_CODE_LENGTH_CODES); - - for (i = 0; i < num_codes; ++i) { - code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3); -@@ -349,36 +357,35 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, - code_lengths, alphabet_size); - } - if (!ok || size == 0) { -- dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -- return 0; -+ return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR); - } - return size; - } - - static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, - int color_cache_bits, int allow_recursion) { -- int i, j; -+ int i; - VP8LBitReader* const br = &dec->br_; - VP8LMetadata* const hdr = &dec->hdr_; - uint32_t* huffman_image = NULL; - HTreeGroup* htree_groups = NULL; -- HuffmanCode* huffman_tables = NULL; -- HuffmanCode* huffman_table = NULL; -+ HuffmanTables* huffman_tables = &hdr->huffman_tables_; - int num_htree_groups = 1; - int num_htree_groups_max = 1; -- int max_alphabet_size = 0; -- int* code_lengths = NULL; -- const int table_size = kTableSize[color_cache_bits]; - int* mapping = NULL; - int ok = 0; - -+ // Check the table has been 0 initialized (through InitMetadata). -+ assert(huffman_tables->root.start == NULL); -+ assert(huffman_tables->curr_segment == NULL); -+ - if (allow_recursion && VP8LReadBits(br, 1)) { - // use meta Huffman codes. - const int huffman_precision = VP8LReadBits(br, 3) + 2; - const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision); - const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision); - const int huffman_pixs = huffman_xsize * huffman_ysize; -- if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec, -+ if (!DecodeImageStream(huffman_xsize, huffman_ysize, /*is_level0=*/0, dec, - &huffman_image)) { - goto Error; - } -@@ -402,7 +409,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, - // values [0, num_htree_groups) - mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping)); - if (mapping == NULL) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -+ VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - goto Error; - } - // -1 means a value is unmapped, and therefore unused in the Huffman -@@ -421,29 +428,55 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, - - if (br->eos_) goto Error; - -- // Find maximum alphabet size for the htree group. -- for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { -- int alphabet_size = kAlphabetSize[j]; -- if (j == 0 && color_cache_bits > 0) { -- alphabet_size += 1 << color_cache_bits; -- } -- if (max_alphabet_size < alphabet_size) { -- max_alphabet_size = alphabet_size; -- } -+ if (!ReadHuffmanCodesHelper(color_cache_bits, num_htree_groups, -+ num_htree_groups_max, mapping, dec, -+ huffman_tables, &htree_groups)) { -+ goto Error; - } -+ ok = 1; - -- code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, -- sizeof(*code_lengths)); -- huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size, -- sizeof(*huffman_tables)); -- htree_groups = VP8LHtreeGroupsNew(num_htree_groups); -+ // All OK. Finalize pointers. -+ hdr->huffman_image_ = huffman_image; -+ hdr->num_htree_groups_ = num_htree_groups; -+ hdr->htree_groups_ = htree_groups; -+ -+ Error: -+ WebPSafeFree(mapping); -+ if (!ok) { -+ WebPSafeFree(huffman_image); -+ VP8LHuffmanTablesDeallocate(huffman_tables); -+ VP8LHtreeGroupsFree(htree_groups); -+ } -+ return ok; -+} - -- if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -+int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups, -+ int num_htree_groups_max, const int* const mapping, -+ VP8LDecoder* const dec, -+ HuffmanTables* const huffman_tables, -+ HTreeGroup** const htree_groups) { -+ int i, j, ok = 0; -+ const int max_alphabet_size = -+ kAlphabetSize[0] + ((color_cache_bits > 0) ? 1 << color_cache_bits : 0); -+ const int table_size = kTableSize[color_cache_bits]; -+ int* code_lengths = NULL; -+ -+ if ((mapping == NULL && num_htree_groups != num_htree_groups_max) || -+ num_htree_groups > num_htree_groups_max) { -+ goto Error; -+ } -+ -+ code_lengths = -+ (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths)); -+ *htree_groups = VP8LHtreeGroupsNew(num_htree_groups); -+ -+ if (*htree_groups == NULL || code_lengths == NULL || -+ !VP8LHuffmanTablesAllocate(num_htree_groups * table_size, -+ huffman_tables)) { -+ VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - goto Error; - } - -- huffman_table = huffman_tables; - for (i = 0; i < num_htree_groups_max; ++i) { - // If the index "i" is unused in the Huffman image, just make sure the - // coefficients are valid but do not store them. -@@ -460,7 +493,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, - } - } else { - HTreeGroup* const htree_group = -- &htree_groups[(mapping == NULL) ? i : mapping[i]]; -+ &(*htree_groups)[(mapping == NULL) ? i : mapping[i]]; - HuffmanCode** const htrees = htree_group->htrees; - int size; - int total_size = 0; -@@ -468,19 +501,20 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, - int max_bits = 0; - for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { - int alphabet_size = kAlphabetSize[j]; -- htrees[j] = huffman_table; - if (j == 0 && color_cache_bits > 0) { - alphabet_size += (1 << color_cache_bits); - } -- size = ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_table); -+ size = -+ ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables); -+ htrees[j] = huffman_tables->curr_segment->curr_table; - if (size == 0) { - goto Error; - } - if (is_trivial_literal && kLiteralMap[j] == 1) { -- is_trivial_literal = (huffman_table->bits == 0); -+ is_trivial_literal = (htrees[j]->bits == 0); - } -- total_size += huffman_table->bits; -- huffman_table += size; -+ total_size += htrees[j]->bits; -+ huffman_tables->curr_segment->curr_table += size; - if (j <= ALPHA) { - int local_max_bits = code_lengths[0]; - int k; -@@ -511,19 +545,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, - } - ok = 1; - -- // All OK. Finalize pointers. -- hdr->huffman_image_ = huffman_image; -- hdr->num_htree_groups_ = num_htree_groups; -- hdr->htree_groups_ = htree_groups; -- hdr->huffman_tables_ = huffman_tables; -- - Error: - WebPSafeFree(code_lengths); -- WebPSafeFree(mapping); - if (!ok) { -- WebPSafeFree(huffman_image); -- WebPSafeFree(huffman_tables); -- VP8LHtreeGroupsFree(htree_groups); -+ VP8LHuffmanTablesDeallocate(huffman_tables); -+ VP8LHtreeGroupsFree(*htree_groups); -+ *htree_groups = NULL; - } - return ok; - } -@@ -547,8 +574,7 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { - scaled_data_size * sizeof(*scaled_data); - uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory)); - if (memory == NULL) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -- return 0; -+ return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - } - assert(dec->rescaler_memory == NULL); - dec->rescaler_memory = memory; -@@ -559,8 +585,11 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { - memory += work_size * sizeof(*work); - scaled_data = (uint32_t*)memory; - -- WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, -- out_width, out_height, 0, num_channels, work); -+ if (!WebPRescalerInit(dec->rescaler, in_width, in_height, -+ (uint8_t*)scaled_data, out_width, out_height, -+ 0, num_channels, work)) { -+ return 0; -+ } - return 1; - } - #endif // WEBP_REDUCE_SIZE -@@ -574,13 +603,14 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { - static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, - int rgba_stride, uint8_t* const rgba) { - uint32_t* const src = (uint32_t*)rescaler->dst; -+ uint8_t* dst = rgba; - const int dst_width = rescaler->dst_width; - int num_lines_out = 0; - while (WebPRescalerHasPendingOutput(rescaler)) { -- uint8_t* const dst = rgba + num_lines_out * rgba_stride; - WebPRescalerExportRow(rescaler); - WebPMultARGBRow(src, dst_width, 1); - VP8LConvertFromBGRA(src, dst_width, colorspace, dst); -+ dst += rgba_stride; - ++num_lines_out; - } - return num_lines_out; -@@ -594,8 +624,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec, - int num_lines_in = 0; - int num_lines_out = 0; - while (num_lines_in < mb_h) { -- uint8_t* const row_in = in + num_lines_in * in_stride; -- uint8_t* const row_out = out + num_lines_out * out_stride; -+ uint8_t* const row_in = in + (uint64_t)num_lines_in * in_stride; -+ uint8_t* const row_out = out + (uint64_t)num_lines_out * out_stride; - const int lines_left = mb_h - num_lines_in; - const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left); - int lines_imported; -@@ -796,7 +826,8 @@ static void ProcessRows(VP8LDecoder* const dec, int row) { - const WebPDecBuffer* const output = dec->output_; - if (WebPIsRGBMode(output->colorspace)) { // convert to RGBA - const WebPRGBABuffer* const buf = &output->u.RGBA; -- uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride; -+ uint8_t* const rgba = -+ buf->rgba + (int64_t)dec->last_out_row_ * buf->stride; - const int num_rows_out = - #if !defined(WEBP_REDUCE_SIZE) - io->use_scaling ? -@@ -1077,12 +1108,10 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data, - End: - br->eos_ = VP8LIsEndOfStream(br); - if (!ok || (br->eos_ && pos < end)) { -- ok = 0; -- dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED -- : VP8_STATUS_BITSTREAM_ERROR; -- } else { -- dec->last_pixel_ = pos; -+ return VP8LSetError( -+ dec, br->eos_ ? VP8_STATUS_SUSPENDED : VP8_STATUS_BITSTREAM_ERROR); - } -+ dec->last_pixel_ = pos; - return ok; - } - -@@ -1232,9 +1261,20 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, - } - - br->eos_ = VP8LIsEndOfStream(br); -- if (dec->incremental_ && br->eos_ && src < src_end) { -+ // In incremental decoding: -+ // br->eos_ && src < src_last: if 'br' reached the end of the buffer and -+ // 'src_last' has not been reached yet, there is not enough data. 'dec' has to -+ // be reset until there is more data. -+ // !br->eos_ && src < src_last: this cannot happen as either the buffer is -+ // fully read, either enough has been read to reach 'src_last'. -+ // src >= src_last: 'src_last' is reached, all is fine. 'src' can actually go -+ // beyond 'src_last' in case the image is cropped and an LZ77 goes further. -+ // The buffer might have been enough or there is some left. 'br->eos_' does -+ // not matter. -+ assert(!dec->incremental_ || (br->eos_ && src < src_last) || src >= src_last); -+ if (dec->incremental_ && br->eos_ && src < src_last) { - RestoreState(dec); -- } else if (!br->eos_) { -+ } else if ((dec->incremental_ && src >= src_last) || !br->eos_) { - // Process the remaining rows corresponding to last row-block. - if (process_func != NULL) { - process_func(dec, row > last_row ? last_row : row); -@@ -1249,8 +1289,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data, - return 1; - - Error: -- dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -- return 0; -+ return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR); - } - - // ----------------------------------------------------------------------------- -@@ -1276,7 +1315,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) { - uint8_t* const new_data = (uint8_t*)new_color_map; - new_color_map[0] = transform->data_[0]; - for (i = 4; i < 4 * num_colors; ++i) { -- // Equivalent to AddPixelEq(), on a byte-basis. -+ // Equivalent to VP8LAddPixels(), on a byte-basis. - new_data[i] = (data[i] + new_data[i - 4]) & 0xff; - } - for (; i < 4 * final_num_colors; ++i) { -@@ -1317,7 +1356,7 @@ static int ReadTransform(int* const xsize, int const* ysize, - transform->bits_), - VP8LSubSampleSize(transform->ysize_, - transform->bits_), -- 0, dec, &transform->data_); -+ /*is_level0=*/0, dec, &transform->data_); - break; - case COLOR_INDEXING_TRANSFORM: { - const int num_colors = VP8LReadBits(br, 8) + 1; -@@ -1327,11 +1366,14 @@ static int ReadTransform(int* const xsize, int const* ysize, - : 3; - *xsize = VP8LSubSampleSize(transform->xsize_, bits); - transform->bits_ = bits; -- ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_); -- ok = ok && ExpandColorMap(num_colors, transform); -+ ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec, -+ &transform->data_); -+ if (ok && !ExpandColorMap(num_colors, transform)) { -+ return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); -+ } - break; - } -- case SUBTRACT_GREEN: -+ case SUBTRACT_GREEN_TRANSFORM: - break; - default: - assert(0); // can't happen -@@ -1353,7 +1395,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) { - assert(hdr != NULL); - - WebPSafeFree(hdr->huffman_image_); -- WebPSafeFree(hdr->huffman_tables_); -+ VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_); - VP8LHtreeGroupsFree(hdr->htree_groups_); - VP8LColorCacheClear(&hdr->color_cache_); - VP8LColorCacheClear(&hdr->saved_color_cache_); -@@ -1434,7 +1476,7 @@ static int DecodeImageStream(int xsize, int ysize, - color_cache_bits = VP8LReadBits(br, 4); - ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS); - if (!ok) { -- dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -+ VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR); - goto End; - } - } -@@ -1443,7 +1485,7 @@ static int DecodeImageStream(int xsize, int ysize, - ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize, - color_cache_bits, is_level0); - if (!ok) { -- dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -+ VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR); - goto End; - } - -@@ -1451,8 +1493,7 @@ static int DecodeImageStream(int xsize, int ysize, - if (color_cache_bits > 0) { - hdr->color_cache_size_ = 1 << color_cache_bits; - if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -- ok = 0; -+ ok = VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - goto End; - } - } else { -@@ -1469,8 +1510,7 @@ static int DecodeImageStream(int xsize, int ysize, - const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize; - data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data)); - if (data == NULL) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -- ok = 0; -+ ok = VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - goto End; - } - } -@@ -1514,9 +1554,8 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { - assert(dec->width_ <= final_width); - dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t)); - if (dec->pixels_ == NULL) { -- dec->argb_cache_ = NULL; // for sanity check -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -- return 0; -+ dec->argb_cache_ = NULL; // for soundness -+ return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - } - dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels; - return 1; -@@ -1524,11 +1563,10 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) { - - static int AllocateInternalBuffers8b(VP8LDecoder* const dec) { - const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_; -- dec->argb_cache_ = NULL; // for sanity check -+ dec->argb_cache_ = NULL; // for soundness - dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t)); - if (dec->pixels_ == NULL) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -- return 0; -+ return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - } - return 1; - } -@@ -1583,7 +1621,8 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec, - dec->status_ = VP8_STATUS_OK; - VP8LInitBitReader(&dec->br_, data, data_size); - -- if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) { -+ if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, /*is_level0=*/1, -+ dec, /*decoded_data=*/NULL)) { - goto Err; - } - -@@ -1638,22 +1677,24 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) { - - if (dec == NULL) return 0; - if (io == NULL) { -- dec->status_ = VP8_STATUS_INVALID_PARAM; -- return 0; -+ return VP8LSetError(dec, VP8_STATUS_INVALID_PARAM); - } - - dec->io_ = io; - dec->status_ = VP8_STATUS_OK; - VP8LInitBitReader(&dec->br_, io->data, io->data_size); - if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) { -- dec->status_ = VP8_STATUS_BITSTREAM_ERROR; -+ VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR); - goto Error; - } - dec->state_ = READ_DIM; - io->width = width; - io->height = height; - -- if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error; -+ if (!DecodeImageStream(width, height, /*is_level0=*/1, dec, -+ /*decoded_data=*/NULL)) { -+ goto Error; -+ } - return 1; - - Error: -@@ -1666,10 +1707,9 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { - VP8Io* io = NULL; - WebPDecParams* params = NULL; - -- // Sanity checks. - if (dec == NULL) return 0; - -- assert(dec->hdr_.huffman_tables_ != NULL); -+ assert(dec->hdr_.huffman_tables_.root.start != NULL); - assert(dec->hdr_.htree_groups_ != NULL); - assert(dec->hdr_.num_htree_groups_ > 0); - -@@ -1684,7 +1724,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { - assert(dec->output_ != NULL); - - if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) { -- dec->status_ = VP8_STATUS_INVALID_PARAM; -+ VP8LSetError(dec, VP8_STATUS_INVALID_PARAM); - goto Err; - } - -@@ -1694,7 +1734,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { - if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err; - #else - if (io->use_scaling) { -- dec->status_ = VP8_STATUS_INVALID_PARAM; -+ VP8LSetError(dec, VP8_STATUS_INVALID_PARAM); - goto Err; - } - #endif -@@ -1712,7 +1752,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { - dec->hdr_.saved_color_cache_.colors_ == NULL) { - if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_, - dec->hdr_.color_cache_.hash_bits_)) { -- dec->status_ = VP8_STATUS_OUT_OF_MEMORY; -+ VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY); - goto Err; - } - } -diff --git a/3rdparty/libwebp/src/dec/vp8li_dec.h b/3rdparty/libwebp/src/dec/vp8li_dec.h -index 72b2e8612084..b057573f6c75 100644 ---- a/3rdparty/libwebp/src/dec/vp8li_dec.h -+++ b/3rdparty/libwebp/src/dec/vp8li_dec.h -@@ -51,7 +51,7 @@ typedef struct { - uint32_t* huffman_image_; - int num_htree_groups_; - HTreeGroup* htree_groups_; -- HuffmanCode* huffman_tables_; -+ HuffmanTables huffman_tables_; - } VP8LMetadata; - - typedef struct VP8LDecoder VP8LDecoder; -@@ -126,6 +126,19 @@ void VP8LClear(VP8LDecoder* const dec); - // Clears and deallocate a lossless decoder instance. - void VP8LDelete(VP8LDecoder* const dec); - -+// Helper function for reading the different Huffman codes and storing them in -+// 'huffman_tables' and 'htree_groups'. -+// If mapping is NULL 'num_htree_groups_max' must equal 'num_htree_groups'. -+// If it is not NULL, it maps 'num_htree_groups_max' indices to the -+// 'num_htree_groups' groups. If 'num_htree_groups_max' > 'num_htree_groups', -+// some of those indices map to -1. This is used for non-balanced codes to -+// limit memory usage. -+int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups, -+ int num_htree_groups_max, const int* const mapping, -+ VP8LDecoder* const dec, -+ HuffmanTables* const huffman_tables, -+ HTreeGroup** const htree_groups); -+ - //------------------------------------------------------------------------------ - - #ifdef __cplusplus -diff --git a/3rdparty/libwebp/src/dec/webp_dec.c b/3rdparty/libwebp/src/dec/webp_dec.c -index 42d098874d07..f557868b9985 100644 ---- a/3rdparty/libwebp/src/dec/webp_dec.c -+++ b/3rdparty/libwebp/src/dec/webp_dec.c -@@ -179,7 +179,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, - return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. - } - // For odd-sized chunk-payload, there's one byte padding at the end. -- disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1; -+ disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u; - total_size += disk_chunk_size; - - // Check that total bytes skipped so far does not exceed riff_size. -@@ -658,19 +658,26 @@ uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, - uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, - int* width, int* height, uint8_t** u, uint8_t** v, - int* stride, int* uv_stride) { -- WebPDecBuffer output; // only to preserve the side-infos -- uint8_t* const out = Decode(MODE_YUV, data, data_size, -- width, height, &output); -- -- if (out != NULL) { -- const WebPYUVABuffer* const buf = &output.u.YUVA; -- *u = buf->u; -- *v = buf->v; -- *stride = buf->y_stride; -- *uv_stride = buf->u_stride; -- assert(buf->u_stride == buf->v_stride); -- } -- return out; -+ // data, width and height are checked by Decode(). -+ if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) { -+ return NULL; -+ } -+ -+ { -+ WebPDecBuffer output; // only to preserve the side-infos -+ uint8_t* const out = Decode(MODE_YUV, data, data_size, -+ width, height, &output); -+ -+ if (out != NULL) { -+ const WebPYUVABuffer* const buf = &output.u.YUVA; -+ *u = buf->u; -+ *v = buf->v; -+ *stride = buf->y_stride; -+ *uv_stride = buf->u_stride; -+ assert(buf->u_stride == buf->v_stride); -+ } -+ return out; -+ } - } - - static void DefaultFeatures(WebPBitstreamFeatures* const features) { -@@ -785,6 +792,13 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size, - //------------------------------------------------------------------------------ - // Cropping and rescaling. - -+int WebPCheckCropDimensions(int image_width, int image_height, -+ int x, int y, int w, int h) { -+ return !(x < 0 || y < 0 || w <= 0 || h <= 0 || -+ x >= image_width || w > image_width || w > image_width - x || -+ y >= image_height || h > image_height || h > image_height - y); -+} -+ - int WebPIoInitFromOptions(const WebPDecoderOptions* const options, - VP8Io* const io, WEBP_CSP_MODE src_colorspace) { - const int W = io->width; -@@ -792,7 +806,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, - int x = 0, y = 0, w = W, h = H; - - // Cropping -- io->use_cropping = (options != NULL) && (options->use_cropping > 0); -+ io->use_cropping = (options != NULL) && options->use_cropping; - if (io->use_cropping) { - w = options->crop_width; - h = options->crop_height; -@@ -802,7 +816,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, - x &= ~1; - y &= ~1; - } -- if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) { -+ if (!WebPCheckCropDimensions(W, H, x, y, w, h)) { - return 0; // out of frame boundary error - } - } -@@ -814,7 +828,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, - io->mb_h = h; - - // Scaling -- io->use_scaling = (options != NULL) && (options->use_scaling > 0); -+ io->use_scaling = (options != NULL) && options->use_scaling; - if (io->use_scaling) { - int scaled_width = options->scaled_width; - int scaled_height = options->scaled_height; -@@ -835,8 +849,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options, - - if (io->use_scaling) { - // disable filter (only for large downscaling ratio). -- io->bypass_filtering = (io->scaled_width < W * 3 / 4) && -- (io->scaled_height < H * 3 / 4); -+ io->bypass_filtering |= (io->scaled_width < W * 3 / 4) && -+ (io->scaled_height < H * 3 / 4); - io->fancy_upsampling = 0; - } - return 1; -diff --git a/3rdparty/libwebp/src/dec/webpi_dec.h b/3rdparty/libwebp/src/dec/webpi_dec.h -index 24baff5d27a8..3b97388c71c1 100644 ---- a/3rdparty/libwebp/src/dec/webpi_dec.h -+++ b/3rdparty/libwebp/src/dec/webpi_dec.h -@@ -77,6 +77,10 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers); - //------------------------------------------------------------------------------ - // Misc utils - -+// Returns true if crop dimensions are within image bounds. -+int WebPCheckCropDimensions(int image_width, int image_height, -+ int x, int y, int w, int h); -+ - // Initializes VP8Io with custom setup, io and teardown functions. The default - // hooks will use the supplied 'params' as io->opaque handle. - void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io); -diff --git a/3rdparty/libwebp/src/demux/anim_decode.c b/3rdparty/libwebp/src/demux/anim_decode.c -index 3dcacc35d675..e077ffb53640 100644 ---- a/3rdparty/libwebp/src/demux/anim_decode.c -+++ b/3rdparty/libwebp/src/demux/anim_decode.c -@@ -23,6 +23,14 @@ - - #define NUM_CHANNELS 4 - -+// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA -+// buffer. -+#ifdef WORDS_BIGENDIAN -+#define CHANNEL_SHIFT(i) (24 - (i) * 8) -+#else -+#define CHANNEL_SHIFT(i) ((i) * 8) -+#endif -+ - typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int); - static void BlendPixelRowNonPremult(uint32_t* const src, - const uint32_t* const dst, int num_pixels); -@@ -87,11 +95,19 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal( - int abi_version) { - WebPAnimDecoderOptions options; - WebPAnimDecoder* dec = NULL; -+ WebPBitstreamFeatures features; - if (webp_data == NULL || - WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) { - return NULL; - } - -+ // Validate the bitstream before doing expensive allocations. The demuxer may -+ // be more tolerant than the decoder. -+ if (WebPGetFeatures(webp_data->bytes, webp_data->size, &features) != -+ VP8_STATUS_OK) { -+ return NULL; -+ } -+ - // Note: calloc() so that the pointer members are initialized to NULL. - dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec)); - if (dec == NULL) goto Error; -@@ -145,7 +161,7 @@ static int ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width, - uint32_t canvas_height) { - const uint64_t size = - (uint64_t)canvas_width * canvas_height * NUM_CHANNELS * sizeof(*buf); -- if (size != (size_t)size) return 0; -+ if (!CheckSizeOverflow(size)) return 0; - memset(buf, 0, (size_t)size); - return 1; - } -@@ -166,7 +182,7 @@ static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset, - static int CopyCanvas(const uint8_t* src, uint8_t* dst, - uint32_t width, uint32_t height) { - const uint64_t size = (uint64_t)width * height * NUM_CHANNELS; -- if (size != (size_t)size) return 0; -+ if (!CheckSizeOverflow(size)) return 0; - assert(src != NULL && dst != NULL); - memcpy(dst, src, (size_t)size); - return 1; -@@ -201,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, - const uint8_t dst_channel = (dst >> shift) & 0xff; - const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a; - assert(blend_unscaled < (1ULL << 32) / scale); -- return (blend_unscaled * scale) >> 24; -+ return (blend_unscaled * scale) >> CHANNEL_SHIFT(3); - } - - // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha. - static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) { -- const uint8_t src_a = (src >> 24) & 0xff; -+ const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; - - if (src_a == 0) { - return dst; - } else { -- const uint8_t dst_a = (dst >> 24) & 0xff; -+ const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff; - // This is the approximate integer arithmetic for the actual formula: - // dst_factor_a = (dst_a * (255 - src_a)) / 255. - const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8; - const uint8_t blend_a = src_a + dst_factor_a; - const uint32_t scale = (1UL << 24) / blend_a; - -- const uint8_t blend_r = -- BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0); -- const uint8_t blend_g = -- BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8); -- const uint8_t blend_b = -- BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16); -+ const uint8_t blend_r = BlendChannelNonPremult( -+ src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0)); -+ const uint8_t blend_g = BlendChannelNonPremult( -+ src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1)); -+ const uint8_t blend_b = BlendChannelNonPremult( -+ src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2)); - assert(src_a + dst_factor_a < 256); - -- return (blend_r << 0) | -- (blend_g << 8) | -- (blend_b << 16) | -- ((uint32_t)blend_a << 24); -+ return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) | -+ ((uint32_t)blend_g << CHANNEL_SHIFT(1)) | -+ ((uint32_t)blend_b << CHANNEL_SHIFT(2)) | -+ ((uint32_t)blend_a << CHANNEL_SHIFT(3)); - } - } - -@@ -239,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src, - const uint32_t* const dst, int num_pixels) { - int i; - for (i = 0; i < num_pixels; ++i) { -- const uint8_t src_alpha = (src[i] >> 24) & 0xff; -+ const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; - if (src_alpha != 0xff) { - src[i] = BlendPixelNonPremult(src[i], dst[i]); - } -@@ -256,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) { - - // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha. - static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) { -- const uint8_t src_a = (src >> 24) & 0xff; -+ const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; - return src + ChannelwiseMultiply(dst, 256 - src_a); - } - -@@ -266,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst, - int num_pixels) { - int i; - for (i = 0; i < num_pixels; ++i) { -- const uint8_t src_alpha = (src[i] >> 24) & 0xff; -+ const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; - if (src_alpha != 0xff) { - src[i] = BlendPixelPremult(src[i], dst[i]); - } -diff --git a/3rdparty/libwebp/src/demux/demux.c b/3rdparty/libwebp/src/demux/demux.c -index 860e2ce7615e..fd45a2500e4b 100644 ---- a/3rdparty/libwebp/src/demux/demux.c -+++ b/3rdparty/libwebp/src/demux/demux.c -@@ -24,8 +24,8 @@ - #include "src/webp/format_constants.h" - - #define DMUX_MAJ_VERSION 1 --#define DMUX_MIN_VERSION 2 --#define DMUX_REV_VERSION 0 -+#define DMUX_MIN_VERSION 3 -+#define DMUX_REV_VERSION 1 - - typedef struct { - size_t start_; // start location of the data -@@ -221,12 +221,16 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size, - const size_t chunk_start_offset = mem->start_; - const uint32_t fourcc = ReadLE32(mem); - const uint32_t payload_size = ReadLE32(mem); -- const uint32_t payload_size_padded = payload_size + (payload_size & 1); -- const size_t payload_available = (payload_size_padded > MemDataSize(mem)) -- ? MemDataSize(mem) : payload_size_padded; -- const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available; -+ uint32_t payload_size_padded; -+ size_t payload_available; -+ size_t chunk_size; - - if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR; -+ -+ payload_size_padded = payload_size + (payload_size & 1); -+ payload_available = (payload_size_padded > MemDataSize(mem)) -+ ? MemDataSize(mem) : payload_size_padded; -+ chunk_size = CHUNK_HEADER_SIZE + payload_available; - if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR; - if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA; - -@@ -451,9 +455,11 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) { - const size_t chunk_start_offset = mem->start_; - const uint32_t fourcc = ReadLE32(mem); - const uint32_t chunk_size = ReadLE32(mem); -- const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1); -+ uint32_t chunk_size_padded; - - if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR; -+ -+ chunk_size_padded = chunk_size + (chunk_size & 1); - if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR; - - switch (fourcc) { -@@ -608,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) { - - while (f != NULL) { - const int cur_frame_set = f->frame_num_; -- int frame_count = 0; - - // Check frame properties. - for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) { -@@ -643,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) { - dmux->canvas_width_, dmux->canvas_height_)) { - return 0; - } -- -- ++frame_count; - } - } - return 1; -diff --git a/3rdparty/libwebp/src/dsp/alpha_processing.c b/3rdparty/libwebp/src/dsp/alpha_processing.c -index 3a27990ddc57..1d152f24dada 100644 ---- a/3rdparty/libwebp/src/dsp/alpha_processing.c -+++ b/3rdparty/libwebp/src/dsp/alpha_processing.c -@@ -157,7 +157,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) { - } - } - --void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, -+void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr, -+ const uint8_t* WEBP_RESTRICT const alpha, - int width, int inverse) { - int x; - for (x = 0; x < width; ++x) { -@@ -178,7 +179,8 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, - #undef MFIX - - void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse); --void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, -+void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr, -+ const uint8_t* WEBP_RESTRICT const alpha, - int width, int inverse); - - //------------------------------------------------------------------------------ -@@ -193,8 +195,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, - } - } - --void WebPMultRows(uint8_t* ptr, int stride, -- const uint8_t* alpha, int alpha_stride, -+void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride, -+ const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, - int width, int num_rows, int inverse) { - int n; - for (n = 0; n < num_rows; ++n) { -@@ -290,9 +292,9 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444, - } - - #if !WEBP_NEON_OMIT_C_CODE --static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, -+static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, - int width, int height, -- uint8_t* dst, int dst_stride) { -+ uint8_t* WEBP_RESTRICT dst, int dst_stride) { - uint32_t alpha_mask = 0xff; - int i, j; - -@@ -309,9 +311,10 @@ static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride, - return (alpha_mask != 0xff); - } - --static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint32_t* dst, int dst_stride) { -+static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint32_t* WEBP_RESTRICT dst, -+ int dst_stride) { - int i, j; - for (j = 0; j < height; ++j) { - for (i = 0; i < width; ++i) { -@@ -322,9 +325,9 @@ static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride, - } - } - --static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, -+static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride, - int width, int height, -- uint8_t* alpha, int alpha_stride) { -+ uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { - uint8_t alpha_mask = 0xff; - int i, j; - -@@ -340,7 +343,8 @@ static int ExtractAlpha_C(const uint8_t* argb, int argb_stride, - return (alpha_mask == 0xff); - } - --static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) { -+static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb, -+ uint8_t* WEBP_RESTRICT alpha, int size) { - int i; - for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8; - } -@@ -372,8 +376,11 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) { - } - - #ifdef WORDS_BIGENDIAN --static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, -- const uint8_t* b, int len, uint32_t* out) { -+static void PackARGB_C(const uint8_t* WEBP_RESTRICT a, -+ const uint8_t* WEBP_RESTRICT r, -+ const uint8_t* WEBP_RESTRICT g, -+ const uint8_t* WEBP_RESTRICT b, -+ int len, uint32_t* WEBP_RESTRICT out) { - int i; - for (i = 0; i < len; ++i) { - out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]); -@@ -381,8 +388,10 @@ static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g, - } - #endif - --static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, -- int len, int step, uint32_t* out) { -+static void PackRGB_C(const uint8_t* WEBP_RESTRICT r, -+ const uint8_t* WEBP_RESTRICT g, -+ const uint8_t* WEBP_RESTRICT b, -+ int len, int step, uint32_t* WEBP_RESTRICT out) { - int i, offset = 0; - for (i = 0; i < len; ++i) { - out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]); -@@ -392,16 +401,22 @@ static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b, - - void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int); - void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int); --int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int); --void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int); --int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int); --void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); -+int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int, -+ uint8_t* WEBP_RESTRICT, int); -+void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT, int, int, int, -+ uint32_t* WEBP_RESTRICT, int); -+int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int, -+ uint8_t* WEBP_RESTRICT, int); -+void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb, -+ uint8_t* WEBP_RESTRICT alpha, int size); - #ifdef WORDS_BIGENDIAN - void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g, - const uint8_t* b, int, uint32_t*); - #endif --void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, -- int len, int step, uint32_t* out); -+void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r, -+ const uint8_t* WEBP_RESTRICT g, -+ const uint8_t* WEBP_RESTRICT b, -+ int len, int step, uint32_t* WEBP_RESTRICT out); - - int (*WebPHasAlpha8b)(const uint8_t* src, int length); - int (*WebPHasAlpha32b)(const uint8_t* src, int length); -@@ -410,6 +425,7 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); - //------------------------------------------------------------------------------ - // Init function - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void WebPInitAlphaProcessingMIPSdspR2(void); - extern void WebPInitAlphaProcessingSSE2(void); - extern void WebPInitAlphaProcessingSSE41(void); -@@ -438,10 +454,10 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - WebPInitAlphaProcessingSSE2(); --#if defined(WEBP_USE_SSE41) -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - WebPInitAlphaProcessingSSE41(); - } -@@ -455,7 +471,7 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - WebPInitAlphaProcessingNEON(); -diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_neon.c b/3rdparty/libwebp/src/dsp/alpha_processing_neon.c -index 9d55421704cc..6716fb77f0d8 100644 ---- a/3rdparty/libwebp/src/dsp/alpha_processing_neon.c -+++ b/3rdparty/libwebp/src/dsp/alpha_processing_neon.c -@@ -80,10 +80,10 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, - - //------------------------------------------------------------------------------ - --static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint8_t* dst, int dst_stride) { -- uint32_t alpha_mask = 0xffffffffu; -+static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint8_t* WEBP_RESTRICT dst, int dst_stride) { -+ uint32_t alpha_mask = 0xffu; - uint8x8_t mask8 = vdup_n_u8(0xff); - uint32_t tmp[2]; - int i, j; -@@ -107,14 +107,16 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride, - dst += dst_stride; - } - vst1_u8((uint8_t*)tmp, mask8); -+ alpha_mask *= 0x01010101; - alpha_mask &= tmp[0]; - alpha_mask &= tmp[1]; - return (alpha_mask != 0xffffffffu); - } - --static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint32_t* dst, int dst_stride) { -+static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint32_t* WEBP_RESTRICT dst, -+ int dst_stride) { - int i, j; - uint8x8x4_t greens; // leave A/R/B channels zero'd. - greens.val[0] = vdup_n_u8(0); -@@ -131,10 +133,10 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride, - } - } - --static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, -+static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride, - int width, int height, -- uint8_t* alpha, int alpha_stride) { -- uint32_t alpha_mask = 0xffffffffu; -+ uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { -+ uint32_t alpha_mask = 0xffu; - uint8x8_t mask8 = vdup_n_u8(0xff); - uint32_t tmp[2]; - int i, j; -@@ -156,13 +158,14 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride, - alpha += alpha_stride; - } - vst1_u8((uint8_t*)tmp, mask8); -+ alpha_mask *= 0x01010101; - alpha_mask &= tmp[0]; - alpha_mask &= tmp[1]; - return (alpha_mask == 0xffffffffu); - } - --static void ExtractGreen_NEON(const uint32_t* argb, -- uint8_t* alpha, int size) { -+static void ExtractGreen_NEON(const uint32_t* WEBP_RESTRICT argb, -+ uint8_t* WEBP_RESTRICT alpha, int size) { - int i; - for (i = 0; i + 16 <= size; i += 16) { - const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i)); -diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c b/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c -index f6c6e0fb1a6d..aa0cc2848ae9 100644 ---- a/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c -+++ b/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c -@@ -18,16 +18,16 @@ - - //------------------------------------------------------------------------------ - --static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint8_t* dst, int dst_stride) { -+static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint8_t* WEBP_RESTRICT dst, int dst_stride) { - // alpha_and stores an 'and' operation of all the alpha[] values. The final - // value is not 0xff if any of the alpha[] is not equal to 0xff. - uint32_t alpha_and = 0xff; - int i, j; - const __m128i zero = _mm_setzero_si128(); -- const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u); // to preserve RGB -- const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); -+ const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00); // to preserve RGB -+ const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0); - __m128i all_alphas = all_0xff; - - // We must be able to access 3 extra bytes after the last written byte -@@ -72,9 +72,10 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride, - return (alpha_and != 0xff); - } - --static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint32_t* dst, int dst_stride) { -+static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint32_t* WEBP_RESTRICT dst, -+ int dst_stride) { - int i, j; - const __m128i zero = _mm_setzero_si128(); - const int limit = width & ~15; -@@ -98,15 +99,15 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride, - } - } - --static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride, -+static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride, - int width, int height, -- uint8_t* alpha, int alpha_stride) { -+ uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { - // alpha_and stores an 'and' operation of all the alpha[] values. The final - // value is not 0xff if any of the alpha[] is not equal to 0xff. - uint32_t alpha_and = 0xff; - int i, j; -- const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha -- const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); -+ const __m128i a_mask = _mm_set1_epi32(0xff); // to preserve alpha -+ const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0); - __m128i all_alphas = all_0xff; - - // We must be able to access 3 extra bytes after the last written byte -@@ -143,6 +144,46 @@ static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride, - return (alpha_and == 0xff); - } - -+static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb, -+ uint8_t* WEBP_RESTRICT alpha, int size) { -+ int i; -+ const __m128i mask = _mm_set1_epi32(0xff); -+ const __m128i* src = (const __m128i*)argb; -+ -+ for (i = 0; i + 16 <= size; i += 16, src += 4) { -+ const __m128i a0 = _mm_loadu_si128(src + 0); -+ const __m128i a1 = _mm_loadu_si128(src + 1); -+ const __m128i a2 = _mm_loadu_si128(src + 2); -+ const __m128i a3 = _mm_loadu_si128(src + 3); -+ const __m128i b0 = _mm_srli_epi32(a0, 8); -+ const __m128i b1 = _mm_srli_epi32(a1, 8); -+ const __m128i b2 = _mm_srli_epi32(a2, 8); -+ const __m128i b3 = _mm_srli_epi32(a3, 8); -+ const __m128i c0 = _mm_and_si128(b0, mask); -+ const __m128i c1 = _mm_and_si128(b1, mask); -+ const __m128i c2 = _mm_and_si128(b2, mask); -+ const __m128i c3 = _mm_and_si128(b3, mask); -+ const __m128i d0 = _mm_packs_epi32(c0, c1); -+ const __m128i d1 = _mm_packs_epi32(c2, c3); -+ const __m128i e = _mm_packus_epi16(d0, d1); -+ // store -+ _mm_storeu_si128((__m128i*)&alpha[i], e); -+ } -+ if (i + 8 <= size) { -+ const __m128i a0 = _mm_loadu_si128(src + 0); -+ const __m128i a1 = _mm_loadu_si128(src + 1); -+ const __m128i b0 = _mm_srli_epi32(a0, 8); -+ const __m128i b1 = _mm_srli_epi32(a1, 8); -+ const __m128i c0 = _mm_and_si128(b0, mask); -+ const __m128i c1 = _mm_and_si128(b1, mask); -+ const __m128i d = _mm_packs_epi32(c0, c1); -+ const __m128i e = _mm_packus_epi16(d, d); -+ _mm_storel_epi64((__m128i*)&alpha[i], e); -+ i += 8; -+ } -+ for (; i < size; ++i) alpha[i] = argb[i] >> 8; -+} -+ - //------------------------------------------------------------------------------ - // Non-dither premultiplied modes - -@@ -177,7 +218,7 @@ static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride, - static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first, - int w, int h, int stride) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i kMult = _mm_set1_epi16(0x8081u); -+ const __m128i kMult = _mm_set1_epi16((short)0x8081); - const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0); - const int kSpan = 4; - while (h-- > 0) { -@@ -266,7 +307,7 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) { - } - - static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) { -- const __m128i m_color = _mm_set1_epi32(color); -+ const __m128i m_color = _mm_set1_epi32((int)color); - const __m128i zero = _mm_setzero_si128(); - int i = 0; - for (; i + 8 <= length; i += 8) { -@@ -317,7 +358,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) { - if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse); - } - --static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha, -+static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr, -+ const uint8_t* WEBP_RESTRICT const alpha, - int width, int inverse) { - int x = 0; - if (!inverse) { -@@ -352,6 +394,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) { - WebPDispatchAlpha = DispatchAlpha_SSE2; - WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2; - WebPExtractAlpha = ExtractAlpha_SSE2; -+ WebPExtractGreen = ExtractGreen_SSE2; - - WebPHasAlpha8b = HasAlpha8b_SSE2; - WebPHasAlpha32b = HasAlpha32b_SSE2; -diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c b/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c -index 56040f9c8801..1156ac3417b2 100644 ---- a/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c -+++ b/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c -@@ -19,14 +19,14 @@ - - //------------------------------------------------------------------------------ - --static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride, -- int width, int height, -- uint8_t* alpha, int alpha_stride) { -+static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb, -+ int argb_stride, int width, int height, -+ uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { - // alpha_and stores an 'and' operation of all the alpha[] values. The final - // value is not 0xff if any of the alpha[] is not equal to 0xff. - uint32_t alpha_and = 0xff; - int i, j; -- const __m128i all_0xff = _mm_set1_epi32(~0u); -+ const __m128i all_0xff = _mm_set1_epi32(~0); - __m128i all_alphas = all_0xff; - - // We must be able to access 3 extra bytes after the last written byte -diff --git a/3rdparty/libwebp/src/dsp/cost.c b/3rdparty/libwebp/src/dsp/cost.c -index cc681cdd4bf1..73d2140177cb 100644 ---- a/3rdparty/libwebp/src/dsp/cost.c -+++ b/3rdparty/libwebp/src/dsp/cost.c -@@ -374,6 +374,7 @@ static void SetResidualCoeffs_C(const int16_t* const coeffs, - VP8GetResidualCostFunc VP8GetResidualCost; - VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8EncDspCostInitMIPS32(void); - extern void VP8EncDspCostInitMIPSdspR2(void); - extern void VP8EncDspCostInitSSE2(void); -@@ -395,12 +396,12 @@ WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) { - VP8EncDspCostInitMIPSdspR2(); - } - #endif --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8EncDspCostInitSSE2(); - } - #endif --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (VP8GetCPUInfo(kNEON)) { - VP8EncDspCostInitNEON(); - } -diff --git a/3rdparty/libwebp/src/dsp/cost_neon.c b/3rdparty/libwebp/src/dsp/cost_neon.c -index 8cc8ce58aa14..6582669cb3f9 100644 ---- a/3rdparty/libwebp/src/dsp/cost_neon.c -+++ b/3rdparty/libwebp/src/dsp/cost_neon.c -@@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, - const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1)); - const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position)); - --#ifdef __aarch64__ -+#if WEBP_AARCH64 - res->last = vmaxvq_u8(masked) - 1; - #else - const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked)); -@@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, - - vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0); - --res->last; --#endif // __aarch64__ -+#endif // WEBP_AARCH64 - - res->coeffs = coeffs; - } -diff --git a/3rdparty/libwebp/src/dsp/cpu.c b/3rdparty/libwebp/src/dsp/cpu.c -index 4ca90d88bf8c..2234c77b3568 100644 ---- a/3rdparty/libwebp/src/dsp/cpu.c -+++ b/3rdparty/libwebp/src/dsp/cpu.c -@@ -11,7 +11,7 @@ - // - // Author: Christian Duvivier (cduvivier@google.com) - --#include "src/dsp/dsp.h" -+#include "src/dsp/cpu.h" - - #if defined(WEBP_HAVE_NEON_RTCD) - #include -@@ -173,6 +173,7 @@ static int x86CPUInfo(CPUFeature feature) { - } - return 0; - } -+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; - #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. - static int AndroidCPUInfo(CPUFeature feature) { -@@ -184,22 +185,23 @@ static int AndroidCPUInfo(CPUFeature feature) { - } - return 0; - } -+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; - #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test - // Use compile flags as an indicator of SIMD support instead of a runtime check. - static int wasmCPUInfo(CPUFeature feature) { - switch (feature) { --#ifdef WEBP_USE_SSE2 -+#ifdef WEBP_HAVE_SSE2 - case kSSE2: - return 1; - #endif --#ifdef WEBP_USE_SSE41 -+#ifdef WEBP_HAVE_SSE41 - case kSSE3: - case kSlowSSSE3: - case kSSE4_1: - return 1; - #endif --#ifdef WEBP_USE_NEON -+#ifdef WEBP_HAVE_NEON - case kNEON: - return 1; - #endif -@@ -208,10 +210,12 @@ static int wasmCPUInfo(CPUFeature feature) { - } - return 0; - } -+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; --#elif defined(WEBP_USE_NEON) --// define a dummy function to enable turning off NEON at runtime by setting --// VP8DecGetCPUInfo = NULL -+#elif defined(WEBP_HAVE_NEON) -+// In most cases this function doesn't check for NEON support (it's assumed by -+// the configuration), but enables turning off NEON at runtime, for testing -+// purposes, by setting VP8GetCPUInfo = NULL. - static int armCPUInfo(CPUFeature feature) { - if (feature != kNEON) return 0; - #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) -@@ -235,6 +239,7 @@ static int armCPUInfo(CPUFeature feature) { - return 1; - #endif - } -+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - VP8CPUInfo VP8GetCPUInfo = armCPUInfo; - #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ - defined(WEBP_USE_MSA) -@@ -246,7 +251,9 @@ static int mipsCPUInfo(CPUFeature feature) { - } - - } -+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; - #else -+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - VP8CPUInfo VP8GetCPUInfo = NULL; - #endif -diff --git a/3rdparty/libwebp/src/dsp/cpu.h b/3rdparty/libwebp/src/dsp/cpu.h -new file mode 100644 -index 000000000000..c86540f28013 ---- /dev/null -+++ b/3rdparty/libwebp/src/dsp/cpu.h -@@ -0,0 +1,266 @@ -+// Copyright 2022 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// CPU detection functions and macros. -+// -+// Author: Skal (pascal.massimino@gmail.com) -+ -+#ifndef WEBP_DSP_CPU_H_ -+#define WEBP_DSP_CPU_H_ -+ -+#include -+ -+#ifdef HAVE_CONFIG_H -+#include "src/webp/config.h" -+#endif -+ -+#include "src/webp/types.h" -+ -+#if defined(__GNUC__) -+#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) -+#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) -+#else -+#define LOCAL_GCC_VERSION 0 -+#define LOCAL_GCC_PREREQ(maj, min) 0 -+#endif -+ -+#if defined(__clang__) -+#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) -+#define LOCAL_CLANG_PREREQ(maj, min) \ -+ (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) -+#else -+#define LOCAL_CLANG_VERSION 0 -+#define LOCAL_CLANG_PREREQ(maj, min) 0 -+#endif -+ -+#ifndef __has_builtin -+#define __has_builtin(x) 0 -+#endif -+ -+//------------------------------------------------------------------------------ -+// x86 defines. -+ -+#if !defined(HAVE_CONFIG_H) -+#if defined(_MSC_VER) && _MSC_VER > 1310 && \ -+ (defined(_M_X64) || defined(_M_IX86)) -+#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets -+#endif -+ -+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ -+ (defined(_M_X64) || defined(_M_IX86)) -+#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets -+#endif -+#endif -+ -+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp -+// files without intrinsics, allowing the corresponding Init() to be called. -+// Files containing intrinsics will need to be built targeting the instruction -+// set so should succeed on one of the earlier tests. -+#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \ -+ (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2)) -+#define WEBP_USE_SSE2 -+#endif -+ -+#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2) -+#define WEBP_HAVE_SSE2 -+#endif -+ -+#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \ -+ (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41)) -+#define WEBP_USE_SSE41 -+#endif -+ -+#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41) -+#define WEBP_HAVE_SSE41 -+#endif -+ -+#undef WEBP_MSC_SSE41 -+#undef WEBP_MSC_SSE2 -+ -+//------------------------------------------------------------------------------ -+// Arm defines. -+ -+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the -+// inline assembly would need to be modified for use with Native Client. -+#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ -+ (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \ -+ !defined(__native_client__) -+#define WEBP_USE_NEON -+#endif -+ -+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ -+ defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) -+#define WEBP_ANDROID_NEON // Android targets that may have NEON -+#define WEBP_USE_NEON -+#endif -+ -+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct -+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in -+// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with -+// vtbl4_u8(); a fix was made in 16.6. -+#if defined(_MSC_VER) && \ -+ ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ -+ (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC)))) -+#define WEBP_USE_NEON -+#define WEBP_USE_INTRINSICS -+#endif -+ -+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) -+#define WEBP_AARCH64 1 -+#else -+#define WEBP_AARCH64 0 -+#endif -+ -+#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) -+#define WEBP_HAVE_NEON -+#endif -+ -+//------------------------------------------------------------------------------ -+// MIPS defines. -+ -+#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \ -+ (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) -+#define WEBP_USE_MIPS32 -+#if (__mips_isa_rev >= 2) -+#define WEBP_USE_MIPS32_R2 -+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) -+#define WEBP_USE_MIPS_DSP_R2 -+#endif -+#endif -+#endif -+ -+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) -+#define WEBP_USE_MSA -+#endif -+ -+//------------------------------------------------------------------------------ -+ -+#ifndef WEBP_DSP_OMIT_C_CODE -+#define WEBP_DSP_OMIT_C_CODE 1 -+#endif -+ -+#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE -+#define WEBP_NEON_OMIT_C_CODE 1 -+#else -+#define WEBP_NEON_OMIT_C_CODE 0 -+#endif -+ -+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) -+#define WEBP_NEON_WORK_AROUND_GCC 1 -+#else -+#define WEBP_NEON_WORK_AROUND_GCC 0 -+#endif -+ -+//------------------------------------------------------------------------------ -+ -+// This macro prevents thread_sanitizer from reporting known concurrent writes. -+#define WEBP_TSAN_IGNORE_FUNCTION -+#if defined(__has_feature) -+#if __has_feature(thread_sanitizer) -+#undef WEBP_TSAN_IGNORE_FUNCTION -+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) -+#endif -+#endif -+ -+#if defined(__has_feature) -+#if __has_feature(memory_sanitizer) -+#define WEBP_MSAN -+#endif -+#endif -+ -+#if defined(WEBP_USE_THREAD) && !defined(_WIN32) -+#include // NOLINT -+ -+#define WEBP_DSP_INIT(func) \ -+ do { \ -+ static volatile VP8CPUInfo func##_last_cpuinfo_used = \ -+ (VP8CPUInfo)&func##_last_cpuinfo_used; \ -+ static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \ -+ if (pthread_mutex_lock(&func##_lock)) break; \ -+ if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \ -+ func##_last_cpuinfo_used = VP8GetCPUInfo; \ -+ (void)pthread_mutex_unlock(&func##_lock); \ -+ } while (0) -+#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) -+#define WEBP_DSP_INIT(func) \ -+ do { \ -+ static volatile VP8CPUInfo func##_last_cpuinfo_used = \ -+ (VP8CPUInfo)&func##_last_cpuinfo_used; \ -+ if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \ -+ func(); \ -+ func##_last_cpuinfo_used = VP8GetCPUInfo; \ -+ } while (0) -+#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) -+ -+// Defines an Init + helper function that control multiple initialization of -+// function pointers / tables. -+/* Usage: -+ WEBP_DSP_INIT_FUNC(InitFunc) { -+ ...function body -+ } -+*/ -+#define WEBP_DSP_INIT_FUNC(name) \ -+ static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \ -+ WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \ -+ static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void) -+ -+#define WEBP_UBSAN_IGNORE_UNDEF -+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW -+#if defined(__clang__) && defined(__has_attribute) -+#if __has_attribute(no_sanitize) -+// This macro prevents the undefined behavior sanitizer from reporting -+// failures. This is only meant to silence unaligned loads on platforms that -+// are known to support them. -+#undef WEBP_UBSAN_IGNORE_UNDEF -+#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined"))) -+ -+// This macro prevents the undefined behavior sanitizer from reporting -+// failures related to unsigned integer overflows. This is only meant to -+// silence cases where this well defined behavior is expected. -+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW -+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ -+ __attribute__((no_sanitize("unsigned-integer-overflow"))) -+#endif -+#endif -+ -+// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. -+// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. -+#if !defined(WEBP_OFFSET_PTR) -+#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) -+#endif -+ -+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) -+#if !defined(WEBP_SWAP_16BIT_CSP) -+#define WEBP_SWAP_16BIT_CSP 0 -+#endif -+ -+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) -+#if !defined(WORDS_BIGENDIAN) && \ -+ (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ -+ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) -+#define WORDS_BIGENDIAN -+#endif -+ -+typedef enum { -+ kSSE2, -+ kSSE3, -+ kSlowSSSE3, // special feature for slow SSSE3 architectures -+ kSSE4_1, -+ kAVX, -+ kAVX2, -+ kNEON, -+ kMIPS32, -+ kMIPSdspR2, -+ kMSA -+} CPUFeature; -+ -+// returns true if the CPU supports the feature. -+typedef int (*VP8CPUInfo)(CPUFeature feature); -+ -+#endif // WEBP_DSP_CPU_H_ -diff --git a/3rdparty/libwebp/src/dsp/dec.c b/3rdparty/libwebp/src/dsp/dec.c -index 1119842dd3de..33d8df8a621f 100644 ---- a/3rdparty/libwebp/src/dsp/dec.c -+++ b/3rdparty/libwebp/src/dsp/dec.c -@@ -734,6 +734,7 @@ VP8SimpleFilterFunc VP8SimpleHFilter16i; - void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst, - int dst_stride); - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8DspInitSSE2(void); - extern void VP8DspInitSSE41(void); - extern void VP8DspInitNEON(void); -@@ -807,10 +808,10 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8DspInitSSE2(); --#if defined(WEBP_USE_SSE41) -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - VP8DspInitSSE41(); - } -@@ -834,7 +835,7 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - VP8DspInitNEON(); -diff --git a/3rdparty/libwebp/src/dsp/dec_neon.c b/3rdparty/libwebp/src/dsp/dec_neon.c -index fa851707e265..22784cf15ae9 100644 ---- a/3rdparty/libwebp/src/dsp/dec_neon.c -+++ b/3rdparty/libwebp/src/dsp/dec_neon.c -@@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { - - if (do_top) { - const uint8x8_t A = vld1_u8(dst - BPS); // top row --#if defined(__aarch64__) -+#if WEBP_AARCH64 - const uint16_t p2 = vaddlv_u8(A); - sum_top = vdupq_n_u16(p2); - #else -@@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { - - if (do_top) { - const uint8x16_t A = vld1q_u8(dst - BPS); // top row --#if defined(__aarch64__) -+#if WEBP_AARCH64 - const uint16_t p3 = vaddlvq_u8(A); - sum_top = vdupq_n_u16(p3); - #else -diff --git a/3rdparty/libwebp/src/dsp/dec_sse2.c b/3rdparty/libwebp/src/dsp/dec_sse2.c -index 873aa59e8a97..01e6bcb636f0 100644 ---- a/3rdparty/libwebp/src/dsp/dec_sse2.c -+++ b/3rdparty/libwebp/src/dsp/dec_sse2.c -@@ -158,10 +158,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) { - dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS)); - } else { - // Load four bytes/pixels per line. -- dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS)); -- dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS)); -- dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS)); -- dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS)); -+ dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS)); -+ dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS)); -+ dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS)); -+ dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS)); - } - // Convert to 16b. - dst0 = _mm_unpacklo_epi8(dst0, zero); -@@ -187,10 +187,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) { - _mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3); - } else { - // Store four bytes/pixels per line. -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); - } - } - } -@@ -213,10 +213,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) { - const __m128i m3 = _mm_subs_epi16(B, d4); - const __m128i zero = _mm_setzero_si128(); - // Load the source pixels. -- __m128i dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS)); -- __m128i dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS)); -- __m128i dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS)); -- __m128i dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS)); -+ __m128i dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS)); -+ __m128i dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS)); -+ __m128i dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS)); -+ __m128i dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS)); - // Convert to 16b. - dst0 = _mm_unpacklo_epi8(dst0, zero); - dst1 = _mm_unpacklo_epi8(dst1, zero); -@@ -233,10 +233,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) { - dst2 = _mm_packus_epi16(dst2, dst2); - dst3 = _mm_packus_epi16(dst3, dst3); - // Store the results. -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); - } - #undef MUL - #endif // USE_TRANSFORM_AC3 -@@ -477,11 +477,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride, - // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00 - // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10 - const __m128i A0 = _mm_set_epi32( -- WebPMemToUint32(&b[6 * stride]), WebPMemToUint32(&b[2 * stride]), -- WebPMemToUint32(&b[4 * stride]), WebPMemToUint32(&b[0 * stride])); -+ WebPMemToInt32(&b[6 * stride]), WebPMemToInt32(&b[2 * stride]), -+ WebPMemToInt32(&b[4 * stride]), WebPMemToInt32(&b[0 * stride])); - const __m128i A1 = _mm_set_epi32( -- WebPMemToUint32(&b[7 * stride]), WebPMemToUint32(&b[3 * stride]), -- WebPMemToUint32(&b[5 * stride]), WebPMemToUint32(&b[1 * stride])); -+ WebPMemToInt32(&b[7 * stride]), WebPMemToInt32(&b[3 * stride]), -+ WebPMemToInt32(&b[5 * stride]), WebPMemToInt32(&b[1 * stride])); - - // B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 - // B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 -@@ -540,7 +540,7 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x, - uint8_t* dst, int stride) { - int i; - for (i = 0; i < 4; ++i, dst += stride) { -- WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x)); -+ WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x)); - *x = _mm_srli_si128(*x, 4); - } - } -@@ -908,10 +908,10 @@ static void VE4_SSE2(uint8_t* dst) { // vertical - const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one); - const __m128i b = _mm_subs_epu8(a, lsb); - const __m128i avg = _mm_avg_epu8(b, BCDEFGH0); -- const uint32_t vals = _mm_cvtsi128_si32(avg); -+ const int vals = _mm_cvtsi128_si32(avg); - int i; - for (i = 0; i < 4; ++i) { -- WebPUint32ToMem(dst + i * BPS, vals); -+ WebPInt32ToMem(dst + i * BPS, vals); - } - } - -@@ -925,10 +925,10 @@ static void LD4_SSE2(uint8_t* dst) { // Down-Left - const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one); - const __m128i avg2 = _mm_subs_epu8(avg1, lsb); - const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); - } - - static void VR4_SSE2(uint8_t* dst) { // Vertical-Right -@@ -946,10 +946,10 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right - const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one); - const __m128i avg2 = _mm_subs_epu8(avg1, lsb); - const __m128i efgh = _mm_avg_epu8(avg2, XABCD); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); - - // these two are hard to implement in SSE2, so we keep the C-version: - DST(0, 2) = AVG3(J, I, X); -@@ -970,11 +970,12 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left - const __m128i abbc = _mm_or_si128(ab, bc); - const __m128i lsb2 = _mm_and_si128(abbc, lsb1); - const __m128i avg4 = _mm_subs_epu8(avg3, lsb2); -- const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); -+ const uint32_t extra_out = -+ (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); - - // these two are hard to get and irregular - DST(3, 2) = (extra_out >> 0) & 0xff; -@@ -990,7 +991,7 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right - const uint32_t K = dst[-1 + 2 * BPS]; - const uint32_t L = dst[-1 + 3 * BPS]; - const __m128i LKJI_____ = -- _mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24)); -+ _mm_cvtsi32_si128((int)(L | (K << 8) | (J << 16) | (I << 24))); - const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD); - const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1); - const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2); -@@ -998,10 +999,10 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right - const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one); - const __m128i avg2 = _mm_subs_epu8(avg1, lsb); - const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); - } - - #undef DST -@@ -1015,13 +1016,13 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) { - const __m128i zero = _mm_setzero_si128(); - int y; - if (size == 4) { -- const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top)); -+ const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top)); - const __m128i top_base = _mm_unpacklo_epi8(top_values, zero); - for (y = 0; y < 4; ++y, dst += BPS) { - const int val = dst[-1] - top[-1]; - const __m128i base = _mm_set1_epi16(val); - const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero); -- WebPUint32ToMem(dst, _mm_cvtsi128_si32(out)); -+ WebPInt32ToMem(dst, _mm_cvtsi128_si32(out)); - } - } else if (size == 8) { - const __m128i top_values = _mm_loadl_epi64((const __m128i*)top); -@@ -1062,7 +1063,7 @@ static void VE16_SSE2(uint8_t* dst) { - static void HE16_SSE2(uint8_t* dst) { // horizontal - int j; - for (j = 16; j > 0; --j) { -- const __m128i values = _mm_set1_epi8(dst[-1]); -+ const __m128i values = _mm_set1_epi8((char)dst[-1]); - _mm_storeu_si128((__m128i*)dst, values); - dst += BPS; - } -@@ -1070,7 +1071,7 @@ static void HE16_SSE2(uint8_t* dst) { // horizontal - - static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) { - int j; -- const __m128i values = _mm_set1_epi8(v); -+ const __m128i values = _mm_set1_epi8((char)v); - for (j = 0; j < 16; ++j) { - _mm_storeu_si128((__m128i*)(dst + j * BPS), values); - } -@@ -1130,7 +1131,7 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical - // helper for chroma-DC predictions - static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { - int j; -- const __m128i values = _mm_set1_epi8(v); -+ const __m128i values = _mm_set1_epi8((char)v); - for (j = 0; j < 8; ++j) { - _mm_storel_epi64((__m128i*)(dst + j * BPS), values); - } -diff --git a/3rdparty/libwebp/src/dsp/dec_sse41.c b/3rdparty/libwebp/src/dsp/dec_sse41.c -index 8f18506d54bf..08a363027226 100644 ---- a/3rdparty/libwebp/src/dsp/dec_sse41.c -+++ b/3rdparty/libwebp/src/dsp/dec_sse41.c -@@ -23,7 +23,7 @@ static void HE16_SSE41(uint8_t* dst) { // horizontal - int j; - const __m128i kShuffle3 = _mm_set1_epi8(3); - for (j = 16; j > 0; --j) { -- const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4)); -+ const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4)); - const __m128i values = _mm_shuffle_epi8(in, kShuffle3); - _mm_storeu_si128((__m128i*)dst, values); - dst += BPS; -diff --git a/3rdparty/libwebp/src/dsp/dsp.h b/3rdparty/libwebp/src/dsp/dsp.h -index 298c721ae2d1..d2000b8efcba 100644 ---- a/3rdparty/libwebp/src/dsp/dsp.h -+++ b/3rdparty/libwebp/src/dsp/dsp.h -@@ -18,6 +18,7 @@ - #include "src/webp/config.h" - #endif - -+#include "src/dsp/cpu.h" - #include "src/webp/types.h" - - #ifdef __cplusplus -@@ -27,205 +28,22 @@ extern "C" { - #define BPS 32 // this is the common stride for enc/dec - - //------------------------------------------------------------------------------ --// CPU detection -- -+// WEBP_RESTRICT -+ -+// Declares a pointer with the restrict type qualifier if available. -+// This allows code to hint to the compiler that only this pointer references a -+// particular object or memory region within the scope of the block in which it -+// is declared. This may allow for improved optimizations due to the lack of -+// pointer aliasing. See also: -+// https://en.cppreference.com/w/c/language/restrict - #if defined(__GNUC__) --# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) --# define LOCAL_GCC_PREREQ(maj, min) \ -- (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) --#else --# define LOCAL_GCC_VERSION 0 --# define LOCAL_GCC_PREREQ(maj, min) 0 --#endif -- --#if defined(__clang__) --# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) --# define LOCAL_CLANG_PREREQ(maj, min) \ -- (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) -+#define WEBP_RESTRICT __restrict__ -+#elif defined(_MSC_VER) -+#define WEBP_RESTRICT __restrict - #else --# define LOCAL_CLANG_VERSION 0 --# define LOCAL_CLANG_PREREQ(maj, min) 0 --#endif -- --#ifndef __has_builtin --# define __has_builtin(x) 0 --#endif -- --#if !defined(HAVE_CONFIG_H) --#if defined(_MSC_VER) && _MSC_VER > 1310 && \ -- (defined(_M_X64) || defined(_M_IX86)) --#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets --#endif -- --#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ -- (defined(_M_X64) || defined(_M_IX86)) --#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets --#endif --#endif -- --// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp --// files without intrinsics, allowing the corresponding Init() to be called. --// Files containing intrinsics will need to be built targeting the instruction --// set so should succeed on one of the earlier tests. --#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2) --#define WEBP_USE_SSE2 -+#define WEBP_RESTRICT - #endif - --#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41) --#define WEBP_USE_SSE41 --#endif -- --#undef WEBP_MSC_SSE41 --#undef WEBP_MSC_SSE2 -- --// The intrinsics currently cause compiler errors with arm-nacl-gcc and the --// inline assembly would need to be modified for use with Native Client. --#if (defined(__ARM_NEON__) || \ -- defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \ -- !defined(__native_client__) --#define WEBP_USE_NEON --#endif -- --#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ -- defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) --#define WEBP_ANDROID_NEON // Android targets that may have NEON --#define WEBP_USE_NEON --#endif -- --#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) --#define WEBP_USE_NEON --#define WEBP_USE_INTRINSICS --#endif -- --#if defined(__mips__) && !defined(__mips64) && \ -- defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) --#define WEBP_USE_MIPS32 --#if (__mips_isa_rev >= 2) --#define WEBP_USE_MIPS32_R2 --#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) --#define WEBP_USE_MIPS_DSP_R2 --#endif --#endif --#endif -- --#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) --#define WEBP_USE_MSA --#endif -- --#ifndef WEBP_DSP_OMIT_C_CODE --#define WEBP_DSP_OMIT_C_CODE 1 --#endif -- --#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE --#define WEBP_NEON_OMIT_C_CODE 1 --#else --#define WEBP_NEON_OMIT_C_CODE 0 --#endif -- --#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) --#define WEBP_NEON_WORK_AROUND_GCC 1 --#else --#define WEBP_NEON_WORK_AROUND_GCC 0 --#endif -- --// This macro prevents thread_sanitizer from reporting known concurrent writes. --#define WEBP_TSAN_IGNORE_FUNCTION --#if defined(__has_feature) --#if __has_feature(thread_sanitizer) --#undef WEBP_TSAN_IGNORE_FUNCTION --#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) --#endif --#endif -- --#if defined(WEBP_USE_THREAD) && !defined(_WIN32) --#include // NOLINT -- --#define WEBP_DSP_INIT(func) do { \ -- static volatile VP8CPUInfo func ## _last_cpuinfo_used = \ -- (VP8CPUInfo)&func ## _last_cpuinfo_used; \ -- static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \ -- if (pthread_mutex_lock(&func ## _lock)) break; \ -- if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \ -- func ## _last_cpuinfo_used = VP8GetCPUInfo; \ -- (void)pthread_mutex_unlock(&func ## _lock); \ --} while (0) --#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) --#define WEBP_DSP_INIT(func) do { \ -- static volatile VP8CPUInfo func ## _last_cpuinfo_used = \ -- (VP8CPUInfo)&func ## _last_cpuinfo_used; \ -- if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \ -- func(); \ -- func ## _last_cpuinfo_used = VP8GetCPUInfo; \ --} while (0) --#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) -- --// Defines an Init + helper function that control multiple initialization of --// function pointers / tables. --/* Usage: -- WEBP_DSP_INIT_FUNC(InitFunc) { -- ...function body -- } --*/ --#define WEBP_DSP_INIT_FUNC(name) \ -- static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \ -- WEBP_TSAN_IGNORE_FUNCTION void name(void) { \ -- WEBP_DSP_INIT(name ## _body); \ -- } \ -- static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void) -- --#define WEBP_UBSAN_IGNORE_UNDEF --#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW --#if defined(__clang__) && defined(__has_attribute) --#if __has_attribute(no_sanitize) --// This macro prevents the undefined behavior sanitizer from reporting --// failures. This is only meant to silence unaligned loads on platforms that --// are known to support them. --#undef WEBP_UBSAN_IGNORE_UNDEF --#define WEBP_UBSAN_IGNORE_UNDEF \ -- __attribute__((no_sanitize("undefined"))) -- --// This macro prevents the undefined behavior sanitizer from reporting --// failures related to unsigned integer overflows. This is only meant to --// silence cases where this well defined behavior is expected. --#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW --#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ -- __attribute__((no_sanitize("unsigned-integer-overflow"))) --#endif --#endif -- --// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. --// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. --#if !defined(WEBP_OFFSET_PTR) --#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) --#endif -- --// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) --#if !defined(WEBP_SWAP_16BIT_CSP) --#define WEBP_SWAP_16BIT_CSP 0 --#endif -- --// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) --#if !defined(WORDS_BIGENDIAN) && \ -- (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ -- (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) --#define WORDS_BIGENDIAN --#endif -- --typedef enum { -- kSSE2, -- kSSE3, -- kSlowSSSE3, // special feature for slow SSSE3 architectures -- kSSE4_1, -- kAVX, -- kAVX2, -- kNEON, -- kMIPS32, -- kMIPSdspR2, -- kMSA --} CPUFeature; --// returns true if the CPU supports the feature. --typedef int (*VP8CPUInfo)(CPUFeature feature); --WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - - //------------------------------------------------------------------------------ - // Init stub generator -@@ -514,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, - extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, - uint8_t* u, uint8_t* v, int width); - --// utilities for accurate RGB->YUV conversion --extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref, -- uint16_t* dst, int len); --extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref, -- int16_t* dst, int len); --extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, -- int len, -- const uint16_t* best_y, uint16_t* out); -- - // Must be called before using the above. - void WebPInitConvertARGBToYUV(void); - -@@ -578,26 +387,29 @@ extern void (*WebPApplyAlphaMultiply4444)( - - // Dispatch the values from alpha[] plane to the ARGB destination 'dst'. - // Returns true if alpha[] plane has non-trivial values different from 0xff. --extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint8_t* dst, int dst_stride); -+extern int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint8_t* WEBP_RESTRICT dst, int dst_stride); - - // Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the - // A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units. --extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride, -- int width, int height, -- uint32_t* dst, int dst_stride); -+extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride, int width, int height, -+ uint32_t* WEBP_RESTRICT dst, -+ int dst_stride); - - // Extract the alpha values from 32b values in argb[] and pack them into alpha[] - // (this is the opposite of WebPDispatchAlpha). - // Returns true if there's only trivial 0xff alpha values. --extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride, -- int width, int height, -- uint8_t* alpha, int alpha_stride); -+extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb, -+ int argb_stride, int width, int height, -+ uint8_t* WEBP_RESTRICT alpha, -+ int alpha_stride); - - // Extract the green values from 32b values in argb[] and pack them into alpha[] - // (this is the opposite of WebPDispatchAlphaToGreen). --extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size); -+extern void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb, -+ uint8_t* WEBP_RESTRICT alpha, int size); - - // Pre-Multiply operation transforms x into x * A / 255 (where x=Y,R,G or B). - // Un-Multiply operation transforms x into x * 255 / A. -@@ -610,29 +422,35 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows, - int inverse); - - // Same for a row of single values, with side alpha values. --extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha, -+extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr, -+ const uint8_t* WEBP_RESTRICT const alpha, - int width, int inverse); - - // Same a WebPMultRow(), but for several 'num_rows' rows. --void WebPMultRows(uint8_t* ptr, int stride, -- const uint8_t* alpha, int alpha_stride, -+void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride, -+ const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, - int width, int num_rows, int inverse); - - // Plain-C versions, used as fallback by some implementations. --void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha, -+void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr, -+ const uint8_t* WEBP_RESTRICT const alpha, - int width, int inverse); - void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse); - - #ifdef WORDS_BIGENDIAN - // ARGB packing function: a/r/g/b input is rgba or bgra order. --extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, -- const uint8_t* g, const uint8_t* b, int len, -- uint32_t* out); -+extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a, -+ const uint8_t* WEBP_RESTRICT r, -+ const uint8_t* WEBP_RESTRICT g, -+ const uint8_t* WEBP_RESTRICT b, -+ int len, uint32_t* WEBP_RESTRICT out); - #endif - - // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order. --extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b, -- int len, int step, uint32_t* out); -+extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r, -+ const uint8_t* WEBP_RESTRICT g, -+ const uint8_t* WEBP_RESTRICT b, -+ int len, int step, uint32_t* WEBP_RESTRICT out); - - // This function returns true if src[i] contains a value different from 0xff. - extern int (*WebPHasAlpha8b)(const uint8_t* src, int length); -diff --git a/3rdparty/libwebp/src/dsp/enc.c b/3rdparty/libwebp/src/dsp/enc.c -index 2fddbc4c5247..2ba97ba8d661 100644 ---- a/3rdparty/libwebp/src/dsp/enc.c -+++ b/3rdparty/libwebp/src/dsp/enc.c -@@ -732,6 +732,7 @@ VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; - VP8BlockCopy VP8Copy4x4; - VP8BlockCopy VP8Copy16x8; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8EncDspInitSSE2(void); - extern void VP8EncDspInitSSE41(void); - extern void VP8EncDspInitNEON(void); -@@ -773,10 +774,10 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8EncDspInitSSE2(); --#if defined(WEBP_USE_SSE41) -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - VP8EncDspInitSSE41(); - } -@@ -800,7 +801,7 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - VP8EncDspInitNEON(); -diff --git a/3rdparty/libwebp/src/dsp/enc_neon.c b/3rdparty/libwebp/src/dsp/enc_neon.c -index 43bf1245c536..714800367ba7 100644 ---- a/3rdparty/libwebp/src/dsp/enc_neon.c -+++ b/3rdparty/libwebp/src/dsp/enc_neon.c -@@ -9,7 +9,7 @@ - // - // ARM NEON version of speed-critical encoding functions. - // --// adapted from libvpx (http://www.webmproject.org/code/) -+// adapted from libvpx (https://www.webmproject.org/code/) - - #include "src/dsp/dsp.h" - -@@ -764,9 +764,14 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a, - - // Horizontal sum of all four uint32_t values in 'sum'. - static int SumToInt_NEON(uint32x4_t sum) { -+#if WEBP_AARCH64 -+ return (int)vaddvq_u32(sum); -+#else - const uint64x2_t sum2 = vpaddlq_u32(sum); -- const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1); -- return (int)sum3; -+ const uint32x2_t sum3 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(sum2)), -+ vreinterpret_u32_u64(vget_high_u64(sum2))); -+ return (int)vget_lane_u32(sum3, 0); -+#endif - } - - static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) { -@@ -860,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16], - uint8x8x4_t shuffles; - // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use - // non-standard versions there. --#if defined(__APPLE__) && defined(__aarch64__) && \ -+#if defined(__APPLE__) && WEBP_AARCH64 && \ - defined(__apple_build_version__) && (__apple_build_version__< 6020037) - uint8x16x2_t all_out; - INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1)); -diff --git a/3rdparty/libwebp/src/dsp/enc_sse2.c b/3rdparty/libwebp/src/dsp/enc_sse2.c -index b2e78ed9411f..010624a2f712 100644 ---- a/3rdparty/libwebp/src/dsp/enc_sse2.c -+++ b/3rdparty/libwebp/src/dsp/enc_sse2.c -@@ -25,9 +25,160 @@ - //------------------------------------------------------------------------------ - // Transforms (Paragraph 14.4) - --// Does one or two inverse transforms. --static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, -- int do_two) { -+// Does one inverse transform. -+static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in, -+ uint8_t* dst) { -+ // This implementation makes use of 16-bit fixed point versions of two -+ // multiply constants: -+ // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 -+ // K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16 -+ // -+ // To be able to use signed 16-bit integers, we use the following trick to -+ // have constants within range: -+ // - Associated constants are obtained by subtracting the 16-bit fixed point -+ // version of one: -+ // k = K - (1 << 16) => K = k + (1 << 16) -+ // K1 = 85267 => k1 = 20091 -+ // K2 = 35468 => k2 = -30068 -+ // - The multiplication of a variable by a constant become the sum of the -+ // variable and the multiplication of that variable by the associated -+ // constant: -+ // (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x -+ const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068, -+ 20091, 20091, 20091, 20091); -+ const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091, -+ -30068, -30068, -30068, -30068); -+ const __m128i zero = _mm_setzero_si128(); -+ const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4); -+ __m128i T01, T23; -+ -+ // Load and concatenate the transform coefficients. -+ const __m128i in01 = _mm_loadu_si128((const __m128i*)&in[0]); -+ const __m128i in23 = _mm_loadu_si128((const __m128i*)&in[8]); -+ // a00 a10 a20 a30 a01 a11 a21 a31 -+ // a02 a12 a22 a32 a03 a13 a23 a33 -+ -+ // Vertical pass and subsequent transpose. -+ { -+ const __m128i in1 = _mm_unpackhi_epi64(in01, in01); -+ const __m128i in3 = _mm_unpackhi_epi64(in23, in23); -+ -+ // First pass, c and d calculations are longer because of the "trick" -+ // multiplications. -+ // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3 -+ // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3 -+ const __m128i a_d3 = _mm_add_epi16(in01, in23); -+ const __m128i b_c3 = _mm_sub_epi16(in01, in23); -+ const __m128i c1d1 = _mm_mulhi_epi16(in1, k2k1); -+ const __m128i c2d2 = _mm_mulhi_epi16(in3, k1k2); -+ const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3); -+ const __m128i c4 = _mm_sub_epi16(c1d1, c2d2); -+ const __m128i c = _mm_add_epi16(c3, c4); -+ const __m128i d4u = _mm_add_epi16(c1d1, c2d2); -+ const __m128i du = _mm_add_epi16(a_d3, d4u); -+ const __m128i d = _mm_unpackhi_epi64(du, du); -+ -+ // Second pass. -+ const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3); -+ const __m128i comb_dc = _mm_unpacklo_epi64(d, c); -+ -+ const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc); -+ const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc); -+ const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2)); -+ -+ const __m128i transpose_0 = _mm_unpacklo_epi16(tmp01, tmp23); -+ const __m128i transpose_1 = _mm_unpackhi_epi16(tmp01, tmp23); -+ // a00 a20 a01 a21 a02 a22 a03 a23 -+ // a10 a30 a11 a31 a12 a32 a13 a33 -+ -+ T01 = _mm_unpacklo_epi16(transpose_0, transpose_1); -+ T23 = _mm_unpackhi_epi16(transpose_0, transpose_1); -+ // a00 a10 a20 a30 a01 a11 a21 a31 -+ // a02 a12 a22 a32 a03 a13 a23 a33 -+ } -+ -+ // Horizontal pass and subsequent transpose. -+ { -+ const __m128i T1 = _mm_unpackhi_epi64(T01, T01); -+ const __m128i T3 = _mm_unpackhi_epi64(T23, T23); -+ -+ // First pass, c and d calculations are longer because of the "trick" -+ // multiplications. -+ const __m128i dc = _mm_add_epi16(T01, zero_four); -+ -+ // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3 -+ // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3 -+ const __m128i a_d3 = _mm_add_epi16(dc, T23); -+ const __m128i b_c3 = _mm_sub_epi16(dc, T23); -+ const __m128i c1d1 = _mm_mulhi_epi16(T1, k2k1); -+ const __m128i c2d2 = _mm_mulhi_epi16(T3, k1k2); -+ const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3); -+ const __m128i c4 = _mm_sub_epi16(c1d1, c2d2); -+ const __m128i c = _mm_add_epi16(c3, c4); -+ const __m128i d4u = _mm_add_epi16(c1d1, c2d2); -+ const __m128i du = _mm_add_epi16(a_d3, d4u); -+ const __m128i d = _mm_unpackhi_epi64(du, du); -+ -+ // Second pass. -+ const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3); -+ const __m128i comb_dc = _mm_unpacklo_epi64(d, c); -+ -+ const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc); -+ const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc); -+ const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2)); -+ -+ const __m128i shifted01 = _mm_srai_epi16(tmp01, 3); -+ const __m128i shifted23 = _mm_srai_epi16(tmp23, 3); -+ // a00 a01 a02 a03 a10 a11 a12 a13 -+ // a20 a21 a22 a23 a30 a31 a32 a33 -+ -+ const __m128i transpose_0 = _mm_unpacklo_epi16(shifted01, shifted23); -+ const __m128i transpose_1 = _mm_unpackhi_epi16(shifted01, shifted23); -+ // a00 a20 a01 a21 a02 a22 a03 a23 -+ // a10 a30 a11 a31 a12 a32 a13 a33 -+ -+ T01 = _mm_unpacklo_epi16(transpose_0, transpose_1); -+ T23 = _mm_unpackhi_epi16(transpose_0, transpose_1); -+ // a00 a10 a20 a30 a01 a11 a21 a31 -+ // a02 a12 a22 a32 a03 a13 a23 a33 -+ } -+ -+ // Add inverse transform to 'ref' and store. -+ { -+ // Load the reference(s). -+ __m128i ref01, ref23, ref0123; -+ int32_t buf[4]; -+ -+ // Load four bytes/pixels per line. -+ const __m128i ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS])); -+ const __m128i ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS])); -+ const __m128i ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS])); -+ const __m128i ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS])); -+ ref01 = _mm_unpacklo_epi32(ref0, ref1); -+ ref23 = _mm_unpacklo_epi32(ref2, ref3); -+ -+ // Convert to 16b. -+ ref01 = _mm_unpacklo_epi8(ref01, zero); -+ ref23 = _mm_unpacklo_epi8(ref23, zero); -+ // Add the inverse transform(s). -+ ref01 = _mm_add_epi16(ref01, T01); -+ ref23 = _mm_add_epi16(ref23, T23); -+ // Unsigned saturate to 8b. -+ ref0123 = _mm_packus_epi16(ref01, ref23); -+ -+ _mm_storeu_si128((__m128i *)buf, ref0123); -+ -+ // Store four bytes/pixels per line. -+ WebPInt32ToMem(&dst[0 * BPS], buf[0]); -+ WebPInt32ToMem(&dst[1 * BPS], buf[1]); -+ WebPInt32ToMem(&dst[2 * BPS], buf[2]); -+ WebPInt32ToMem(&dst[3 * BPS], buf[3]); -+ } -+} -+ -+// Does two inverse transforms. -+static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in, -+ uint8_t* dst) { - // This implementation makes use of 16-bit fixed point versions of two - // multiply constants: - // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 -@@ -49,33 +200,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, - __m128i T0, T1, T2, T3; - - // Load and concatenate the transform coefficients (we'll do two inverse -- // transforms in parallel). In the case of only one inverse transform, the -- // second half of the vectors will just contain random value we'll never -- // use nor store. -+ // transforms in parallel). - __m128i in0, in1, in2, in3; - { -- in0 = _mm_loadl_epi64((const __m128i*)&in[0]); -- in1 = _mm_loadl_epi64((const __m128i*)&in[4]); -- in2 = _mm_loadl_epi64((const __m128i*)&in[8]); -- in3 = _mm_loadl_epi64((const __m128i*)&in[12]); -- // a00 a10 a20 a30 x x x x -- // a01 a11 a21 a31 x x x x -- // a02 a12 a22 a32 x x x x -- // a03 a13 a23 a33 x x x x -- if (do_two) { -- const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]); -- const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]); -- const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]); -- const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]); -- in0 = _mm_unpacklo_epi64(in0, inB0); -- in1 = _mm_unpacklo_epi64(in1, inB1); -- in2 = _mm_unpacklo_epi64(in2, inB2); -- in3 = _mm_unpacklo_epi64(in3, inB3); -- // a00 a10 a20 a30 b00 b10 b20 b30 -- // a01 a11 a21 a31 b01 b11 b21 b31 -- // a02 a12 a22 a32 b02 b12 b22 b32 -- // a03 a13 a23 a33 b03 b13 b23 b33 -- } -+ const __m128i tmp0 = _mm_loadu_si128((const __m128i*)&in[0]); -+ const __m128i tmp1 = _mm_loadu_si128((const __m128i*)&in[8]); -+ const __m128i tmp2 = _mm_loadu_si128((const __m128i*)&in[16]); -+ const __m128i tmp3 = _mm_loadu_si128((const __m128i*)&in[24]); -+ in0 = _mm_unpacklo_epi64(tmp0, tmp2); -+ in1 = _mm_unpackhi_epi64(tmp0, tmp2); -+ in2 = _mm_unpacklo_epi64(tmp1, tmp3); -+ in3 = _mm_unpackhi_epi64(tmp1, tmp3); -+ // a00 a10 a20 a30 b00 b10 b20 b30 -+ // a01 a11 a21 a31 b01 b11 b21 b31 -+ // a02 a12 a22 a32 b02 b12 b22 b32 -+ // a03 a13 a23 a33 b03 b13 b23 b33 - } - - // Vertical pass and subsequent transpose. -@@ -148,19 +287,11 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, - const __m128i zero = _mm_setzero_si128(); - // Load the reference(s). - __m128i ref0, ref1, ref2, ref3; -- if (do_two) { -- // Load eight bytes/pixels per line. -- ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]); -- ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]); -- ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]); -- ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); -- } else { -- // Load four bytes/pixels per line. -- ref0 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[0 * BPS])); -- ref1 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[1 * BPS])); -- ref2 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[2 * BPS])); -- ref3 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[3 * BPS])); -- } -+ // Load eight bytes/pixels per line. -+ ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]); -+ ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]); -+ ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]); -+ ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); - // Convert to 16b. - ref0 = _mm_unpacklo_epi8(ref0, zero); - ref1 = _mm_unpacklo_epi8(ref1, zero); -@@ -176,20 +307,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, - ref1 = _mm_packus_epi16(ref1, ref1); - ref2 = _mm_packus_epi16(ref2, ref2); - ref3 = _mm_packus_epi16(ref3, ref3); -- // Store the results. -- if (do_two) { -- // Store eight bytes/pixels per line. -- _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0); -- _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1); -- _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2); -- _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); -- } else { -- // Store four bytes/pixels per line. -- WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0)); -- WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1)); -- WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2)); -- WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3)); -- } -+ // Store eight bytes/pixels per line. -+ _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0); -+ _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1); -+ _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2); -+ _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); -+ } -+} -+ -+// Does one or two inverse transforms. -+static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, -+ int do_two) { -+ if (do_two) { -+ ITransform_Two_SSE2(ref, in, dst); -+ } else { -+ ITransform_One_SSE2(ref, in, dst); - } - } - -@@ -481,7 +613,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred, - // helper for chroma-DC predictions - static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { - int j; -- const __m128i values = _mm_set1_epi8(v); -+ const __m128i values = _mm_set1_epi8((char)v); - for (j = 0; j < 8; ++j) { - _mm_storel_epi64((__m128i*)(dst + j * BPS), values); - } -@@ -489,7 +621,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { - - static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) { - int j; -- const __m128i values = _mm_set1_epi8(v); -+ const __m128i values = _mm_set1_epi8((char)v); - for (j = 0; j < 16; ++j) { - _mm_store_si128((__m128i*)(dst + j * BPS), values); - } -@@ -540,7 +672,7 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst, - static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) { - int j; - for (j = 0; j < 8; ++j) { -- const __m128i values = _mm_set1_epi8(left[j]); -+ const __m128i values = _mm_set1_epi8((char)left[j]); - _mm_storel_epi64((__m128i*)dst, values); - dst += BPS; - } -@@ -549,7 +681,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) { - static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) { - int j; - for (j = 0; j < 16; ++j) { -- const __m128i values = _mm_set1_epi8(left[j]); -+ const __m128i values = _mm_set1_epi8((char)left[j]); - _mm_store_si128((__m128i*)dst, values); - dst += BPS; - } -@@ -722,10 +854,10 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst, - const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one); - const __m128i b = _mm_subs_epu8(a, lsb); - const __m128i avg = _mm_avg_epu8(b, BCDEFGH0); -- const uint32_t vals = _mm_cvtsi128_si32(avg); -+ const int vals = _mm_cvtsi128_si32(avg); - int i; - for (i = 0; i < 4; ++i) { -- WebPUint32ToMem(dst + i * BPS, vals); -+ WebPInt32ToMem(dst + i * BPS, vals); - } - } - -@@ -760,10 +892,10 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst, - const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one); - const __m128i avg2 = _mm_subs_epu8(avg1, lsb); - const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); - } - - static WEBP_INLINE void VR4_SSE2(uint8_t* dst, -@@ -782,10 +914,10 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst, - const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one); - const __m128i avg2 = _mm_subs_epu8(avg1, lsb); - const __m128i efgh = _mm_avg_epu8(avg2, XABCD); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); - - // these two are hard to implement in SSE2, so we keep the C-version: - DST(0, 2) = AVG3(J, I, X); -@@ -807,11 +939,12 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst, - const __m128i abbc = _mm_or_si128(ab, bc); - const __m128i lsb2 = _mm_and_si128(abbc, lsb1); - const __m128i avg4 = _mm_subs_epu8(avg3, lsb2); -- const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); -+ const uint32_t extra_out = -+ (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); - - // these two are hard to get and irregular - DST(3, 2) = (extra_out >> 0) & 0xff; -@@ -829,10 +962,10 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst, - const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one); - const __m128i avg2 = _mm_subs_epu8(avg1, lsb); - const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_); -- WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); -- WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -- WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -- WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); -+ WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); -+ WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); -+ WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); -+ WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); - } - - static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) { -@@ -875,14 +1008,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) { - - static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top)); -+ const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top)); - const __m128i top_base = _mm_unpacklo_epi8(top_values, zero); - int y; - for (y = 0; y < 4; ++y, dst += BPS) { - const int val = top[-2 - y] - top[-1]; - const __m128i base = _mm_set1_epi16(val); - const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero); -- WebPUint32ToMem(dst, _mm_cvtsi128_si32(out)); -+ WebPInt32ToMem(dst, _mm_cvtsi128_si32(out)); - } - } - -diff --git a/3rdparty/libwebp/src/dsp/filters.c b/3rdparty/libwebp/src/dsp/filters.c -index 9e910d99c92a..c1350d5c9d2a 100644 ---- a/3rdparty/libwebp/src/dsp/filters.c -+++ b/3rdparty/libwebp/src/dsp/filters.c -@@ -189,6 +189,12 @@ static void GradientFilter_C(const uint8_t* data, int width, int height, - - //------------------------------------------------------------------------------ - -+static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in, -+ uint8_t* out, int width) { -+ (void)prev; -+ if (out != in) memcpy(out, in, width * sizeof(*out)); -+} -+ - static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in, - uint8_t* out, int width) { - uint8_t pred = (prev == NULL) ? 0 : prev[0]; -@@ -233,13 +239,14 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in, - WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; - WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8FiltersInitMIPSdspR2(void); - extern void VP8FiltersInitMSA(void); - extern void VP8FiltersInitNEON(void); - extern void VP8FiltersInitSSE2(void); - - WEBP_DSP_INIT_FUNC(VP8FiltersInit) { -- WebPUnfilters[WEBP_FILTER_NONE] = NULL; -+ WebPUnfilters[WEBP_FILTER_NONE] = NoneUnfilter_C; - #if !WEBP_NEON_OMIT_C_CODE - WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C; - WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C; -@@ -254,7 +261,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) { - #endif - - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8FiltersInitSSE2(); - } -@@ -271,13 +278,14 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - VP8FiltersInitNEON(); - } - #endif - -+ assert(WebPUnfilters[WEBP_FILTER_NONE] != NULL); - assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL); - assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL); - assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL); -diff --git a/3rdparty/libwebp/src/dsp/filters_sse2.c b/3rdparty/libwebp/src/dsp/filters_sse2.c -index 4b3f2d020f40..5c33ec15e219 100644 ---- a/3rdparty/libwebp/src/dsp/filters_sse2.c -+++ b/3rdparty/libwebp/src/dsp/filters_sse2.c -@@ -320,7 +320,12 @@ extern void VP8FiltersInitSSE2(void); - - WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) { - WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2; -+#if defined(CHROMIUM) -+ // TODO(crbug.com/654974) -+ (void)VerticalUnfilter_SSE2; -+#else - WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2; -+#endif - WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2; - - WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2; -diff --git a/3rdparty/libwebp/src/dsp/lossless.c b/3rdparty/libwebp/src/dsp/lossless.c -index 46b220e2edc9..9f8120945397 100644 ---- a/3rdparty/libwebp/src/dsp/lossless.c -+++ b/3rdparty/libwebp/src/dsp/lossless.c -@@ -49,7 +49,7 @@ static WEBP_INLINE uint32_t Clip255(uint32_t a) { - } - - static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { -- return Clip255(a + b - c); -+ return Clip255((uint32_t)(a + b - c)); - } - - static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, -@@ -66,7 +66,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, - } - - static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { -- return Clip255(a + (a - b) / 2); -+ return Clip255((uint32_t)(a + (a - b) / 2)); - } - - static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, -@@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { - //------------------------------------------------------------------------------ - // Predictors - --uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor0_C(const uint32_t* const left, -+ const uint32_t* const top) { - (void)top; - (void)left; - return ARGB_BLACK; - } --uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor1_C(const uint32_t* const left, -+ const uint32_t* const top) { - (void)top; -- return left; -+ return *left; - } --uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor2_C(const uint32_t* const left, -+ const uint32_t* const top) { - (void)left; - return top[0]; - } --uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor3_C(const uint32_t* const left, -+ const uint32_t* const top) { - (void)left; - return top[1]; - } --uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor4_C(const uint32_t* const left, -+ const uint32_t* const top) { - (void)left; - return top[-1]; - } --uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average3(left, top[0], top[1]); -+uint32_t VP8LPredictor5_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average3(*left, top[0], top[1]); - return pred; - } --uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average2(left, top[-1]); -+uint32_t VP8LPredictor6_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average2(*left, top[-1]); - return pred; - } --uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average2(left, top[0]); -+uint32_t VP8LPredictor7_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average2(*left, top[0]); - return pred; - } --uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor8_C(const uint32_t* const left, -+ const uint32_t* const top) { - const uint32_t pred = Average2(top[-1], top[0]); - (void)left; - return pred; - } --uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) { -+uint32_t VP8LPredictor9_C(const uint32_t* const left, -+ const uint32_t* const top) { - const uint32_t pred = Average2(top[0], top[1]); - (void)left; - return pred; - } --uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average4(left, top[-1], top[0], top[1]); -+uint32_t VP8LPredictor10_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average4(*left, top[-1], top[0], top[1]); - return pred; - } --uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Select(top[0], left, top[-1]); -+uint32_t VP8LPredictor11_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Select(top[0], *left, top[-1]); - return pred; - } --uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); -+uint32_t VP8LPredictor12_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]); - return pred; - } --uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); -+uint32_t VP8LPredictor13_C(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]); - return pred; - } - -@@ -279,10 +293,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, - const uint32_t red = argb >> 16; - int new_red = red & 0xff; - int new_blue = argb & 0xff; -- new_red += ColorTransformDelta(m->green_to_red_, green); -+ new_red += ColorTransformDelta((int8_t)m->green_to_red_, green); - new_red &= 0xff; -- new_blue += ColorTransformDelta(m->green_to_blue_, green); -- new_blue += ColorTransformDelta(m->red_to_blue_, (int8_t)new_red); -+ new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green); -+ new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red); - new_blue &= 0xff; - dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); - } -@@ -381,7 +395,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform, - assert(row_start < row_end); - assert(row_end <= transform->ysize_); - switch (transform->type_) { -- case SUBTRACT_GREEN: -+ case SUBTRACT_GREEN_TRANSFORM: - VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out); - break; - case PREDICTOR_TRANSFORM: -@@ -574,7 +588,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR; - VP8LMapARGBFunc VP8LMapColor32b; - VP8LMapAlphaFunc VP8LMapColor8b; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8LDspInitSSE2(void); -+extern void VP8LDspInitSSE41(void); - extern void VP8LDspInitNEON(void); - extern void VP8LDspInitMIPSdspR2(void); - extern void VP8LDspInitMSA(void); -@@ -621,9 +637,14 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8LDspInitSSE2(); -+#if defined(WEBP_HAVE_SSE41) -+ if (VP8GetCPUInfo(kSSE4_1)) { -+ VP8LDspInitSSE41(); -+ } -+#endif - } - #endif - #if defined(WEBP_USE_MIPS_DSP_R2) -@@ -638,7 +659,7 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - VP8LDspInitNEON(); -diff --git a/3rdparty/libwebp/src/dsp/lossless.h b/3rdparty/libwebp/src/dsp/lossless.h -index ebd316d1ed7b..0bf10a1a3dab 100644 ---- a/3rdparty/libwebp/src/dsp/lossless.h -+++ b/3rdparty/libwebp/src/dsp/lossless.h -@@ -28,23 +28,38 @@ extern "C" { - //------------------------------------------------------------------------------ - // Decoding - --typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); -+typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left, -+ const uint32_t* const top); - extern VP8LPredictorFunc VP8LPredictors[16]; - --uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top); --uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top); -+uint32_t VP8LPredictor0_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor1_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor2_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor3_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor4_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor5_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor6_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor7_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor8_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor9_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor10_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor11_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor12_C(const uint32_t* const left, -+ const uint32_t* const top); -+uint32_t VP8LPredictor13_C(const uint32_t* const left, -+ const uint32_t* const top); - - // These Add/Sub function expects upper[-1] and out[-1] to be readable. - typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, -@@ -167,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; - // ----------------------------------------------------------------------------- - // Huffman-cost related functions. - --typedef double (*VP8LCostFunc)(const uint32_t* population, int length); --typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, -- int length); -+typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length); -+typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, -+ int length); - typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256], - const int Y[256]); - -@@ -183,7 +198,7 @@ typedef struct { // small struct to hold counters - } VP8LStreaks; - - typedef struct { // small struct to hold bit entropy results -- double entropy; // entropy -+ float entropy; // entropy - uint32_t sum; // sum of the population - int nonzeros; // number of non-zero elements in the population - uint32_t max_val; // maximum value in the population -diff --git a/3rdparty/libwebp/src/dsp/lossless_common.h b/3rdparty/libwebp/src/dsp/lossless_common.h -index 96a106f9eebc..d6139b2b577d 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_common.h -+++ b/3rdparty/libwebp/src/dsp/lossless_common.h -@@ -16,9 +16,9 @@ - #ifndef WEBP_DSP_LOSSLESS_COMMON_H_ - #define WEBP_DSP_LOSSLESS_COMMON_H_ - --#include "src/webp/types.h" -- -+#include "src/dsp/cpu.h" - #include "src/utils/utils.h" -+#include "src/webp/types.h" - - #ifdef __cplusplus - extern "C" { -@@ -166,7 +166,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) { - } - - //------------------------------------------------------------------------------ --// Transform-related functions use din both encoding and decoding. -+// Transform-related functions used in both encoding and decoding. - - // Macros used to create a batch predictor that iteratively uses a - // one-pixel predictor. -@@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ - int x; \ - assert(upper != NULL); \ - for (x = 0; x < num_pixels; ++x) { \ -- const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \ -+ const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \ - out[x] = VP8LAddPixels(in[x], pred); \ - } \ - } -diff --git a/3rdparty/libwebp/src/dsp/lossless_enc.c b/3rdparty/libwebp/src/dsp/lossless_enc.c -index a0c7ab911798..997d56c2ad30 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_enc.c -+++ b/3rdparty/libwebp/src/dsp/lossless_enc.c -@@ -329,6 +329,15 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = { - static float FastSLog2Slow_C(uint32_t v) { - assert(v >= LOG_LOOKUP_IDX_MAX); - if (v < APPROX_LOG_WITH_CORRECTION_MAX) { -+#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) -+ // use clz if available -+ const int log_cnt = BitsLog2Floor(v) - 7; -+ const uint32_t y = 1 << log_cnt; -+ int correction = 0; -+ const float v_f = (float)v; -+ const uint32_t orig_v = v; -+ v >>= log_cnt; -+#else - int log_cnt = 0; - uint32_t y = 1; - int correction = 0; -@@ -339,6 +348,7 @@ static float FastSLog2Slow_C(uint32_t v) { - v = v >> 1; - y = y << 1; - } while (v >= LOG_LOOKUP_IDX_MAX); -+#endif - // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256 - // Xf = floor(Xf) * (1 + (v % y) / v) - // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) -@@ -355,6 +365,14 @@ static float FastSLog2Slow_C(uint32_t v) { - static float FastLog2Slow_C(uint32_t v) { - assert(v >= LOG_LOOKUP_IDX_MAX); - if (v < APPROX_LOG_WITH_CORRECTION_MAX) { -+#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ) -+ // use clz if available -+ const int log_cnt = BitsLog2Floor(v) - 7; -+ const uint32_t y = 1 << log_cnt; -+ const uint32_t orig_v = v; -+ double log_2; -+ v >>= log_cnt; -+#else - int log_cnt = 0; - uint32_t y = 1; - const uint32_t orig_v = v; -@@ -364,6 +382,7 @@ static float FastLog2Slow_C(uint32_t v) { - v = v >> 1; - y = y << 1; - } while (v >= LOG_LOOKUP_IDX_MAX); -+#endif - log_2 = kLog2Table[v] + log_cnt; - if (orig_v >= APPROX_LOG_MAX) { - // Since the division is still expensive, add this correction factor only -@@ -383,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) { - // Compute the combined Shanon's entropy for distribution {X} and {X+Y} - static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) { - int i; -- double retval = 0.; -+ float retval = 0.f; - int sumX = 0, sumXY = 0; - for (i = 0; i < 256; ++i) { - const int x = X[i]; -@@ -399,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) { - } - } - retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); -- return (float)retval; -+ return retval; - } - - void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { -@@ -503,11 +522,11 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[], - void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { - int i; - for (i = 0; i < num_pixels; ++i) { -- const int argb = argb_data[i]; -+ const int argb = (int)argb_data[i]; - const int green = (argb >> 8) & 0xff; - const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; - const uint32_t new_b = (((argb >> 0) & 0xff) - green) & 0xff; -- argb_data[i] = (argb & 0xff00ff00u) | (new_r << 16) | new_b; -+ argb_data[i] = ((uint32_t)argb & 0xff00ff00u) | (new_r << 16) | new_b; - } - } - -@@ -528,10 +547,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, - const int8_t red = U32ToS8(argb >> 16); - int new_red = red & 0xff; - int new_blue = argb & 0xff; -- new_red -= ColorTransformDelta(m->green_to_red_, green); -+ new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green); - new_red &= 0xff; -- new_blue -= ColorTransformDelta(m->green_to_blue_, green); -- new_blue -= ColorTransformDelta(m->red_to_blue_, red); -+ new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green); -+ new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red); - new_blue &= 0xff; - data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); - } -@@ -541,7 +560,7 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, - uint32_t argb) { - const int8_t green = U32ToS8(argb >> 8); - int new_red = argb >> 16; -- new_red -= ColorTransformDelta(green_to_red, green); -+ new_red -= ColorTransformDelta((int8_t)green_to_red, green); - return (new_red & 0xff); - } - -@@ -550,9 +569,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, - uint32_t argb) { - const int8_t green = U32ToS8(argb >> 8); - const int8_t red = U32ToS8(argb >> 16); -- uint8_t new_blue = argb & 0xff; -- new_blue -= ColorTransformDelta(green_to_blue, green); -- new_blue -= ColorTransformDelta(red_to_blue, red); -+ int new_blue = argb & 0xff; -+ new_blue -= ColorTransformDelta((int8_t)green_to_blue, green); -+ new_blue -= ColorTransformDelta((int8_t)red_to_blue, red); - return (new_blue & 0xff); - } - -@@ -617,20 +636,25 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits, - - //------------------------------------------------------------------------------ - --static double ExtraCost_C(const uint32_t* population, int length) { -+static uint32_t ExtraCost_C(const uint32_t* population, int length) { - int i; -- double cost = 0.; -- for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; -+ uint32_t cost = population[4] + population[5]; -+ assert(length % 2 == 0); -+ for (i = 2; i < length / 2 - 1; ++i) { -+ cost += i * (population[2 * i + 2] + population[2 * i + 3]); -+ } - return cost; - } - --static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, -- int length) { -+static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, -+ int length) { - int i; -- double cost = 0.; -- for (i = 2; i < length - 2; ++i) { -- const int xy = X[i + 2] + Y[i + 2]; -- cost += (i >> 1) * xy; -+ uint32_t cost = X[4] + Y[4] + X[5] + Y[5]; -+ assert(length % 2 == 0); -+ for (i = 2; i < length / 2 - 1; ++i) { -+ const int xy0 = X[2 * i + 2] + Y[2 * i + 2]; -+ const int xy1 = X[2 * i + 3] + Y[2 * i + 3]; -+ cost += i * (xy0 + xy1); - } - return cost; - } -@@ -726,7 +750,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ - assert(upper != NULL); \ - for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = \ -- VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \ -+ VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \ - out[x] = VP8LSubPixels(in[x], pred); \ - } \ - } -@@ -772,6 +796,7 @@ VP8LBundleColorMapFunc VP8LBundleColorMap; - VP8LPredictorAddSubFunc VP8LPredictorsSub[16]; - VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8LEncDspInitSSE2(void); - extern void VP8LEncDspInitSSE41(void); - extern void VP8LEncDspInitNEON(void); -@@ -843,10 +868,10 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8LEncDspInitSSE2(); --#if defined(WEBP_USE_SSE41) -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - VP8LEncDspInitSSE41(); - } -@@ -870,7 +895,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - VP8LEncDspInitNEON(); -diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c b/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c -index 0412a093cf9a..e10f12da9d58 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c -+++ b/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c -@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) { - // cost += i * *(pop + 1); - // pop += 2; - // } --// return (double)cost; --static double ExtraCost_MIPS32(const uint32_t* const population, int length) { -+// return cost; -+static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) { - int i, temp0, temp1; - const uint32_t* pop = &population[4]; - const uint32_t* const LoopEnd = &population[length]; -@@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) { - : "memory", "hi", "lo" - ); - -- return (double)((int64_t)temp0 << 32 | temp1); -+ return ((int64_t)temp0 << 32 | temp1); - } - - // C version of this function: -@@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) { - // pX += 2; - // pY += 2; - // } --// return (double)cost; --static double ExtraCostCombined_MIPS32(const uint32_t* const X, -- const uint32_t* const Y, int length) { -+// return cost; -+static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X, -+ const uint32_t* const Y, int length) { - int i, temp0, temp1, temp2, temp3; - const uint32_t* pX = &X[4]; - const uint32_t* pY = &Y[4]; -@@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X, - : "memory", "hi", "lo" - ); - -- return (double)((int64_t)temp0 << 32 | temp1); -+ return ((int64_t)temp0 << 32 | temp1); - } - - #define HUFFMAN_COST_PASS \ -@@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], - static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb, - uint32_t* pout, int size) { - uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; -- const uint32_t end = ((size) / 4) * 4; -+ const int end = ((size) / 4) * 4; - const uint32_t* const LoopEnd = pa + end; - int i; - ASM_START - ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout) - ASM_END_0 -- for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i]; -+ for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i]; - } - - static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) { - uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; -- const uint32_t end = ((size) / 4) * 4; -+ const int end = ((size) / 4) * 4; - const uint32_t* const LoopEnd = pa + end; - int i; - ASM_START - ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout) - ASM_END_1 -- for (i = end; i < size; ++i) pout[i] += pa[i]; -+ for (i = 0; i < size - end; ++i) pout[i] += pa[i]; - } - - #undef ASM_END_1 -diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_neon.c b/3rdparty/libwebp/src/dsp/lossless_enc_neon.c -index 7c7b73f8b692..e32c7961a239 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_enc_neon.c -+++ b/3rdparty/libwebp/src/dsp/lossless_enc_neon.c -@@ -25,7 +25,7 @@ - - // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use - // non-standard versions there. --#if defined(__APPLE__) && defined(__aarch64__) && \ -+#if defined(__APPLE__) && WEBP_AARCH64 && \ - defined(__apple_build_version__) && (__apple_build_version__< 6020037) - #define USE_VTBLQ - #endif -diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c -index 90c263735f58..66cbaab7720a 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c -+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c -@@ -54,8 +54,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m, - const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_), - CST_5b(m->green_to_blue_)); - const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0); -- const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks -- const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks -+ const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks -+ const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks - int i; - for (i = 0; i + 4 <= num_pixels; i += 4) { - const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb -@@ -232,79 +232,55 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { - //------------------------------------------------------------------------------ - // Entropy - --// Checks whether the X or Y contribution is worth computing and adding. --// Used in loop unrolling. --#define ANALYZE_X_OR_Y(x_or_y, j) \ -- do { \ -- if ((x_or_y)[i + (j)] != 0) retval -= VP8LFastSLog2((x_or_y)[i + (j)]); \ -- } while (0) -- --// Checks whether the X + Y contribution is worth computing and adding. --// Used in loop unrolling. --#define ANALYZE_XY(j) \ -- do { \ -- if (tmp[j] != 0) { \ -- retval -= VP8LFastSLog2(tmp[j]); \ -- ANALYZE_X_OR_Y(X, j); \ -- } \ -- } while (0) -- --#if !(defined(__i386__) || defined(_M_IX86)) -+// TODO(https://crbug.com/webp/499): this function produces different results -+// from the C code due to use of double/float resulting in output differences -+// when compared to -noasm. -+#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86)) -+ - static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { - int i; -- double retval = 0.; -- int sumX, sumXY; -- int32_t tmp[4]; -- __m128i zero = _mm_setzero_si128(); -- // Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY). -- __m128i sumXY_128 = zero; -- __m128i sumX_128 = zero; -- -- for (i = 0; i < 256; i += 4) { -- const __m128i x = _mm_loadu_si128((const __m128i*)(X + i)); -- const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i)); -- -- // Check if any X is non-zero: this actually provides a speedup as X is -- // usually sparse. -- if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) { -- const __m128i xy_128 = _mm_add_epi32(x, y); -- sumXY_128 = _mm_add_epi32(sumXY_128, xy_128); -- -- sumX_128 = _mm_add_epi32(sumX_128, x); -- -- // Analyze the different X + Y. -- _mm_storeu_si128((__m128i*)tmp, xy_128); -- -- ANALYZE_XY(0); -- ANALYZE_XY(1); -- ANALYZE_XY(2); -- ANALYZE_XY(3); -- } else { -- // X is fully 0, so only deal with Y. -- sumXY_128 = _mm_add_epi32(sumXY_128, y); -- -- ANALYZE_X_OR_Y(Y, 0); -- ANALYZE_X_OR_Y(Y, 1); -- ANALYZE_X_OR_Y(Y, 2); -- ANALYZE_X_OR_Y(Y, 3); -+ float retval = 0.f; -+ int sumX = 0, sumXY = 0; -+ const __m128i zero = _mm_setzero_si128(); -+ -+ for (i = 0; i < 256; i += 16) { -+ const __m128i x0 = _mm_loadu_si128((const __m128i*)(X + i + 0)); -+ const __m128i y0 = _mm_loadu_si128((const __m128i*)(Y + i + 0)); -+ const __m128i x1 = _mm_loadu_si128((const __m128i*)(X + i + 4)); -+ const __m128i y1 = _mm_loadu_si128((const __m128i*)(Y + i + 4)); -+ const __m128i x2 = _mm_loadu_si128((const __m128i*)(X + i + 8)); -+ const __m128i y2 = _mm_loadu_si128((const __m128i*)(Y + i + 8)); -+ const __m128i x3 = _mm_loadu_si128((const __m128i*)(X + i + 12)); -+ const __m128i y3 = _mm_loadu_si128((const __m128i*)(Y + i + 12)); -+ const __m128i x4 = _mm_packs_epi16(_mm_packs_epi32(x0, x1), -+ _mm_packs_epi32(x2, x3)); -+ const __m128i y4 = _mm_packs_epi16(_mm_packs_epi32(y0, y1), -+ _mm_packs_epi32(y2, y3)); -+ const int32_t mx = _mm_movemask_epi8(_mm_cmpgt_epi8(x4, zero)); -+ int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx; -+ while (my) { -+ const int32_t j = BitsCtz(my); -+ int xy; -+ if ((mx >> j) & 1) { -+ const int x = X[i + j]; -+ sumXY += x; -+ retval -= VP8LFastSLog2(x); -+ } -+ xy = X[i + j] + Y[i + j]; -+ sumX += xy; -+ retval -= VP8LFastSLog2(xy); -+ my &= my - 1; - } - } -- -- // Sum up sumX_128 to get sumX. -- _mm_storeu_si128((__m128i*)tmp, sumX_128); -- sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0]; -- -- // Sum up sumXY_128 to get sumXY. -- _mm_storeu_si128((__m128i*)tmp, sumXY_128); -- sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0]; -- - retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); -- return (float)retval; -+ return retval; - } --#endif // !(defined(__i386__) || defined(_M_IX86)) - --#undef ANALYZE_X_OR_Y --#undef ANALYZE_XY -+#else -+ -+#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC // won't be faster -+ -+#endif - - //------------------------------------------------------------------------------ - -@@ -400,7 +376,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits, - break; - } - case 2: { -- const __m128i mask_or = _mm_set1_epi32(0xff000000); -+ const __m128i mask_or = _mm_set1_epi32((int)0xff000000); - const __m128i mul_cst = _mm_set1_epi16(0x0104); - const __m128i mask_mul = _mm_set1_epi16(0x0f00); - for (x = 0; x + 16 <= width; x += 16, dst += 4) { -@@ -451,7 +427,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0, - static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { - int i; -- const __m128i black = _mm_set1_epi32(ARGB_BLACK); -+ const __m128i black = _mm_set1_epi32((int)ARGB_BLACK); - for (i = 0; i + 4 <= num_pixels; i += 4) { - const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); - const __m128i res = _mm_sub_epi8(src, black); -@@ -662,10 +638,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) { - VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2; - VP8LAddVector = AddVector_SSE2; - VP8LAddVectorEq = AddVectorEq_SSE2; -- // TODO(https://crbug.com/webp/499): this function produces different results -- // from the C code due to use of double/float resulting in output differences -- // when compared to -noasm. --#if !(defined(__i386__) || defined(_M_IX86)) -+#if !defined(DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC) - VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2; - #endif - VP8LVectorMismatch = VectorMismatch_SSE2; -diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c -index 719d8ed25e15..7ab83c2604b4 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c -+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c -@@ -18,8 +18,53 @@ - #include - #include "src/dsp/lossless.h" - --// For sign-extended multiplying constants, pre-shifted by 5: --#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5) -+//------------------------------------------------------------------------------ -+// Cost operations. -+ -+static WEBP_INLINE uint32_t HorizontalSum_SSE41(__m128i cost) { -+ cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 8)); -+ cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 4)); -+ return _mm_cvtsi128_si32(cost); -+} -+ -+static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) { -+ int i; -+ __m128i cost = _mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]); -+ assert(length % 8 == 0); -+ -+ for (i = 8; i + 8 <= length; i += 8) { -+ const int j = (i - 2) >> 1; -+ const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]); -+ const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]); -+ const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j); -+ const __m128i a2 = _mm_hadd_epi32(a0, a1); -+ const __m128i mul = _mm_mullo_epi32(a2, w); -+ cost = _mm_add_epi32(mul, cost); -+ } -+ return HorizontalSum_SSE41(cost); -+} -+ -+static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a, -+ const uint32_t* const b, int length) { -+ int i; -+ __m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]), -+ _mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4])); -+ assert(length % 8 == 0); -+ -+ for (i = 8; i + 8 <= length; i += 8) { -+ const int j = (i - 2) >> 1; -+ const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]); -+ const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]); -+ const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]); -+ const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]); -+ const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j); -+ const __m128i a2 = _mm_hadd_epi32(a0, a1); -+ const __m128i b2 = _mm_hadd_epi32(b0, b1); -+ const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w); -+ cost = _mm_add_epi32(mul, cost); -+ } -+ return HorizontalSum_SSE41(cost); -+} - - //------------------------------------------------------------------------------ - // Subtract-Green Transform -@@ -44,46 +89,50 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data, - //------------------------------------------------------------------------------ - // Color Transform - --#define SPAN 8 -+// For sign-extended multiplying constants, pre-shifted by 5: -+#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5) -+ -+#define MK_CST_16(HI, LO) \ -+ _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff))) -+ - static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, - int tile_width, int tile_height, - int green_to_blue, int red_to_blue, - int histo[]) { -- const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue)); -- const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue)); -- const __m128i mask_g = _mm_set1_epi16((short)0xff00); // green mask -- const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask -- const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask -- const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1, -- -1, -1, -1, -1, -1, -1, -1); -- const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -- 2, -1, 6, -1, 10, -1, 14); -- int y; -- for (y = 0; y < tile_height; ++y) { -- const uint32_t* const src = argb + y * stride; -- int i, x; -- for (x = 0; x + SPAN <= tile_width; x += SPAN) { -- uint16_t values[SPAN]; -- const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); -- const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); -- const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo); -- const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi); -- const __m128i r = _mm_or_si128(r0, r1); // r 0 -- const __m128i gb0 = _mm_and_si128(in0, mask_gb); -- const __m128i gb1 = _mm_and_si128(in1, mask_gb); -- const __m128i gb = _mm_packus_epi32(gb0, gb1); // g b -- const __m128i g = _mm_and_si128(gb, mask_g); // g 0 -- const __m128i A = _mm_mulhi_epi16(r, mults_r); // x dbr -- const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dbg -- const __m128i C = _mm_sub_epi8(gb, B); // x b' -- const __m128i D = _mm_sub_epi8(C, A); // x b'' -- const __m128i E = _mm_and_si128(D, mask_b); // 0 b'' -- _mm_storeu_si128((__m128i*)values, E); -- for (i = 0; i < SPAN; ++i) ++histo[values[i]]; -+ const __m128i mult = -+ MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue)); -+ const __m128i perm = -+ _mm_setr_epi8(-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14); -+ if (tile_width >= 4) { -+ int y; -+ for (y = 0; y < tile_height; ++y) { -+ const uint32_t* const src = argb + y * stride; -+ const __m128i A1 = _mm_loadu_si128((const __m128i*)src); -+ const __m128i B1 = _mm_shuffle_epi8(A1, perm); -+ const __m128i C1 = _mm_mulhi_epi16(B1, mult); -+ const __m128i D1 = _mm_sub_epi16(A1, C1); -+ __m128i E = _mm_add_epi16(_mm_srli_epi32(D1, 16), D1); -+ int x; -+ for (x = 4; x + 4 <= tile_width; x += 4) { -+ const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x)); -+ __m128i B2, C2, D2; -+ ++histo[_mm_extract_epi8(E, 0)]; -+ B2 = _mm_shuffle_epi8(A2, perm); -+ ++histo[_mm_extract_epi8(E, 4)]; -+ C2 = _mm_mulhi_epi16(B2, mult); -+ ++histo[_mm_extract_epi8(E, 8)]; -+ D2 = _mm_sub_epi16(A2, C2); -+ ++histo[_mm_extract_epi8(E, 12)]; -+ E = _mm_add_epi16(_mm_srli_epi32(D2, 16), D2); -+ } -+ ++histo[_mm_extract_epi8(E, 0)]; -+ ++histo[_mm_extract_epi8(E, 4)]; -+ ++histo[_mm_extract_epi8(E, 8)]; -+ ++histo[_mm_extract_epi8(E, 12)]; - } - } - { -- const int left_over = tile_width & (SPAN - 1); -+ const int left_over = tile_width & 3; - if (left_over > 0) { - VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride, - left_over, tile_height, -@@ -95,33 +144,37 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride, - static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, - int tile_width, int tile_height, - int green_to_red, int histo[]) { -- const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red)); -- const __m128i mask_g = _mm_set1_epi32(0x00ff00); // green mask -- const __m128i mask = _mm_set1_epi16(0xff); -- -- int y; -- for (y = 0; y < tile_height; ++y) { -- const uint32_t* const src = argb + y * stride; -- int i, x; -- for (x = 0; x + SPAN <= tile_width; x += SPAN) { -- uint16_t values[SPAN]; -- const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]); -- const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]); -- const __m128i g0 = _mm_and_si128(in0, mask_g); // 0 0 | g 0 -- const __m128i g1 = _mm_and_si128(in1, mask_g); -- const __m128i g = _mm_packus_epi32(g0, g1); // g 0 -- const __m128i A0 = _mm_srli_epi32(in0, 16); // 0 0 | x r -- const __m128i A1 = _mm_srli_epi32(in1, 16); -- const __m128i A = _mm_packus_epi32(A0, A1); // x r -- const __m128i B = _mm_mulhi_epi16(g, mults_g); // x dr -- const __m128i C = _mm_sub_epi8(A, B); // x r' -- const __m128i D = _mm_and_si128(C, mask); // 0 r' -- _mm_storeu_si128((__m128i*)values, D); -- for (i = 0; i < SPAN; ++i) ++histo[values[i]]; -+ -+ const __m128i mult = MK_CST_16(0, CST_5b(green_to_red)); -+ const __m128i mask_g = _mm_set1_epi32(0x0000ff00); -+ if (tile_width >= 4) { -+ int y; -+ for (y = 0; y < tile_height; ++y) { -+ const uint32_t* const src = argb + y * stride; -+ const __m128i A1 = _mm_loadu_si128((const __m128i*)src); -+ const __m128i B1 = _mm_and_si128(A1, mask_g); -+ const __m128i C1 = _mm_madd_epi16(B1, mult); -+ __m128i D = _mm_sub_epi16(A1, C1); -+ int x; -+ for (x = 4; x + 4 <= tile_width; x += 4) { -+ const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x)); -+ __m128i B2, C2; -+ ++histo[_mm_extract_epi8(D, 2)]; -+ B2 = _mm_and_si128(A2, mask_g); -+ ++histo[_mm_extract_epi8(D, 6)]; -+ C2 = _mm_madd_epi16(B2, mult); -+ ++histo[_mm_extract_epi8(D, 10)]; -+ ++histo[_mm_extract_epi8(D, 14)]; -+ D = _mm_sub_epi16(A2, C2); -+ } -+ ++histo[_mm_extract_epi8(D, 2)]; -+ ++histo[_mm_extract_epi8(D, 6)]; -+ ++histo[_mm_extract_epi8(D, 10)]; -+ ++histo[_mm_extract_epi8(D, 14)]; - } - } - { -- const int left_over = tile_width & (SPAN - 1); -+ const int left_over = tile_width & 3; - if (left_over > 0) { - VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride, - left_over, tile_height, green_to_red, -@@ -130,12 +183,16 @@ static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride, - } - } - -+#undef MK_CST_16 -+ - //------------------------------------------------------------------------------ - // Entry point - - extern void VP8LEncDspInitSSE41(void); - - WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) { -+ VP8LExtraCost = ExtraCost_SSE41; -+ VP8LExtraCostCombined = ExtraCostCombined_SSE41; - VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41; - VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41; - VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41; -diff --git a/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c b/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c -index 9888854d5719..bfe5ea6b3865 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c -+++ b/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c -@@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, - return Average2(Average2(a0, a1), Average2(a2, a3)); - } - --static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) { -- return Average3(left, top[0], top[1]); -+static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, -+ const uint32_t* const top) { -+ return Average3(*left, top[0], top[1]); - } - --static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) { -- return Average2(left, top[-1]); -+static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, -+ const uint32_t* const top) { -+ return Average2(*left, top[-1]); - } - --static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) { -- return Average2(left, top[0]); -+static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, -+ const uint32_t* const top) { -+ return Average2(*left, top[0]); - } - --static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) { -+static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, -+ const uint32_t* const top) { - (void)left; - return Average2(top[-1], top[0]); - } - --static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) { -+static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, -+ const uint32_t* const top) { - (void)left; - return Average2(top[0], top[1]); - } - --static uint32_t Predictor10_MIPSdspR2(uint32_t left, -+static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, - const uint32_t* const top) { -- return Average4(left, top[-1], top[0], top[1]); -+ return Average4(*left, top[-1], top[0], top[1]); - } - --static uint32_t Predictor11_MIPSdspR2(uint32_t left, -+static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, - const uint32_t* const top) { -- return Select(top[0], left, top[-1]); -+ return Select(top[0], *left, top[-1]); - } - --static uint32_t Predictor12_MIPSdspR2(uint32_t left, -+static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, - const uint32_t* const top) { -- return ClampedAddSubtractFull(left, top[0], top[-1]); -+ return ClampedAddSubtractFull(*left, top[0], top[-1]); - } - --static uint32_t Predictor13_MIPSdspR2(uint32_t left, -+static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, - const uint32_t* const top) { -- return ClampedAddSubtractHalf(left, top[0], top[-1]); -+ return ClampedAddSubtractHalf(*left, top[0], top[-1]); - } - - // Add green to blue and red channels (i.e. perform the inverse transform of -diff --git a/3rdparty/libwebp/src/dsp/lossless_neon.c b/3rdparty/libwebp/src/dsp/lossless_neon.c -index 76a1b6f8732c..ddc9b61711e3 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_neon.c -+++ b/3rdparty/libwebp/src/dsp/lossless_neon.c -@@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1, - return avg; - } - --static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) { -- return Average3_NEON(left, top[0], top[1]); -+static uint32_t Predictor5_NEON(const uint32_t* const left, -+ const uint32_t* const top) { -+ return Average3_NEON(*left, top[0], top[1]); - } --static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) { -- return Average2_NEON(left, top[-1]); -+static uint32_t Predictor6_NEON(const uint32_t* const left, -+ const uint32_t* const top) { -+ return Average2_NEON(*left, top[-1]); - } --static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) { -- return Average2_NEON(left, top[0]); -+static uint32_t Predictor7_NEON(const uint32_t* const left, -+ const uint32_t* const top) { -+ return Average2_NEON(*left, top[0]); - } --static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) { -- return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]); -+static uint32_t Predictor13_NEON(const uint32_t* const left, -+ const uint32_t* const top) { -+ return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]); - } - - // Batch versions of those functions. -@@ -494,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper, - - // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use - // non-standard versions there. --#if defined(__APPLE__) && defined(__aarch64__) && \ -+#if defined(__APPLE__) && WEBP_AARCH64 && \ - defined(__apple_build_version__) && (__apple_build_version__< 6020037) - #define USE_VTBLQ - #endif -diff --git a/3rdparty/libwebp/src/dsp/lossless_sse2.c b/3rdparty/libwebp/src/dsp/lossless_sse2.c -index aef0cee1b370..4b6a532c239c 100644 ---- a/3rdparty/libwebp/src/dsp/lossless_sse2.c -+++ b/3rdparty/libwebp/src/dsp/lossless_sse2.c -@@ -18,7 +18,6 @@ - #include "src/dsp/common_sse2.h" - #include "src/dsp/lossless.h" - #include "src/dsp/lossless_common.h" --#include - #include - - //------------------------------------------------------------------------------ -@@ -28,23 +27,22 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0, - uint32_t c1, - uint32_t c2) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero); -- const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero); -- const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); -+ const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero); -+ const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero); -+ const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero); - const __m128i V1 = _mm_add_epi16(C0, C1); - const __m128i V2 = _mm_sub_epi16(V1, C2); - const __m128i b = _mm_packus_epi16(V2, V2); -- const uint32_t output = _mm_cvtsi128_si32(b); -- return output; -+ return (uint32_t)_mm_cvtsi128_si32(b); - } - - static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0, - uint32_t c1, - uint32_t c2) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero); -- const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero); -- const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); -+ const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero); -+ const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero); -+ const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero); - const __m128i avg = _mm_add_epi16(C1, C0); - const __m128i A0 = _mm_srli_epi16(avg, 1); - const __m128i A1 = _mm_sub_epi16(A0, B0); -@@ -53,16 +51,15 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0, - const __m128i A3 = _mm_srai_epi16(A2, 1); - const __m128i A4 = _mm_add_epi16(A0, A3); - const __m128i A5 = _mm_packus_epi16(A4, A4); -- const uint32_t output = _mm_cvtsi128_si32(A5); -- return output; -+ return (uint32_t)_mm_cvtsi128_si32(A5); - } - - static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) { - int pa_minus_pb; - const __m128i zero = _mm_setzero_si128(); -- const __m128i A0 = _mm_cvtsi32_si128(a); -- const __m128i B0 = _mm_cvtsi32_si128(b); -- const __m128i C0 = _mm_cvtsi32_si128(c); -+ const __m128i A0 = _mm_cvtsi32_si128((int)a); -+ const __m128i B0 = _mm_cvtsi32_si128((int)b); -+ const __m128i C0 = _mm_cvtsi32_si128((int)c); - const __m128i AC0 = _mm_subs_epu8(A0, C0); - const __m128i CA0 = _mm_subs_epu8(C0, A0); - const __m128i BC0 = _mm_subs_epu8(B0, C0); -@@ -95,8 +92,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0, - __m128i* const avg) { - // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1) - const __m128i ones = _mm_set1_epi8(1); -- const __m128i A0 = _mm_cvtsi32_si128(a0); -- const __m128i A1 = _mm_cvtsi32_si128(a1); -+ const __m128i A0 = _mm_cvtsi32_si128((int)a0); -+ const __m128i A1 = _mm_cvtsi32_si128((int)a1); - const __m128i avg1 = _mm_avg_epu8(A0, A1); - const __m128i one = _mm_and_si128(_mm_xor_si128(A0, A1), ones); - *avg = _mm_sub_epi8(avg1, one); -@@ -104,8 +101,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0, - - static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero); -- const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero); -+ const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a0), zero); -+ const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero); - const __m128i sum = _mm_add_epi16(A1, A0); - return _mm_srli_epi16(sum, 1); - } -@@ -113,19 +110,18 @@ static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) { - static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) { - __m128i output; - Average2_uint32_SSE2(a0, a1, &output); -- return _mm_cvtsi128_si32(output); -+ return (uint32_t)_mm_cvtsi128_si32(output); - } - - static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1, - uint32_t a2) { - const __m128i zero = _mm_setzero_si128(); - const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2); -- const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero); -+ const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero); - const __m128i sum = _mm_add_epi16(avg1, A1); - const __m128i avg2 = _mm_srli_epi16(sum, 1); - const __m128i A2 = _mm_packus_epi16(avg2, avg2); -- const uint32_t output = _mm_cvtsi128_si32(A2); -- return output; -+ return (uint32_t)_mm_cvtsi128_si32(A2); - } - - static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, -@@ -135,46 +131,54 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, - const __m128i sum = _mm_add_epi16(avg2, avg1); - const __m128i avg3 = _mm_srli_epi16(sum, 1); - const __m128i A0 = _mm_packus_epi16(avg3, avg3); -- const uint32_t output = _mm_cvtsi128_si32(A0); -- return output; -+ return (uint32_t)_mm_cvtsi128_si32(A0); - } - --static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average3_SSE2(left, top[0], top[1]); -+static uint32_t Predictor5_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average3_SSE2(*left, top[0], top[1]); - return pred; - } --static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average2_SSE2(left, top[-1]); -+static uint32_t Predictor6_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average2_SSE2(*left, top[-1]); - return pred; - } --static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average2_SSE2(left, top[0]); -+static uint32_t Predictor7_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average2_SSE2(*left, top[0]); - return pred; - } --static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) { -+static uint32_t Predictor8_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(top[-1], top[0]); - (void)left; - return pred; - } --static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) { -+static uint32_t Predictor9_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(top[0], top[1]); - (void)left; - return pred; - } --static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]); -+static uint32_t Predictor10_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]); - return pred; - } --static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = Select_SSE2(top[0], left, top[-1]); -+static uint32_t Predictor11_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = Select_SSE2(top[0], *left, top[-1]); - return pred; - } --static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]); -+static uint32_t Predictor12_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]); - return pred; - } --static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) { -- const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]); -+static uint32_t Predictor13_SSE2(const uint32_t* const left, -+ const uint32_t* const top) { -+ const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]); - return pred; - } - -@@ -184,7 +188,7 @@ static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) { - static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { - int i; -- const __m128i black = _mm_set1_epi32(ARGB_BLACK); -+ const __m128i black = _mm_set1_epi32((int)ARGB_BLACK); - for (i = 0; i + 4 <= num_pixels; i += 4) { - const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); - const __m128i res = _mm_add_epi8(src, black); -@@ -200,7 +204,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, - static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { - int i; -- __m128i prev = _mm_set1_epi32(out[-1]); -+ __m128i prev = _mm_set1_epi32((int)out[-1]); - for (i = 0; i + 4 <= num_pixels; i += 4) { - // a | b | c | d - const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); -@@ -277,12 +281,12 @@ GENERATE_PREDICTOR_2(9, upper[i + 1]) - #undef GENERATE_PREDICTOR_2 - - // Predictor10: average of (average of (L,TL), average of (T, TR)). --#define DO_PRED10(OUT) do { \ -- __m128i avgLTL, avg; \ -- Average2_m128i(&L, &TL, &avgLTL); \ -- Average2_m128i(&avgTTR, &avgLTL, &avg); \ -- L = _mm_add_epi8(avg, src); \ -- out[i + (OUT)] = _mm_cvtsi128_si32(L); \ -+#define DO_PRED10(OUT) do { \ -+ __m128i avgLTL, avg; \ -+ Average2_m128i(&L, &TL, &avgLTL); \ -+ Average2_m128i(&avgTTR, &avgLTL, &avg); \ -+ L = _mm_add_epi8(avg, src); \ -+ out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \ - } while (0) - - #define DO_PRED10_SHIFT do { \ -@@ -295,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1]) - static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { - int i; -- __m128i L = _mm_cvtsi32_si128(out[-1]); -+ __m128i L = _mm_cvtsi32_si128((int)out[-1]); - for (i = 0; i + 4 <= num_pixels; i += 4) { - __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); - __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); -@@ -328,7 +332,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, - const __m128i B = _mm_andnot_si128(mask, T); \ - const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \ - L = _mm_add_epi8(src, pred); \ -- out[i + (OUT)] = _mm_cvtsi128_si32(L); \ -+ out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \ - } while (0) - - #define DO_PRED11_SHIFT do { \ -@@ -343,7 +347,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { - int i; - __m128i pa; -- __m128i L = _mm_cvtsi32_si128(out[-1]); -+ __m128i L = _mm_cvtsi32_si128((int)out[-1]); - for (i = 0; i + 4 <= num_pixels; i += 4) { - __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); - __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); -@@ -376,12 +380,12 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, - #undef DO_PRED11_SHIFT - - // Predictor12: ClampedAddSubtractFull. --#define DO_PRED12(DIFF, LANE, OUT) do { \ -- const __m128i all = _mm_add_epi16(L, (DIFF)); \ -- const __m128i alls = _mm_packus_epi16(all, all); \ -- const __m128i res = _mm_add_epi8(src, alls); \ -- out[i + (OUT)] = _mm_cvtsi128_si32(res); \ -- L = _mm_unpacklo_epi8(res, zero); \ -+#define DO_PRED12(DIFF, LANE, OUT) do { \ -+ const __m128i all = _mm_add_epi16(L, (DIFF)); \ -+ const __m128i alls = _mm_packus_epi16(all, all); \ -+ const __m128i res = _mm_add_epi8(src, alls); \ -+ out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(res); \ -+ L = _mm_unpacklo_epi8(res, zero); \ - } while (0) - - #define DO_PRED12_SHIFT(DIFF, LANE) do { \ -@@ -394,7 +398,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper, - int num_pixels, uint32_t* out) { - int i; - const __m128i zero = _mm_setzero_si128(); -- const __m128i L8 = _mm_cvtsi32_si128(out[-1]); -+ const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]); - __m128i L = _mm_unpacklo_epi8(L8, zero); - for (i = 0; i + 4 <= num_pixels; i += 4) { - // Load 4 pixels at a time. -@@ -460,7 +464,7 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m, - const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0); - #undef MK_CST_16 - #undef CST -- const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks -+ const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks - int i; - for (i = 0; i + 4 <= num_pixels; i += 4) { - const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb -@@ -524,7 +528,7 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels, - - static void ConvertBGRAToRGBA_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { -- const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu); -+ const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff); - const __m128i* in = (const __m128i*)src; - __m128i* out = (__m128i*)dst; - while (num_pixels >= 8) { -@@ -553,7 +557,7 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src, - static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { - const __m128i mask_0x0f = _mm_set1_epi8(0x0f); -- const __m128i mask_0xf0 = _mm_set1_epi8(0xf0); -+ const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0); - const __m128i* in = (const __m128i*)src; - __m128i* out = (__m128i*)dst; - while (num_pixels >= 8) { -@@ -588,8 +592,8 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src, - - static void ConvertBGRAToRGB565_SSE2(const uint32_t* src, - int num_pixels, uint8_t* dst) { -- const __m128i mask_0xe0 = _mm_set1_epi8(0xe0); -- const __m128i mask_0xf8 = _mm_set1_epi8(0xf8); -+ const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0); -+ const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8); - const __m128i mask_0x07 = _mm_set1_epi8(0x07); - const __m128i* in = (const __m128i*)src; - __m128i* out = (__m128i*)dst; -diff --git a/3rdparty/libwebp/src/dsp/lossless_sse41.c b/3rdparty/libwebp/src/dsp/lossless_sse41.c -new file mode 100644 -index 000000000000..bb7ce7611fa9 ---- /dev/null -+++ b/3rdparty/libwebp/src/dsp/lossless_sse41.c -@@ -0,0 +1,133 @@ -+// Copyright 2021 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// SSE41 variant of methods for lossless decoder -+ -+#include "src/dsp/dsp.h" -+ -+#if defined(WEBP_USE_SSE41) -+ -+#include "src/dsp/common_sse41.h" -+#include "src/dsp/lossless.h" -+#include "src/dsp/lossless_common.h" -+ -+//------------------------------------------------------------------------------ -+// Color-space conversion functions -+ -+static void TransformColorInverse_SSE41(const VP8LMultipliers* const m, -+ const uint32_t* const src, -+ int num_pixels, uint32_t* dst) { -+// sign-extended multiplying constants, pre-shifted by 5. -+#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend -+ const __m128i mults_rb = -+ _mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 | -+ (CST(green_to_blue_) & 0xffff))); -+ const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_)); -+#undef CST -+ const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); -+ const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5, -+ -1, 9, -1, 9, -1, 13, -1, 13); -+ const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1, -+ -1, 10, -1, -1, -1, 14, -1, -1); -+ int i; -+ for (i = 0; i + 4 <= num_pixels; i += 4) { -+ const __m128i A = _mm_loadu_si128((const __m128i*)(src + i)); -+ const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g0 -+ const __m128i C = _mm_mulhi_epi16(B, mults_rb); -+ const __m128i D = _mm_add_epi8(A, C); -+ const __m128i E = _mm_shuffle_epi8(D, perm2); -+ const __m128i F = _mm_mulhi_epi16(E, mults_b2); -+ const __m128i G = _mm_add_epi8(D, F); -+ const __m128i out = _mm_blendv_epi8(G, A, mask_ag); -+ _mm_storeu_si128((__m128i*)&dst[i], out); -+ } -+ // Fall-back to C-version for left-overs. -+ if (i != num_pixels) { -+ VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i); -+ } -+} -+ -+//------------------------------------------------------------------------------ -+ -+#define ARGB_TO_RGB_SSE41 do { \ -+ while (num_pixels >= 16) { \ -+ const __m128i in0 = _mm_loadu_si128(in + 0); \ -+ const __m128i in1 = _mm_loadu_si128(in + 1); \ -+ const __m128i in2 = _mm_loadu_si128(in + 2); \ -+ const __m128i in3 = _mm_loadu_si128(in + 3); \ -+ const __m128i a0 = _mm_shuffle_epi8(in0, perm0); \ -+ const __m128i a1 = _mm_shuffle_epi8(in1, perm1); \ -+ const __m128i a2 = _mm_shuffle_epi8(in2, perm2); \ -+ const __m128i a3 = _mm_shuffle_epi8(in3, perm3); \ -+ const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \ -+ const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \ -+ const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \ -+ _mm_storeu_si128(out + 0, b0); \ -+ _mm_storeu_si128(out + 1, b1); \ -+ _mm_storeu_si128(out + 2, b2); \ -+ in += 4; \ -+ out += 3; \ -+ num_pixels -= 16; \ -+ } \ -+} while (0) -+ -+static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels, -+ uint8_t* dst) { -+ const __m128i* in = (const __m128i*)src; -+ __m128i* out = (__m128i*)dst; -+ const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, -+ 8, 14, 13, 12, -1, -1, -1, -1); -+ const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39); -+ const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e); -+ const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93); -+ -+ ARGB_TO_RGB_SSE41; -+ -+ // left-overs -+ if (num_pixels > 0) { -+ VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out); -+ } -+} -+ -+static void ConvertBGRAToBGR_SSE41(const uint32_t* src, -+ int num_pixels, uint8_t* dst) { -+ const __m128i* in = (const __m128i*)src; -+ __m128i* out = (__m128i*)dst; -+ const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10, -+ 12, 13, 14, -1, -1, -1, -1); -+ const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39); -+ const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e); -+ const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93); -+ -+ ARGB_TO_RGB_SSE41; -+ -+ // left-overs -+ if (num_pixels > 0) { -+ VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out); -+ } -+} -+ -+#undef ARGB_TO_RGB_SSE41 -+ -+//------------------------------------------------------------------------------ -+// Entry point -+ -+extern void VP8LDspInitSSE41(void); -+ -+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) { -+ VP8LTransformColorInverse = TransformColorInverse_SSE41; -+ VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41; -+ VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41; -+} -+ -+#else // !WEBP_USE_SSE41 -+ -+WEBP_DSP_INIT_STUB(VP8LDspInitSSE41) -+ -+#endif // WEBP_USE_SSE41 -diff --git a/3rdparty/libwebp/src/dsp/msa_macro.h b/3rdparty/libwebp/src/dsp/msa_macro.h -index a16c0bb3009b..90adbbc3197e 100644 ---- a/3rdparty/libwebp/src/dsp/msa_macro.h -+++ b/3rdparty/libwebp/src/dsp/msa_macro.h -@@ -14,6 +14,10 @@ - #ifndef WEBP_DSP_MSA_MACRO_H_ - #define WEBP_DSP_MSA_MACRO_H_ - -+#include "src/dsp/dsp.h" -+ -+#if defined(WEBP_USE_MSA) -+ - #include - #include - -@@ -69,27 +73,25 @@ - #define ST_UW(...) ST_W(v4u32, __VA_ARGS__) - #define ST_SW(...) ST_W(v4i32, __VA_ARGS__) - --#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME) \ -- static inline TYPE FUNC_NAME(const void* const psrc) { \ -- const uint8_t* const psrc_m = (const uint8_t*)psrc; \ -- TYPE val_m; \ -- __asm__ volatile ( \ -- "" #INSTR " %[val_m], %[psrc_m] \n\t" \ -- : [val_m] "=r" (val_m) \ -- : [psrc_m] "m" (*psrc_m)); \ -- return val_m; \ -+#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME) \ -+ static inline TYPE FUNC_NAME(const void* const psrc) { \ -+ const uint8_t* const psrc_m = (const uint8_t*)psrc; \ -+ TYPE val_m; \ -+ __asm__ volatile("" #INSTR " %[val_m], %[psrc_m] \n\t" \ -+ : [val_m] "=r"(val_m) \ -+ : [psrc_m] "m"(*psrc_m)); \ -+ return val_m; \ - } - - #define MSA_LOAD(psrc, FUNC_NAME) FUNC_NAME(psrc) - --#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME) \ -- static inline void FUNC_NAME(TYPE val, void* const pdst) { \ -- uint8_t* const pdst_m = (uint8_t*)pdst; \ -- TYPE val_m = val; \ -- __asm__ volatile ( \ -- " " #INSTR " %[val_m], %[pdst_m] \n\t" \ -- : [pdst_m] "=m" (*pdst_m) \ -- : [val_m] "r" (val_m)); \ -+#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME) \ -+ static inline void FUNC_NAME(TYPE val, void* const pdst) { \ -+ uint8_t* const pdst_m = (uint8_t*)pdst; \ -+ TYPE val_m = val; \ -+ __asm__ volatile(" " #INSTR " %[val_m], %[pdst_m] \n\t" \ -+ : [pdst_m] "=m"(*pdst_m) \ -+ : [val_m] "r"(val_m)); \ - } - - #define MSA_STORE(val, pdst, FUNC_NAME) FUNC_NAME(val, pdst) -@@ -1389,4 +1391,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) { - } while (0) - #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__) - -+#endif // WEBP_USE_MSA - #endif // WEBP_DSP_MSA_MACRO_H_ -diff --git a/3rdparty/libwebp/src/dsp/neon.h b/3rdparty/libwebp/src/dsp/neon.h -index aa1dea130106..14acb4044ba6 100644 ---- a/3rdparty/libwebp/src/dsp/neon.h -+++ b/3rdparty/libwebp/src/dsp/neon.h -@@ -12,14 +12,16 @@ - #ifndef WEBP_DSP_NEON_H_ - #define WEBP_DSP_NEON_H_ - --#include -- - #include "src/dsp/dsp.h" - -+#if defined(WEBP_USE_NEON) -+ -+#include -+ - // Right now, some intrinsics functions seem slower, so we disable them - // everywhere except newer clang/gcc or aarch64 where the inline assembly is - // incompatible. --#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__) -+#if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64 - #define WEBP_USE_INTRINSICS // use intrinsics when possible - #endif - -@@ -44,7 +46,7 @@ - // if using intrinsics, this flag avoids some functions that make gcc-4.6.3 - // crash ("internal compiler error: in immed_double_const, at emit-rtl."). - // (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183) --#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) -+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) - #define WORK_AROUND_GCC - #endif - -@@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) { - } while (0) - #endif - -+#endif // WEBP_USE_NEON - #endif // WEBP_DSP_NEON_H_ -diff --git a/3rdparty/libwebp/src/dsp/quant.h b/3rdparty/libwebp/src/dsp/quant.h -index 5e8dba8d19e8..dcbc11c77c59 100644 ---- a/3rdparty/libwebp/src/dsp/quant.h -+++ b/3rdparty/libwebp/src/dsp/quant.h -@@ -21,18 +21,24 @@ - - #define IsFlat IsFlat_NEON - --static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { -+static uint32_t horizontal_add_uint32x4(const uint32x4_t a) { -+#if WEBP_AARCH64 -+ return vaddvq_u32(a); -+#else - const uint64x2_t b = vpaddlq_u32(a); -- return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), -- vreinterpret_u32_u64(vget_high_u64(b))); -+ const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), -+ vreinterpret_u32_u64(vget_high_u64(b))); -+ return vget_lane_u32(c, 0); -+#endif - } - - static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks, - int thresh) { - const int16x8_t tst_ones = vdupq_n_s16(-1); - uint32x4_t sum = vdupq_n_u32(0); -+ int i; - -- for (int i = 0; i < num_blocks; ++i) { -+ for (i = 0; i < num_blocks; ++i) { - // Set DC to zero. - const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0); - const int16x8_t a_1 = vld1q_s16(levels + 8); -@@ -45,7 +51,7 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks, - - levels += 16; - } -- return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0); -+ return thresh >= (int)horizontal_add_uint32x4(sum); - } - - #else -diff --git a/3rdparty/libwebp/src/dsp/rescaler.c b/3rdparty/libwebp/src/dsp/rescaler.c -index c5a01e82df5c..325d8be1808b 100644 ---- a/3rdparty/libwebp/src/dsp/rescaler.c -+++ b/3rdparty/libwebp/src/dsp/rescaler.c -@@ -38,8 +38,9 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, - int x_out = channel; - // simple bilinear interpolation - int accum = wrk->x_add; -- int left = src[x_in]; -- int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left; -+ rescaler_t left = (rescaler_t)src[x_in]; -+ rescaler_t right = -+ (wrk->src_width > 1) ? (rescaler_t)src[x_in + x_stride] : left; - x_in += x_stride; - while (1) { - wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; -@@ -50,7 +51,7 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk, - left = right; - x_in += x_stride; - assert(x_in < wrk->src_width * x_stride); -- right = src[x_in]; -+ right = (rescaler_t)src[x_in]; - accum += wrk->x_add; - } - } -@@ -196,6 +197,7 @@ WebPRescalerImportRowFunc WebPRescalerImportRowShrink; - WebPRescalerExportRowFunc WebPRescalerExportRowExpand; - WebPRescalerExportRowFunc WebPRescalerExportRowShrink; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void WebPRescalerDspInitSSE2(void); - extern void WebPRescalerDspInitMIPS32(void); - extern void WebPRescalerDspInitMIPSdspR2(void); -@@ -213,7 +215,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { - WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C; - - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - WebPRescalerDspInitSSE2(); - } -@@ -235,7 +237,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - WebPRescalerDspInitNEON(); -diff --git a/3rdparty/libwebp/src/dsp/rescaler_sse2.c b/3rdparty/libwebp/src/dsp/rescaler_sse2.c -index d7effea16ea2..3f18e94e9359 100644 ---- a/3rdparty/libwebp/src/dsp/rescaler_sse2.c -+++ b/3rdparty/libwebp/src/dsp/rescaler_sse2.c -@@ -85,7 +85,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, - const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum); - const __m128i out = _mm_madd_epi16(cur_pixels, mult); - assert(sizeof(*frow) == sizeof(uint32_t)); -- WebPUint32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out)); -+ WebPInt32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out)); - frow += 1; - if (frow >= frow_end) break; - accum -= wrk->x_sub; -@@ -132,7 +132,7 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk, - __m128i base = zero; - accum += wrk->x_add; - while (accum > 0) { -- const __m128i A = _mm_cvtsi32_si128(WebPMemToUint32(src)); -+ const __m128i A = _mm_cvtsi32_si128(WebPMemToInt32(src)); - src += 4; - base = _mm_unpacklo_epi8(A, zero); - // To avoid overflow, we need: base * x_add / x_sub < 32768 -@@ -198,7 +198,7 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0, - const __m128i* const mult, - uint8_t* const dst) { - const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER); -- const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0); -+ const __m128i mask = _mm_set_epi32(~0, 0, ~0, 0); - const __m128i B0 = _mm_mul_epu32(*A0, *mult); - const __m128i B1 = _mm_mul_epu32(*A1, *mult); - const __m128i B2 = _mm_mul_epu32(*A2, *mult); -diff --git a/3rdparty/libwebp/src/dsp/ssim.c b/3rdparty/libwebp/src/dsp/ssim.c -index 989ce8254c9f..9a1341ed9585 100644 ---- a/3rdparty/libwebp/src/dsp/ssim.c -+++ b/3rdparty/libwebp/src/dsp/ssim.c -@@ -137,6 +137,7 @@ VP8SSIMGetClippedFunc VP8SSIMGetClipped; - VP8AccumulateSSEFunc VP8AccumulateSSE; - #endif - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void VP8SSIMDspInitSSE2(void); - - WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) { -@@ -150,7 +151,7 @@ WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) { - #endif - - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - VP8SSIMDspInitSSE2(); - } -diff --git a/3rdparty/libwebp/src/dsp/upsampling.c b/3rdparty/libwebp/src/dsp/upsampling.c -index 9b60da5bbb2a..983b9c42d36c 100644 ---- a/3rdparty/libwebp/src/dsp/upsampling.c -+++ b/3rdparty/libwebp/src/dsp/upsampling.c -@@ -215,6 +215,7 @@ static void EmptyYuv444Func(const uint8_t* y, - - WebPYUV444Converter WebPYUV444Converters[MODE_LAST]; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void WebPInitYUV444ConvertersMIPSdspR2(void); - extern void WebPInitYUV444ConvertersSSE2(void); - extern void WebPInitYUV444ConvertersSSE41(void); -@@ -233,12 +234,12 @@ WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) { - WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C; - - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - WebPInitYUV444ConvertersSSE2(); - } - #endif --#if defined(WEBP_USE_SSE41) -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - WebPInitYUV444ConvertersSSE41(); - } -@@ -278,12 +279,12 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - WebPInitUpsamplersSSE2(); - } - #endif --#if defined(WEBP_USE_SSE41) -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - WebPInitUpsamplersSSE41(); - } -@@ -300,7 +301,7 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) { - #endif - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - WebPInitUpsamplersNEON(); -diff --git a/3rdparty/libwebp/src/dsp/upsampling_neon.c b/3rdparty/libwebp/src/dsp/upsampling_neon.c -index 6ba71a7de537..bbc000ca2d38 100644 ---- a/3rdparty/libwebp/src/dsp/upsampling_neon.c -+++ b/3rdparty/libwebp/src/dsp/upsampling_neon.c -@@ -111,7 +111,7 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 }; - vst4_u8(out, v255_r_g_b); \ - } while (0) - --#if !defined(WEBP_SWAP_16BIT_CSP) -+#if (WEBP_SWAP_16BIT_CSP == 0) - #define ZIP_U8(lo, hi) vzip_u8((lo), (hi)) - #else - #define ZIP_U8(lo, hi) vzip_u8((hi), (lo)) -diff --git a/3rdparty/libwebp/src/dsp/upsampling_sse2.c b/3rdparty/libwebp/src/dsp/upsampling_sse2.c -index 340f1e2ac238..08b6d0b1cfb8 100644 ---- a/3rdparty/libwebp/src/dsp/upsampling_sse2.c -+++ b/3rdparty/libwebp/src/dsp/upsampling_sse2.c -@@ -121,7 +121,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ - int uv_pos, pos; \ - /* 16byte-aligned array to cache reconstructed u and v */ \ - uint8_t uv_buf[14 * 32 + 15] = { 0 }; \ -- uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ -+ uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \ - uint8_t* const r_v = r_u + 32; \ - \ - assert(top_y != NULL); \ -diff --git a/3rdparty/libwebp/src/dsp/yuv.c b/3rdparty/libwebp/src/dsp/yuv.c -index 14e67fc28ef8..8a04b85d82dd 100644 ---- a/3rdparty/libwebp/src/dsp/yuv.c -+++ b/3rdparty/libwebp/src/dsp/yuv.c -@@ -70,6 +70,7 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, - - WebPSamplerRowFunc WebPSamplers[MODE_LAST]; - -+extern VP8CPUInfo VP8GetCPUInfo; - extern void WebPInitSamplersSSE2(void); - extern void WebPInitSamplersSSE41(void); - extern void WebPInitSamplersMIPS32(void); -@@ -90,16 +91,16 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) { - - // If defined, use CPUInfo() to overwrite some pointers with faster versions. - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - WebPInitSamplersSSE2(); - } --#endif // WEBP_USE_SSE2 --#if defined(WEBP_USE_SSE41) -+#endif // WEBP_HAVE_SSE2 -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - WebPInitSamplersSSE41(); - } --#endif // WEBP_USE_SSE41 -+#endif // WEBP_HAVE_SSE41 - #if defined(WEBP_USE_MIPS32) - if (VP8GetCPUInfo(kMIPS32)) { - WebPInitSamplersMIPS32(); -@@ -194,50 +195,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, - - //----------------------------------------------------------------------------- - --#if !WEBP_NEON_OMIT_C_CODE --#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic --static uint16_t clip_y(int v) { -- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; --} -- --static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src, -- uint16_t* dst, int len) { -- uint64_t diff = 0; -- int i; -- for (i = 0; i < len; ++i) { -- const int diff_y = ref[i] - src[i]; -- const int new_y = (int)dst[i] + diff_y; -- dst[i] = clip_y(new_y); -- diff += (uint64_t)abs(diff_y); -- } -- return diff; --} -- --static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src, -- int16_t* dst, int len) { -- int i; -- for (i = 0; i < len; ++i) { -- const int diff_uv = ref[i] - src[i]; -- dst[i] += diff_uv; -- } --} -- --static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, -- const uint16_t* best_y, uint16_t* out) { -- int i; -- for (i = 0; i < len; ++i, ++A, ++B) { -- const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; -- const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; -- out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); -- out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); -- } --} --#endif // !WEBP_NEON_OMIT_C_CODE -- --#undef MAX_Y -- --//----------------------------------------------------------------------------- -- - void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); - void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); - void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, -@@ -247,18 +204,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); - void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, - int src_width, int do_store); - --uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src, -- uint16_t* dst, int len); --void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src, -- int16_t* dst, int len); --void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len, -- const uint16_t* best_y, uint16_t* out); -- - extern void WebPInitConvertARGBToYUVSSE2(void); - extern void WebPInitConvertARGBToYUVSSE41(void); - extern void WebPInitConvertARGBToYUVNEON(void); --extern void WebPInitSharpYUVSSE2(void); --extern void WebPInitSharpYUVNEON(void); - - WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { - WebPConvertARGBToY = ConvertARGBToY_C; -@@ -269,40 +217,29 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { - - WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; - --#if !WEBP_NEON_OMIT_C_CODE -- WebPSharpYUVUpdateY = SharpYUVUpdateY_C; -- WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; -- WebPSharpYUVFilterRow = SharpYUVFilterRow_C; --#endif -- - if (VP8GetCPUInfo != NULL) { --#if defined(WEBP_USE_SSE2) -+#if defined(WEBP_HAVE_SSE2) - if (VP8GetCPUInfo(kSSE2)) { - WebPInitConvertARGBToYUVSSE2(); -- WebPInitSharpYUVSSE2(); - } --#endif // WEBP_USE_SSE2 --#if defined(WEBP_USE_SSE41) -+#endif // WEBP_HAVE_SSE2 -+#if defined(WEBP_HAVE_SSE41) - if (VP8GetCPUInfo(kSSE4_1)) { - WebPInitConvertARGBToYUVSSE41(); - } --#endif // WEBP_USE_SSE41 -+#endif // WEBP_HAVE_SSE41 - } - --#if defined(WEBP_USE_NEON) -+#if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || - (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { - WebPInitConvertARGBToYUVNEON(); -- WebPInitSharpYUVNEON(); - } --#endif // WEBP_USE_NEON -+#endif // WEBP_HAVE_NEON - - assert(WebPConvertARGBToY != NULL); - assert(WebPConvertARGBToUV != NULL); - assert(WebPConvertRGB24ToY != NULL); - assert(WebPConvertBGR24ToY != NULL); - assert(WebPConvertRGBA32ToUV != NULL); -- assert(WebPSharpYUVUpdateY != NULL); -- assert(WebPSharpYUVUpdateRGB != NULL); -- assert(WebPSharpYUVFilterRow != NULL); - } -diff --git a/3rdparty/libwebp/src/dsp/yuv.h b/3rdparty/libwebp/src/dsp/yuv.h -index c12be1d094b6..66a397d117b4 100644 ---- a/3rdparty/libwebp/src/dsp/yuv.h -+++ b/3rdparty/libwebp/src/dsp/yuv.h -@@ -10,7 +10,7 @@ - // inline YUV<->RGB conversion function - // - // The exact naming is Y'CbCr, following the ITU-R BT.601 standard. --// More information at: http://en.wikipedia.org/wiki/YCbCr -+// More information at: https://en.wikipedia.org/wiki/YCbCr - // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 - // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 - // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 -diff --git a/3rdparty/libwebp/src/dsp/yuv_neon.c b/3rdparty/libwebp/src/dsp/yuv_neon.c -index a34d60248f6a..ff77b009801d 100644 ---- a/3rdparty/libwebp/src/dsp/yuv_neon.c -+++ b/3rdparty/libwebp/src/dsp/yuv_neon.c -@@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) { - WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON; - } - --//------------------------------------------------------------------------------ -- --#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic --static uint16_t clip_y_NEON(int v) { -- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; --} -- --static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src, -- uint16_t* dst, int len) { -- int i; -- const int16x8_t zero = vdupq_n_s16(0); -- const int16x8_t max = vdupq_n_s16(MAX_Y); -- uint64x2_t sum = vdupq_n_u64(0); -- uint64_t diff; -- -- for (i = 0; i + 8 <= len; i += 8) { -- const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i)); -- const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i)); -- const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i)); -- const int16x8_t D = vsubq_s16(A, B); // diff_y -- const int16x8_t F = vaddq_s16(C, D); // new_y -- const uint16x8_t H = -- vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero)); -- const int16x8_t I = vabsq_s16(D); // abs(diff_y) -- vst1q_u16(dst + i, H); -- sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I))); -- } -- diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1); -- for (; i < len; ++i) { -- const int diff_y = ref[i] - src[i]; -- const int new_y = (int)(dst[i]) + diff_y; -- dst[i] = clip_y_NEON(new_y); -- diff += (uint64_t)(abs(diff_y)); -- } -- return diff; --} -- --static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src, -- int16_t* dst, int len) { -- int i; -- for (i = 0; i + 8 <= len; i += 8) { -- const int16x8_t A = vld1q_s16(ref + i); -- const int16x8_t B = vld1q_s16(src + i); -- const int16x8_t C = vld1q_s16(dst + i); -- const int16x8_t D = vsubq_s16(A, B); // diff_uv -- const int16x8_t E = vaddq_s16(C, D); // new_uv -- vst1q_s16(dst + i, E); -- } -- for (; i < len; ++i) { -- const int diff_uv = ref[i] - src[i]; -- dst[i] += diff_uv; -- } --} -- --static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len, -- const uint16_t* best_y, uint16_t* out) { -- int i; -- const int16x8_t max = vdupq_n_s16(MAX_Y); -- const int16x8_t zero = vdupq_n_s16(0); -- for (i = 0; i + 8 <= len; i += 8) { -- const int16x8_t a0 = vld1q_s16(A + i + 0); -- const int16x8_t a1 = vld1q_s16(A + i + 1); -- const int16x8_t b0 = vld1q_s16(B + i + 0); -- const int16x8_t b1 = vld1q_s16(B + i + 1); -- const int16x8_t a0b1 = vaddq_s16(a0, b1); -- const int16x8_t a1b0 = vaddq_s16(a1, b0); -- const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1 -- const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1) -- const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0) -- const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3); -- const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3); -- const int16x8_t d0 = vaddq_s16(c1, a0); -- const int16x8_t d1 = vaddq_s16(c0, a1); -- const int16x8_t e0 = vrshrq_n_s16(d0, 1); -- const int16x8_t e1 = vrshrq_n_s16(d1, 1); -- const int16x8x2_t f = vzipq_s16(e0, e1); -- const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0)); -- const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8)); -- const int16x8_t h0 = vaddq_s16(g0, f.val[0]); -- const int16x8_t h1 = vaddq_s16(g1, f.val[1]); -- const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero); -- const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero); -- vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0)); -- vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1)); -- } -- for (; i < len; ++i) { -- const int a0b1 = A[i + 0] + B[i + 1]; -- const int a1b0 = A[i + 1] + B[i + 0]; -- const int a0a1b0b1 = a0b1 + a1b0 + 8; -- const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; -- const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; -- out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0); -- out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1); -- } --} --#undef MAX_Y -- --//------------------------------------------------------------------------------ -- --extern void WebPInitSharpYUVNEON(void); -- --WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) { -- WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON; -- WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON; -- WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON; --} -- - #else // !WEBP_USE_NEON - - WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON) --WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON) - - #endif // WEBP_USE_NEON -diff --git a/3rdparty/libwebp/src/dsp/yuv_sse2.c b/3rdparty/libwebp/src/dsp/yuv_sse2.c -index baa48d537175..01a48f9af2c6 100644 ---- a/3rdparty/libwebp/src/dsp/yuv_sse2.c -+++ b/3rdparty/libwebp/src/dsp/yuv_sse2.c -@@ -15,10 +15,12 @@ - - #if defined(WEBP_USE_SSE2) - --#include "src/dsp/common_sse2.h" - #include - #include - -+#include "src/dsp/common_sse2.h" -+#include "src/utils/utils.h" -+ - //----------------------------------------------------------------------------- - // Convert spans of 32 pixels to various RGB formats for the fancy upsampler. - -@@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) { - // Load and replicate the U/V samples - static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src); -+ const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src)); - const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0); - return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples - } -@@ -130,7 +132,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R, - const __m128i rg0 = _mm_packus_epi16(*B, *A); - const __m128i ba0 = _mm_packus_epi16(*R, *G); - #endif -- const __m128i mask_0xf0 = _mm_set1_epi8(0xf0); -+ const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0); - const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0); // rbrbrbrbrb... - const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0); // gagagagaga... - const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0); -@@ -147,9 +149,10 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R, - const __m128i r0 = _mm_packus_epi16(*R, *R); - const __m128i g0 = _mm_packus_epi16(*G, *G); - const __m128i b0 = _mm_packus_epi16(*B, *B); -- const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8)); -+ const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8((char)0xf8)); - const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f)); -- const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5); -+ const __m128i g1 = -+ _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8((char)0xe0)), 5); - const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3); - const __m128i rg = _mm_or_si128(r1, g1); - const __m128i gb = _mm_or_si128(g2, b1); -@@ -747,128 +750,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) { - WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2; - } - --//------------------------------------------------------------------------------ -- --#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic --static uint16_t clip_y(int v) { -- return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; --} -- --static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, -- uint16_t* dst, int len) { -- uint64_t diff = 0; -- uint32_t tmp[4]; -- int i; -- const __m128i zero = _mm_setzero_si128(); -- const __m128i max = _mm_set1_epi16(MAX_Y); -- const __m128i one = _mm_set1_epi16(1); -- __m128i sum = zero; -- -- for (i = 0; i + 8 <= len; i += 8) { -- const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); -- const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); -- const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); -- const __m128i D = _mm_sub_epi16(A, B); // diff_y -- const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0) -- const __m128i F = _mm_add_epi16(C, D); // new_y -- const __m128i G = _mm_or_si128(E, one); // -1 or 1 -- const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); -- const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...)) -- _mm_storeu_si128((__m128i*)(dst + i), H); -- sum = _mm_add_epi32(sum, I); -- } -- _mm_storeu_si128((__m128i*)tmp, sum); -- diff = tmp[3] + tmp[2] + tmp[1] + tmp[0]; -- for (; i < len; ++i) { -- const int diff_y = ref[i] - src[i]; -- const int new_y = (int)dst[i] + diff_y; -- dst[i] = clip_y(new_y); -- diff += (uint64_t)abs(diff_y); -- } -- return diff; --} -- --static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, -- int16_t* dst, int len) { -- int i = 0; -- for (i = 0; i + 8 <= len; i += 8) { -- const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); -- const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); -- const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); -- const __m128i D = _mm_sub_epi16(A, B); // diff_uv -- const __m128i E = _mm_add_epi16(C, D); // new_uv -- _mm_storeu_si128((__m128i*)(dst + i), E); -- } -- for (; i < len; ++i) { -- const int diff_uv = ref[i] - src[i]; -- dst[i] += diff_uv; -- } --} -- --static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, -- const uint16_t* best_y, uint16_t* out) { -- int i; -- const __m128i kCst8 = _mm_set1_epi16(8); -- const __m128i max = _mm_set1_epi16(MAX_Y); -- const __m128i zero = _mm_setzero_si128(); -- for (i = 0; i + 8 <= len; i += 8) { -- const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); -- const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); -- const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); -- const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); -- const __m128i a0b1 = _mm_add_epi16(a0, b1); -- const __m128i a1b0 = _mm_add_epi16(a1, b0); -- const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1 -- const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); -- const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1) -- const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0) -- const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); -- const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); -- const __m128i d0 = _mm_add_epi16(c1, a0); -- const __m128i d1 = _mm_add_epi16(c0, a1); -- const __m128i e0 = _mm_srai_epi16(d0, 1); -- const __m128i e1 = _mm_srai_epi16(d1, 1); -- const __m128i f0 = _mm_unpacklo_epi16(e0, e1); -- const __m128i f1 = _mm_unpackhi_epi16(e0, e1); -- const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); -- const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); -- const __m128i h0 = _mm_add_epi16(g0, f0); -- const __m128i h1 = _mm_add_epi16(g1, f1); -- const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); -- const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); -- _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); -- _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); -- } -- for (; i < len; ++i) { -- // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = -- // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 -- // We reuse the common sub-expressions. -- const int a0b1 = A[i + 0] + B[i + 1]; -- const int a1b0 = A[i + 1] + B[i + 0]; -- const int a0a1b0b1 = a0b1 + a1b0 + 8; -- const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; -- const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; -- out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); -- out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); -- } --} -- --#undef MAX_Y -- --//------------------------------------------------------------------------------ -- --extern void WebPInitSharpYUVSSE2(void); -- --WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) { -- WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2; -- WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2; -- WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2; --} -- - #else // !WEBP_USE_SSE2 - - WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2) - WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2) --WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2) - - #endif // WEBP_USE_SSE2 -diff --git a/3rdparty/libwebp/src/dsp/yuv_sse41.c b/3rdparty/libwebp/src/dsp/yuv_sse41.c -index 579d1f7402c2..f79b802e4712 100644 ---- a/3rdparty/libwebp/src/dsp/yuv_sse41.c -+++ b/3rdparty/libwebp/src/dsp/yuv_sse41.c -@@ -15,10 +15,12 @@ - - #if defined(WEBP_USE_SSE41) - --#include "src/dsp/common_sse41.h" - #include - #include - -+#include "src/dsp/common_sse41.h" -+#include "src/utils/utils.h" -+ - //----------------------------------------------------------------------------- - // Convert spans of 32 pixels to various RGB formats for the fancy upsampler. - -@@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) { - // Load and replicate the U/V samples - static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) { - const __m128i zero = _mm_setzero_si128(); -- const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src); -+ const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src)); - const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0); - return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples - } -diff --git a/3rdparty/libwebp/src/enc/alpha_enc.c b/3rdparty/libwebp/src/enc/alpha_enc.c -index dce9ca957d3a..4a599f88a98d 100644 ---- a/3rdparty/libwebp/src/enc/alpha_enc.c -+++ b/3rdparty/libwebp/src/enc/alpha_enc.c -@@ -13,6 +13,7 @@ - - #include - #include -+#include - - #include "src/enc/vp8i_enc.h" - #include "src/dsp/dsp.h" -@@ -54,7 +55,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, - WebPConfig config; - WebPPicture picture; - -- WebPPictureInit(&picture); -+ if (!WebPPictureInit(&picture)) return 0; - picture.width = width; - picture.height = height; - picture.use_argb = 1; -@@ -86,7 +87,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, - // a decoder bug related to alpha with color cache. - // See: https://code.google.com/p/webp/issues/detail?id=239 - // Need to re-enable this later. -- ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK); -+ ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0); - WebPPictureFree(&picture); - ok = ok && !bw->error_; - if (!ok) { -@@ -140,6 +141,11 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, - !reduce_levels, &tmp_bw, &result->stats); - if (ok) { - output = VP8LBitWriterFinish(&tmp_bw); -+ if (tmp_bw.error_) { -+ VP8LBitWriterWipeOut(&tmp_bw); -+ memset(&result->bw, 0, sizeof(result->bw)); -+ return 0; -+ } - output_size = VP8LBitWriterNumBytes(&tmp_bw); - if (output_size > data_size) { - // compressed size is larger than source! Revert to uncompressed mode. -@@ -148,6 +154,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, - } - } else { - VP8LBitWriterWipeOut(&tmp_bw); -+ memset(&result->bw, 0, sizeof(result->bw)); - return 0; - } - } -@@ -162,7 +169,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, - header = method | (filter << 2); - if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; - -- VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); -+ if (!VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size)) ok = 0; - ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); - ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); - -@@ -303,7 +310,7 @@ static int EncodeAlpha(VP8Encoder* const enc, - int ok = 1; - const int reduce_levels = (quality < 100); - -- // quick sanity checks -+ // quick correctness checks - assert((uint64_t)data_size == (uint64_t)width * height); // as per spec - assert(enc != NULL && pic != NULL && pic->a != NULL); - assert(output != NULL && output_size != NULL); -@@ -312,11 +319,11 @@ static int EncodeAlpha(VP8Encoder* const enc, - assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST); - - if (quality < 0 || quality > 100) { -- return 0; -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - - if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) { -- return 0; -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - - if (method == ALPHA_NO_COMPRESSION) { -@@ -326,7 +333,7 @@ static int EncodeAlpha(VP8Encoder* const enc, - - quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); - if (quant_alpha == NULL) { -- return 0; -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - - // Extract alpha data (width x height) from raw_data (stride x height). -@@ -346,6 +353,9 @@ static int EncodeAlpha(VP8Encoder* const enc, - ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, - filter, reduce_levels, effort_level, output, - output_size, pic->stats); -+ if (!ok) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise -+ } - #if !defined(WEBP_DISABLE_STATS) - if (pic->stats != NULL) { // need stats? - pic->stats->coded_size += (int)(*output_size); -@@ -361,7 +371,7 @@ static int EncodeAlpha(VP8Encoder* const enc, - //------------------------------------------------------------------------------ - // Main calls - --static int CompressAlphaJob(void* arg1, void* dummy) { -+static int CompressAlphaJob(void* arg1, void* unused) { - VP8Encoder* const enc = (VP8Encoder*)arg1; - const WebPConfig* config = enc->config_; - uint8_t* alpha_data = NULL; -@@ -375,13 +385,13 @@ static int CompressAlphaJob(void* arg1, void* dummy) { - filter, effort_level, &alpha_data, &alpha_size)) { - return 0; - } -- if (alpha_size != (uint32_t)alpha_size) { // Sanity check. -+ if (alpha_size != (uint32_t)alpha_size) { // Soundness check. - WebPSafeFree(alpha_data); - return 0; - } - enc->alpha_data_size_ = (uint32_t)alpha_size; - enc->alpha_data_ = alpha_data; -- (void)dummy; -+ (void)unused; - return 1; - } - -@@ -405,7 +415,7 @@ int VP8EncStartAlpha(VP8Encoder* const enc) { - WebPWorker* const worker = &enc->alpha_worker_; - // Makes sure worker is good to go. - if (!WebPGetWorkerInterface()->Reset(worker)) { -- return 0; -+ return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - WebPGetWorkerInterface()->Launch(worker); - return 1; -diff --git a/3rdparty/libwebp/src/enc/analysis_enc.c b/3rdparty/libwebp/src/enc/analysis_enc.c -index ebb784261c63..962eaa998f87 100644 ---- a/3rdparty/libwebp/src/enc/analysis_enc.c -+++ b/3rdparty/libwebp/src/enc/analysis_enc.c -@@ -391,12 +391,14 @@ static int DoSegmentsJob(void* arg1, void* arg2) { - return ok; - } - -+#ifdef WEBP_USE_THREAD - static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) { - int i; - for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i]; - dst->alpha += src->alpha; - dst->uv_alpha += src->uv_alpha; - } -+#endif - - // initialize the job struct with some tasks to perform - static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, -@@ -425,10 +427,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) { - (enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled. - if (do_segments) { - const int last_row = enc->mb_h_; -- // We give a little more than a half work to the main thread. -- const int split_row = (9 * last_row + 15) >> 4; - const int total_mb = last_row * enc->mb_w_; - #ifdef WEBP_USE_THREAD -+ // We give a little more than a half work to the main thread. -+ const int split_row = (9 * last_row + 15) >> 4; - const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it - const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow); - #else -@@ -438,6 +440,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) { - WebPGetWorkerInterface(); - SegmentJob main_job; - if (do_mt) { -+#ifdef WEBP_USE_THREAD - SegmentJob side_job; - // Note the use of '&' instead of '&&' because we must call the functions - // no matter what. -@@ -455,6 +458,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) { - } - worker_interface->End(&side_job.worker); - if (ok) MergeJobs(&side_job, &main_job); // merge results together -+#endif // WEBP_USE_THREAD - } else { - // Even for single-thread case, we use the generic Worker tools. - InitSegmentJob(enc, &main_job, 0, last_row); -@@ -470,6 +474,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) { - } else { // Use only one default segment. - ResetAllMBInfo(enc); - } -+ if (!ok) { -+ return WebPEncodingSetError(enc->pic_, -+ VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise -+ } - return ok; - } - -diff --git a/3rdparty/libwebp/src/enc/backward_references_cost_enc.c b/3rdparty/libwebp/src/enc/backward_references_cost_enc.c -index 516abd73eb45..6968ef3c9f3e 100644 ---- a/3rdparty/libwebp/src/enc/backward_references_cost_enc.c -+++ b/3rdparty/libwebp/src/enc/backward_references_cost_enc.c -@@ -15,10 +15,11 @@ - // - - #include -+#include - -+#include "src/dsp/lossless_common.h" - #include "src/enc/backward_references_enc.h" - #include "src/enc/histogram_enc.h" --#include "src/dsp/lossless_common.h" - #include "src/utils/color_cache_utils.h" - #include "src/utils/utils.h" - -@@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, - const PixOrCopy v); - - typedef struct { -- double alpha_[VALUES_IN_BYTE]; -- double red_[VALUES_IN_BYTE]; -- double blue_[VALUES_IN_BYTE]; -- double distance_[NUM_DISTANCE_CODES]; -- double* literal_; -+ float alpha_[VALUES_IN_BYTE]; -+ float red_[VALUES_IN_BYTE]; -+ float blue_[VALUES_IN_BYTE]; -+ float distance_[NUM_DISTANCE_CODES]; -+ float* literal_; - } CostModel; - - static void ConvertPopulationCountTableToBitEstimates( -- int num_symbols, const uint32_t population_counts[], double output[]) { -+ int num_symbols, const uint32_t population_counts[], float output[]) { - uint32_t sum = 0; - int nonzeros = 0; - int i; -@@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates( - if (nonzeros <= 1) { - memset(output, 0, num_symbols * sizeof(*output)); - } else { -- const double logsum = VP8LFastLog2(sum); -+ const float logsum = VP8LFastLog2(sum); - for (i = 0; i < num_symbols; ++i) { - output[i] = logsum - VP8LFastLog2(population_counts[i]); - } -@@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, - } - - ConvertPopulationCountTableToBitEstimates( -- VP8LHistogramNumCodes(histo->palette_code_bits_), -- histo->literal_, m->literal_); -+ VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_, -+ m->literal_); - ConvertPopulationCountTableToBitEstimates( - VALUES_IN_BYTE, histo->red_, m->red_); - ConvertPopulationCountTableToBitEstimates( -@@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, - return ok; - } - --static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) { -+static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) { - return m->alpha_[v >> 24] + - m->red_[(v >> 16) & 0xff] + - m->literal_[(v >> 8) & 0xff] + - m->blue_[v & 0xff]; - } - --static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { -+static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) { - const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; - return m->literal_[literal_idx]; - } - --static WEBP_INLINE double GetLengthCost(const CostModel* const m, -- uint32_t length) { -+static WEBP_INLINE float GetLengthCost(const CostModel* const m, -+ uint32_t length) { - int code, extra_bits; - VP8LPrefixEncodeBits(length, &code, &extra_bits); - return m->literal_[VALUES_IN_BYTE + code] + extra_bits; - } - --static WEBP_INLINE double GetDistanceCost(const CostModel* const m, -- uint32_t distance) { -+static WEBP_INLINE float GetDistanceCost(const CostModel* const m, -+ uint32_t distance) { - int code, extra_bits; - VP8LPrefixEncodeBits(distance, &code, &extra_bits); - return m->distance_[code] + extra_bits; -@@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel( - const uint32_t* const argb, VP8LColorCache* const hashers, - const CostModel* const cost_model, int idx, int use_color_cache, - float prev_cost, float* const cost, uint16_t* const dist_array) { -- double cost_val = prev_cost; -+ float cost_val = prev_cost; - const uint32_t color = argb[idx]; - const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; - if (ix >= 0) { - // use_color_cache is true and hashers contains color -- const double mul0 = 0.68; -+ const float mul0 = 0.68f; - cost_val += GetCacheCost(cost_model, ix) * mul0; - } else { -- const double mul1 = 0.82; -+ const float mul1 = 0.82f; - if (use_color_cache) VP8LColorCacheInsert(hashers, color); - cost_val += GetLiteralCost(cost_model, color) * mul1; - } - if (cost[idx] > cost_val) { -- cost[idx] = (float)cost_val; -+ cost[idx] = cost_val; - dist_array[idx] = 1; // only one is inserted. - } - } -@@ -172,7 +173,7 @@ struct CostInterval { - - // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval. - typedef struct { -- double cost_; -+ float cost_; - int start_; - int end_; // Exclusive. - } CostCacheInterval; -@@ -187,7 +188,7 @@ typedef struct { - int count_; // The number of stored intervals. - CostCacheInterval* cache_intervals_; - size_t cache_intervals_size_; -- double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). -+ float cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). - float* costs_; - uint16_t* dist_array_; - // Most of the time, we only need few intervals -> use a free-list, to avoid -@@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager, - CostManagerInitFreeList(manager); - - // Fill in the cost_cache_. -+ // Has to be done in two passes due to a GCC bug on i686 -+ // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 -+ for (i = 0; i < cost_cache_size; ++i) { -+ manager->cost_cache_[i] = GetLengthCost(cost_model, i); -+ } - manager->cache_intervals_size_ = 1; -- manager->cost_cache_[0] = GetLengthCost(cost_model, 0); - for (i = 1; i < cost_cache_size; ++i) { -- manager->cost_cache_[i] = GetLengthCost(cost_model, i); - // Get the number of bound intervals. - if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) { - ++manager->cache_intervals_size_; -@@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager, - cur->end_ = 1; - cur->cost_ = manager->cost_cache_[0]; - for (i = 1; i < cost_cache_size; ++i) { -- const double cost_val = manager->cost_cache_[i]; -+ const float cost_val = manager->cost_cache_[i]; - if (cost_val != cur->cost_) { - ++cur; - // Initialize an interval. -@@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager, - } - cur->end_ = i + 1; - } -+ assert((size_t)(cur - manager->cache_intervals_) + 1 == -+ manager->cache_intervals_size_); - } - - manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); -@@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager, - return 0; - } - // Set the initial costs_ high for every pixel as we will keep the minimum. -- for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; -+ for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX; - - return 1; - } -@@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager, - // If handling the interval or one of its subintervals becomes to heavy, its - // contribution is added to the costs right away. - static WEBP_INLINE void PushInterval(CostManager* const manager, -- double distance_cost, int position, -+ float distance_cost, int position, - int len) { - size_t i; - CostInterval* interval = manager->head_; -@@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, - const int k = j - position; - float cost_tmp; - assert(k >= 0 && k < MAX_LENGTH); -- cost_tmp = (float)(distance_cost + manager->cost_cache_[k]); -+ cost_tmp = distance_cost + manager->cost_cache_[k]; - - if (manager->costs_[j] > cost_tmp) { - manager->costs_[j] = cost_tmp; -@@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, - const int end = position + (cost_cache_intervals[i].end_ > len - ? len - : cost_cache_intervals[i].end_); -- const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_); -+ const float cost = distance_cost + cost_cache_intervals[i].cost_; - - for (; interval != NULL && interval->start_ < end; - interval = interval_next) { -@@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly( - const int pix_count = xsize * ysize; - const int use_color_cache = (cache_bits > 0); - const size_t literal_array_size = -- sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + -- ((cache_bits > 0) ? (1 << cache_bits) : 0)); -+ sizeof(float) * (VP8LHistogramNumCodes(cache_bits)); - const size_t cost_model_size = sizeof(CostModel) + literal_array_size; - CostModel* const cost_model = - (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); - VP8LColorCache hashers; - CostManager* cost_manager = -- (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); -+ (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager)); - int offset_prev = -1, len_prev = -1; -- double offset_cost = -1; -+ float offset_cost = -1.f; - int first_offset_is_constant = -1; // initialized with 'impossible' value - int reach = 0; - - if (cost_model == NULL || cost_manager == NULL) goto Error; - -- cost_model->literal_ = (double*)(cost_model + 1); -+ cost_model->literal_ = (float*)(cost_model + 1); - if (use_color_cache) { - cc_init = VP8LColorCacheInit(&hashers, cache_bits); - if (!cc_init) goto Error; -@@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly( - } - - ok = !refs->error_; --Error: -+ Error: - if (cc_init) VP8LColorCacheClear(&hashers); - CostManagerClear(cost_manager); - WebPSafeFree(cost_model); -diff --git a/3rdparty/libwebp/src/enc/backward_references_enc.c b/3rdparty/libwebp/src/enc/backward_references_enc.c -index 519b36a09153..dc98bf171943 100644 ---- a/3rdparty/libwebp/src/enc/backward_references_enc.c -+++ b/3rdparty/libwebp/src/enc/backward_references_enc.c -@@ -10,6 +10,8 @@ - // Author: Jyrki Alakuijala (jyrki@google.com) - // - -+#include "src/enc/backward_references_enc.h" -+ - #include - #include - #include -@@ -17,10 +19,11 @@ - #include "src/dsp/dsp.h" - #include "src/dsp/lossless.h" - #include "src/dsp/lossless_common.h" --#include "src/enc/backward_references_enc.h" - #include "src/enc/histogram_enc.h" -+#include "src/enc/vp8i_enc.h" - #include "src/utils/color_cache_utils.h" - #include "src/utils/utils.h" -+#include "src/webp/encode.h" - - #define MIN_BLOCK_SIZE 256 // minimum block size for backward references - -@@ -255,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) { - - int VP8LHashChainFill(VP8LHashChain* const p, int quality, - const uint32_t* const argb, int xsize, int ysize, -- int low_effort) { -+ int low_effort, const WebPPicture* const pic, -+ int percent_range, int* const percent) { - const int size = xsize * ysize; - const int iter_max = GetMaxItersForQuality(quality); - const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); -+ int remaining_percent = percent_range; -+ int percent_start = *percent; - int pos; - int argb_comp; - uint32_t base_position; -@@ -276,7 +282,12 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, - - hash_to_first_index = - (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); -- if (hash_to_first_index == NULL) return 0; -+ if (hash_to_first_index == NULL) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ } -+ -+ percent_range = remaining_percent / 2; -+ remaining_percent -= percent_range; - - // Set the int32_t array to -1. - memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); -@@ -323,12 +334,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, - hash_to_first_index[hash_code] = pos++; - argb_comp = argb_comp_next; - } -+ -+ if (!WebPReportProgress( -+ pic, percent_start + percent_range * pos / (size - 2), percent)) { -+ WebPSafeFree(hash_to_first_index); -+ return 0; -+ } - } - // Process the penultimate pixel. - chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)]; - - WebPSafeFree(hash_to_first_index); - -+ percent_start += percent_range; -+ if (!WebPReportProgress(pic, percent_start, percent)) return 0; -+ percent_range = remaining_percent; -+ - // Find the best match interval at each pixel, defined by an offset to the - // pixel and a length. The right-most pixel cannot match anything to the right - // (hence a best length of 0) and the left-most pixel nothing to the left -@@ -417,8 +438,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, - max_base_position = base_position; - } - } -+ -+ if (!WebPReportProgress(pic, -+ percent_start + percent_range * -+ (size - 2 - base_position) / -+ (size - 2), -+ percent)) { -+ return 0; -+ } - } -- return 1; -+ -+ return WebPReportProgress(pic, percent_start + percent_range, percent); - } - - static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, -@@ -728,7 +758,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, - int* const best_cache_bits) { - int i; - const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits; -- double entropy_min = MAX_ENTROPY; -+ float entropy_min = MAX_ENTROPY; - int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 }; - VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1]; - VP8LRefsCursor c = VP8LRefsCursorInit(refs); -@@ -813,14 +843,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, - } - - for (i = 0; i <= cache_bits_max; ++i) { -- const double entropy = VP8LHistogramEstimateBits(histos[i]); -+ const float entropy = VP8LHistogramEstimateBits(histos[i]); - if (i == 0 || entropy < entropy_min) { - entropy_min = entropy; - *best_cache_bits = i; - } - } - ok = 1; --Error: -+ Error: - for (i = 0; i <= cache_bits_max; ++i) { - if (cc_init[i]) VP8LColorCacheClear(&hashers[i]); - VP8LFreeHistogram(histos[i]); -@@ -890,7 +920,7 @@ static int GetBackwardReferences(int width, int height, - int i, lz77_type; - // Index 0 is for a color cache, index 1 for no cache (if needed). - int lz77_types_best[2] = {0, 0}; -- double bit_costs_best[2] = {DBL_MAX, DBL_MAX}; -+ float bit_costs_best[2] = {FLT_MAX, FLT_MAX}; - VP8LHashChain hash_chain_box; - VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1]; - int status = 0; -@@ -902,7 +932,7 @@ static int GetBackwardReferences(int width, int height, - for (lz77_type = 1; lz77_types_to_try; - lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { - int res = 0; -- double bit_cost = 0.; -+ float bit_cost = 0.f; - if ((lz77_types_to_try & lz77_type) == 0) continue; - switch (lz77_type) { - case kLZ77RLE: -@@ -976,15 +1006,16 @@ static int GetBackwardReferences(int width, int height, - const VP8LHashChain* const hash_chain_tmp = - (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box; - const int cache_bits = (i == 1) ? 0 : *cache_bits_best; -- if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, -- hash_chain_tmp, &refs[i], -- refs_tmp)) { -- double bit_cost_trace; -- VP8LHistogramCreate(histo, refs_tmp, cache_bits); -- bit_cost_trace = VP8LHistogramEstimateBits(histo); -- if (bit_cost_trace < bit_costs_best[i]) { -- BackwardRefsSwap(refs_tmp, &refs[i]); -- } -+ float bit_cost_trace; -+ if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, -+ hash_chain_tmp, &refs[i], -+ refs_tmp)) { -+ goto Error; -+ } -+ VP8LHistogramCreate(histo, refs_tmp, cache_bits); -+ bit_cost_trace = VP8LHistogramEstimateBits(histo); -+ if (bit_cost_trace < bit_costs_best[i]) { -+ BackwardRefsSwap(refs_tmp, &refs[i]); - } - } - -@@ -1000,31 +1031,35 @@ static int GetBackwardReferences(int width, int height, - } - status = 1; - --Error: -+ Error: - VP8LHashChainClear(&hash_chain_box); - VP8LFreeHistogram(histo); - return status; - } - --WebPEncodingError VP8LGetBackwardReferences( -+int VP8LGetBackwardReferences( - int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, -- int* const cache_bits_best) { -+ int* const cache_bits_best, const WebPPicture* const pic, int percent_range, -+ int* const percent) { - if (low_effort) { - VP8LBackwardRefs* refs_best; - *cache_bits_best = cache_bits_max; - refs_best = GetBackwardReferencesLowEffort( - width, height, argb, cache_bits_best, hash_chain, refs); -- if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; -+ if (refs_best == NULL) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ } - // Set it in first position. - BackwardRefsSwap(refs_best, &refs[0]); - } else { - if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, - cache_bits_max, do_no_cache, hash_chain, refs, - cache_bits_best)) { -- return VP8_ENC_ERROR_OUT_OF_MEMORY; -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - } -- return VP8_ENC_OK; -+ -+ return WebPReportProgress(pic, *percent + percent_range, percent); - } -diff --git a/3rdparty/libwebp/src/enc/backward_references_enc.h b/3rdparty/libwebp/src/enc/backward_references_enc.h -index 4c0267b41e90..4dff1c27b57c 100644 ---- a/3rdparty/libwebp/src/enc/backward_references_enc.h -+++ b/3rdparty/libwebp/src/enc/backward_references_enc.h -@@ -134,10 +134,11 @@ struct VP8LHashChain { - - // Must be called first, to set size. - int VP8LHashChainInit(VP8LHashChain* const p, int size); --// Pre-compute the best matches for argb. -+// Pre-compute the best matches for argb. pic and percent are for progress. - int VP8LHashChainFill(VP8LHashChain* const p, int quality, - const uint32_t* const argb, int xsize, int ysize, -- int low_effort); -+ int low_effort, const WebPPicture* const pic, -+ int percent_range, int* const percent); - void VP8LHashChainClear(VP8LHashChain* const p); // release memory - - static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p, -@@ -227,11 +228,14 @@ enum VP8LLZ77Type { - // VP8LBackwardRefs is put in the first element, the best value with no-cache in - // the second element. - // In both cases, the last element is used as temporary internally. --WebPEncodingError VP8LGetBackwardReferences( -+// pic and percent are for progress. -+// Returns false in case of error (stored in pic->error_code). -+int VP8LGetBackwardReferences( - int width, int height, const uint32_t* const argb, int quality, - int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, - const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, -- int* const cache_bits_best); -+ int* const cache_bits_best, const WebPPicture* const pic, int percent_range, -+ int* const percent); - - #ifdef __cplusplus - } -diff --git a/3rdparty/libwebp/src/enc/frame_enc.c b/3rdparty/libwebp/src/enc/frame_enc.c -index af538d83bacd..01860ca757e6 100644 ---- a/3rdparty/libwebp/src/enc/frame_enc.c -+++ b/3rdparty/libwebp/src/enc/frame_enc.c -@@ -578,7 +578,7 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt, - uint64_t size = 0; - uint64_t size_p0 = 0; - uint64_t distortion = 0; -- const uint64_t pixel_count = nb_mbs * 384; -+ const uint64_t pixel_count = (uint64_t)nb_mbs * 384; - - VP8IteratorInit(enc, &it); - SetLoopParams(enc, s->q); -@@ -689,7 +689,7 @@ static int PreLoopInitialize(VP8Encoder* const enc) { - } - if (!ok) { - VP8EncFreeBitWriters(enc); // malloc error occurred -- WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - return ok; - } -@@ -719,6 +719,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) { - } else { - // Something bad happened -> need to do some memory cleanup. - VP8EncFreeBitWriters(enc); -+ return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - return ok; - } -@@ -754,6 +755,11 @@ int VP8EncLoop(VP8Encoder* const enc) { - // *then* decide how to code the skip decision if there's one. - if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) { - CodeResiduals(it.bw_, &it, &info); -+ if (it.bw_->error_) { -+ // enc->pic_->error_code is set in PostLoopFinalize(). -+ ok = 0; -+ break; -+ } - } else { // reset predictors after a skip - ResetAfterSkip(&it); - } -@@ -778,11 +784,12 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { - // Roughly refresh the proba eight times per pass - int max_count = (enc->mb_w_ * enc->mb_h_) >> 3; - int num_pass_left = enc->config_->pass; -+ int remaining_progress = 40; // percents - const int do_search = enc->do_search_; - VP8EncIterator it; - VP8EncProba* const proba = &enc->proba_; - const VP8RDLevel rd_opt = enc->rd_opt_level_; -- const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384; -+ const uint64_t pixel_count = (uint64_t)enc->mb_w_ * enc->mb_h_ * 384; - PassStats stats; - int ok; - -@@ -805,6 +812,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { - uint64_t size_p0 = 0; - uint64_t distortion = 0; - int cnt = max_count; -+ // The final number of passes is not trivial to know in advance. -+ const int pass_progress = remaining_progress / (2 + num_pass_left); -+ remaining_progress -= pass_progress; - VP8IteratorInit(enc, &it); - SetLoopParams(enc, stats.q); - if (is_last_pass) { -@@ -832,7 +842,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { - StoreSideInfo(&it); - VP8StoreFilterStats(&it); - VP8IteratorExport(&it); -- ok = VP8IteratorProgress(&it, 20); -+ ok = VP8IteratorProgress(&it, pass_progress); - } - VP8IteratorSaveBoundary(&it); - } while (ok && VP8IteratorNext(&it)); -@@ -878,7 +888,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { - ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, - (const uint8_t*)proba->coeffs_, 1); - } -- ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); -+ ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress, -+ &enc->percent_); - return PostLoopFinalize(&it, ok); - } - -diff --git a/3rdparty/libwebp/src/enc/histogram_enc.c b/3rdparty/libwebp/src/enc/histogram_enc.c -index edc6e4faa43b..3ca67b3ad09b 100644 ---- a/3rdparty/libwebp/src/enc/histogram_enc.c -+++ b/3rdparty/libwebp/src/enc/histogram_enc.c -@@ -13,15 +13,17 @@ - #include "src/webp/config.h" - #endif - -+#include - #include - --#include "src/enc/backward_references_enc.h" --#include "src/enc/histogram_enc.h" - #include "src/dsp/lossless.h" - #include "src/dsp/lossless_common.h" -+#include "src/enc/backward_references_enc.h" -+#include "src/enc/histogram_enc.h" -+#include "src/enc/vp8i_enc.h" - #include "src/utils/utils.h" - --#define MAX_COST 1.e38 -+#define MAX_BIT_COST FLT_MAX - - // Number of partitions for the three dominant (literal, red and blue) symbol - // costs. -@@ -228,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, - // ----------------------------------------------------------------------------- - // Entropy-related functions. - --static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { -- double mix; -+static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) { -+ float mix; - if (entropy->nonzeros < 5) { - if (entropy->nonzeros <= 1) { - return 0; -@@ -238,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { - // Let's mix in a bit of entropy to favor good clustering when - // distributions of these are combined. - if (entropy->nonzeros == 2) { -- return 0.99 * entropy->sum + 0.01 * entropy->entropy; -+ return 0.99f * entropy->sum + 0.01f * entropy->entropy; - } - // No matter what the entropy says, we cannot be better than min_limit - // with Huffman coding. I am mixing a bit of entropy into the - // min_limit since it produces much better (~0.5 %) compression results - // perhaps because of better entropy clustering. - if (entropy->nonzeros == 3) { -- mix = 0.95; -+ mix = 0.95f; - } else { -- mix = 0.7; // nonzeros == 4. -+ mix = 0.7f; // nonzeros == 4. - } - } else { -- mix = 0.627; -+ mix = 0.627f; - } - - { -- double min_limit = 2 * entropy->sum - entropy->max_val; -- min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy; -+ float min_limit = 2.f * entropy->sum - entropy->max_val; -+ min_limit = mix * min_limit + (1.f - mix) * entropy->entropy; - return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; - } - } - --double VP8LBitsEntropy(const uint32_t* const array, int n) { -+float VP8LBitsEntropy(const uint32_t* const array, int n) { - VP8LBitEntropy entropy; - VP8LBitsEntropyUnrefined(array, n, &entropy); - - return BitsEntropyRefine(&entropy); - } - --static double InitialHuffmanCost(void) { -+static float InitialHuffmanCost(void) { - // Small bias because Huffman code length is typically not stored in - // full length. - static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; -- static const double kSmallBias = 9.1; -+ static const float kSmallBias = 9.1f; - return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; - } - - // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) --static double FinalHuffmanCost(const VP8LStreaks* const stats) { -+static float FinalHuffmanCost(const VP8LStreaks* const stats) { - // The constants in this function are experimental and got rounded from - // their original values in 1/8 when switched to 1/1024. -- double retval = InitialHuffmanCost(); -+ float retval = InitialHuffmanCost(); - // Second coefficient: Many zeros in the histogram are covered efficiently - // by a run-length encode. Originally 2/8. -- retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; -+ retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1]; - // Second coefficient: Constant values are encoded less efficiently, but still - // RLE'ed. Originally 6/8. -- retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; -+ retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1]; - // 0s are usually encoded more efficiently than non-0s. - // Originally 15/8. -- retval += 1.796875 * stats->streaks[0][0]; -+ retval += 1.796875f * stats->streaks[0][0]; - // Originally 26/8. -- retval += 3.28125 * stats->streaks[1][0]; -+ retval += 3.28125f * stats->streaks[1][0]; - return retval; - } - - // Get the symbol entropy for the distribution 'population'. - // Set 'trivial_sym', if there's only one symbol present in the distribution. --static double PopulationCost(const uint32_t* const population, int length, -- uint32_t* const trivial_sym, -- uint8_t* const is_used) { -+static float PopulationCost(const uint32_t* const population, int length, -+ uint32_t* const trivial_sym, -+ uint8_t* const is_used) { - VP8LBitEntropy bit_entropy; - VP8LStreaks stats; - VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); -@@ -314,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length, - - // trivial_at_end is 1 if the two histograms only have one element that is - // non-zero: both the zero-th one, or both the last one. --static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, -- const uint32_t* const Y, -- int length, int is_X_used, -- int is_Y_used, -- int trivial_at_end) { -+static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X, -+ const uint32_t* const Y, int length, -+ int is_X_used, int is_Y_used, -+ int trivial_at_end) { - VP8LStreaks stats; - if (trivial_at_end) { - // This configuration is due to palettization that transforms an indexed -@@ -356,16 +357,18 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, - } - - // Estimates the Entropy + Huffman + other block overhead size cost. --double VP8LHistogramEstimateBits(VP8LHistogram* const p) { -- return -- PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), -- NULL, &p->is_used_[0]) -- + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1]) -- + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2]) -- + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) -- + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4]) -- + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES) -- + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); -+float VP8LHistogramEstimateBits(VP8LHistogram* const p) { -+ return PopulationCost(p->literal_, -+ VP8LHistogramNumCodes(p->palette_code_bits_), NULL, -+ &p->is_used_[0]) + -+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1]) + -+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2]) + -+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) + -+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, -+ &p->is_used_[4]) + -+ (float)VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, -+ NUM_LENGTH_CODES) + -+ (float)VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES); - } - - // ----------------------------------------------------------------------------- -@@ -373,17 +376,16 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) { - - static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, - const VP8LHistogram* const b, -- double cost_threshold, -- double* cost) { -+ float cost_threshold, float* cost) { - const int palette_code_bits = a->palette_code_bits_; - int trivial_at_end = 0; - assert(a->palette_code_bits_ == b->palette_code_bits_); - *cost += GetCombinedEntropy(a->literal_, b->literal_, - VP8LHistogramNumCodes(palette_code_bits), - a->is_used_[0], b->is_used_[0], 0); -- *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, -- b->literal_ + NUM_LITERAL_CODES, -- NUM_LENGTH_CODES); -+ *cost += (float)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES, -+ b->literal_ + NUM_LITERAL_CODES, -+ NUM_LENGTH_CODES); - if (*cost > cost_threshold) return 0; - - if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM && -@@ -417,8 +419,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, - *cost += - GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, - a->is_used_[4], b->is_used_[4], 0); -- *cost += -- VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES); -+ *cost += (float)VP8LExtraCostCombined(a->distance_, b->distance_, -+ NUM_DISTANCE_CODES); - if (*cost > cost_threshold) return 0; - - return 1; -@@ -439,12 +441,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a, - // Since the previous score passed is 'cost_threshold', we only need to compare - // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out - // early. --static double HistogramAddEval(const VP8LHistogram* const a, -- const VP8LHistogram* const b, -- VP8LHistogram* const out, -- double cost_threshold) { -- double cost = 0; -- const double sum_cost = a->bit_cost_ + b->bit_cost_; -+static float HistogramAddEval(const VP8LHistogram* const a, -+ const VP8LHistogram* const b, -+ VP8LHistogram* const out, float cost_threshold) { -+ float cost = 0; -+ const float sum_cost = a->bit_cost_ + b->bit_cost_; - cost_threshold += sum_cost; - - if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { -@@ -459,10 +460,10 @@ static double HistogramAddEval(const VP8LHistogram* const a, - // Same as HistogramAddEval(), except that the resulting histogram - // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit - // the term C(b) which is constant over all the evaluations. --static double HistogramAddThresh(const VP8LHistogram* const a, -- const VP8LHistogram* const b, -- double cost_threshold) { -- double cost; -+static float HistogramAddThresh(const VP8LHistogram* const a, -+ const VP8LHistogram* const b, -+ float cost_threshold) { -+ float cost; - assert(a != NULL && b != NULL); - cost = -a->bit_cost_; - GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); -@@ -473,24 +474,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a, - - // The structure to keep track of cost range for the three dominant entropy - // symbols. --// TODO(skal): Evaluate if float can be used here instead of double for --// representing the entropy costs. - typedef struct { -- double literal_max_; -- double literal_min_; -- double red_max_; -- double red_min_; -- double blue_max_; -- double blue_min_; -+ float literal_max_; -+ float literal_min_; -+ float red_max_; -+ float red_min_; -+ float blue_max_; -+ float blue_min_; - } DominantCostRange; - - static void DominantCostRangeInit(DominantCostRange* const c) { - c->literal_max_ = 0.; -- c->literal_min_ = MAX_COST; -+ c->literal_min_ = MAX_BIT_COST; - c->red_max_ = 0.; -- c->red_min_ = MAX_COST; -+ c->red_min_ = MAX_BIT_COST; - c->blue_max_ = 0.; -- c->blue_min_ = MAX_COST; -+ c->blue_min_ = MAX_BIT_COST; - } - - static void UpdateDominantCostRange( -@@ -505,16 +504,15 @@ static void UpdateDominantCostRange( - - static void UpdateHistogramCost(VP8LHistogram* const h) { - uint32_t alpha_sym, red_sym, blue_sym; -- const double alpha_cost = -- PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, -- &h->is_used_[3]); -- const double distance_cost = -+ const float alpha_cost = -+ PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]); -+ const float distance_cost = - PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) + -- VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); -+ (float)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); - const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); - h->literal_cost_ = - PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) + -- VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); -+ (float)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES); - h->red_cost_ = - PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]); - h->blue_cost_ = -@@ -529,10 +527,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { - } - } - --static int GetBinIdForEntropy(double min, double max, double val) { -- const double range = max - min; -+static int GetBinIdForEntropy(float min, float max, float val) { -+ const float range = max - min; - if (range > 0.) { -- const double delta = val - min; -+ const float delta = val - min; - return (int)((NUM_PARTITIONS - 1e-6) * delta / range); - } else { - return 0; -@@ -641,15 +639,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, - - // Merges some histograms with same bin_id together if it's advantageous. - // Sets the remaining histograms to NULL. --static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, -- int* num_used, -- const uint16_t* const clusters, -- uint16_t* const cluster_mappings, -- VP8LHistogram* cur_combo, -- const uint16_t* const bin_map, -- int num_bins, -- double combine_cost_factor, -- int low_effort) { -+static void HistogramCombineEntropyBin( -+ VP8LHistogramSet* const image_histo, int* num_used, -+ const uint16_t* const clusters, uint16_t* const cluster_mappings, -+ VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins, -+ float combine_cost_factor, int low_effort) { - VP8LHistogram** const histograms = image_histo->histograms; - int idx; - struct { -@@ -679,11 +673,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, - cluster_mappings[clusters[idx]] = clusters[first]; - } else { - // try to merge #idx into #first (both share the same bin_id) -- const double bit_cost = histograms[idx]->bit_cost_; -- const double bit_cost_thresh = -bit_cost * combine_cost_factor; -- const double curr_cost_diff = -- HistogramAddEval(histograms[first], histograms[idx], -- cur_combo, bit_cost_thresh); -+ const float bit_cost = histograms[idx]->bit_cost_; -+ const float bit_cost_thresh = -bit_cost * combine_cost_factor; -+ const float curr_cost_diff = HistogramAddEval( -+ histograms[first], histograms[idx], cur_combo, bit_cost_thresh); - if (curr_cost_diff < bit_cost_thresh) { - // Try to merge two histograms only if the combo is a trivial one or - // the two candidate histograms are already non-trivial. -@@ -731,8 +724,8 @@ static uint32_t MyRand(uint32_t* const seed) { - typedef struct { - int idx1; - int idx2; -- double cost_diff; -- double cost_combo; -+ float cost_diff; -+ float cost_combo; - } HistogramPair; - - typedef struct { -@@ -787,10 +780,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue, - // Update the cost diff and combo of a pair of histograms. This needs to be - // called when the the histograms have been merged with a third one. - static void HistoQueueUpdatePair(const VP8LHistogram* const h1, -- const VP8LHistogram* const h2, -- double threshold, -+ const VP8LHistogram* const h2, float threshold, - HistogramPair* const pair) { -- const double sum_cost = h1->bit_cost_ + h2->bit_cost_; -+ const float sum_cost = h1->bit_cost_ + h2->bit_cost_; - pair->cost_combo = 0.; - GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo); - pair->cost_diff = pair->cost_combo - sum_cost; -@@ -799,9 +791,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1, - // Create a pair from indices "idx1" and "idx2" provided its cost - // is inferior to "threshold", a negative entropy. - // It returns the cost of the pair, or 0. if it superior to threshold. --static double HistoQueuePush(HistoQueue* const histo_queue, -- VP8LHistogram** const histograms, int idx1, -- int idx2, double threshold) { -+static float HistoQueuePush(HistoQueue* const histo_queue, -+ VP8LHistogram** const histograms, int idx1, -+ int idx2, float threshold) { - const VP8LHistogram* h1; - const VP8LHistogram* h2; - HistogramPair pair; -@@ -945,8 +937,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, - ++tries_with_no_success < num_tries_no_success; - ++iter) { - int* mapping_index; -- double best_cost = -- (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff; -+ float best_cost = -+ (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff; - int best_idx1 = -1, best_idx2 = 1; - const uint32_t rand_range = (*num_used - 1) * (*num_used); - // (*num_used) / 2 was chosen empirically. Less means faster but worse -@@ -955,7 +947,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, - - // Pick random samples. - for (j = 0; *num_used >= 2 && j < num_tries; ++j) { -- double curr_cost; -+ float curr_cost; - // Choose two different histograms at random and try to combine them. - const uint32_t tmp = MyRand(&seed) % rand_range; - uint32_t idx1 = tmp / (*num_used - 1); -@@ -1034,7 +1026,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, - *do_greedy = (*num_used <= min_cluster_size); - ok = 1; - --End: -+ End: - HistoQueueClear(&histo_queue); - WebPSafeFree(mappings); - return ok; -@@ -1057,7 +1049,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, - if (out_size > 1) { - for (i = 0; i < in_size; ++i) { - int best_out = 0; -- double best_bits = MAX_COST; -+ float best_bits = MAX_BIT_COST; - int k; - if (in_histo[i] == NULL) { - // Arbitrarily set to the previous value if unused to help future LZ77. -@@ -1065,7 +1057,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, - continue; - } - for (k = 0; k < out_size; ++k) { -- double cur_bits; -+ float cur_bits; - cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits); - if (k == 0 || cur_bits < best_bits) { - best_bits = cur_bits; -@@ -1093,13 +1085,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in, - } - } - --static double GetCombineCostFactor(int histo_size, int quality) { -- double combine_cost_factor = 0.16; -+static float GetCombineCostFactor(int histo_size, int quality) { -+ float combine_cost_factor = 0.16f; - if (quality < 90) { -- if (histo_size > 256) combine_cost_factor /= 2.; -- if (histo_size > 512) combine_cost_factor /= 2.; -- if (histo_size > 1024) combine_cost_factor /= 2.; -- if (quality <= 50) combine_cost_factor /= 2.; -+ if (histo_size > 256) combine_cost_factor /= 2.f; -+ if (histo_size > 512) combine_cost_factor /= 2.f; -+ if (histo_size > 1024) combine_cost_factor /= 2.f; -+ if (quality <= 50) combine_cost_factor /= 2.f; - } - return combine_cost_factor; - } -@@ -1169,15 +1161,17 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) { - } - - int VP8LGetHistoImageSymbols(int xsize, int ysize, -- const VP8LBackwardRefs* const refs, -- int quality, int low_effort, -- int histo_bits, int cache_bits, -+ const VP8LBackwardRefs* const refs, int quality, -+ int low_effort, int histogram_bits, int cache_bits, - VP8LHistogramSet* const image_histo, - VP8LHistogram* const tmp_histo, -- uint16_t* const histogram_symbols) { -- int ok = 0; -- const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1; -- const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1; -+ uint16_t* const histogram_symbols, -+ const WebPPicture* const pic, int percent_range, -+ int* const percent) { -+ const int histo_xsize = -+ histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1; -+ const int histo_ysize = -+ histogram_bits ? VP8LSubSampleSize(ysize, histogram_bits) : 1; - const int image_histo_raw_size = histo_xsize * histo_ysize; - VP8LHistogramSet* const orig_histo = - VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits); -@@ -1187,13 +1181,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, - const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE; - int entropy_combine; - uint16_t* const map_tmp = -- WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp)); -+ WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp)); - uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size; - int num_used = image_histo_raw_size; -- if (orig_histo == NULL || map_tmp == NULL) goto Error; -+ if (orig_histo == NULL || map_tmp == NULL) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } - - // Construct the histograms from backward references. -- HistogramBuild(xsize, histo_bits, refs, orig_histo); -+ HistogramBuild(xsize, histogram_bits, refs, orig_histo); - // Copies the histograms and computes its bit_cost. - // histogram_symbols is optimized - HistogramCopyAndAnalyze(orig_histo, image_histo, &num_used, -@@ -1204,16 +1201,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, - - if (entropy_combine) { - uint16_t* const bin_map = map_tmp; -- const double combine_cost_factor = -+ const float combine_cost_factor = - GetCombineCostFactor(image_histo_raw_size, quality); - const uint32_t num_clusters = num_used; - - HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort); - // Collapse histograms with similar entropy. -- HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols, -- cluster_mappings, tmp_histo, bin_map, -- entropy_combine_num_bins, combine_cost_factor, -- low_effort); -+ HistogramCombineEntropyBin( -+ image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo, -+ bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort); - OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters, - map_tmp, histogram_symbols); - } -@@ -1227,11 +1223,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, - int do_greedy; - if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size, - &do_greedy)) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - if (do_greedy) { - RemoveEmptyHistograms(image_histo); - if (!HistogramCombineGreedy(image_histo, &num_used)) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - } -@@ -1241,10 +1239,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, - RemoveEmptyHistograms(image_histo); - HistogramRemap(orig_histo, image_histo, histogram_symbols); - -- ok = 1; -+ if (!WebPReportProgress(pic, *percent + percent_range, percent)) { -+ goto Error; -+ } - - Error: - VP8LFreeHistogramSet(orig_histo); - WebPSafeFree(map_tmp); -- return ok; -+ return (pic->error_code == VP8_ENC_OK); - } -diff --git a/3rdparty/libwebp/src/enc/histogram_enc.h b/3rdparty/libwebp/src/enc/histogram_enc.h -index 54c2d2178393..4c0bb97464de 100644 ---- a/3rdparty/libwebp/src/enc/histogram_enc.h -+++ b/3rdparty/libwebp/src/enc/histogram_enc.h -@@ -40,10 +40,10 @@ typedef struct { - int palette_code_bits_; - uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha - // literal symbols are single valued. -- double bit_cost_; // cached value of bit cost. -- double literal_cost_; // Cached values of dominant entropy costs: -- double red_cost_; // literal, red & blue. -- double blue_cost_; -+ float bit_cost_; // cached value of bit cost. -+ float literal_cost_; // Cached values of dominant entropy costs: -+ float red_cost_; // literal, red & blue. -+ float blue_cost_; - uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance - } VP8LHistogram; - -@@ -64,8 +64,8 @@ void VP8LHistogramCreate(VP8LHistogram* const p, - const VP8LBackwardRefs* const refs, - int palette_code_bits); - --// Return the size of the histogram for a given palette_code_bits. --int VP8LGetHistogramSize(int palette_code_bits); -+// Return the size of the histogram for a given cache_bits. -+int VP8LGetHistogramSize(int cache_bits); - - // Set the palette_code_bits and reset the stats. - // If init_arrays is true, the arrays are also filled with 0's. -@@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { - ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); - } - --// Builds the histogram image. -+// Builds the histogram image. pic and percent are for progress. -+// Returns false in case of error (stored in pic->error_code). - int VP8LGetHistoImageSymbols(int xsize, int ysize, -- const VP8LBackwardRefs* const refs, -- int quality, int low_effort, -- int histogram_bits, int cache_bits, -- VP8LHistogramSet* const image_in, -+ const VP8LBackwardRefs* const refs, int quality, -+ int low_effort, int histogram_bits, int cache_bits, -+ VP8LHistogramSet* const image_histo, - VP8LHistogram* const tmp_histo, -- uint16_t* const histogram_symbols); -+ uint16_t* const histogram_symbols, -+ const WebPPicture* const pic, int percent_range, -+ int* const percent); - - // Returns the entropy for the symbols in the input array. --double VP8LBitsEntropy(const uint32_t* const array, int n); -+float VP8LBitsEntropy(const uint32_t* const array, int n); - - // Estimate how many bits the combined entropy of literals and distance - // approximately maps to. --double VP8LHistogramEstimateBits(VP8LHistogram* const p); -+float VP8LHistogramEstimateBits(VP8LHistogram* const p); - - #ifdef __cplusplus - } -diff --git a/3rdparty/libwebp/src/enc/picture_csp_enc.c b/3rdparty/libwebp/src/enc/picture_csp_enc.c -index 35eede96355b..a9280e6c3050 100644 ---- a/3rdparty/libwebp/src/enc/picture_csp_enc.c -+++ b/3rdparty/libwebp/src/enc/picture_csp_enc.c -@@ -15,12 +15,19 @@ - #include - #include - -+#include "sharpyuv/sharpyuv.h" -+#include "sharpyuv/sharpyuv_csp.h" - #include "src/enc/vp8i_enc.h" - #include "src/utils/random_utils.h" - #include "src/utils/utils.h" - #include "src/dsp/dsp.h" - #include "src/dsp/lossless.h" - #include "src/dsp/yuv.h" -+#include "src/dsp/cpu.h" -+ -+#if defined(WEBP_USE_THREAD) && !defined(_WIN32) -+#include -+#endif - - // Uncomment to disable gamma-compression during RGB->U/V averaging - #define USE_GAMMA_COMPRESSION -@@ -62,10 +69,12 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height, - int WebPPictureHasTransparency(const WebPPicture* picture) { - if (picture == NULL) return 0; - if (picture->use_argb) { -- const int alpha_offset = ALPHA_OFFSET; -- return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, -- picture->width, picture->height, -- 4, picture->argb_stride * sizeof(*picture->argb)); -+ if (picture->argb != NULL) { -+ return CheckNonOpaque((const uint8_t*)picture->argb + ALPHA_OFFSET, -+ picture->width, picture->height, -+ 4, picture->argb_stride * sizeof(*picture->argb)); -+ } -+ return 0; - } - return CheckNonOpaque(picture->a, picture->width, picture->height, - 1, picture->a_stride); -@@ -76,30 +85,31 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { - - #if defined(USE_GAMMA_COMPRESSION) - --// gamma-compensates loss of resolution during chroma subsampling --#define kGamma 0.80 // for now we use a different gamma value than kGammaF --#define kGammaFix 12 // fixed-point precision for linear values --#define kGammaScale ((1 << kGammaFix) - 1) --#define kGammaTabFix 7 // fixed-point fractional bits precision --#define kGammaTabScale (1 << kGammaTabFix) --#define kGammaTabRounder (kGammaTabScale >> 1) --#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) -+// Gamma correction compensates loss of resolution during chroma subsampling. -+#define GAMMA_FIX 12 // fixed-point precision for linear values -+#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision -+#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX)) -+static const double kGamma = 0.80; -+static const int kGammaScale = ((1 << GAMMA_FIX) - 1); -+static const int kGammaTabScale = (1 << GAMMA_TAB_FIX); -+static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1); - --static int kLinearToGammaTab[kGammaTabSize + 1]; -+static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1]; - static uint16_t kGammaToLinearTab[256]; - static volatile int kGammaTablesOk = 0; - static void InitGammaTables(void); -+extern VP8CPUInfo VP8GetCPUInfo; - - WEBP_DSP_INIT_FUNC(InitGammaTables) { - if (!kGammaTablesOk) { - int v; -- const double scale = (double)(1 << kGammaTabFix) / kGammaScale; -+ const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale; - const double norm = 1. / 255.; - for (v = 0; v <= 255; ++v) { - kGammaToLinearTab[v] = - (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); - } -- for (v = 0; v <= kGammaTabSize; ++v) { -+ for (v = 0; v <= GAMMA_TAB_SIZE; ++v) { - kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); - } - kGammaTablesOk = 1; -@@ -111,12 +121,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { - } - - static WEBP_INLINE int Interpolate(int v) { -- const int tab_pos = v >> (kGammaTabFix + 2); // integer part -+ const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part - const int x = v & ((kGammaTabScale << 2) - 1); // fractional part - const int v0 = kLinearToGammaTab[tab_pos]; - const int v1 = kLinearToGammaTab[tab_pos + 1]; - const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate -- assert(tab_pos + 1 < kGammaTabSize + 1); -+ assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1); - return y; - } - -@@ -124,7 +134,7 @@ static WEBP_INLINE int Interpolate(int v) { - // U/V value, suitable for RGBToU/V calls. - static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { - const int y = Interpolate(base_value << shift); // final uplifted value -- return (y + kGammaTabRounder) >> kGammaTabFix; // descale -+ return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale - } - - #else -@@ -158,415 +168,26 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) { - //------------------------------------------------------------------------------ - // Sharp RGB->YUV conversion - --static const int kNumIterations = 4; - static const int kMinDimensionIterativeConversion = 4; - --// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some --// banding sometimes. Better use extra precision. --#define SFIX 2 // fixed-point precision of RGB and Y/W --typedef int16_t fixed_t; // signed type with extra SFIX precision for UV --typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W -- --#define SHALF (1 << SFIX >> 1) --#define MAX_Y_T ((256 << SFIX) - 1) --#define SROUNDER (1 << (YUV_FIX + SFIX - 1)) -- --#if defined(USE_GAMMA_COMPRESSION) -- --// We use tables of different size and precision for the Rec709 / BT2020 --// transfer function. --#define kGammaF (1./0.45) --static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; --#define GAMMA_TO_LINEAR_BITS 14 --static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX --static volatile int kGammaTablesSOk = 0; --static void InitGammaTablesS(void); -- --WEBP_DSP_INIT_FUNC(InitGammaTablesS) { -- assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values -- if (!kGammaTablesSOk) { -- int v; -- const double norm = 1. / MAX_Y_T; -- const double scale = 1. / kGammaTabSize; -- const double a = 0.09929682680944; -- const double thresh = 0.018053968510807; -- const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; -- for (v = 0; v <= MAX_Y_T; ++v) { -- const double g = norm * v; -- double value; -- if (g <= thresh * 4.5) { -- value = g / 4.5; -- } else { -- const double a_rec = 1. / (1. + a); -- value = pow(a_rec * (g + a), kGammaF); -- } -- kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); -- } -- for (v = 0; v <= kGammaTabSize; ++v) { -- const double g = scale * v; -- double value; -- if (g <= thresh) { -- value = 4.5 * g; -- } else { -- value = (1. + a) * pow(g, 1. / kGammaF) - a; -- } -- // we already incorporate the 1/2 rounding constant here -- kLinearToGammaTabS[v] = -- (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1); -- } -- // to prevent small rounding errors to cause read-overflow: -- kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize]; -- kGammaTablesSOk = 1; -- } --} -- --// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS --static WEBP_INLINE uint32_t GammaToLinearS(int v) { -- return kGammaToLinearTabS[v]; --} -- --static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { -- // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision -- const uint32_t v = value * kGammaTabSize; -- const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS; -- // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision -- const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part -- // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1]) -- const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0]; -- const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1]; -- // Final interpolation. Note that rounding is already included. -- const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0. -- const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS); -- return result; --} -- --#else -- --static void InitGammaTablesS(void) {} --static WEBP_INLINE uint32_t GammaToLinearS(int v) { -- return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T; --} --static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { -- return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS; --} -- --#endif // USE_GAMMA_COMPRESSION -- --//------------------------------------------------------------------------------ -- --static uint8_t clip_8b(fixed_t v) { -- return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; --} -- --static fixed_y_t clip_y(int y) { -- return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; --} -- --//------------------------------------------------------------------------------ -- --static int RGBToGray(int r, int g, int b) { -- const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF; -- return (luma >> YUV_FIX); --} -- --static uint32_t ScaleDown(int a, int b, int c, int d) { -- const uint32_t A = GammaToLinearS(a); -- const uint32_t B = GammaToLinearS(b); -- const uint32_t C = GammaToLinearS(c); -- const uint32_t D = GammaToLinearS(d); -- return LinearToGammaS((A + B + C + D + 2) >> 2); --} -- --static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { -- int i; -- for (i = 0; i < w; ++i) { -- const uint32_t R = GammaToLinearS(src[0 * w + i]); -- const uint32_t G = GammaToLinearS(src[1 * w + i]); -- const uint32_t B = GammaToLinearS(src[2 * w + i]); -- const uint32_t Y = RGBToGray(R, G, B); -- dst[i] = (fixed_y_t)LinearToGammaS(Y); -- } --} -- --static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, -- fixed_t* dst, int uv_w) { -- int i; -- for (i = 0; i < uv_w; ++i) { -- const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], -- src2[0 * uv_w + 0], src2[0 * uv_w + 1]); -- const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], -- src2[2 * uv_w + 0], src2[2 * uv_w + 1]); -- const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], -- src2[4 * uv_w + 0], src2[4 * uv_w + 1]); -- const int W = RGBToGray(r, g, b); -- dst[0 * uv_w] = (fixed_t)(r - W); -- dst[1 * uv_w] = (fixed_t)(g - W); -- dst[2 * uv_w] = (fixed_t)(b - W); -- dst += 1; -- src1 += 2; -- src2 += 2; -- } --} -- --static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { -- int i; -- for (i = 0; i < w; ++i) { -- y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); -- } --} -- --//------------------------------------------------------------------------------ -- --static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { -- const int v0 = (A * 3 + B + 2) >> 2; -- return clip_y(v0 + W0); --} -- --//------------------------------------------------------------------------------ -- --static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX -- return ((fixed_y_t)a << SFIX) | SHALF; --} -- --static void ImportOneRow(const uint8_t* const r_ptr, -- const uint8_t* const g_ptr, -- const uint8_t* const b_ptr, -- int step, -- int pic_width, -- fixed_y_t* const dst) { -- int i; -- const int w = (pic_width + 1) & ~1; -- for (i = 0; i < pic_width; ++i) { -- const int off = i * step; -- dst[i + 0 * w] = UpLift(r_ptr[off]); -- dst[i + 1 * w] = UpLift(g_ptr[off]); -- dst[i + 2 * w] = UpLift(b_ptr[off]); -- } -- if (pic_width & 1) { // replicate rightmost pixel -- dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; -- dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; -- dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; -- } --} -- --static void InterpolateTwoRows(const fixed_y_t* const best_y, -- const fixed_t* prev_uv, -- const fixed_t* cur_uv, -- const fixed_t* next_uv, -- int w, -- fixed_y_t* out1, -- fixed_y_t* out2) { -- const int uv_w = w >> 1; -- const int len = (w - 1) >> 1; // length to filter -- int k = 3; -- while (k-- > 0) { // process each R/G/B segments in turn -- // special boundary case for i==0 -- out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); -- out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); -- -- WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); -- WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); -- -- // special boundary case for i == w - 1 when w is even -- if (!(w & 1)) { -- out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], -- best_y[w - 1 + 0]); -- out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], -- best_y[w - 1 + w]); -- } -- out1 += w; -- out2 += w; -- prev_uv += uv_w; -- cur_uv += uv_w; -- next_uv += uv_w; -- } --} -- --static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { -- const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; -- return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); --} -- --static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { -- const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER; -- return clip_8b(128 + (u >> (YUV_FIX + SFIX))); --} -- --static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { -- const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER; -- return clip_8b(128 + (v >> (YUV_FIX + SFIX))); --} -- --static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, -- WebPPicture* const picture) { -- int i, j; -- uint8_t* dst_y = picture->y; -- uint8_t* dst_u = picture->u; -- uint8_t* dst_v = picture->v; -- const fixed_t* const best_uv_base = best_uv; -- const int w = (picture->width + 1) & ~1; -- const int h = (picture->height + 1) & ~1; -- const int uv_w = w >> 1; -- const int uv_h = h >> 1; -- for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) { -- for (i = 0; i < picture->width; ++i) { -- const int off = (i >> 1); -- const int W = best_y[i]; -- const int r = best_uv[off + 0 * uv_w] + W; -- const int g = best_uv[off + 1 * uv_w] + W; -- const int b = best_uv[off + 2 * uv_w] + W; -- dst_y[i] = ConvertRGBToY(r, g, b); -- } -- best_y += w; -- best_uv += (j & 1) * 3 * uv_w; -- dst_y += picture->y_stride; -- } -- for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { -- for (i = 0; i < uv_w; ++i) { -- const int off = i; -- const int r = best_uv[off + 0 * uv_w]; -- const int g = best_uv[off + 1 * uv_w]; -- const int b = best_uv[off + 2 * uv_w]; -- dst_u[i] = ConvertRGBToU(r, g, b); -- dst_v[i] = ConvertRGBToV(r, g, b); -- } -- best_uv += 3 * uv_w; -- dst_u += picture->uv_stride; -- dst_v += picture->uv_stride; -- } -- return 1; --} -- - //------------------------------------------------------------------------------ - // Main function - --#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) -- - static int PreprocessARGB(const uint8_t* r_ptr, - const uint8_t* g_ptr, - const uint8_t* b_ptr, - int step, int rgb_stride, - WebPPicture* const picture) { -- // we expand the right/bottom border if needed -- const int w = (picture->width + 1) & ~1; -- const int h = (picture->height + 1) & ~1; -- const int uv_w = w >> 1; -- const int uv_h = h >> 1; -- uint64_t prev_diff_y_sum = ~0; -- int j, iter; -- -- // TODO(skal): allocate one big memory chunk. But for now, it's easier -- // for valgrind debugging to have several chunks. -- fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch -- fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); -- fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); -- fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); -- fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); -- fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); -- fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); -- fixed_y_t* best_y = best_y_base; -- fixed_y_t* target_y = target_y_base; -- fixed_t* best_uv = best_uv_base; -- fixed_t* target_uv = target_uv_base; -- const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); -- int ok; -- -- if (best_y_base == NULL || best_uv_base == NULL || -- target_y_base == NULL || target_uv_base == NULL || -- best_rgb_y == NULL || best_rgb_uv == NULL || -- tmp_buffer == NULL) { -- ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); -- goto End; -- } -- assert(picture->width >= kMinDimensionIterativeConversion); -- assert(picture->height >= kMinDimensionIterativeConversion); -- -- WebPInitConvertARGBToYUV(); -- -- // Import RGB samples to W/RGB representation. -- for (j = 0; j < picture->height; j += 2) { -- const int is_last_row = (j == picture->height - 1); -- fixed_y_t* const src1 = tmp_buffer + 0 * w; -- fixed_y_t* const src2 = tmp_buffer + 3 * w; -- -- // prepare two rows of input -- ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1); -- if (!is_last_row) { -- ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, -- step, picture->width, src2); -- } else { -- memcpy(src2, src1, 3 * w * sizeof(*src2)); -- } -- StoreGray(src1, best_y + 0, w); -- StoreGray(src2, best_y + w, w); -- -- UpdateW(src1, target_y, w); -- UpdateW(src2, target_y + w, w); -- UpdateChroma(src1, src2, target_uv, uv_w); -- memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); -- best_y += 2 * w; -- best_uv += 3 * uv_w; -- target_y += 2 * w; -- target_uv += 3 * uv_w; -- r_ptr += 2 * rgb_stride; -- g_ptr += 2 * rgb_stride; -- b_ptr += 2 * rgb_stride; -- } -- -- // Iterate and resolve clipping conflicts. -- for (iter = 0; iter < kNumIterations; ++iter) { -- const fixed_t* cur_uv = best_uv_base; -- const fixed_t* prev_uv = best_uv_base; -- uint64_t diff_y_sum = 0; -- -- best_y = best_y_base; -- best_uv = best_uv_base; -- target_y = target_y_base; -- target_uv = target_uv_base; -- for (j = 0; j < h; j += 2) { -- fixed_y_t* const src1 = tmp_buffer + 0 * w; -- fixed_y_t* const src2 = tmp_buffer + 3 * w; -- { -- const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); -- InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2); -- prev_uv = cur_uv; -- cur_uv = next_uv; -- } -- -- UpdateW(src1, best_rgb_y + 0 * w, w); -- UpdateW(src2, best_rgb_y + 1 * w, w); -- UpdateChroma(src1, src2, best_rgb_uv, uv_w); -- -- // update two rows of Y and one row of RGB -- diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w); -- WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); -- -- best_y += 2 * w; -- best_uv += 3 * uv_w; -- target_y += 2 * w; -- target_uv += 3 * uv_w; -- } -- // test exit condition -- if (iter > 0) { -- if (diff_y_sum < diff_y_threshold) break; -- if (diff_y_sum > prev_diff_y_sum) break; -- } -- prev_diff_y_sum = diff_y_sum; -+ const int ok = SharpYuvConvert( -+ r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8, -+ picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v, -+ picture->uv_stride, /*yuv_bit_depth=*/8, picture->width, -+ picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp)); -+ if (!ok) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } -- // final reconstruction -- ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture); -- -- End: -- WebPSafeFree(best_y_base); -- WebPSafeFree(best_uv_base); -- WebPSafeFree(target_y_base); -- WebPSafeFree(target_uv_base); -- WebPSafeFree(best_rgb_y); -- WebPSafeFree(best_rgb_uv); -- WebPSafeFree(tmp_buffer); - return ok; - } --#undef SAFE_ALLOC - - //------------------------------------------------------------------------------ - // "Fast" regular RGB->YUV -@@ -591,8 +212,8 @@ static const int kAlphaFix = 19; - // and constant are adjusted very tightly to fit 32b arithmetic. - // In particular, they use the fact that the operands for 'v / a' are actually - // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 --// with ai in [0..255] and pi in [0..1<> 1); ++y) { -@@ -1044,7 +678,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - // Allocate a new argb buffer (discarding the previous one). -- if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0; -+ if (!WebPPictureAllocARGB(picture)) return 0; - picture->use_argb = 1; - - // Convert -@@ -1106,6 +740,8 @@ static int Import(WebPPicture* const picture, - const int width = picture->width; - const int height = picture->height; - -+ if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0; -+ - if (!picture->use_argb) { - const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL; - return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, -@@ -1163,24 +799,24 @@ static int Import(WebPPicture* const picture, - #if !defined(WEBP_REDUCE_CSP) - - int WebPPictureImportBGR(WebPPicture* picture, -- const uint8_t* rgb, int rgb_stride) { -- return (picture != NULL && rgb != NULL) -- ? Import(picture, rgb, rgb_stride, 3, 1, 0) -+ const uint8_t* bgr, int bgr_stride) { -+ return (picture != NULL && bgr != NULL) -+ ? Import(picture, bgr, bgr_stride, 3, 1, 0) - : 0; - } - - int WebPPictureImportBGRA(WebPPicture* picture, -- const uint8_t* rgba, int rgba_stride) { -- return (picture != NULL && rgba != NULL) -- ? Import(picture, rgba, rgba_stride, 4, 1, 1) -+ const uint8_t* bgra, int bgra_stride) { -+ return (picture != NULL && bgra != NULL) -+ ? Import(picture, bgra, bgra_stride, 4, 1, 1) - : 0; - } - - - int WebPPictureImportBGRX(WebPPicture* picture, -- const uint8_t* rgba, int rgba_stride) { -- return (picture != NULL && rgba != NULL) -- ? Import(picture, rgba, rgba_stride, 4, 1, 0) -+ const uint8_t* bgrx, int bgrx_stride) { -+ return (picture != NULL && bgrx != NULL) -+ ? Import(picture, bgrx, bgrx_stride, 4, 1, 0) - : 0; - } - -@@ -1201,9 +837,9 @@ int WebPPictureImportRGBA(WebPPicture* picture, - } - - int WebPPictureImportRGBX(WebPPicture* picture, -- const uint8_t* rgba, int rgba_stride) { -- return (picture != NULL && rgba != NULL) -- ? Import(picture, rgba, rgba_stride, 4, 0, 0) -+ const uint8_t* rgbx, int rgbx_stride) { -+ return (picture != NULL && rgbx != NULL) -+ ? Import(picture, rgbx, rgbx_stride, 4, 0, 0) - : 0; - } - -diff --git a/3rdparty/libwebp/src/enc/picture_enc.c b/3rdparty/libwebp/src/enc/picture_enc.c -index c691622d03cd..5a2703541f2d 100644 ---- a/3rdparty/libwebp/src/enc/picture_enc.c -+++ b/3rdparty/libwebp/src/enc/picture_enc.c -@@ -12,10 +12,10 @@ - // Author: Skal (pascal.massimino@gmail.com) - - #include -+#include - #include - - #include "src/enc/vp8i_enc.h" --#include "src/dsp/dsp.h" - #include "src/utils/utils.h" - - //------------------------------------------------------------------------------ -@@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) { - - //------------------------------------------------------------------------------ - -+int WebPValidatePicture(const WebPPicture* const picture) { -+ if (picture == NULL) return 0; -+ if (picture->width <= 0 || picture->height <= 0) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); -+ } -+ if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 || -+ picture->height <= 0 || picture->height / 4 > INT_MAX / 4) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); -+ } -+ if (picture->colorspace != WEBP_YUV420 && -+ picture->colorspace != WEBP_YUV420A) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); -+ } -+ return 1; -+} -+ - static void WebPPictureResetBufferARGB(WebPPicture* const picture) { - picture->memory_argb_ = NULL; - picture->argb = NULL; -@@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) { - WebPPictureResetBufferYUVA(picture); - } - --int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { -+int WebPPictureAllocARGB(WebPPicture* const picture) { - void* memory; -+ const int width = picture->width; -+ const int height = picture->height; - const uint64_t argb_size = (uint64_t)width * height; - -- assert(picture != NULL); -+ if (!WebPValidatePicture(picture)) return 0; - - WebPSafeFree(picture->memory_argb_); - WebPPictureResetBufferARGB(picture); - -- if (width <= 0 || height <= 0) { -- return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); -- } - // allocate a new buffer. - memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb)); - if (memory == NULL) { -@@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { - return 1; - } - --int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { -- const WebPEncCSP uv_csp = -- (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK); -+int WebPPictureAllocYUVA(WebPPicture* const picture) { - const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT; -+ const int width = picture->width; -+ const int height = picture->height; - const int y_stride = width; - const int uv_width = (int)(((int64_t)width + 1) >> 1); - const int uv_height = (int)(((int64_t)height + 1) >> 1); -@@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { - uint64_t y_size, uv_size, a_size, total_size; - uint8_t* mem; - -- assert(picture != NULL); -+ if (!WebPValidatePicture(picture)) return 0; - - WebPSafeFree(picture->memory_); - WebPPictureResetBufferYUVA(picture); - -- if (uv_csp != WEBP_YUV420) { -- return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); -- } -- - // alpha - a_width = has_alpha ? width : 0; - a_stride = a_width; -@@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { - - int WebPPictureAlloc(WebPPicture* picture) { - if (picture != NULL) { -- const int width = picture->width; -- const int height = picture->height; -- - WebPPictureFree(picture); // erase previous buffer - - if (!picture->use_argb) { -- return WebPPictureAllocYUVA(picture, width, height); -+ return WebPPictureAllocYUVA(picture); - } else { -- return WebPPictureAllocARGB(picture, width, height); -+ return WebPPictureAllocARGB(picture); - } - } - return 1; -diff --git a/3rdparty/libwebp/src/enc/picture_rescale_enc.c b/3rdparty/libwebp/src/enc/picture_rescale_enc.c -index 58a6ae7b9de8..ea90d825484e 100644 ---- a/3rdparty/libwebp/src/enc/picture_rescale_enc.c -+++ b/3rdparty/libwebp/src/enc/picture_rescale_enc.c -@@ -13,14 +13,15 @@ - - #include "src/webp/encode.h" - --#if !defined(WEBP_REDUCE_SIZE) -- - #include - #include - - #include "src/enc/vp8i_enc.h" -+ -+#if !defined(WEBP_REDUCE_SIZE) - #include "src/utils/rescaler_utils.h" - #include "src/utils/utils.h" -+#endif // !defined(WEBP_REDUCE_SIZE) - - #define HALVE(x) (((x) + 1) >> 1) - -@@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic, - return 1; - } - -+#if !defined(WEBP_REDUCE_SIZE) - int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { - if (src == NULL || dst == NULL) return 0; - if (src == dst) return 1; -@@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { - } - return 1; - } -+#endif // !defined(WEBP_REDUCE_SIZE) - - int WebPPictureIsView(const WebPPicture* picture) { - if (picture == NULL) return 0; -@@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src, - return 1; - } - -+#if !defined(WEBP_REDUCE_SIZE) - //------------------------------------------------------------------------------ - // Picture cropping - -@@ -133,7 +137,9 @@ int WebPPictureCrop(WebPPicture* pic, - PictureGrabSpecs(pic, &tmp); - tmp.width = width; - tmp.height = height; -- if (!WebPPictureAlloc(&tmp)) return 0; -+ if (!WebPPictureAlloc(&tmp)) { -+ return WebPEncodingSetError(pic, tmp.error_code); -+ } - - if (!pic->use_argb) { - const int y_offset = top * pic->y_stride + left; -@@ -164,22 +170,25 @@ int WebPPictureCrop(WebPPicture* pic, - //------------------------------------------------------------------------------ - // Simple picture rescaler - --static void RescalePlane(const uint8_t* src, -- int src_width, int src_height, int src_stride, -- uint8_t* dst, -- int dst_width, int dst_height, int dst_stride, -- rescaler_t* const work, -- int num_channels) { -+static int RescalePlane(const uint8_t* src, -+ int src_width, int src_height, int src_stride, -+ uint8_t* dst, -+ int dst_width, int dst_height, int dst_stride, -+ rescaler_t* const work, -+ int num_channels) { - WebPRescaler rescaler; - int y = 0; -- WebPRescalerInit(&rescaler, src_width, src_height, -- dst, dst_width, dst_height, dst_stride, -- num_channels, work); -+ if (!WebPRescalerInit(&rescaler, src_width, src_height, -+ dst, dst_width, dst_height, dst_stride, -+ num_channels, work)) { -+ return 0; -+ } - while (y < src_height) { - y += WebPRescalerImport(&rescaler, src_height - y, - src + y * src_stride, src_stride); - WebPRescalerExport(&rescaler); - } -+ return 1; - } - - static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) { -@@ -195,73 +204,76 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { - } - } - --int WebPPictureRescale(WebPPicture* pic, int width, int height) { -+int WebPPictureRescale(WebPPicture* picture, int width, int height) { - WebPPicture tmp; - int prev_width, prev_height; - rescaler_t* work; - -- if (pic == NULL) return 0; -- prev_width = pic->width; -- prev_height = pic->height; -+ if (picture == NULL) return 0; -+ prev_width = picture->width; -+ prev_height = picture->height; - if (!WebPRescalerGetScaledDimensions( - prev_width, prev_height, &width, &height)) { -- return 0; -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); - } - -- PictureGrabSpecs(pic, &tmp); -+ PictureGrabSpecs(picture, &tmp); - tmp.width = width; - tmp.height = height; -- if (!WebPPictureAlloc(&tmp)) return 0; -+ if (!WebPPictureAlloc(&tmp)) { -+ return WebPEncodingSetError(picture, tmp.error_code); -+ } - -- if (!pic->use_argb) { -+ if (!picture->use_argb) { - work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); - if (work == NULL) { - WebPPictureFree(&tmp); -- return 0; -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - // If present, we need to rescale alpha first (for AlphaMultiplyY). -- if (pic->a != NULL) { -+ if (picture->a != NULL) { - WebPInitAlphaProcessing(); -- RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, -- tmp.a, width, height, tmp.a_stride, work, 1); -+ if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride, -+ tmp.a, width, height, tmp.a_stride, work, 1)) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); -+ } - } - - // We take transparency into account on the luma plane only. That's not - // totally exact blending, but still is a good approximation. -- AlphaMultiplyY(pic, 0); -- RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, -- tmp.y, width, height, tmp.y_stride, work, 1); -+ AlphaMultiplyY(picture, 0); -+ if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride, -+ tmp.y, width, height, tmp.y_stride, work, 1) || -+ !RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height), -+ picture->uv_stride, tmp.u, HALVE(width), HALVE(height), -+ tmp.uv_stride, work, 1) || -+ !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height), -+ picture->uv_stride, tmp.v, HALVE(width), HALVE(height), -+ tmp.uv_stride, work, 1)) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); -+ } - AlphaMultiplyY(&tmp, 1); -- -- RescalePlane(pic->u, -- HALVE(prev_width), HALVE(prev_height), pic->uv_stride, -- tmp.u, -- HALVE(width), HALVE(height), tmp.uv_stride, work, 1); -- RescalePlane(pic->v, -- HALVE(prev_width), HALVE(prev_height), pic->uv_stride, -- tmp.v, -- HALVE(width), HALVE(height), tmp.uv_stride, work, 1); - } else { - work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); - if (work == NULL) { - WebPPictureFree(&tmp); -- return 0; -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - // In order to correctly interpolate colors, we need to apply the alpha - // weighting first (black-matting), scale the RGB values, and remove - // the premultiplication afterward (while preserving the alpha channel). - WebPInitAlphaProcessing(); -- AlphaMultiplyARGB(pic, 0); -- RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, -- pic->argb_stride * 4, -- (uint8_t*)tmp.argb, width, height, -- tmp.argb_stride * 4, -- work, 4); -+ AlphaMultiplyARGB(picture, 0); -+ if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height, -+ picture->argb_stride * 4, (uint8_t*)tmp.argb, width, -+ height, tmp.argb_stride * 4, work, 4)) { -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); -+ } - AlphaMultiplyARGB(&tmp, 1); - } -- WebPPictureFree(pic); -+ WebPPictureFree(picture); - WebPSafeFree(work); -- *pic = tmp; -+ *picture = tmp; - return 1; - } - -@@ -273,23 +285,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { - return 0; - } - --int WebPPictureIsView(const WebPPicture* picture) { -- (void)picture; -- return 0; --} -- --int WebPPictureView(const WebPPicture* src, -- int left, int top, int width, int height, -- WebPPicture* dst) { -- (void)src; -- (void)left; -- (void)top; -- (void)width; -- (void)height; -- (void)dst; -- return 0; --} -- - int WebPPictureCrop(WebPPicture* pic, - int left, int top, int width, int height) { - (void)pic; -diff --git a/3rdparty/libwebp/src/enc/picture_tools_enc.c b/3rdparty/libwebp/src/enc/picture_tools_enc.c -index 38cb01534a3f..147cc18608c4 100644 ---- a/3rdparty/libwebp/src/enc/picture_tools_enc.c -+++ b/3rdparty/libwebp/src/enc/picture_tools_enc.c -@@ -190,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { - return (0xff000000u | (r << 16) | (g << 8) | b); - } - --void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { -+void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) { - const int red = (background_rgb >> 16) & 0xff; - const int green = (background_rgb >> 8) & 0xff; - const int blue = (background_rgb >> 0) & 0xff; - int x, y; -- if (pic == NULL) return; -- if (!pic->use_argb) { -- const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop -+ if (picture == NULL) return; -+ if (!picture->use_argb) { -+ // omit last pixel during u/v loop -+ const int uv_width = (picture->width >> 1); - const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF); - // VP8RGBToU/V expects the u/v values summed over four pixels - const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); - const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); -- const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT; -- uint8_t* y_ptr = pic->y; -- uint8_t* u_ptr = pic->u; -- uint8_t* v_ptr = pic->v; -- uint8_t* a_ptr = pic->a; -+ const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; -+ uint8_t* y_ptr = picture->y; -+ uint8_t* u_ptr = picture->u; -+ uint8_t* v_ptr = picture->v; -+ uint8_t* a_ptr = picture->a; - if (!has_alpha || a_ptr == NULL) return; // nothing to do -- for (y = 0; y < pic->height; ++y) { -+ for (y = 0; y < picture->height; ++y) { - // Luma blending -- for (x = 0; x < pic->width; ++x) { -+ for (x = 0; x < picture->width; ++x) { - const uint8_t alpha = a_ptr[x]; - if (alpha < 0xff) { - y_ptr[x] = BLEND(Y0, y_ptr[x], alpha); -@@ -219,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { - // Chroma blending every even line - if ((y & 1) == 0) { - uint8_t* const a_ptr2 = -- (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride; -+ (y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride; - for (x = 0; x < uv_width; ++x) { - // Average four alpha values into a single blending weight. - // TODO(skal): might lead to visible contouring. Can we do better? -@@ -229,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { - u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha); - v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha); - } -- if (pic->width & 1) { // rightmost pixel -+ if (picture->width & 1) { // rightmost pixel - const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]); - u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha); - v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha); - } - } else { -- u_ptr += pic->uv_stride; -- v_ptr += pic->uv_stride; -+ u_ptr += picture->uv_stride; -+ v_ptr += picture->uv_stride; - } -- memset(a_ptr, 0xff, pic->width); // reset alpha value to opaque -- a_ptr += pic->a_stride; -- y_ptr += pic->y_stride; -+ memset(a_ptr, 0xff, picture->width); // reset alpha value to opaque -+ a_ptr += picture->a_stride; -+ y_ptr += picture->y_stride; - } - } else { -- uint32_t* argb = pic->argb; -+ uint32_t* argb = picture->argb; - const uint32_t background = MakeARGB32(red, green, blue); -- for (y = 0; y < pic->height; ++y) { -- for (x = 0; x < pic->width; ++x) { -+ for (y = 0; y < picture->height; ++y) { -+ for (x = 0; x < picture->width; ++x) { - const int alpha = (argb[x] >> 24) & 0xff; - if (alpha != 0xff) { - if (alpha > 0) { -@@ -262,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { - } - } - } -- argb += pic->argb_stride; -+ argb += picture->argb_stride; - } - } - } -diff --git a/3rdparty/libwebp/src/enc/predictor_enc.c b/3rdparty/libwebp/src/enc/predictor_enc.c -index 2e6762ea0dd2..b3d44b59d506 100644 ---- a/3rdparty/libwebp/src/enc/predictor_enc.c -+++ b/3rdparty/libwebp/src/enc/predictor_enc.c -@@ -16,6 +16,7 @@ - - #include "src/dsp/lossless.h" - #include "src/dsp/lossless_common.h" -+#include "src/enc/vp8i_enc.h" - #include "src/enc/vp8li_enc.h" - - #define MAX_DIFF_COST (1e30f) -@@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } - // Methods to calculate Entropy (Shannon). - - static float PredictionCostSpatial(const int counts[256], int weight_0, -- double exp_val) { -+ float exp_val) { - const int significant_symbols = 256 >> 4; -- const double exp_decay_factor = 0.6; -- double bits = weight_0 * counts[0]; -+ const float exp_decay_factor = 0.6f; -+ float bits = (float)weight_0 * counts[0]; - int i; - for (i = 1; i < significant_symbols; ++i) { - bits += exp_val * (counts[i] + counts[256 - i]); -@@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0, - static float PredictionCostSpatialHistogram(const int accumulated[4][256], - const int tile[4][256]) { - int i; -- double retval = 0; -+ float retval = 0.f; - for (i = 0; i < 4; ++i) { -- const double kExpValue = 0.94; -+ const float kExpValue = 0.94f; - retval += PredictionCostSpatial(tile[i], 1, kExpValue); - retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]); - } -@@ -249,7 +250,7 @@ static WEBP_INLINE void GetResidual( - } else if (x == 0) { - predict = upper_row[x]; // Top. - } else { -- predict = pred_func(current_row[x - 1], upper_row + x); -+ predict = pred_func(¤t_row[x - 1], upper_row + x); - } - #if (WEBP_NEAR_LOSSLESS == 1) - if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 || -@@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height, - // with respect to predictions. If near_lossless_quality < 100, applies - // near lossless processing, shaving off more bits of residuals for lower - // qualities. --void VP8LResidualImage(int width, int height, int bits, int low_effort, -- uint32_t* const argb, uint32_t* const argb_scratch, -- uint32_t* const image, int near_lossless_quality, -- int exact, int used_subtract_green) { -+int VP8LResidualImage(int width, int height, int bits, int low_effort, -+ uint32_t* const argb, uint32_t* const argb_scratch, -+ uint32_t* const image, int near_lossless_quality, -+ int exact, int used_subtract_green, -+ const WebPPicture* const pic, int percent_range, -+ int* const percent) { - const int tiles_per_row = VP8LSubSampleSize(width, bits); - const int tiles_per_col = VP8LSubSampleSize(height, bits); -+ int percent_start = *percent; - int tile_y; - int histo[4][256]; - const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality); -@@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort, - for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { - int tile_x; - for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { -- const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y, -- bits, histo, argb_scratch, argb, max_quantization, exact, -- used_subtract_green, image); -+ const int pred = GetBestPredictorForTile( -+ width, height, tile_x, tile_y, bits, histo, argb_scratch, argb, -+ max_quantization, exact, used_subtract_green, image); - image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8); - } -+ -+ if (!WebPReportProgress( -+ pic, percent_start + percent_range * tile_y / tiles_per_col, -+ percent)) { -+ return 0; -+ } - } - } - - CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, - low_effort, max_quantization, exact, - used_subtract_green); -+ return WebPReportProgress(pic, percent_start + percent_range, percent); - } - - //------------------------------------------------------------------------------ -@@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256], - const int counts[256]) { - // Favor low entropy, locally and globally. - // Favor small absolute values for PredictionCostSpatial -- static const double kExpValue = 2.4; -+ static const float kExpValue = 2.4f; - return VP8LCombinedShannonEntropy(counts, accumulated) + - PredictionCostSpatial(counts, 3, kExpValue); - } -@@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize, - } - } - --void VP8LColorSpaceTransform(int width, int height, int bits, int quality, -- uint32_t* const argb, uint32_t* image) { -+int VP8LColorSpaceTransform(int width, int height, int bits, int quality, -+ uint32_t* const argb, uint32_t* image, -+ const WebPPicture* const pic, int percent_range, -+ int* const percent) { - const int max_tile_size = 1 << bits; - const int tile_xsize = VP8LSubSampleSize(width, bits); - const int tile_ysize = VP8LSubSampleSize(height, bits); -+ int percent_start = *percent; - int accumulated_red_histo[256] = { 0 }; - int accumulated_blue_histo[256] = { 0 }; - int tile_x, tile_y; -@@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality, - } - } - } -+ if (!WebPReportProgress( -+ pic, percent_start + percent_range * tile_y / tile_ysize, -+ percent)) { -+ return 0; -+ } - } -+ return 1; - } -diff --git a/3rdparty/libwebp/src/enc/quant_enc.c b/3rdparty/libwebp/src/enc/quant_enc.c -index 01eb565c7f9c..6d8202d27714 100644 ---- a/3rdparty/libwebp/src/enc/quant_enc.c -+++ b/3rdparty/libwebp/src/enc/quant_enc.c -@@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) { - rd->score = MAX_COST; - } - --static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { -+static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst, -+ const VP8ModeScore* WEBP_RESTRICT const src) { - dst->D = src->D; - dst->SD = src->SD; - dst->R = src->R; -@@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { - dst->score = src->score; - } - --static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { -+static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst, -+ const VP8ModeScore* WEBP_RESTRICT const src) { - dst->D += src->D; - dst->SD += src->SD; - dst->R += src->R; -@@ -585,15 +587,18 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, - return rate * lambda + RD_DISTO_MULT * distortion; - } - --static int TrellisQuantizeBlock(const VP8Encoder* const enc, -+// Coefficient type. -+enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 }; -+ -+static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc, - int16_t in[16], int16_t out[16], - int ctx0, int coeff_type, -- const VP8Matrix* const mtx, -+ const VP8Matrix* WEBP_RESTRICT const mtx, - int lambda) { - const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; - CostArrayPtr const costs = - (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; -- const int first = (coeff_type == 0) ? 1 : 0; -+ const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0; - Node nodes[16][NUM_NODES]; - ScoreState score_states[2][NUM_NODES]; - ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); -@@ -657,16 +662,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, - // test all alternate level values around level0. - for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { - Node* const cur = &NODE(n, m); -- int level = level0 + m; -+ const int level = level0 + m; - const int ctx = (level > 2) ? 2 : level; - const int band = VP8EncBands[n + 1]; - score_t base_score; -- score_t best_cur_score = MAX_COST; -- int best_prev = 0; // default, in case -+ score_t best_cur_score; -+ int best_prev; -+ score_t cost, score; - -- ss_cur[m].score = MAX_COST; - ss_cur[m].costs = costs[n + 1][ctx]; - if (level < 0 || level > thresh_level) { -+ ss_cur[m].score = MAX_COST; - // Node is dead. - continue; - } -@@ -682,18 +688,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, - } - - // Inspect all possible non-dead predecessors. Retain only the best one. -- for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { -+ // The base_score is added to all scores so it is only added for the final -+ // value after the loop. -+ cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level); -+ best_cur_score = -+ ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0); -+ best_prev = -MIN_DELTA; -+ for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) { - // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically - // eliminated since their score can't be better than the current best. -- const score_t cost = VP8LevelCost(ss_prev[p].costs, level); -+ cost = VP8LevelCost(ss_prev[p].costs, level); - // Examine node assuming it's a non-terminal one. -- const score_t score = -- base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); -+ score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); - if (score < best_cur_score) { - best_cur_score = score; - best_prev = p; - } - } -+ best_cur_score += base_score; - // Store best finding in current node. - cur->sign = sign; - cur->level = level; -@@ -701,11 +713,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, - ss_cur[m].score = best_cur_score; - - // Now, record best terminal node (and thus best entry in the graph). -- if (level != 0) { -+ if (level != 0 && best_cur_score < best_score) { - const score_t last_pos_cost = - (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; - const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); -- const score_t score = best_cur_score + last_pos_score; -+ score = best_cur_score + last_pos_score; - if (score < best_score) { - best_score = score; - best_path[0] = n; // best eob position -@@ -717,10 +729,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, - } - - // Fresh start -- memset(in + first, 0, (16 - first) * sizeof(*in)); -- memset(out + first, 0, (16 - first) * sizeof(*out)); -+ // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case. -+ if (coeff_type == TYPE_I16_AC) { -+ memset(in + 1, 0, 15 * sizeof(*in)); -+ memset(out + 1, 0, 15 * sizeof(*out)); -+ } else { -+ memset(in, 0, 16 * sizeof(*in)); -+ memset(out, 0, 16 * sizeof(*out)); -+ } - if (best_path[0] == -1) { -- return 0; // skip! -+ return 0; // skip! - } - - { -@@ -751,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, - // all at once. Output is the reconstructed block in *yuv_out, and the - // quantized levels in *levels. - --static int ReconstructIntra16(VP8EncIterator* const it, -- VP8ModeScore* const rd, -- uint8_t* const yuv_out, -+static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd, -+ uint8_t* WEBP_RESTRICT const yuv_out, - int mode) { - const VP8Encoder* const enc = it->enc_; - const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; -@@ -775,9 +793,9 @@ static int ReconstructIntra16(VP8EncIterator* const it, - for (y = 0, n = 0; y < 4; ++y) { - for (x = 0; x < 4; ++x, ++n) { - const int ctx = it->top_nz_[x] + it->left_nz_[y]; -- const int non_zero = -- TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, -- &dqm->y1_, dqm->lambda_trellis_i16_); -+ const int non_zero = TrellisQuantizeBlock( -+ enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_, -+ dqm->lambda_trellis_i16_); - it->top_nz_[x] = it->left_nz_[y] = non_zero; - rd->y_ac_levels[n][0] = 0; - nz |= non_zero << n; -@@ -803,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it, - return nz; - } - --static int ReconstructIntra4(VP8EncIterator* const it, -+static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it, - int16_t levels[16], -- const uint8_t* const src, -- uint8_t* const yuv_out, -+ const uint8_t* WEBP_RESTRICT const src, -+ uint8_t* WEBP_RESTRICT const yuv_out, - int mode) { - const VP8Encoder* const enc = it->enc_; - const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; -@@ -818,7 +836,7 @@ static int ReconstructIntra4(VP8EncIterator* const it, - if (DO_TRELLIS_I4 && it->do_trellis_) { - const int x = it->i4_ & 3, y = it->i4_ >> 2; - const int ctx = it->top_nz_[x] + it->left_nz_[y]; -- nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, -+ nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_, - dqm->lambda_trellis_i4_); - } else { - nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); -@@ -839,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it, - - // Quantize as usual, but also compute and return the quantization error. - // Error is already divided by DSHIFT. --static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { -+static int QuantizeSingle(int16_t* WEBP_RESTRICT const v, -+ const VP8Matrix* WEBP_RESTRICT const mtx) { - int V = *v; - const int sign = (V < 0); - if (sign) V = -V; -@@ -853,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { - return (sign ? -V : V) >> DSCALE; - } - --static void CorrectDCValues(const VP8EncIterator* const it, -- const VP8Matrix* const mtx, -- int16_t tmp[][16], VP8ModeScore* const rd) { -+static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it, -+ const VP8Matrix* WEBP_RESTRICT const mtx, -+ int16_t tmp[][16], -+ VP8ModeScore* WEBP_RESTRICT const rd) { - // | top[0] | top[1] - // --------+--------+--------- - // left[0] | tmp[0] tmp[1] <-> err0 err1 -@@ -886,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it, - } - } - --static void StoreDiffusionErrors(VP8EncIterator* const it, -- const VP8ModeScore* const rd) { -+static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it, -+ const VP8ModeScore* WEBP_RESTRICT const rd) { - int ch; - for (ch = 0; ch <= 1; ++ch) { - int8_t* const top = it->top_derr_[it->x_][ch]; -@@ -906,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it, - - //------------------------------------------------------------------------------ - --static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, -- uint8_t* const yuv_out, int mode) { -+static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd, -+ uint8_t* WEBP_RESTRICT const yuv_out, int mode) { - const VP8Encoder* const enc = it->enc_; - const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; - const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; -@@ -927,9 +948,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, - for (y = 0; y < 2; ++y) { - for (x = 0; x < 2; ++x, ++n) { - const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; -- const int non_zero = -- TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, -- &dqm->uv_, dqm->lambda_trellis_uv_); -+ const int non_zero = TrellisQuantizeBlock( -+ enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_, -+ dqm->lambda_trellis_uv_); - it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; - nz |= non_zero << n; - } -@@ -978,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) { - SwapPtr(&it->yuv_out_, &it->yuv_out2_); - } - --static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { -+static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT rd) { - const int kNumBlocks = 16; - VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; - const int lambda = dqm->lambda_i16_; -@@ -1038,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { - //------------------------------------------------------------------------------ - - // return the cost array corresponding to the surrounding prediction modes. --static const uint16_t* GetCostModeI4(VP8EncIterator* const it, -+static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it, - const uint8_t modes[16]) { - const int preds_w = it->enc_->preds_w_; - const int x = (it->i4_ & 3), y = it->i4_ >> 2; -@@ -1047,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it, - return VP8FixedCostsI4[top][left]; - } - --static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { -+static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd) { - const VP8Encoder* const enc = it->enc_; - const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; - const int lambda = dqm->lambda_i4_; -@@ -1143,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { - - //------------------------------------------------------------------------------ - --static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { -+static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd) { - const int kNumBlocks = 8; - const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; - const int lambda = dqm->lambda_uv_; -@@ -1195,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { - //------------------------------------------------------------------------------ - // Final reconstruction and quantization. - --static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { -+static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd) { - const VP8Encoder* const enc = it->enc_; - const int is_i16 = (it->mb_->type_ == 1); - int nz = 0; -@@ -1220,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { - } - - // Refine intra16/intra4 sub-modes based on distortion only (not rate). --static void RefineUsingDistortion(VP8EncIterator* const it, -+static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it, - int try_both_modes, int refine_uv_mode, -- VP8ModeScore* const rd) { -+ VP8ModeScore* WEBP_RESTRICT const rd) { - score_t best_score = MAX_COST; - int nz = 0; - int mode; -@@ -1336,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it, - //------------------------------------------------------------------------------ - // Entry point - --int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, -+int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd, - VP8RDLevel rd_opt) { - int is_skipped; - const int method = it->enc_->method_; -diff --git a/3rdparty/libwebp/src/enc/syntax_enc.c b/3rdparty/libwebp/src/enc/syntax_enc.c -index a9e5a6cf0fec..9b8f524d6981 100644 ---- a/3rdparty/libwebp/src/enc/syntax_enc.c -+++ b/3rdparty/libwebp/src/enc/syntax_enc.c -@@ -258,7 +258,10 @@ static int EmitPartitionsSize(const VP8Encoder* const enc, - buf[3 * p + 1] = (part_size >> 8) & 0xff; - buf[3 * p + 2] = (part_size >> 16) & 0xff; - } -- return p ? pic->writer(buf, 3 * p, pic) : 1; -+ if (p && !pic->writer(buf, 3 * p, pic)) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); -+ } -+ return 1; - } - - //------------------------------------------------------------------------------ -@@ -349,7 +352,7 @@ int VP8EncWrite(VP8Encoder* const enc) { - (enc->alpha_data_size_ & 1); - riff_size += CHUNK_HEADER_SIZE + padded_alpha_size; - } -- // Sanity check. -+ // RIFF size should fit in 32-bits. - if (riff_size > 0xfffffffeU) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG); - } -@@ -381,6 +384,7 @@ int VP8EncWrite(VP8Encoder* const enc) { - - enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size); - ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_); -+ if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - return ok; - } - -diff --git a/3rdparty/libwebp/src/enc/vp8i_enc.h b/3rdparty/libwebp/src/enc/vp8i_enc.h -index 0e35562a8c9a..19d9a6edb77d 100644 ---- a/3rdparty/libwebp/src/enc/vp8i_enc.h -+++ b/3rdparty/libwebp/src/enc/vp8i_enc.h -@@ -31,8 +31,8 @@ extern "C" { - - // version numbers - #define ENC_MAJ_VERSION 1 --#define ENC_MIN_VERSION 2 --#define ENC_REV_VERSION 0 -+#define ENC_MIN_VERSION 3 -+#define ENC_REV_VERSION 1 - - enum { MAX_LF_LEVELS = 64, // Maximum loop filter level - MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost -@@ -286,8 +286,7 @@ int VP8IteratorNext(VP8EncIterator* const it); - // save the yuv_out_ boundary values to top_/left_ arrays for next iterations. - void VP8IteratorSaveBoundary(VP8EncIterator* const it); - // Report progression based on macroblock rows. Return 0 for user-abort request. --int VP8IteratorProgress(const VP8EncIterator* const it, -- int final_delta_percent); -+int VP8IteratorProgress(const VP8EncIterator* const it, int delta); - // Intra4x4 iterations - void VP8IteratorStartI4(VP8EncIterator* const it); - // returns true if not done. -@@ -471,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc); - // Sets up segment's quantization values, base_quant_ and filter strengths. - void VP8SetSegmentParams(VP8Encoder* const enc, float quality); - // Pick best modes and fills the levels. Returns true if skipped. --int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, -+int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it, -+ VP8ModeScore* WEBP_RESTRICT const rd, - VP8RDLevel rd_opt); - - // in alpha.c -@@ -491,19 +491,24 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta); - - // misc utils for picture_*.c: - -+// Returns true if 'picture' is non-NULL and dimensions/colorspace are within -+// their valid ranges. If returning false, the 'error_code' in 'picture' is -+// updated. -+int WebPValidatePicture(const WebPPicture* const picture); -+ - // Remove reference to the ARGB/YUVA buffer (doesn't free anything). - void WebPPictureResetBuffers(WebPPicture* const picture); - --// Allocates ARGB buffer of given dimension (previous one is always free'd). --// Preserves the YUV(A) buffer. Returns false in case of error (invalid param, --// out-of-memory). --int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); -+// Allocates ARGB buffer according to set width/height (previous one is -+// always free'd). Preserves the YUV(A) buffer. Returns false in case of error -+// (invalid param, out-of-memory). -+int WebPPictureAllocARGB(WebPPicture* const picture); - --// Allocates YUVA buffer of given dimension (previous one is always free'd). --// Uses picture->csp to determine whether an alpha buffer is needed. -+// Allocates YUVA buffer according to set width/height (previous one is always -+// free'd). Uses picture->csp to determine whether an alpha buffer is needed. - // Preserves the ARGB buffer. - // Returns false in case of error (invalid param, out-of-memory). --int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); -+int WebPPictureAllocYUVA(WebPPicture* const picture); - - // Replace samples that are fully transparent by 'color' to help compressibility - // (no guarantee, though). Assumes pic->use_argb is true. -diff --git a/3rdparty/libwebp/src/enc/vp8l_enc.c b/3rdparty/libwebp/src/enc/vp8l_enc.c -index 0b44ebe46ec5..c43d990d17e6 100644 ---- a/3rdparty/libwebp/src/enc/vp8l_enc.c -+++ b/3rdparty/libwebp/src/enc/vp8l_enc.c -@@ -15,128 +15,25 @@ - #include - #include - -+#include "src/dsp/lossless.h" -+#include "src/dsp/lossless_common.h" - #include "src/enc/backward_references_enc.h" - #include "src/enc/histogram_enc.h" - #include "src/enc/vp8i_enc.h" - #include "src/enc/vp8li_enc.h" --#include "src/dsp/lossless.h" --#include "src/dsp/lossless_common.h" - #include "src/utils/bit_writer_utils.h" - #include "src/utils/huffman_encode_utils.h" -+#include "src/utils/palette.h" - #include "src/utils/utils.h" -+#include "src/webp/encode.h" - #include "src/webp/format_constants.h" - - // Maximum number of histogram images (sub-blocks). - #define MAX_HUFF_IMAGE_SIZE 2600 - --// Palette reordering for smaller sum of deltas (and for smaller storage). -- --static int PaletteCompareColorsForQsort(const void* p1, const void* p2) { -- const uint32_t a = WebPMemToUint32((uint8_t*)p1); -- const uint32_t b = WebPMemToUint32((uint8_t*)p2); -- assert(a != b); -- return (a < b) ? -1 : 1; --} -- --static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) { -- return (v <= 128) ? v : (256 - v); --} -- --// Computes a value that is related to the entropy created by the --// palette entry diff. --// --// Note that the last & 0xff is a no-operation in the next statement, but --// removed by most compilers and is here only for regularity of the code. --static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) { -- const uint32_t diff = VP8LSubPixels(col1, col2); -- const int kMoreWeightForRGBThanForAlpha = 9; -- uint32_t score; -- score = PaletteComponentDistance((diff >> 0) & 0xff); -- score += PaletteComponentDistance((diff >> 8) & 0xff); -- score += PaletteComponentDistance((diff >> 16) & 0xff); -- score *= kMoreWeightForRGBThanForAlpha; -- score += PaletteComponentDistance((diff >> 24) & 0xff); -- return score; --} -- --static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { -- const uint32_t tmp = *col1; -- *col1 = *col2; -- *col2 = tmp; --} -- --static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) { -- // Find greedily always the closest color of the predicted color to minimize -- // deltas in the palette. This reduces storage needs since the -- // palette is stored with delta encoding. -- uint32_t predict = 0x00000000; -- int i, k; -- for (i = 0; i < num_colors; ++i) { -- int best_ix = i; -- uint32_t best_score = ~0U; -- for (k = i; k < num_colors; ++k) { -- const uint32_t cur_score = PaletteColorDistance(palette[k], predict); -- if (best_score > cur_score) { -- best_score = cur_score; -- best_ix = k; -- } -- } -- SwapColor(&palette[best_ix], &palette[i]); -- predict = palette[i]; -- } --} -- --// The palette has been sorted by alpha. This function checks if the other --// components of the palette have a monotonic development with regards to --// position in the palette. If all have monotonic development, there is --// no benefit to re-organize them greedily. A monotonic development --// would be spotted in green-only situations (like lossy alpha) or gray-scale --// images. --static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) { -- uint32_t predict = 0x000000; -- int i; -- uint8_t sign_found = 0x00; -- for (i = 0; i < num_colors; ++i) { -- const uint32_t diff = VP8LSubPixels(palette[i], predict); -- const uint8_t rd = (diff >> 16) & 0xff; -- const uint8_t gd = (diff >> 8) & 0xff; -- const uint8_t bd = (diff >> 0) & 0xff; -- if (rd != 0x00) { -- sign_found |= (rd < 0x80) ? 1 : 2; -- } -- if (gd != 0x00) { -- sign_found |= (gd < 0x80) ? 8 : 16; -- } -- if (bd != 0x00) { -- sign_found |= (bd < 0x80) ? 64 : 128; -- } -- predict = palette[i]; -- } -- return (sign_found & (sign_found << 1)) != 0; // two consequent signs. --} -- - // ----------------------------------------------------------------------------- - // Palette - --// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE, --// creates a palette and returns true, else returns false. --static int AnalyzeAndCreatePalette(const WebPPicture* const pic, -- int low_effort, -- uint32_t palette[MAX_PALETTE_SIZE], -- int* const palette_size) { -- const int num_colors = WebPGetColorPalette(pic, palette); -- if (num_colors > MAX_PALETTE_SIZE) { -- *palette_size = 0; -- return 0; -- } -- *palette_size = num_colors; -- qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); -- if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) { -- GreedyMinimizeDeltas(palette, num_colors); -- } -- return 1; --} -- - // These five modes are evaluated and their respective entropy is computed. - typedef enum { - kDirect = 0, -@@ -165,10 +62,11 @@ typedef enum { - kHistoTotal // Must be last. - } HistoIx; - --static void AddSingleSubGreen(int p, uint32_t* const r, uint32_t* const b) { -- const int green = p >> 8; // The upper bits are masked away later. -- ++r[((p >> 16) - green) & 0xff]; -- ++b[((p >> 0) - green) & 0xff]; -+static void AddSingleSubGreen(uint32_t p, -+ uint32_t* const r, uint32_t* const b) { -+ const int green = (int)p >> 8; // The upper bits are masked away later. -+ ++r[(((int)p >> 16) - green) & 0xff]; -+ ++b[(((int)p >> 0) - green) & 0xff]; - } - - static void AddSingle(uint32_t p, -@@ -242,8 +140,8 @@ static int AnalyzeEntropy(const uint32_t* argb, - curr_row += argb_stride; - } - { -- double entropy_comp[kHistoTotal]; -- double entropy[kNumEntropyIx]; -+ float entropy_comp[kHistoTotal]; -+ float entropy[kNumEntropyIx]; - int k; - int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen; - int j; -@@ -362,11 +260,14 @@ typedef struct { - } CrunchSubConfig; - typedef struct { - int entropy_idx_; -+ PaletteSorting palette_sorting_type_; - CrunchSubConfig sub_configs_[CRUNCH_SUBCONFIGS_MAX]; - int sub_configs_size_; - } CrunchConfig; - --#define CRUNCH_CONFIGS_MAX kNumEntropyIx -+// +2 because we add a palette sorting configuration for kPalette and -+// kPaletteAndSpatial. -+#define CRUNCH_CONFIGS_MAX (kNumEntropyIx + 2 * kPaletteSortingNum) - - static int EncoderAnalyze(VP8LEncoder* const enc, - CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX], -@@ -386,9 +287,12 @@ static int EncoderAnalyze(VP8LEncoder* const enc, - int do_no_cache = 0; - assert(pic != NULL && pic->argb != NULL); - -- use_palette = -- AnalyzeAndCreatePalette(pic, low_effort, -- enc->palette_, &enc->palette_size_); -+ // Check whether a palette is possible. -+ enc->palette_size_ = GetColorPalette(pic, enc->palette_sorted_); -+ use_palette = (enc->palette_size_ <= MAX_PALETTE_SIZE); -+ if (!use_palette) { -+ enc->palette_size_ = 0; -+ } - - // Empirical bit sizes. - enc->histo_bits_ = GetHistoBits(method, use_palette, -@@ -398,6 +302,8 @@ static int EncoderAnalyze(VP8LEncoder* const enc, - if (low_effort) { - // AnalyzeEntropy is somewhat slow. - crunch_configs[0].entropy_idx_ = use_palette ? kPalette : kSpatialSubGreen; -+ crunch_configs[0].palette_sorting_type_ = -+ use_palette ? kSortedDefault : kUnusedPalette; - n_lz77s = 1; - *crunch_configs_size = 1; - } else { -@@ -418,13 +324,37 @@ static int EncoderAnalyze(VP8LEncoder* const enc, - // a palette. - if ((i != kPalette && i != kPaletteAndSpatial) || use_palette) { - assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX); -- crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i; -+ if (use_palette && (i == kPalette || i == kPaletteAndSpatial)) { -+ int sorting_method; -+ for (sorting_method = 0; sorting_method < kPaletteSortingNum; -+ ++sorting_method) { -+ const PaletteSorting typed_sorting_method = -+ (PaletteSorting)sorting_method; -+ // TODO(vrabaud) kSortedDefault should be tested. It is omitted -+ // for now for backward compatibility. -+ if (typed_sorting_method == kUnusedPalette || -+ typed_sorting_method == kSortedDefault) { -+ continue; -+ } -+ crunch_configs[(*crunch_configs_size)].entropy_idx_ = i; -+ crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = -+ typed_sorting_method; -+ ++*crunch_configs_size; -+ } -+ } else { -+ crunch_configs[(*crunch_configs_size)].entropy_idx_ = i; -+ crunch_configs[(*crunch_configs_size)].palette_sorting_type_ = -+ kUnusedPalette; -+ ++*crunch_configs_size; -+ } - } - } - } else { - // Only choose the guessed best transform. - *crunch_configs_size = 1; - crunch_configs[0].entropy_idx_ = min_entropy_ix; -+ crunch_configs[0].palette_sorting_type_ = -+ use_palette ? kMinimizeDelta : kUnusedPalette; - if (config->quality >= 75 && method == 5) { - // Test with and without color cache. - do_no_cache = 1; -@@ -432,6 +362,7 @@ static int EncoderAnalyze(VP8LEncoder* const enc, - if (min_entropy_ix == kPalette) { - *crunch_configs_size = 2; - crunch_configs[1].entropy_idx_ = kPaletteAndSpatial; -+ crunch_configs[1].palette_sorting_type_ = kMinimizeDelta; - } - } - } -@@ -730,11 +661,11 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( - VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits); - } - --static WebPEncodingError StoreImageToBitMask( -+static int StoreImageToBitMask( - VP8LBitWriter* const bw, int width, int histo_bits, - const VP8LBackwardRefs* const refs, - const uint16_t* histogram_symbols, -- const HuffmanTreeCode* const huffman_codes) { -+ const HuffmanTreeCode* const huffman_codes, const WebPPicture* const pic) { - const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; - const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits); - // x and y trace the position in the image. -@@ -787,44 +718,52 @@ static WebPEncodingError StoreImageToBitMask( - } - VP8LRefsCursorNext(&c); - } -- return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK; -+ if (bw->error_) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ } -+ return 1; - } - --// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 --static WebPEncodingError EncodeImageNoHuffman( -- VP8LBitWriter* const bw, const uint32_t* const argb, -- VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_array, -- int width, int height, int quality, int low_effort) { -+// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31. -+// pic and percent are for progress. -+static int EncodeImageNoHuffman(VP8LBitWriter* const bw, -+ const uint32_t* const argb, -+ VP8LHashChain* const hash_chain, -+ VP8LBackwardRefs* const refs_array, int width, -+ int height, int quality, int low_effort, -+ const WebPPicture* const pic, int percent_range, -+ int* const percent) { - int i; - int max_tokens = 0; -- WebPEncodingError err = VP8_ENC_OK; - VP8LBackwardRefs* refs; - HuffmanTreeToken* tokens = NULL; -- HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; -- const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol -+ HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}}; -+ const uint16_t histogram_symbols[1] = {0}; // only one tree, one symbol - int cache_bits = 0; - VP8LHistogramSet* histogram_image = NULL; - HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( -- 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); -+ 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); - if (huff_tree == NULL) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - - // Calculate backward references from ARGB image. -- if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, -- low_effort)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, low_effort, -+ pic, percent_range / 2, percent)) { -+ goto Error; -+ } -+ if (!VP8LGetBackwardReferences(width, height, argb, quality, /*low_effort=*/0, -+ kLZ77Standard | kLZ77RLE, cache_bits, -+ /*do_no_cache=*/0, hash_chain, refs_array, -+ &cache_bits, pic, -+ percent_range - percent_range / 2, percent)) { - goto Error; - } -- err = VP8LGetBackwardReferences( -- width, height, argb, quality, /*low_effort=*/0, kLZ77Standard | kLZ77RLE, -- cache_bits, /*do_no_cache=*/0, hash_chain, refs_array, &cache_bits); -- if (err != VP8_ENC_OK) goto Error; - refs = &refs_array[0]; - histogram_image = VP8LAllocateHistogramSet(1, cache_bits); - if (histogram_image == NULL) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - VP8LHistogramSetClear(histogram_image); -@@ -835,7 +774,7 @@ static WebPEncodingError EncodeImageNoHuffman( - // Create Huffman bit lengths and codes for each histogram image. - assert(histogram_image->size == 1); - if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - -@@ -852,7 +791,7 @@ static WebPEncodingError EncodeImageNoHuffman( - - tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - -@@ -864,27 +803,32 @@ static WebPEncodingError EncodeImageNoHuffman( - } - - // Store actual literals. -- err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, -- huffman_codes); -+ if (!StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, huffman_codes, -+ pic)) { -+ goto Error; -+ } - - Error: - WebPSafeFree(tokens); - WebPSafeFree(huff_tree); - VP8LFreeHistogramSet(histogram_image); - WebPSafeFree(huffman_codes[0].codes); -- return err; -+ return (pic->error_code == VP8_ENC_OK); - } - --static WebPEncodingError EncodeImageInternal( -+// pic and percent are for progress. -+static int EncodeImageInternal( - VP8LBitWriter* const bw, const uint32_t* const argb, - VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width, - int height, int quality, int low_effort, int use_cache, - const CrunchConfig* const config, int* cache_bits, int histogram_bits, -- size_t init_byte_position, int* const hdr_size, int* const data_size) { -- WebPEncodingError err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ size_t init_byte_position, int* const hdr_size, int* const data_size, -+ const WebPPicture* const pic, int percent_range, int* const percent) { - const uint32_t histogram_image_xysize = - VP8LSubSampleSize(width, histogram_bits) * - VP8LSubSampleSize(height, histogram_bits); -+ int remaining_percent = percent_range; -+ int percent_start = *percent; - VP8LHistogramSet* histogram_image = NULL; - VP8LHistogram* tmp_histo = NULL; - int histogram_image_size = 0; -@@ -893,9 +837,8 @@ static WebPEncodingError EncodeImageInternal( - 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); - HuffmanTreeToken* tokens = NULL; - HuffmanTreeCode* huffman_codes = NULL; -- uint16_t* const histogram_symbols = -- (uint16_t*)WebPSafeMalloc(histogram_image_xysize, -- sizeof(*histogram_symbols)); -+ uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc( -+ histogram_image_xysize, sizeof(*histogram_symbols)); - int sub_configs_idx; - int cache_bits_init, write_histogram_image; - VP8LBitWriter bw_init = *bw, bw_best; -@@ -907,14 +850,27 @@ static WebPEncodingError EncodeImageInternal( - assert(hdr_size != NULL); - assert(data_size != NULL); - -- // Make sure we can allocate the different objects. - memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram)); -+ if (!VP8LBitWriterInit(&bw_best, 0)) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } -+ -+ // Make sure we can allocate the different objects. - if (huff_tree == NULL || histogram_symbols == NULL || -- !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize) || -- !VP8LHashChainFill(hash_chain, quality, argb, width, height, -- low_effort)) { -+ !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize)) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } -+ -+ percent_range = remaining_percent / 5; -+ if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, -+ low_effort, pic, percent_range, percent)) { - goto Error; - } -+ percent_start += percent_range; -+ remaining_percent -= percent_range; -+ - if (use_cache) { - // If the value is different from zero, it has been set during the - // palette analysis. -@@ -923,22 +879,27 @@ static WebPEncodingError EncodeImageInternal( - cache_bits_init = 0; - } - // If several iterations will happen, clone into bw_best. -- if (!VP8LBitWriterInit(&bw_best, 0) || -- ((config->sub_configs_size_ > 1 || -- config->sub_configs_[0].do_no_cache_) && -- !VP8LBitWriterClone(bw, &bw_best))) { -+ if ((config->sub_configs_size_ > 1 || config->sub_configs_[0].do_no_cache_) && -+ !VP8LBitWriterClone(bw, &bw_best)) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } -+ - for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_; - ++sub_configs_idx) { - const CrunchSubConfig* const sub_config = - &config->sub_configs_[sub_configs_idx]; - int cache_bits_best, i_cache; -- err = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, -- sub_config->lz77_, cache_bits_init, -- sub_config->do_no_cache_, hash_chain, -- &refs_array[0], &cache_bits_best); -- if (err != VP8_ENC_OK) goto Error; -+ int i_remaining_percent = remaining_percent / config->sub_configs_size_; -+ int i_percent_range = i_remaining_percent / 4; -+ i_remaining_percent -= i_percent_range; -+ -+ if (!VP8LGetBackwardReferences( -+ width, height, argb, quality, low_effort, sub_config->lz77_, -+ cache_bits_init, sub_config->do_no_cache_, hash_chain, -+ &refs_array[0], &cache_bits_best, pic, i_percent_range, percent)) { -+ goto Error; -+ } - - for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) { - const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0; -@@ -953,11 +914,17 @@ static WebPEncodingError EncodeImageInternal( - histogram_image = - VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp); - tmp_histo = VP8LAllocateHistogram(cache_bits_tmp); -- if (histogram_image == NULL || tmp_histo == NULL || -- !VP8LGetHistoImageSymbols(width, height, &refs_array[i_cache], -- quality, low_effort, histogram_bits, -- cache_bits_tmp, histogram_image, tmp_histo, -- histogram_symbols)) { -+ if (histogram_image == NULL || tmp_histo == NULL) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } -+ -+ i_percent_range = i_remaining_percent / 3; -+ i_remaining_percent -= i_percent_range; -+ if (!VP8LGetHistoImageSymbols( -+ width, height, &refs_array[i_cache], quality, low_effort, -+ histogram_bits, cache_bits_tmp, histogram_image, tmp_histo, -+ histogram_symbols, pic, i_percent_range, percent)) { - goto Error; - } - // Create Huffman bit lengths and codes for each histogram image. -@@ -970,6 +937,7 @@ static WebPEncodingError EncodeImageInternal( - // GetHuffBitLengthsAndCodes(). - if (huffman_codes == NULL || - !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - // Free combined histograms. -@@ -992,12 +960,14 @@ static WebPEncodingError EncodeImageInternal( - write_histogram_image = (histogram_image_size > 1); - VP8LPutBits(bw, write_histogram_image, 1); - if (write_histogram_image) { -- uint32_t* const histogram_argb = -- (uint32_t*)WebPSafeMalloc(histogram_image_xysize, -- sizeof(*histogram_argb)); -+ uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc( -+ histogram_image_xysize, sizeof(*histogram_argb)); - int max_index = 0; - uint32_t i; -- if (histogram_argb == NULL) goto Error; -+ if (histogram_argb == NULL) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } - for (i = 0; i < histogram_image_xysize; ++i) { - const int symbol_index = histogram_symbols[i] & 0xffff; - histogram_argb[i] = (symbol_index << 8); -@@ -1008,12 +978,17 @@ static WebPEncodingError EncodeImageInternal( - histogram_image_size = max_index; - - VP8LPutBits(bw, histogram_bits - 2, 3); -- err = EncodeImageNoHuffman( -- bw, histogram_argb, &hash_chain_histogram, &refs_array[2], -- VP8LSubSampleSize(width, histogram_bits), -- VP8LSubSampleSize(height, histogram_bits), quality, low_effort); -+ i_percent_range = i_remaining_percent / 2; -+ i_remaining_percent -= i_percent_range; -+ if (!EncodeImageNoHuffman( -+ bw, histogram_argb, &hash_chain_histogram, &refs_array[2], -+ VP8LSubSampleSize(width, histogram_bits), -+ VP8LSubSampleSize(height, histogram_bits), quality, low_effort, -+ pic, i_percent_range, percent)) { -+ WebPSafeFree(histogram_argb); -+ goto Error; -+ } - WebPSafeFree(histogram_argb); -- if (err != VP8_ENC_OK) goto Error; - } - - // Store Huffman codes. -@@ -1028,7 +1003,10 @@ static WebPEncodingError EncodeImageInternal( - } - } - tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); -- if (tokens == NULL) goto Error; -+ if (tokens == NULL) { -+ WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } - for (i = 0; i < 5 * histogram_image_size; ++i) { - HuffmanTreeCode* const codes = &huffman_codes[i]; - StoreHuffmanCode(bw, huff_tree, tokens, codes); -@@ -1037,9 +1015,10 @@ static WebPEncodingError EncodeImageInternal( - } - // Store actual literals. - hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); -- err = StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], -- histogram_symbols, huffman_codes); -- if (err != VP8_ENC_OK) goto Error; -+ if (!StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], -+ histogram_symbols, huffman_codes, pic)) { -+ goto Error; -+ } - // Keep track of the smallest image so far. - if (VP8LBitWriterNumBytes(bw) < bw_size_best) { - bw_size_best = VP8LBitWriterNumBytes(bw); -@@ -1059,7 +1038,10 @@ static WebPEncodingError EncodeImageInternal( - } - } - VP8LBitWriterSwap(bw, &bw_best); -- err = VP8_ENC_OK; -+ -+ if (!WebPReportProgress(pic, percent_start + remaining_percent, percent)) { -+ goto Error; -+ } - - Error: - WebPSafeFree(tokens); -@@ -1073,7 +1055,7 @@ static WebPEncodingError EncodeImageInternal( - } - WebPSafeFree(histogram_symbols); - VP8LBitWriterWipeOut(&bw_best); -- return err; -+ return (pic->error_code == VP8_ENC_OK); - } - - // ----------------------------------------------------------------------------- -@@ -1082,26 +1064,27 @@ static WebPEncodingError EncodeImageInternal( - static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, - VP8LBitWriter* const bw) { - VP8LPutBits(bw, TRANSFORM_PRESENT, 1); -- VP8LPutBits(bw, SUBTRACT_GREEN, 2); -+ VP8LPutBits(bw, SUBTRACT_GREEN_TRANSFORM, 2); - VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); - } - --static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, -- int width, int height, -- int quality, int low_effort, -- int used_subtract_green, -- VP8LBitWriter* const bw) { -+static int ApplyPredictFilter(const VP8LEncoder* const enc, int width, -+ int height, int quality, int low_effort, -+ int used_subtract_green, VP8LBitWriter* const bw, -+ int percent_range, int* const percent) { - const int pred_bits = enc->transform_bits_; - const int transform_width = VP8LSubSampleSize(width, pred_bits); - const int transform_height = VP8LSubSampleSize(height, pred_bits); - // we disable near-lossless quantization if palette is used. -- const int near_lossless_strength = enc->use_palette_ ? 100 -- : enc->config_->near_lossless; -+ const int near_lossless_strength = -+ enc->use_palette_ ? 100 : enc->config_->near_lossless; - -- VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, -- enc->argb_scratch_, enc->transform_data_, -- near_lossless_strength, enc->config_->exact, -- used_subtract_green); -+ if (!VP8LResidualImage( -+ width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_, -+ enc->transform_data_, near_lossless_strength, enc->config_->exact, -+ used_subtract_green, enc->pic_, percent_range / 2, percent)) { -+ return 0; -+ } - VP8LPutBits(bw, TRANSFORM_PRESENT, 1); - VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); - assert(pred_bits >= 2); -@@ -1109,19 +1092,23 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, - return EncodeImageNoHuffman( - bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, -- quality, low_effort); -+ quality, low_effort, enc->pic_, percent_range - percent_range / 2, -+ percent); - } - --static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, -- int width, int height, -- int quality, int low_effort, -- VP8LBitWriter* const bw) { -+static int ApplyCrossColorFilter(const VP8LEncoder* const enc, int width, -+ int height, int quality, int low_effort, -+ VP8LBitWriter* const bw, int percent_range, -+ int* const percent) { - const int ccolor_transform_bits = enc->transform_bits_; - const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); - const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); - -- VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, -- enc->argb_, enc->transform_data_); -+ if (!VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, -+ enc->argb_, enc->transform_data_, enc->pic_, -+ percent_range / 2, percent)) { -+ return 0; -+ } - VP8LPutBits(bw, TRANSFORM_PRESENT, 1); - VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); - assert(ccolor_transform_bits >= 2); -@@ -1129,23 +1116,21 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, - return EncodeImageNoHuffman( - bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, - (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, -- quality, low_effort); -+ quality, low_effort, enc->pic_, percent_range - percent_range / 2, -+ percent); - } - - // ----------------------------------------------------------------------------- - --static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic, -- size_t riff_size, size_t vp8l_size) { -+static int WriteRiffHeader(const WebPPicture* const pic, size_t riff_size, -+ size_t vp8l_size) { - uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = { - 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P', - 'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE, - }; - PutLE32(riff + TAG_SIZE, (uint32_t)riff_size); - PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size); -- if (!pic->writer(riff, sizeof(riff), pic)) { -- return VP8_ENC_ERROR_BAD_WRITE; -- } -- return VP8_ENC_OK; -+ return pic->writer(riff, sizeof(riff), pic); - } - - static int WriteImageSize(const WebPPicture* const pic, -@@ -1165,36 +1150,32 @@ static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) { - return !bw->error_; - } - --static WebPEncodingError WriteImage(const WebPPicture* const pic, -- VP8LBitWriter* const bw, -- size_t* const coded_size) { -- WebPEncodingError err = VP8_ENC_OK; -+static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw, -+ size_t* const coded_size) { - const uint8_t* const webpll_data = VP8LBitWriterFinish(bw); - const size_t webpll_size = VP8LBitWriterNumBytes(bw); - const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size; - const size_t pad = vp8l_size & 1; - const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad; -+ *coded_size = 0; - -- err = WriteRiffHeader(pic, riff_size, vp8l_size); -- if (err != VP8_ENC_OK) goto Error; -+ if (bw->error_) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ } - -- if (!pic->writer(webpll_data, webpll_size, pic)) { -- err = VP8_ENC_ERROR_BAD_WRITE; -- goto Error; -+ if (!WriteRiffHeader(pic, riff_size, vp8l_size) || -+ !pic->writer(webpll_data, webpll_size, pic)) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - } - - if (pad) { - const uint8_t pad_byte[1] = { 0 }; - if (!pic->writer(pad_byte, 1, pic)) { -- err = VP8_ENC_ERROR_BAD_WRITE; -- goto Error; -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - } - } - *coded_size = CHUNK_HEADER_SIZE + riff_size; -- return VP8_ENC_OK; -- -- Error: -- return err; -+ return 1; - } - - // ----------------------------------------------------------------------------- -@@ -1210,36 +1191,32 @@ static void ClearTransformBuffer(VP8LEncoder* const enc) { - // Flags influencing the memory allocated: - // enc->transform_bits_ - // enc->use_predict_, enc->use_cross_color_ --static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, -- int width, int height) { -- WebPEncodingError err = VP8_ENC_OK; -- const uint64_t image_size = width * height; -+static int AllocateTransformBuffer(VP8LEncoder* const enc, int width, -+ int height) { -+ const uint64_t image_size = (uint64_t)width * height; - // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra - // pixel in each, plus 2 regular scanlines of bytes. - // TODO(skal): Clean up by using arithmetic in bytes instead of words. - const uint64_t argb_scratch_size = -- enc->use_predict_ -- ? (width + 1) * 2 + -- (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t) -- : 0; -+ enc->use_predict_ ? (width + 1) * 2 + (width * 2 + sizeof(uint32_t) - 1) / -+ sizeof(uint32_t) -+ : 0; - const uint64_t transform_data_size = - (enc->use_predict_ || enc->use_cross_color_) -- ? VP8LSubSampleSize(width, enc->transform_bits_) * -+ ? (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) * - VP8LSubSampleSize(height, enc->transform_bits_) - : 0; - const uint64_t max_alignment_in_words = - (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t); -- const uint64_t mem_size = -- image_size + max_alignment_in_words + -- argb_scratch_size + max_alignment_in_words + -- transform_data_size; -+ const uint64_t mem_size = image_size + max_alignment_in_words + -+ argb_scratch_size + max_alignment_in_words + -+ transform_data_size; - uint32_t* mem = enc->transform_mem_; - if (mem == NULL || mem_size > enc->transform_mem_size_) { - ClearTransformBuffer(enc); - mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); - if (mem == NULL) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -- goto Error; -+ return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); - } - enc->transform_mem_ = mem; - enc->transform_mem_size_ = (size_t)mem_size; -@@ -1252,19 +1229,16 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, - enc->transform_data_ = mem; - - enc->current_width_ = width; -- Error: -- return err; -+ return 1; - } - --static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { -- WebPEncodingError err = VP8_ENC_OK; -+static int MakeInputImageCopy(VP8LEncoder* const enc) { - const WebPPicture* const picture = enc->pic_; - const int width = picture->width; - const int height = picture->height; - -- err = AllocateTransformBuffer(enc, width, height); -- if (err != VP8_ENC_OK) return err; -- if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK; -+ if (!AllocateTransformBuffer(enc, width, height)) return 0; -+ if (enc->argb_content_ == kEncoderARGB) return 1; - - { - uint32_t* dst = enc->argb_; -@@ -1278,27 +1252,11 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { - } - enc->argb_content_ = kEncoderARGB; - assert(enc->current_width_ == width); -- return VP8_ENC_OK; -+ return 1; - } - - // ----------------------------------------------------------------------------- - --static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, -- int hi) { -- int low = 0; -- if (sorted[low] == color) return low; // loop invariant: sorted[low] != color -- while (1) { -- const int mid = (low + hi) >> 1; -- if (sorted[mid] == color) { -- return mid; -- } else if (sorted[mid] < color) { -- low = mid; -- } else { -- hi = mid; -- } -- } --} -- - #define APPLY_PALETTE_GREEDY_MAX 4 - - static WEBP_INLINE uint32_t SearchColorGreedy(const uint32_t palette[], -@@ -1333,17 +1291,6 @@ static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) { - (32 - PALETTE_INV_SIZE_BITS); - } - --// Sort palette in increasing order and prepare an inverse mapping array. --static void PrepareMapToPalette(const uint32_t palette[], int num_colors, -- uint32_t sorted[], uint32_t idx_map[]) { -- int i; -- memcpy(sorted, palette, num_colors * sizeof(*sorted)); -- qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); -- for (i = 0; i < num_colors; ++i) { -- idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; -- } --} -- - // Use 1 pixel cache for ARGB pixels. - #define APPLY_PALETTE_FOR(COLOR_INDEX) do { \ - uint32_t prev_pix = palette[0]; \ -@@ -1367,16 +1314,18 @@ static void PrepareMapToPalette(const uint32_t palette[], int num_colors, - // using 'row' as a temporary buffer of size 'width'. - // We assume that all src[] values have a corresponding entry in the palette. - // Note: src[] can be the same as dst[] --static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, -- uint32_t* dst, uint32_t dst_stride, -- const uint32_t* palette, int palette_size, -- int width, int height, int xbits) { -+static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst, -+ uint32_t dst_stride, const uint32_t* palette, -+ int palette_size, int width, int height, int xbits, -+ const WebPPicture* const pic) { - // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be - // made to work in-place. - uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); - int x, y; - -- if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; -+ if (tmp_row == NULL) { -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ } - - if (palette_size < APPLY_PALETTE_GREEDY_MAX) { - APPLY_PALETTE_FOR(SearchColorGreedy(palette, palette_size, pix)); -@@ -1421,7 +1370,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, - } - } - WebPSafeFree(tmp_row); -- return VP8_ENC_OK; -+ return 1; - } - #undef APPLY_PALETTE_FOR - #undef PALETTE_INV_SIZE_BITS -@@ -1429,9 +1378,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, - #undef APPLY_PALETTE_GREEDY_MAX - - // Note: Expects "enc->palette_" to be set properly. --static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, -- int in_place) { -- WebPEncodingError err = VP8_ENC_OK; -+static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) { - const WebPPicture* const pic = enc->pic_; - const int width = pic->width; - const int height = pic->height; -@@ -1449,19 +1396,22 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, - xbits = (palette_size <= 16) ? 1 : 0; - } - -- err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); -- if (err != VP8_ENC_OK) return err; -- -- err = ApplyPalette(src, src_stride, -+ if (!AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height)) { -+ return 0; -+ } -+ if (!ApplyPalette(src, src_stride, - enc->argb_, enc->current_width_, -- palette, palette_size, width, height, xbits); -+ palette, palette_size, width, height, xbits, pic)) { -+ return 0; -+ } - enc->argb_content_ = kEncoderPalette; -- return err; -+ return 1; - } - - // Save palette_[] to bitstream. - static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, -- VP8LEncoder* const enc) { -+ VP8LEncoder* const enc, -+ int percent_range, int* const percent) { - int i; - uint32_t tmp_palette[MAX_PALETTE_SIZE]; - const int palette_size = enc->palette_size_; -@@ -1476,7 +1426,7 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, - tmp_palette[0] = palette[0]; - return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, - &enc->refs_[0], palette_size, 1, /*quality=*/20, -- low_effort); -+ low_effort, enc->pic_, percent_range, percent); - } - - // ----------------------------------------------------------------------------- -@@ -1520,7 +1470,6 @@ typedef struct { - CrunchConfig crunch_configs_[CRUNCH_CONFIGS_MAX]; - int num_crunch_configs_; - int red_and_blue_always_zero_; -- WebPEncodingError err_; - WebPAuxStats* stats_; - } StreamEncodeContext; - -@@ -1537,7 +1486,6 @@ static int EncodeStreamHook(void* input, void* data2) { - #if !defined(WEBP_DISABLE_STATS) - WebPAuxStats* const stats = params->stats_; - #endif -- WebPEncodingError err = VP8_ENC_OK; - const int quality = (int)config->quality; - const int low_effort = (config->method == 0); - #if (WEBP_NEAR_LOSSLESS == 1) -@@ -1545,6 +1493,7 @@ static int EncodeStreamHook(void* input, void* data2) { - #endif - const int height = picture->height; - const size_t byte_position = VP8LBitWriterNumBytes(bw); -+ int percent = 2; // for WebPProgressHook - #if (WEBP_NEAR_LOSSLESS == 1) - int use_near_lossless = 0; - #endif -@@ -1558,12 +1507,13 @@ static int EncodeStreamHook(void* input, void* data2) { - - if (!VP8LBitWriterInit(&bw_best, 0) || - (num_crunch_configs > 1 && !VP8LBitWriterClone(bw, &bw_best))) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - - for (idx = 0; idx < num_crunch_configs; ++idx) { - const int entropy_idx = crunch_configs[idx].entropy_idx_; -+ int remaining_percent = 97 / num_crunch_configs, percent_range; - enc->use_palette_ = - (entropy_idx == kPalette) || (entropy_idx == kPaletteAndSpatial); - enc->use_subtract_green_ = -@@ -1571,7 +1521,8 @@ static int EncodeStreamHook(void* input, void* data2) { - enc->use_predict_ = (entropy_idx == kSpatial) || - (entropy_idx == kSpatialSubGreen) || - (entropy_idx == kPaletteAndSpatial); -- if (low_effort) { -+ // When using a palette, R/B==0, hence no need to test for cross-color. -+ if (low_effort || enc->use_palette_) { - enc->use_cross_color_ = 0; - } else { - enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_; -@@ -1586,11 +1537,10 @@ static int EncodeStreamHook(void* input, void* data2) { - use_near_lossless = (config->near_lossless < 100) && !enc->use_palette_ && - !enc->use_predict_; - if (use_near_lossless) { -- err = AllocateTransformBuffer(enc, width, height); -- if (err != VP8_ENC_OK) goto Error; -+ if (!AllocateTransformBuffer(enc, width, height)) goto Error; - if ((enc->argb_content_ != kEncoderNearLossless) && - !VP8ApplyNearLossless(picture, config->near_lossless, enc->argb_)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - enc->argb_content_ = kEncoderNearLossless; -@@ -1603,10 +1553,18 @@ static int EncodeStreamHook(void* input, void* data2) { - - // Encode palette - if (enc->use_palette_) { -- err = EncodePalette(bw, low_effort, enc); -- if (err != VP8_ENC_OK) goto Error; -- err = MapImageFromPalette(enc, use_delta_palette); -- if (err != VP8_ENC_OK) goto Error; -+ if (!PaletteSort(crunch_configs[idx].palette_sorting_type_, enc->pic_, -+ enc->palette_sorted_, enc->palette_size_, -+ enc->palette_)) { -+ WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ goto Error; -+ } -+ percent_range = remaining_percent / 4; -+ if (!EncodePalette(bw, low_effort, enc, percent_range, &percent)) { -+ goto Error; -+ } -+ remaining_percent -= percent_range; -+ if (!MapImageFromPalette(enc, use_delta_palette)) goto Error; - // If using a color cache, do not have it bigger than the number of - // colors. - if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) { -@@ -1617,8 +1575,7 @@ static int EncodeStreamHook(void* input, void* data2) { - // In case image is not packed. - if (enc->argb_content_ != kEncoderNearLossless && - enc->argb_content_ != kEncoderPalette) { -- err = MakeInputImageCopy(enc); -- if (err != VP8_ENC_OK) goto Error; -+ if (!MakeInputImageCopy(enc)) goto Error; - } - - // ----------------------------------------------------------------------- -@@ -1629,15 +1586,22 @@ static int EncodeStreamHook(void* input, void* data2) { - } - - if (enc->use_predict_) { -- err = ApplyPredictFilter(enc, enc->current_width_, height, quality, -- low_effort, enc->use_subtract_green_, bw); -- if (err != VP8_ENC_OK) goto Error; -+ percent_range = remaining_percent / 3; -+ if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, -+ low_effort, enc->use_subtract_green_, bw, -+ percent_range, &percent)) { -+ goto Error; -+ } -+ remaining_percent -= percent_range; - } - - if (enc->use_cross_color_) { -- err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, -- low_effort, bw); -- if (err != VP8_ENC_OK) goto Error; -+ percent_range = remaining_percent / 2; -+ if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, -+ low_effort, bw, percent_range, &percent)) { -+ goto Error; -+ } -+ remaining_percent -= percent_range; - } - } - -@@ -1645,12 +1609,13 @@ static int EncodeStreamHook(void* input, void* data2) { - - // ------------------------------------------------------------------------- - // Encode and write the transformed image. -- err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, -- enc->current_width_, height, quality, low_effort, -- use_cache, &crunch_configs[idx], -- &enc->cache_bits_, enc->histo_bits_, -- byte_position, &hdr_size, &data_size); -- if (err != VP8_ENC_OK) goto Error; -+ if (!EncodeImageInternal( -+ bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_, -+ height, quality, low_effort, use_cache, &crunch_configs[idx], -+ &enc->cache_bits_, enc->histo_bits_, byte_position, &hdr_size, -+ &data_size, picture, remaining_percent, &percent)) { -+ goto Error; -+ } - - // If we are better than what we already have. - if (VP8LBitWriterNumBytes(bw) < best_size) { -@@ -1680,18 +1645,15 @@ static int EncodeStreamHook(void* input, void* data2) { - } - VP8LBitWriterSwap(&bw_best, bw); - --Error: -+ Error: - VP8LBitWriterWipeOut(&bw_best); -- params->err_ = err; - // The hook should return false in case of error. -- return (err == VP8_ENC_OK); -+ return (params->picture_->error_code == VP8_ENC_OK); - } - --WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, -- const WebPPicture* const picture, -- VP8LBitWriter* const bw_main, -- int use_cache) { -- WebPEncodingError err = VP8_ENC_OK; -+int VP8LEncodeStream(const WebPConfig* const config, -+ const WebPPicture* const picture, -+ VP8LBitWriter* const bw_main, int use_cache) { - VP8LEncoder* const enc_main = VP8LEncoderNew(config, picture); - VP8LEncoder* enc_side = NULL; - CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX]; -@@ -1703,15 +1665,23 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - // The main thread uses picture->stats, the side thread uses stats_side. - WebPAuxStats stats_side; - VP8LBitWriter bw_side; -+ WebPPicture picture_side; - const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface(); - int ok_main; - -+ if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) { -+ VP8LEncoderDelete(enc_main); -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); -+ } -+ -+ // Avoid "garbage value" error from Clang's static analysis tool. -+ WebPPictureInit(&picture_side); -+ - // Analyze image (entropy, num_palettes etc) -- if (enc_main == NULL || -- !EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, -+ if (!EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, - &red_and_blue_always_zero) || -- !EncoderInit(enc_main) || !VP8LBitWriterInit(&bw_side, 0)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ !EncoderInit(enc_main)) { -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - -@@ -1740,25 +1710,32 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - StreamEncodeContext* const param = - (idx == 0) ? ¶ms_main : ¶ms_side; - param->config_ = config; -- param->picture_ = picture; - param->use_cache_ = use_cache; - param->red_and_blue_always_zero_ = red_and_blue_always_zero; - if (idx == 0) { -+ param->picture_ = picture; - param->stats_ = picture->stats; - param->bw_ = bw_main; - param->enc_ = enc_main; - } else { -+ // Create a side picture (error_code is not thread-safe). -+ if (!WebPPictureView(picture, /*left=*/0, /*top=*/0, picture->width, -+ picture->height, &picture_side)) { -+ assert(0); -+ } -+ picture_side.progress_hook = NULL; // Progress hook is not thread-safe. -+ param->picture_ = &picture_side; // No need to free a view afterwards. - param->stats_ = (picture->stats == NULL) ? NULL : &stats_side; - // Create a side bit writer. - if (!VP8LBitWriterClone(bw_main, &bw_side)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - param->bw_ = &bw_side; - // Create a side encoder. -- enc_side = VP8LEncoderNew(config, picture); -+ enc_side = VP8LEncoderNew(config, &picture_side); - if (enc_side == NULL || !EncoderInit(enc_side)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - // Copy the values that were computed for the main encoder. -@@ -1767,6 +1744,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - enc_side->palette_size_ = enc_main->palette_size_; - memcpy(enc_side->palette_, enc_main->palette_, - sizeof(enc_main->palette_)); -+ memcpy(enc_side->palette_sorted_, enc_main->palette_sorted_, -+ sizeof(enc_main->palette_sorted_)); - param->enc_ = enc_side; - } - // Create the workers. -@@ -1780,7 +1759,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - // Start the second thread if needed. - if (num_crunch_configs_side != 0) { - if (!worker_interface->Reset(&worker_side)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - #if !defined(WEBP_DISABLE_STATS) -@@ -1790,8 +1769,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - memcpy(&stats_side, picture->stats, sizeof(stats_side)); - } - #endif -- // This line is only useful to remove a Clang static analyzer warning. -- params_side.err_ = VP8_ENC_OK; - worker_interface->Launch(&worker_side); - } - // Execute the main thread. -@@ -1803,7 +1780,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - const int ok_side = worker_interface->Sync(&worker_side); - worker_interface->End(&worker_side); - if (!ok_main || !ok_side) { -- err = ok_main ? params_side.err_ : params_main.err_; -+ if (picture->error_code == VP8_ENC_OK) { -+ assert(picture_side.error_code != VP8_ENC_OK); -+ WebPEncodingSetError(picture, picture_side.error_code); -+ } - goto Error; - } - if (VP8LBitWriterNumBytes(&bw_side) < VP8LBitWriterNumBytes(bw_main)) { -@@ -1814,18 +1794,13 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - } - #endif - } -- } else { -- if (!ok_main) { -- err = params_main.err_; -- goto Error; -- } - } - --Error: -+ Error: - VP8LBitWriterWipeOut(&bw_side); - VP8LEncoderDelete(enc_main); - VP8LEncoderDelete(enc_side); -- return err; -+ return (picture->error_code == VP8_ENC_OK); - } - - #undef CRUNCH_CONFIGS_MAX -@@ -1838,15 +1813,12 @@ int VP8LEncodeImage(const WebPConfig* const config, - size_t coded_size; - int percent = 0; - int initial_size; -- WebPEncodingError err = VP8_ENC_OK; - VP8LBitWriter bw; - - if (picture == NULL) return 0; - - if (config == NULL || picture->argb == NULL) { -- err = VP8_ENC_ERROR_NULL_PARAMETER; -- WebPEncodingSetError(picture, err); -- return 0; -+ return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - } - - width = picture->width; -@@ -1856,13 +1828,13 @@ int VP8LEncodeImage(const WebPConfig* const config, - initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? - width * height : width * height * 2; - if (!VP8LBitWriterInit(&bw, initial_size)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - - if (!WebPReportProgress(picture, 1, &percent)) { - UserAbort: -- err = VP8_ENC_ERROR_USER_ABORT; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_USER_ABORT); - goto Error; - } - // Reset stats (for pure lossless coding) -@@ -1878,28 +1850,26 @@ int VP8LEncodeImage(const WebPConfig* const config, - - // Write image size. - if (!WriteImageSize(picture, &bw)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - - has_alpha = WebPPictureHasTransparency(picture); - // Write the non-trivial Alpha flag and lossless version. - if (!WriteRealAlphaAndVersion(&bw, has_alpha)) { -- err = VP8_ENC_ERROR_OUT_OF_MEMORY; -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto Error; - } - -- if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort; -+ if (!WebPReportProgress(picture, 2, &percent)) goto UserAbort; - - // Encode main image stream. -- err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/); -- if (err != VP8_ENC_OK) goto Error; -+ if (!VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/)) goto Error; - -- if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort; -+ if (!WebPReportProgress(picture, 99, &percent)) goto UserAbort; - - // Finish the RIFF chunk. -- err = WriteImage(picture, &bw, &coded_size); -- if (err != VP8_ENC_OK) goto Error; -+ if (!WriteImage(picture, &bw, &coded_size)) goto Error; - - if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort; - -@@ -1918,13 +1888,11 @@ int VP8LEncodeImage(const WebPConfig* const config, - } - - Error: -- if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY; -- VP8LBitWriterWipeOut(&bw); -- if (err != VP8_ENC_OK) { -- WebPEncodingSetError(picture, err); -- return 0; -+ if (bw.error_) { -+ WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - } -- return 1; -+ VP8LBitWriterWipeOut(&bw); -+ return (picture->error_code == VP8_ENC_OK); - } - - //------------------------------------------------------------------------------ -diff --git a/3rdparty/libwebp/src/enc/vp8li_enc.h b/3rdparty/libwebp/src/enc/vp8li_enc.h -index 94210ce9f3bd..3d35e1612dee 100644 ---- a/3rdparty/libwebp/src/enc/vp8li_enc.h -+++ b/3rdparty/libwebp/src/enc/vp8li_enc.h -@@ -69,6 +69,8 @@ typedef struct { - int use_palette_; - int palette_size_; - uint32_t palette_[MAX_PALETTE_SIZE]; -+ // Sorted version of palette_ for cache purposes. -+ uint32_t palette_sorted_[MAX_PALETTE_SIZE]; - - // Some 'scratch' (potentially large) objects. - struct VP8LBackwardRefs refs_[4]; // Backward Refs array for temporaries. -@@ -87,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config, - - // Encodes the main image stream using the supplied bit writer. - // If 'use_cache' is false, disables the use of color cache. --WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, -- const WebPPicture* const picture, -- VP8LBitWriter* const bw, int use_cache); -+// Returns false in case of error (stored in picture->error_code). -+int VP8LEncodeStream(const WebPConfig* const config, -+ const WebPPicture* const picture, VP8LBitWriter* const bw, -+ int use_cache); - - #if (WEBP_NEAR_LOSSLESS == 1) - // in near_lossless.c -@@ -101,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality, - //------------------------------------------------------------------------------ - // Image transforms in predictor.c. - --void VP8LResidualImage(int width, int height, int bits, int low_effort, -- uint32_t* const argb, uint32_t* const argb_scratch, -- uint32_t* const image, int near_lossless, int exact, -- int used_subtract_green); -- --void VP8LColorSpaceTransform(int width, int height, int bits, int quality, -- uint32_t* const argb, uint32_t* image); -+// pic and percent are for progress. -+// Returns false in case of error (stored in pic->error_code). -+int VP8LResidualImage(int width, int height, int bits, int low_effort, -+ uint32_t* const argb, uint32_t* const argb_scratch, -+ uint32_t* const image, int near_lossless, int exact, -+ int used_subtract_green, const WebPPicture* const pic, -+ int percent_range, int* const percent); -+ -+int VP8LColorSpaceTransform(int width, int height, int bits, int quality, -+ uint32_t* const argb, uint32_t* image, -+ const WebPPicture* const pic, int percent_range, -+ int* const percent); - - //------------------------------------------------------------------------------ - -diff --git a/3rdparty/libwebp/src/enc/webp_enc.c b/3rdparty/libwebp/src/enc/webp_enc.c -index ce2db2e94bcf..583fe6a8bbd6 100644 ---- a/3rdparty/libwebp/src/enc/webp_enc.c -+++ b/3rdparty/libwebp/src/enc/webp_enc.c -@@ -307,7 +307,10 @@ int WebPEncodingSetError(const WebPPicture* const pic, - WebPEncodingError error) { - assert((int)error < VP8_ENC_ERROR_LAST); - assert((int)error >= VP8_ENC_OK); -- ((WebPPicture*)pic)->error_code = error; -+ // The oldest error reported takes precedence over the new one. -+ if (pic->error_code == VP8_ENC_OK) { -+ ((WebPPicture*)pic)->error_code = error; -+ } - return 0; - } - -@@ -317,8 +320,7 @@ int WebPReportProgress(const WebPPicture* const pic, - *percent_store = percent; - if (pic->progress_hook && !pic->progress_hook(percent, pic)) { - // user abort requested -- WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); -- return 0; -+ return WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); - } - } - return 1; // ok -@@ -329,16 +331,14 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { - int ok = 0; - if (pic == NULL) return 0; - -- WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far -+ pic->error_code = VP8_ENC_OK; // all ok so far - if (config == NULL) { // bad params - return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); - } - if (!WebPValidateConfig(config)) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } -- if (pic->width <= 0 || pic->height <= 0) { -- return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); -- } -+ if (!WebPValidatePicture(pic)) return 0; - if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); - } -diff --git a/3rdparty/libwebp/src/mux/anim_encode.c b/3rdparty/libwebp/src/mux/anim_encode.c -index 7be99068f687..d1c61a2f1ee5 100644 ---- a/3rdparty/libwebp/src/mux/anim_encode.c -+++ b/3rdparty/libwebp/src/mux/anim_encode.c -@@ -248,9 +248,6 @@ WebPAnimEncoder* WebPAnimEncoderNewInternal( - - enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc)); - if (enc == NULL) return NULL; -- // sanity inits, so we can call WebPAnimEncoderDelete(): -- enc->encoded_frames_ = NULL; -- enc->mux_ = NULL; - MarkNoError(enc); - - // Dimensions and options. -@@ -421,7 +418,7 @@ static void MinimizeChangeRectangle(const WebPPicture* const src, - const int max_allowed_diff_lossy = QualityToMaxDiff(quality); - const int max_allowed_diff = is_lossless ? 0 : max_allowed_diff_lossy; - -- // Sanity checks. -+ // Assumption/correctness checks. - assert(src->width == dst->width && src->height == dst->height); - assert(rect->x_offset_ + rect->width_ <= dst->width); - assert(rect->y_offset_ + rect->height_ <= dst->height); -@@ -596,16 +593,17 @@ int WebPAnimEncoderRefineRect( - int is_lossless, float quality, int* const x_offset, int* const y_offset, - int* const width, int* const height) { - FrameRectangle rect; -- const int right = clip(*x_offset + *width, 0, curr_canvas->width); -- const int left = clip(*x_offset, 0, curr_canvas->width - 1); -- const int bottom = clip(*y_offset + *height, 0, curr_canvas->height); -- const int top = clip(*y_offset, 0, curr_canvas->height - 1); -+ int right, left, bottom, top; - if (prev_canvas == NULL || curr_canvas == NULL || - prev_canvas->width != curr_canvas->width || - prev_canvas->height != curr_canvas->height || - !prev_canvas->use_argb || !curr_canvas->use_argb) { - return 0; - } -+ right = clip(*x_offset + *width, 0, curr_canvas->width); -+ left = clip(*x_offset, 0, curr_canvas->width - 1); -+ bottom = clip(*y_offset + *height, 0, curr_canvas->height); -+ top = clip(*y_offset, 0, curr_canvas->height - 1); - rect.x_offset_ = left; - rect.y_offset_ = top; - rect.width_ = clip(right - left, 0, curr_canvas->width - rect.x_offset_); -@@ -949,7 +947,8 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) { - int new_duration; - - assert(enc->count_ >= 1); -- assert(prev_enc_frame->sub_frame_.duration == -+ assert(!prev_enc_frame->is_key_frame_ || -+ prev_enc_frame->sub_frame_.duration == - prev_enc_frame->key_frame_.duration); - assert(prev_enc_frame->sub_frame_.duration == - (prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1))); -@@ -966,7 +965,7 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) { - 0x10, 0x88, 0x88, 0x08 - }; - const WebPData lossless_1x1 = { -- lossless_1x1_bytes, sizeof(lossless_1x1_bytes) -+ lossless_1x1_bytes, sizeof(lossless_1x1_bytes) - }; - const uint8_t lossy_1x1_bytes[] = { - 0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50, -@@ -1358,6 +1357,12 @@ int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp, - if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) { - return 0; - } -+ // IncreasePreviousDuration() may add a frame to avoid exceeding -+ // MAX_DURATION which could cause CacheFrame() to over read encoded_frames_ -+ // before the next flush. -+ if (enc->count_ == enc->size_ && !FlushFrames(enc)) { -+ return 0; -+ } - } else { - enc->first_timestamp_ = timestamp; - } -diff --git a/3rdparty/libwebp/src/mux/muxedit.c b/3rdparty/libwebp/src/mux/muxedit.c -index ccf14b2a0c51..aab479cc6c78 100644 ---- a/3rdparty/libwebp/src/mux/muxedit.c -+++ b/3rdparty/libwebp/src/mux/muxedit.c -@@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) { - err = ChunkAssignData(&chunk, data, copy_data, tag); \ - if (err == WEBP_MUX_OK) { \ - err = ChunkSetHead(&chunk, (LIST)); \ -+ if (err != WEBP_MUX_OK) ChunkRelease(&chunk); \ - } \ - return err; \ - } -@@ -235,7 +236,6 @@ WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream, - WebPMuxImage wpi; - WebPMuxError err; - -- // Sanity checks. - if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL || - bitstream->size > MAX_CHUNK_PAYLOAD) { - return WEBP_MUX_INVALID_ARGUMENT; -@@ -267,7 +267,6 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info, - WebPMuxImage wpi; - WebPMuxError err; - -- // Sanity checks. - if (mux == NULL || info == NULL) return WEBP_MUX_INVALID_ARGUMENT; - - if (info->id != WEBP_CHUNK_ANMF) return WEBP_MUX_INVALID_ARGUMENT; -@@ -556,7 +555,8 @@ static WebPMuxError MuxCleanup(WebPMux* const mux) { - if (num_frames == 1) { - WebPMuxImage* frame = NULL; - err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame); -- assert(err == WEBP_MUX_OK); // We know that one frame does exist. -+ if (err != WEBP_MUX_OK) return err; -+ // We know that one frame does exist. - assert(frame != NULL); - if (frame->header_ != NULL && - ((mux->canvas_width_ == 0 && mux->canvas_height_ == 0) || -diff --git a/3rdparty/libwebp/src/mux/muxi.h b/3rdparty/libwebp/src/mux/muxi.h -index 2289822e8f88..fc44d6f2feb8 100644 ---- a/3rdparty/libwebp/src/mux/muxi.h -+++ b/3rdparty/libwebp/src/mux/muxi.h -@@ -28,8 +28,8 @@ extern "C" { - // Defines and constants. - - #define MUX_MAJ_VERSION 1 --#define MUX_MIN_VERSION 2 --#define MUX_REV_VERSION 0 -+#define MUX_MIN_VERSION 3 -+#define MUX_REV_VERSION 1 - - // Chunk object. - typedef struct WebPChunk WebPChunk; -diff --git a/3rdparty/libwebp/src/mux/muxinternal.c b/3rdparty/libwebp/src/mux/muxinternal.c -index b9ee6717d3a4..75b6b416b993 100644 ---- a/3rdparty/libwebp/src/mux/muxinternal.c -+++ b/3rdparty/libwebp/src/mux/muxinternal.c -@@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk, - - WebPMuxError ChunkAppend(WebPChunk* const chunk, - WebPChunk*** const chunk_list) { -+ WebPMuxError err; - assert(chunk_list != NULL && *chunk_list != NULL); - - if (**chunk_list == NULL) { -- ChunkSetHead(chunk, *chunk_list); -+ err = ChunkSetHead(chunk, *chunk_list); - } else { - WebPChunk* last_chunk = **chunk_list; - while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_; -- ChunkSetHead(chunk, &last_chunk->next_); -- *chunk_list = &last_chunk->next_; -+ err = ChunkSetHead(chunk, &last_chunk->next_); -+ if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_; - } -- return WEBP_MUX_OK; -+ return err; - } - - //------------------------------------------------------------------------------ -diff --git a/3rdparty/libwebp/src/mux/muxread.c b/3rdparty/libwebp/src/mux/muxread.c -index 0101fde15da0..9862ec68eea6 100644 ---- a/3rdparty/libwebp/src/mux/muxread.c -+++ b/3rdparty/libwebp/src/mux/muxread.c -@@ -56,7 +56,7 @@ static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk, - uint32_t chunk_size; - WebPData chunk_data; - -- // Sanity checks. -+ // Correctness checks. - if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA; - chunk_size = GetLE32(data + TAG_SIZE); - if (chunk_size > MAX_CHUNK_PAYLOAD) return WEBP_MUX_BAD_DATA; -@@ -116,9 +116,12 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data, - // Each of ANMF chunk contain a header at the beginning. So, its size should - // be at least 'hdr_size'. - if (size < hdr_size) goto Fail; -- ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_); -+ if (ChunkAssignData(&subchunk, &temp, copy_data, -+ chunk->tag_) != WEBP_MUX_OK) { -+ goto Fail; -+ } - } -- ChunkSetHead(&subchunk, &wpi->header_); -+ if (ChunkSetHead(&subchunk, &wpi->header_) != WEBP_MUX_OK) goto Fail; - wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks. - - // Rest of the chunks. -@@ -186,7 +189,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data, - WebPChunk** chunk_list_ends[WEBP_CHUNK_NIL + 1] = { NULL }; - ChunkInit(&chunk); - -- // Sanity checks. - if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) { - return NULL; // version mismatch - } -@@ -481,7 +483,6 @@ WebPMuxError WebPMuxGetFrame( - WebPMuxError err; - WebPMuxImage* wpi; - -- // Sanity checks. - if (mux == NULL || frame == NULL) { - return WEBP_MUX_INVALID_ARGUMENT; - } -diff --git a/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h b/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h -index 46b38807062c..24f3af7b5454 100644 ---- a/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h -+++ b/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h -@@ -55,7 +55,7 @@ void VP8LoadFinalBytes(VP8BitReader* const br); - - // makes sure br->value_ has at least BITS bits worth of data - static WEBP_UBSAN_IGNORE_UNDEF WEBP_INLINE --void VP8LoadNewBytes(VP8BitReader* const br) { -+void VP8LoadNewBytes(VP8BitReader* WEBP_RESTRICT const br) { - assert(br != NULL && br->buf_ != NULL); - // Read 'BITS' bits at a time if possible. - if (br->buf_ < br->buf_max_) { -@@ -104,7 +104,7 @@ void VP8LoadNewBytes(VP8BitReader* const br) { - } - - // Read a bit with proba 'prob'. Speed-critical function! --static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, -+static WEBP_INLINE int VP8GetBit(VP8BitReader* WEBP_RESTRICT const br, - int prob, const char label[]) { - // Don't move this declaration! It makes a big speed difference to store - // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't -@@ -137,7 +137,8 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, - - // simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) - static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE --int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) { -+int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v, -+ const char label[]) { - if (br->bits_ < 0) { - VP8LoadNewBytes(br); - } -@@ -147,15 +148,15 @@ int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) { - const range_t value = (range_t)(br->value_ >> pos); - const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0 - br->bits_ -= 1; -- br->range_ += mask; -+ br->range_ += (range_t)mask; - br->range_ |= 1; -- br->value_ -= (bit_t)((split + 1) & mask) << pos; -+ br->value_ -= (bit_t)((split + 1) & (uint32_t)mask) << pos; - BT_TRACK(br); - return (v ^ mask) - mask; - } - } - --static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, -+static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* WEBP_RESTRICT const br, - int prob, const char label[]) { - // Don't move this declaration! It makes a big speed difference to store - // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't -diff --git a/3rdparty/libwebp/src/utils/bit_reader_utils.c b/3rdparty/libwebp/src/utils/bit_reader_utils.c -index 857cd6098882..a26557aa49f9 100644 ---- a/3rdparty/libwebp/src/utils/bit_reader_utils.c -+++ b/3rdparty/libwebp/src/utils/bit_reader_utils.c -@@ -15,6 +15,7 @@ - #include "src/webp/config.h" - #endif - -+#include "src/dsp/cpu.h" - #include "src/utils/bit_reader_inl_utils.h" - #include "src/utils/utils.h" - -@@ -121,7 +122,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits, - - #define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. - --#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ -+#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \ - defined(__i386__) || defined(_M_IX86) || \ - defined(__x86_64__) || defined(_M_X64) - #define VP8L_USE_FAST_LOAD -diff --git a/3rdparty/libwebp/src/utils/bit_reader_utils.h b/3rdparty/libwebp/src/utils/bit_reader_utils.h -index e64156e31817..25ff31e5d97a 100644 ---- a/3rdparty/libwebp/src/utils/bit_reader_utils.h -+++ b/3rdparty/libwebp/src/utils/bit_reader_utils.h -@@ -19,6 +19,7 @@ - #ifdef _MSC_VER - #include // _byteswap_ulong - #endif -+#include "src/dsp/cpu.h" - #include "src/webp/types.h" - - // Warning! This macro triggers quite some MACRO wizardry around func signature! -@@ -64,7 +65,7 @@ extern "C" { - #define BITS 56 - #elif defined(__arm__) || defined(_M_ARM) // ARM - #define BITS 24 --#elif defined(__aarch64__) // ARM 64bit -+#elif WEBP_AARCH64 // ARM 64bit - #define BITS 56 - #elif defined(__mips__) // MIPS - #define BITS 24 -diff --git a/3rdparty/libwebp/src/utils/bit_writer_utils.c b/3rdparty/libwebp/src/utils/bit_writer_utils.c -index bef0e31ca5ea..2f408508f114 100644 ---- a/3rdparty/libwebp/src/utils/bit_writer_utils.c -+++ b/3rdparty/libwebp/src/utils/bit_writer_utils.c -@@ -278,7 +278,7 @@ void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) { - // If needed, make some room by flushing some bits out. - if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { - const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; -- if (extra_size != (size_t)extra_size || -+ if (!CheckSizeOverflow(extra_size) || - !VP8LBitWriterResize(bw, (size_t)extra_size)) { - bw->cur_ = bw->buf_; - bw->error_ = 1; -@@ -314,7 +314,7 @@ void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) { - while (used >= VP8L_WRITER_BITS) { - if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) { - const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE; -- if (extra_size != (size_t)extra_size || -+ if (!CheckSizeOverflow(extra_size) || - !VP8LBitWriterResize(bw, (size_t)extra_size)) { - bw->cur_ = bw->buf_; - bw->error_ = 1; -diff --git a/3rdparty/libwebp/src/utils/color_cache_utils.c b/3rdparty/libwebp/src/utils/color_cache_utils.c -index b09f538e8be6..7b5222b6e554 100644 ---- a/3rdparty/libwebp/src/utils/color_cache_utils.c -+++ b/3rdparty/libwebp/src/utils/color_cache_utils.c -@@ -20,22 +20,22 @@ - //------------------------------------------------------------------------------ - // VP8LColorCache. - --int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) { -+int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits) { - const int hash_size = 1 << hash_bits; -- assert(cc != NULL); -+ assert(color_cache != NULL); - assert(hash_bits > 0); -- cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size, -- sizeof(*cc->colors_)); -- if (cc->colors_ == NULL) return 0; -- cc->hash_shift_ = 32 - hash_bits; -- cc->hash_bits_ = hash_bits; -+ color_cache->colors_ = (uint32_t*)WebPSafeCalloc( -+ (uint64_t)hash_size, sizeof(*color_cache->colors_)); -+ if (color_cache->colors_ == NULL) return 0; -+ color_cache->hash_shift_ = 32 - hash_bits; -+ color_cache->hash_bits_ = hash_bits; - return 1; - } - --void VP8LColorCacheClear(VP8LColorCache* const cc) { -- if (cc != NULL) { -- WebPSafeFree(cc->colors_); -- cc->colors_ = NULL; -+void VP8LColorCacheClear(VP8LColorCache* const color_cache) { -+ if (color_cache != NULL) { -+ WebPSafeFree(color_cache->colors_); -+ color_cache->colors_ = NULL; - } - } - -diff --git a/3rdparty/libwebp/src/utils/huffman_encode_utils.c b/3rdparty/libwebp/src/utils/huffman_encode_utils.c -index 6f3b1bbe020f..585db9195184 100644 ---- a/3rdparty/libwebp/src/utils/huffman_encode_utils.c -+++ b/3rdparty/libwebp/src/utils/huffman_encode_utils.c -@@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree, - // especially when population counts are longer than 2**tree_limit, but - // we are not planning to use this with extremely long blocks. - // --// See http://en.wikipedia.org/wiki/Huffman_coding -+// See https://en.wikipedia.org/wiki/Huffman_coding - static void GenerateOptimalTree(const uint32_t* const histogram, - int histogram_size, - HuffmanTree* tree, int tree_depth_limit, -@@ -404,8 +404,7 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) { - // Main entry point - - void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, -- uint8_t* const buf_rle, -- HuffmanTree* const huff_tree, -+ uint8_t* const buf_rle, HuffmanTree* const huff_tree, - HuffmanTreeCode* const huff_code) { - const int num_symbols = huff_code->num_symbols; - memset(buf_rle, 0, num_symbols * sizeof(*buf_rle)); -diff --git a/3rdparty/libwebp/src/utils/huffman_encode_utils.h b/3rdparty/libwebp/src/utils/huffman_encode_utils.h -index 3e6763ce49db..3f7f1d8074c2 100644 ---- a/3rdparty/libwebp/src/utils/huffman_encode_utils.h -+++ b/3rdparty/libwebp/src/utils/huffman_encode_utils.h -@@ -51,7 +51,7 @@ int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree, - // huffman code tree. - void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit, - uint8_t* const buf_rle, HuffmanTree* const huff_tree, -- HuffmanTreeCode* const tree); -+ HuffmanTreeCode* const huff_code); - - #ifdef __cplusplus - } -diff --git a/3rdparty/libwebp/src/utils/huffman_utils.c b/3rdparty/libwebp/src/utils/huffman_utils.c -index 0cba0fbb7d4f..cf73abd437d0 100644 ---- a/3rdparty/libwebp/src/utils/huffman_utils.c -+++ b/3rdparty/libwebp/src/utils/huffman_utils.c -@@ -142,7 +142,7 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits, - - { - int step; // step size to replicate values in current table -- uint32_t low = -1; // low bits for current root entry -+ uint32_t low = 0xffffffffu; // low bits for current root entry - uint32_t mask = total_size - 1; // mask for low bits - uint32_t key = 0; // reversed prefix code - int num_nodes = 1; // number of Huffman tree nodes -@@ -177,21 +177,24 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits, - if (num_open < 0) { - return 0; - } -- if (root_table == NULL) continue; - for (; count[len] > 0; --count[len]) { - HuffmanCode code; - if ((key & mask) != low) { -- table += table_size; -+ if (root_table != NULL) table += table_size; - table_bits = NextTableBitSize(count, len, root_bits); - table_size = 1 << table_bits; - total_size += table_size; - low = key & mask; -- root_table[low].bits = (uint8_t)(table_bits + root_bits); -- root_table[low].value = (uint16_t)((table - root_table) - low); -+ if (root_table != NULL) { -+ root_table[low].bits = (uint8_t)(table_bits + root_bits); -+ root_table[low].value = (uint16_t)((table - root_table) - low); -+ } -+ } -+ if (root_table != NULL) { -+ code.bits = (uint8_t)(len - root_bits); -+ code.value = (uint16_t)sorted[symbol++]; -+ ReplicateValue(&table[key >> root_bits], step, table_size, code); - } -- code.bits = (uint8_t)(len - root_bits); -- code.value = (uint16_t)sorted[symbol++]; -- ReplicateValue(&table[key >> root_bits], step, table_size, code); - key = GetNextKey(key, len); - } - } -@@ -211,25 +214,83 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits, - ((1 << MAX_CACHE_BITS) + NUM_LITERAL_CODES + NUM_LENGTH_CODES) - // Cut-off value for switching between heap and stack allocation. - #define SORTED_SIZE_CUTOFF 512 --int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, -+int VP8LBuildHuffmanTable(HuffmanTables* const root_table, int root_bits, - const int code_lengths[], int code_lengths_size) { -- int total_size; -+ const int total_size = -+ BuildHuffmanTable(NULL, root_bits, code_lengths, code_lengths_size, NULL); - assert(code_lengths_size <= MAX_CODE_LENGTHS_SIZE); -- if (root_table == NULL) { -- total_size = BuildHuffmanTable(NULL, root_bits, -- code_lengths, code_lengths_size, NULL); -- } else if (code_lengths_size <= SORTED_SIZE_CUTOFF) { -+ if (total_size == 0 || root_table == NULL) return total_size; -+ -+ if (root_table->curr_segment->curr_table + total_size >= -+ root_table->curr_segment->start + root_table->curr_segment->size) { -+ // If 'root_table' does not have enough memory, allocate a new segment. -+ // The available part of root_table->curr_segment is left unused because we -+ // need a contiguous buffer. -+ const int segment_size = root_table->curr_segment->size; -+ struct HuffmanTablesSegment* next = -+ (HuffmanTablesSegment*)WebPSafeMalloc(1, sizeof(*next)); -+ if (next == NULL) return 0; -+ // Fill the new segment. -+ // We need at least 'total_size' but if that value is small, it is better to -+ // allocate a big chunk to prevent more allocations later. 'segment_size' is -+ // therefore chosen (any other arbitrary value could be chosen). -+ next->size = total_size > segment_size ? total_size : segment_size; -+ next->start = -+ (HuffmanCode*)WebPSafeMalloc(next->size, sizeof(*next->start)); -+ if (next->start == NULL) { -+ WebPSafeFree(next); -+ return 0; -+ } -+ next->curr_table = next->start; -+ next->next = NULL; -+ // Point to the new segment. -+ root_table->curr_segment->next = next; -+ root_table->curr_segment = next; -+ } -+ if (code_lengths_size <= SORTED_SIZE_CUTOFF) { - // use local stack-allocated array. - uint16_t sorted[SORTED_SIZE_CUTOFF]; -- total_size = BuildHuffmanTable(root_table, root_bits, -- code_lengths, code_lengths_size, sorted); -- } else { // rare case. Use heap allocation. -+ BuildHuffmanTable(root_table->curr_segment->curr_table, root_bits, -+ code_lengths, code_lengths_size, sorted); -+ } else { // rare case. Use heap allocation. - uint16_t* const sorted = - (uint16_t*)WebPSafeMalloc(code_lengths_size, sizeof(*sorted)); - if (sorted == NULL) return 0; -- total_size = BuildHuffmanTable(root_table, root_bits, -- code_lengths, code_lengths_size, sorted); -+ BuildHuffmanTable(root_table->curr_segment->curr_table, root_bits, -+ code_lengths, code_lengths_size, sorted); - WebPSafeFree(sorted); - } - return total_size; - } -+ -+int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables) { -+ // Have 'segment' point to the first segment for now, 'root'. -+ HuffmanTablesSegment* const root = &huffman_tables->root; -+ huffman_tables->curr_segment = root; -+ // Allocate root. -+ root->start = (HuffmanCode*)WebPSafeMalloc(size, sizeof(*root->start)); -+ if (root->start == NULL) return 0; -+ root->curr_table = root->start; -+ root->next = NULL; -+ root->size = size; -+ return 1; -+} -+ -+void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables) { -+ HuffmanTablesSegment *current, *next; -+ if (huffman_tables == NULL) return; -+ // Free the root node. -+ current = &huffman_tables->root; -+ next = current->next; -+ WebPSafeFree(current->start); -+ current->start = NULL; -+ current->next = NULL; -+ current = next; -+ // Free the following nodes. -+ while (current != NULL) { -+ next = current->next; -+ WebPSafeFree(current->start); -+ WebPSafeFree(current); -+ current = next; -+ } -+} -diff --git a/3rdparty/libwebp/src/utils/huffman_utils.h b/3rdparty/libwebp/src/utils/huffman_utils.h -index 13b7ad1ac40c..98415c532895 100644 ---- a/3rdparty/libwebp/src/utils/huffman_utils.h -+++ b/3rdparty/libwebp/src/utils/huffman_utils.h -@@ -43,6 +43,29 @@ typedef struct { - // or non-literal symbol otherwise - } HuffmanCode32; - -+// Contiguous memory segment of HuffmanCodes. -+typedef struct HuffmanTablesSegment { -+ HuffmanCode* start; -+ // Pointer to where we are writing into the segment. Starts at 'start' and -+ // cannot go beyond 'start' + 'size'. -+ HuffmanCode* curr_table; -+ // Pointer to the next segment in the chain. -+ struct HuffmanTablesSegment* next; -+ int size; -+} HuffmanTablesSegment; -+ -+// Chained memory segments of HuffmanCodes. -+typedef struct HuffmanTables { -+ HuffmanTablesSegment root; -+ // Currently processed segment. At first, this is 'root'. -+ HuffmanTablesSegment* curr_segment; -+} HuffmanTables; -+ -+// Allocates a HuffmanTables with 'size' contiguous HuffmanCodes. Returns 0 on -+// memory allocation error, 1 otherwise. -+int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables); -+void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables); -+ - #define HUFFMAN_PACKED_BITS 6 - #define HUFFMAN_PACKED_TABLE_SIZE (1u << HUFFMAN_PACKED_BITS) - -@@ -78,9 +101,7 @@ void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups); - // the huffman table. - // Returns built table size or 0 in case of error (invalid tree or - // memory error). --// If root_table is NULL, it returns 0 if a lookup cannot be built, something --// > 0 otherwise (but not the table size). --int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, -+int VP8LBuildHuffmanTable(HuffmanTables* const root_table, int root_bits, - const int code_lengths[], int code_lengths_size); - - #ifdef __cplusplus -diff --git a/3rdparty/libwebp/src/utils/palette.c b/3rdparty/libwebp/src/utils/palette.c -new file mode 100644 -index 000000000000..515da2101950 ---- /dev/null -+++ b/3rdparty/libwebp/src/utils/palette.c -@@ -0,0 +1,402 @@ -+// Copyright 2023 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Utilities for palette analysis. -+// -+// Author: Vincent Rabaud (vrabaud@google.com) -+ -+#include "src/utils/palette.h" -+ -+#include -+#include -+ -+#include "src/dsp/lossless_common.h" -+#include "src/utils/color_cache_utils.h" -+#include "src/utils/utils.h" -+#include "src/webp/encode.h" -+#include "src/webp/format_constants.h" -+ -+// ----------------------------------------------------------------------------- -+ -+// Palette reordering for smaller sum of deltas (and for smaller storage). -+ -+static int PaletteCompareColorsForQsort(const void* p1, const void* p2) { -+ const uint32_t a = WebPMemToUint32((uint8_t*)p1); -+ const uint32_t b = WebPMemToUint32((uint8_t*)p2); -+ assert(a != b); -+ return (a < b) ? -1 : 1; -+} -+ -+static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) { -+ return (v <= 128) ? v : (256 - v); -+} -+ -+// Computes a value that is related to the entropy created by the -+// palette entry diff. -+// -+// Note that the last & 0xff is a no-operation in the next statement, but -+// removed by most compilers and is here only for regularity of the code. -+static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) { -+ const uint32_t diff = VP8LSubPixels(col1, col2); -+ const int kMoreWeightForRGBThanForAlpha = 9; -+ uint32_t score; -+ score = PaletteComponentDistance((diff >> 0) & 0xff); -+ score += PaletteComponentDistance((diff >> 8) & 0xff); -+ score += PaletteComponentDistance((diff >> 16) & 0xff); -+ score *= kMoreWeightForRGBThanForAlpha; -+ score += PaletteComponentDistance((diff >> 24) & 0xff); -+ return score; -+} -+ -+static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) { -+ const uint32_t tmp = *col1; -+ *col1 = *col2; -+ *col2 = tmp; -+} -+ -+int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, int num_colors) { -+ int low = 0, hi = num_colors; -+ if (sorted[low] == color) return low; // loop invariant: sorted[low] != color -+ while (1) { -+ const int mid = (low + hi) >> 1; -+ if (sorted[mid] == color) { -+ return mid; -+ } else if (sorted[mid] < color) { -+ low = mid; -+ } else { -+ hi = mid; -+ } -+ } -+ assert(0); -+ return 0; -+} -+ -+void PrepareMapToPalette(const uint32_t palette[], uint32_t num_colors, -+ uint32_t sorted[], uint32_t idx_map[]) { -+ uint32_t i; -+ memcpy(sorted, palette, num_colors * sizeof(*sorted)); -+ qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort); -+ for (i = 0; i < num_colors; ++i) { -+ idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i; -+ } -+} -+ -+//------------------------------------------------------------------------------ -+ -+#define COLOR_HASH_SIZE (MAX_PALETTE_SIZE * 4) -+#define COLOR_HASH_RIGHT_SHIFT 22 // 32 - log2(COLOR_HASH_SIZE). -+ -+int GetColorPalette(const WebPPicture* const pic, uint32_t* const palette) { -+ int i; -+ int x, y; -+ int num_colors = 0; -+ uint8_t in_use[COLOR_HASH_SIZE] = {0}; -+ uint32_t colors[COLOR_HASH_SIZE] = {0}; -+ const uint32_t* argb = pic->argb; -+ const int width = pic->width; -+ const int height = pic->height; -+ uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0] -+ assert(pic != NULL); -+ assert(pic->use_argb); -+ -+ for (y = 0; y < height; ++y) { -+ for (x = 0; x < width; ++x) { -+ int key; -+ if (argb[x] == last_pix) { -+ continue; -+ } -+ last_pix = argb[x]; -+ key = VP8LHashPix(last_pix, COLOR_HASH_RIGHT_SHIFT); -+ while (1) { -+ if (!in_use[key]) { -+ colors[key] = last_pix; -+ in_use[key] = 1; -+ ++num_colors; -+ if (num_colors > MAX_PALETTE_SIZE) { -+ return MAX_PALETTE_SIZE + 1; // Exact count not needed. -+ } -+ break; -+ } else if (colors[key] == last_pix) { -+ break; // The color is already there. -+ } else { -+ // Some other color sits here, so do linear conflict resolution. -+ ++key; -+ key &= (COLOR_HASH_SIZE - 1); // Key mask. -+ } -+ } -+ } -+ argb += pic->argb_stride; -+ } -+ -+ if (palette != NULL) { // Fill the colors into palette. -+ num_colors = 0; -+ for (i = 0; i < COLOR_HASH_SIZE; ++i) { -+ if (in_use[i]) { -+ palette[num_colors] = colors[i]; -+ ++num_colors; -+ } -+ } -+ qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort); -+ } -+ return num_colors; -+} -+ -+#undef COLOR_HASH_SIZE -+#undef COLOR_HASH_RIGHT_SHIFT -+ -+// ----------------------------------------------------------------------------- -+ -+// The palette has been sorted by alpha. This function checks if the other -+// components of the palette have a monotonic development with regards to -+// position in the palette. If all have monotonic development, there is -+// no benefit to re-organize them greedily. A monotonic development -+// would be spotted in green-only situations (like lossy alpha) or gray-scale -+// images. -+static int PaletteHasNonMonotonousDeltas(const uint32_t* const palette, -+ int num_colors) { -+ uint32_t predict = 0x000000; -+ int i; -+ uint8_t sign_found = 0x00; -+ for (i = 0; i < num_colors; ++i) { -+ const uint32_t diff = VP8LSubPixels(palette[i], predict); -+ const uint8_t rd = (diff >> 16) & 0xff; -+ const uint8_t gd = (diff >> 8) & 0xff; -+ const uint8_t bd = (diff >> 0) & 0xff; -+ if (rd != 0x00) { -+ sign_found |= (rd < 0x80) ? 1 : 2; -+ } -+ if (gd != 0x00) { -+ sign_found |= (gd < 0x80) ? 8 : 16; -+ } -+ if (bd != 0x00) { -+ sign_found |= (bd < 0x80) ? 64 : 128; -+ } -+ predict = palette[i]; -+ } -+ return (sign_found & (sign_found << 1)) != 0; // two consequent signs. -+} -+ -+static void PaletteSortMinimizeDeltas(const uint32_t* const palette_sorted, -+ int num_colors, uint32_t* const palette) { -+ uint32_t predict = 0x00000000; -+ int i, k; -+ memcpy(palette, palette_sorted, num_colors * sizeof(*palette)); -+ if (!PaletteHasNonMonotonousDeltas(palette_sorted, num_colors)) return; -+ // Find greedily always the closest color of the predicted color to minimize -+ // deltas in the palette. This reduces storage needs since the -+ // palette is stored with delta encoding. -+ for (i = 0; i < num_colors; ++i) { -+ int best_ix = i; -+ uint32_t best_score = ~0U; -+ for (k = i; k < num_colors; ++k) { -+ const uint32_t cur_score = PaletteColorDistance(palette[k], predict); -+ if (best_score > cur_score) { -+ best_score = cur_score; -+ best_ix = k; -+ } -+ } -+ SwapColor(&palette[best_ix], &palette[i]); -+ predict = palette[i]; -+ } -+} -+ -+// ----------------------------------------------------------------------------- -+// Modified Zeng method from "A Survey on Palette Reordering -+// Methods for Improving the Compression of Color-Indexed Images" by Armando J. -+// Pinho and Antonio J. R. Neves. -+ -+// Finds the biggest cooccurrence in the matrix. -+static void CoOccurrenceFindMax(const uint32_t* const cooccurrence, -+ uint32_t num_colors, uint8_t* const c1, -+ uint8_t* const c2) { -+ // Find the index that is most frequently located adjacent to other -+ // (different) indexes. -+ uint32_t best_sum = 0u; -+ uint32_t i, j, best_cooccurrence; -+ *c1 = 0u; -+ for (i = 0; i < num_colors; ++i) { -+ uint32_t sum = 0; -+ for (j = 0; j < num_colors; ++j) sum += cooccurrence[i * num_colors + j]; -+ if (sum > best_sum) { -+ best_sum = sum; -+ *c1 = i; -+ } -+ } -+ // Find the index that is most frequently found adjacent to *c1. -+ *c2 = 0u; -+ best_cooccurrence = 0u; -+ for (i = 0; i < num_colors; ++i) { -+ if (cooccurrence[*c1 * num_colors + i] > best_cooccurrence) { -+ best_cooccurrence = cooccurrence[*c1 * num_colors + i]; -+ *c2 = i; -+ } -+ } -+ assert(*c1 != *c2); -+} -+ -+// Builds the cooccurrence matrix -+static int CoOccurrenceBuild(const WebPPicture* const pic, -+ const uint32_t* const palette, uint32_t num_colors, -+ uint32_t* cooccurrence) { -+ uint32_t *lines, *line_top, *line_current, *line_tmp; -+ int x, y; -+ const uint32_t* src = pic->argb; -+ uint32_t prev_pix = ~src[0]; -+ uint32_t prev_idx = 0u; -+ uint32_t idx_map[MAX_PALETTE_SIZE] = {0}; -+ uint32_t palette_sorted[MAX_PALETTE_SIZE]; -+ lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines)); -+ if (lines == NULL) { -+ return 0; -+ } -+ line_top = &lines[0]; -+ line_current = &lines[pic->width]; -+ PrepareMapToPalette(palette, num_colors, palette_sorted, idx_map); -+ for (y = 0; y < pic->height; ++y) { -+ for (x = 0; x < pic->width; ++x) { -+ const uint32_t pix = src[x]; -+ if (pix != prev_pix) { -+ prev_idx = idx_map[SearchColorNoIdx(palette_sorted, pix, num_colors)]; -+ prev_pix = pix; -+ } -+ line_current[x] = prev_idx; -+ // 4-connectivity is what works best as mentioned in "On the relation -+ // between Memon's and the modified Zeng's palette reordering methods". -+ if (x > 0 && prev_idx != line_current[x - 1]) { -+ const uint32_t left_idx = line_current[x - 1]; -+ ++cooccurrence[prev_idx * num_colors + left_idx]; -+ ++cooccurrence[left_idx * num_colors + prev_idx]; -+ } -+ if (y > 0 && prev_idx != line_top[x]) { -+ const uint32_t top_idx = line_top[x]; -+ ++cooccurrence[prev_idx * num_colors + top_idx]; -+ ++cooccurrence[top_idx * num_colors + prev_idx]; -+ } -+ } -+ line_tmp = line_top; -+ line_top = line_current; -+ line_current = line_tmp; -+ src += pic->argb_stride; -+ } -+ WebPSafeFree(lines); -+ return 1; -+} -+ -+struct Sum { -+ uint8_t index; -+ uint32_t sum; -+}; -+ -+static int PaletteSortModifiedZeng(const WebPPicture* const pic, -+ const uint32_t* const palette_in, -+ uint32_t num_colors, -+ uint32_t* const palette) { -+ uint32_t i, j, ind; -+ uint8_t remapping[MAX_PALETTE_SIZE]; -+ uint32_t* cooccurrence; -+ struct Sum sums[MAX_PALETTE_SIZE]; -+ uint32_t first, last; -+ uint32_t num_sums; -+ // TODO(vrabaud) check whether one color images should use palette or not. -+ if (num_colors <= 1) return 1; -+ // Build the co-occurrence matrix. -+ cooccurrence = -+ (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence)); -+ if (cooccurrence == NULL) { -+ return 0; -+ } -+ if (!CoOccurrenceBuild(pic, palette_in, num_colors, cooccurrence)) { -+ WebPSafeFree(cooccurrence); -+ return 0; -+ } -+ -+ // Initialize the mapping list with the two best indices. -+ CoOccurrenceFindMax(cooccurrence, num_colors, &remapping[0], &remapping[1]); -+ -+ // We need to append and prepend to the list of remapping. To this end, we -+ // actually define the next start/end of the list as indices in a vector (with -+ // a wrap around when the end is reached). -+ first = 0; -+ last = 1; -+ num_sums = num_colors - 2; // -2 because we know the first two values -+ if (num_sums > 0) { -+ // Initialize the sums with the first two remappings and find the best one -+ struct Sum* best_sum = &sums[0]; -+ best_sum->index = 0u; -+ best_sum->sum = 0u; -+ for (i = 0, j = 0; i < num_colors; ++i) { -+ if (i == remapping[0] || i == remapping[1]) continue; -+ sums[j].index = i; -+ sums[j].sum = cooccurrence[i * num_colors + remapping[0]] + -+ cooccurrence[i * num_colors + remapping[1]]; -+ if (sums[j].sum > best_sum->sum) best_sum = &sums[j]; -+ ++j; -+ } -+ -+ while (num_sums > 0) { -+ const uint8_t best_index = best_sum->index; -+ // Compute delta to know if we need to prepend or append the best index. -+ int32_t delta = 0; -+ const int32_t n = num_colors - num_sums; -+ for (ind = first, j = 0; (ind + j) % num_colors != last + 1; ++j) { -+ const uint16_t l_j = remapping[(ind + j) % num_colors]; -+ delta += (n - 1 - 2 * (int32_t)j) * -+ (int32_t)cooccurrence[best_index * num_colors + l_j]; -+ } -+ if (delta > 0) { -+ first = (first == 0) ? num_colors - 1 : first - 1; -+ remapping[first] = best_index; -+ } else { -+ ++last; -+ remapping[last] = best_index; -+ } -+ // Remove best_sum from sums. -+ *best_sum = sums[num_sums - 1]; -+ --num_sums; -+ // Update all the sums and find the best one. -+ best_sum = &sums[0]; -+ for (i = 0; i < num_sums; ++i) { -+ sums[i].sum += cooccurrence[best_index * num_colors + sums[i].index]; -+ if (sums[i].sum > best_sum->sum) best_sum = &sums[i]; -+ } -+ } -+ } -+ assert((last + 1) % num_colors == first); -+ WebPSafeFree(cooccurrence); -+ -+ // Re-map the palette. -+ for (i = 0; i < num_colors; ++i) { -+ palette[i] = palette_in[remapping[(first + i) % num_colors]]; -+ } -+ return 1; -+} -+ -+// ----------------------------------------------------------------------------- -+ -+int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic, -+ const uint32_t* const palette_sorted, uint32_t num_colors, -+ uint32_t* const palette) { -+ switch (method) { -+ case kSortedDefault: -+ // Nothing to do, we have already sorted the palette. -+ memcpy(palette, palette_sorted, num_colors * sizeof(*palette)); -+ return 1; -+ case kMinimizeDelta: -+ PaletteSortMinimizeDeltas(palette_sorted, num_colors, palette); -+ return 1; -+ case kModifiedZeng: -+ return PaletteSortModifiedZeng(pic, palette_sorted, num_colors, palette); -+ case kUnusedPalette: -+ case kPaletteSortingNum: -+ break; -+ } -+ -+ assert(0); -+ return 0; -+} -diff --git a/3rdparty/libwebp/src/utils/palette.h b/3rdparty/libwebp/src/utils/palette.h -new file mode 100644 -index 000000000000..34479e463fe3 ---- /dev/null -+++ b/3rdparty/libwebp/src/utils/palette.h -@@ -0,0 +1,60 @@ -+// Copyright 2023 Google Inc. All Rights Reserved. -+// -+// Use of this source code is governed by a BSD-style license -+// that can be found in the COPYING file in the root of the source -+// tree. An additional intellectual property rights grant can be found -+// in the file PATENTS. All contributing project authors may -+// be found in the AUTHORS file in the root of the source tree. -+// ----------------------------------------------------------------------------- -+// -+// Utilities for palette analysis. -+// -+// Author: Vincent Rabaud (vrabaud@google.com) -+ -+#ifndef WEBP_UTILS_PALETTE_H_ -+#define WEBP_UTILS_PALETTE_H_ -+ -+#include "src/webp/types.h" -+ -+struct WebPPicture; -+ -+// The different ways a palette can be sorted. -+typedef enum PaletteSorting { -+ kSortedDefault = 0, -+ // Sorts by minimizing L1 deltas between consecutive colors, giving more -+ // weight to RGB colors. -+ kMinimizeDelta = 1, -+ // Implements the modified Zeng method from "A Survey on Palette Reordering -+ // Methods for Improving the Compression of Color-Indexed Images" by Armando -+ // J. Pinho and Antonio J. R. Neves. -+ kModifiedZeng = 2, -+ kUnusedPalette = 3, -+ kPaletteSortingNum = 4 -+} PaletteSorting; -+ -+// Returns the index of 'color' in the sorted palette 'sorted' of size -+// 'num_colors'. -+int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, int num_colors); -+ -+// Sort palette in increasing order and prepare an inverse mapping array. -+void PrepareMapToPalette(const uint32_t palette[], uint32_t num_colors, -+ uint32_t sorted[], uint32_t idx_map[]); -+ -+// Returns count of unique colors in 'pic', assuming pic->use_argb is true. -+// If the unique color count is more than MAX_PALETTE_SIZE, returns -+// MAX_PALETTE_SIZE+1. -+// If 'palette' is not NULL and the number of unique colors is less than or -+// equal to MAX_PALETTE_SIZE, also outputs the actual unique colors into -+// 'palette' in a sorted order. Note: 'palette' is assumed to be an array -+// already allocated with at least MAX_PALETTE_SIZE elements. -+int GetColorPalette(const struct WebPPicture* const pic, -+ uint32_t* const palette); -+ -+// Sorts the palette according to the criterion defined by 'method'. -+// 'palette_sorted' is the input palette sorted lexicographically, as done in -+// PrepareMapToPalette. Returns 0 on memory allocation error. -+int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic, -+ const uint32_t* const palette_sorted, uint32_t num_colors, -+ uint32_t* const palette); -+ -+#endif // WEBP_UTILS_PALETTE_H_ -diff --git a/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c b/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c -index f65b6cdbb696..97e78937043e 100644 ---- a/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c -+++ b/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c -@@ -30,7 +30,7 @@ - - #define DFIX 4 // extra precision for ordered dithering - #define DSIZE 4 // dithering size (must be a power of two) --// cf. http://en.wikipedia.org/wiki/Ordered_dithering -+// cf. https://en.wikipedia.org/wiki/Ordered_dithering - static const uint8_t kOrderedDither[DSIZE][DSIZE] = { - { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision - { 12, 4, 14, 6 }, -diff --git a/3rdparty/libwebp/src/utils/rescaler_utils.c b/3rdparty/libwebp/src/utils/rescaler_utils.c -index 4bcae24af54a..a0581a14b1a8 100644 ---- a/3rdparty/libwebp/src/utils/rescaler_utils.c -+++ b/3rdparty/libwebp/src/utils/rescaler_utils.c -@@ -12,66 +12,74 @@ - // Author: Skal (pascal.massimino@gmail.com) - - #include -+#include - #include - #include - #include "src/dsp/dsp.h" - #include "src/utils/rescaler_utils.h" -+#include "src/utils/utils.h" - - //------------------------------------------------------------------------------ - --void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, -- uint8_t* const dst, -- int dst_width, int dst_height, int dst_stride, -- int num_channels, rescaler_t* const work) { -+int WebPRescalerInit(WebPRescaler* const rescaler, -+ int src_width, int src_height, -+ uint8_t* const dst, -+ int dst_width, int dst_height, int dst_stride, -+ int num_channels, rescaler_t* const work) { - const int x_add = src_width, x_sub = dst_width; - const int y_add = src_height, y_sub = dst_height; -- wrk->x_expand = (src_width < dst_width); -- wrk->y_expand = (src_height < dst_height); -- wrk->src_width = src_width; -- wrk->src_height = src_height; -- wrk->dst_width = dst_width; -- wrk->dst_height = dst_height; -- wrk->src_y = 0; -- wrk->dst_y = 0; -- wrk->dst = dst; -- wrk->dst_stride = dst_stride; -- wrk->num_channels = num_channels; -+ const uint64_t total_size = 2ull * dst_width * num_channels * sizeof(*work); -+ if (!CheckSizeOverflow(total_size)) return 0; -+ -+ rescaler->x_expand = (src_width < dst_width); -+ rescaler->y_expand = (src_height < dst_height); -+ rescaler->src_width = src_width; -+ rescaler->src_height = src_height; -+ rescaler->dst_width = dst_width; -+ rescaler->dst_height = dst_height; -+ rescaler->src_y = 0; -+ rescaler->dst_y = 0; -+ rescaler->dst = dst; -+ rescaler->dst_stride = dst_stride; -+ rescaler->num_channels = num_channels; - - // for 'x_expand', we use bilinear interpolation -- wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add; -- wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; -- if (!wrk->x_expand) { // fx_scale is not used otherwise -- wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub); -+ rescaler->x_add = rescaler->x_expand ? (x_sub - 1) : x_add; -+ rescaler->x_sub = rescaler->x_expand ? (x_add - 1) : x_sub; -+ if (!rescaler->x_expand) { // fx_scale is not used otherwise -+ rescaler->fx_scale = WEBP_RESCALER_FRAC(1, rescaler->x_sub); - } - // vertical scaling parameters -- wrk->y_add = wrk->y_expand ? y_add - 1 : y_add; -- wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub; -- wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add; -- if (!wrk->y_expand) { -+ rescaler->y_add = rescaler->y_expand ? y_add - 1 : y_add; -+ rescaler->y_sub = rescaler->y_expand ? y_sub - 1 : y_sub; -+ rescaler->y_accum = rescaler->y_expand ? rescaler->y_sub : rescaler->y_add; -+ if (!rescaler->y_expand) { - // This is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast. -- // Its value is <= WEBP_RESCALER_ONE, because dst_height <= wrk->y_add, and -- // wrk->x_add >= 1; -- const uint64_t ratio = -- (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add); -+ // Its value is <= WEBP_RESCALER_ONE, because dst_height <= rescaler->y_add -+ // and rescaler->x_add >= 1; -+ const uint64_t num = (uint64_t)dst_height * WEBP_RESCALER_ONE; -+ const uint64_t den = (uint64_t)rescaler->x_add * rescaler->y_add; -+ const uint64_t ratio = num / den; - if (ratio != (uint32_t)ratio) { - // When ratio == WEBP_RESCALER_ONE, we can't represent the ratio with the - // current fixed-point precision. This happens when src_height == -- // wrk->y_add (which == src_height), and wrk->x_add == 1. -+ // rescaler->y_add (which == src_height), and rescaler->x_add == 1. - // => We special-case fxy_scale = 0, in WebPRescalerExportRow(). -- wrk->fxy_scale = 0; -+ rescaler->fxy_scale = 0; - } else { -- wrk->fxy_scale = (uint32_t)ratio; -+ rescaler->fxy_scale = (uint32_t)ratio; - } -- wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub); -+ rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->y_sub); - } else { -- wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add); -- // wrk->fxy_scale is unused here. -+ rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->x_add); -+ // rescaler->fxy_scale is unused here. - } -- wrk->irow = work; -- wrk->frow = work + num_channels * dst_width; -- memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); -+ rescaler->irow = work; -+ rescaler->frow = work + num_channels * dst_width; -+ memset(work, 0, (size_t)total_size); - - WebPRescalerDspInit(); -+ return 1; - } - - int WebPRescalerGetScaledDimensions(int src_width, int src_height, -@@ -82,6 +90,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, - { - int width = *scaled_width; - int height = *scaled_height; -+ const int max_size = INT_MAX / 2; - - // if width is unspecified, scale original proportionally to height ratio. - if (width == 0 && src_height > 0) { -@@ -94,7 +103,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, - (int)(((uint64_t)src_height * width + src_width - 1) / src_width); - } - // Check if the overall dimensions still make sense. -- if (width <= 0 || height <= 0) { -+ if (width <= 0 || height <= 0 || width > max_size || height > max_size) { - return 0; - } - -@@ -107,31 +116,34 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height, - //------------------------------------------------------------------------------ - // all-in-one calls - --int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) { -- const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub; -+int WebPRescaleNeededLines(const WebPRescaler* const rescaler, -+ int max_num_lines) { -+ const int num_lines = -+ (rescaler->y_accum + rescaler->y_sub - 1) / rescaler->y_sub; - return (num_lines > max_num_lines) ? max_num_lines : num_lines; - } - --int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, -+int WebPRescalerImport(WebPRescaler* const rescaler, int num_lines, - const uint8_t* src, int src_stride) { - int total_imported = 0; -- while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) { -- if (wrk->y_expand) { -- rescaler_t* const tmp = wrk->irow; -- wrk->irow = wrk->frow; -- wrk->frow = tmp; -+ while (total_imported < num_lines && -+ !WebPRescalerHasPendingOutput(rescaler)) { -+ if (rescaler->y_expand) { -+ rescaler_t* const tmp = rescaler->irow; -+ rescaler->irow = rescaler->frow; -+ rescaler->frow = tmp; - } -- WebPRescalerImportRow(wrk, src); -- if (!wrk->y_expand) { // Accumulate the contribution of the new row. -+ WebPRescalerImportRow(rescaler, src); -+ if (!rescaler->y_expand) { // Accumulate the contribution of the new row. - int x; -- for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) { -- wrk->irow[x] += wrk->frow[x]; -+ for (x = 0; x < rescaler->num_channels * rescaler->dst_width; ++x) { -+ rescaler->irow[x] += rescaler->frow[x]; - } - } -- ++wrk->src_y; -+ ++rescaler->src_y; - src += src_stride; - ++total_imported; -- wrk->y_accum -= wrk->y_sub; -+ rescaler->y_accum -= rescaler->y_sub; - } - return total_imported; - } -diff --git a/3rdparty/libwebp/src/utils/rescaler_utils.h b/3rdparty/libwebp/src/utils/rescaler_utils.h -index ca41e42c4a53..ef201ef86c19 100644 ---- a/3rdparty/libwebp/src/utils/rescaler_utils.h -+++ b/3rdparty/libwebp/src/utils/rescaler_utils.h -@@ -47,12 +47,13 @@ struct WebPRescaler { - }; - - // Initialize a rescaler given scratch area 'work' and dimensions of src & dst. --void WebPRescalerInit(WebPRescaler* const rescaler, -- int src_width, int src_height, -- uint8_t* const dst, -- int dst_width, int dst_height, int dst_stride, -- int num_channels, -- rescaler_t* const work); -+// Returns false in case of error. -+int WebPRescalerInit(WebPRescaler* const rescaler, -+ int src_width, int src_height, -+ uint8_t* const dst, -+ int dst_width, int dst_height, int dst_stride, -+ int num_channels, -+ rescaler_t* const work); - - // If either 'scaled_width' or 'scaled_height' (but not both) is 0 the value - // will be calculated preserving the aspect ratio, otherwise the values are -diff --git a/3rdparty/libwebp/src/utils/utils.c b/3rdparty/libwebp/src/utils/utils.c -index 6080e19e2176..408ce88f67f6 100644 ---- a/3rdparty/libwebp/src/utils/utils.c -+++ b/3rdparty/libwebp/src/utils/utils.c -@@ -11,19 +11,19 @@ - // - // Author: Skal (pascal.massimino@gmail.com) - -+#include "src/utils/utils.h" -+ - #include - #include // for memcpy() --#include "src/webp/decode.h" -+ -+#include "src/utils/palette.h" - #include "src/webp/encode.h" --#include "src/webp/format_constants.h" // for MAX_PALETTE_SIZE --#include "src/utils/color_cache_utils.h" --#include "src/utils/utils.h" - - // If PRINT_MEM_INFO is defined, extra info (like total memory used, number of - // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow, - // and not multi-thread safe!). - // An interesting alternative is valgrind's 'massif' tool: --// http://valgrind.org/docs/manual/ms-manual.html -+// https://valgrind.org/docs/manual/ms-manual.html - // Here is an example command line: - /* valgrind --tool=massif --massif-out-file=massif.out \ - --stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc -@@ -101,6 +101,9 @@ static void Increment(int* const v) { - #if defined(MALLOC_LIMIT) - { - const char* const malloc_limit_str = getenv("MALLOC_LIMIT"); -+#if MALLOC_LIMIT > 1 -+ mem_limit = (size_t)MALLOC_LIMIT; -+#endif - if (malloc_limit_str != NULL) { - mem_limit = atoi(malloc_limit_str); - } -@@ -169,16 +172,16 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) { - const uint64_t total_size = nmemb * size; - if (nmemb == 0) return 1; - if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0; -- if (total_size != (size_t)total_size) return 0; -+ if (!CheckSizeOverflow(total_size)) return 0; - #if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT) - if (countdown_to_fail > 0 && --countdown_to_fail == 0) { - return 0; // fake fail! - } - #endif --#if defined(MALLOC_LIMIT) -+#if defined(PRINT_MEM_INFO) && defined(MALLOC_LIMIT) - if (mem_limit > 0) { - const uint64_t new_total_mem = (uint64_t)total_mem + total_size; -- if (new_total_mem != (size_t)new_total_mem || -+ if (!CheckSizeOverflow(new_total_mem) || - new_total_mem > mem_limit) { - return 0; // fake fail! - } -@@ -249,66 +252,10 @@ void WebPCopyPixels(const WebPPicture* const src, WebPPicture* const dst) { - - //------------------------------------------------------------------------------ - --#define COLOR_HASH_SIZE (MAX_PALETTE_SIZE * 4) --#define COLOR_HASH_RIGHT_SHIFT 22 // 32 - log2(COLOR_HASH_SIZE). -- - int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) { -- int i; -- int x, y; -- int num_colors = 0; -- uint8_t in_use[COLOR_HASH_SIZE] = { 0 }; -- uint32_t colors[COLOR_HASH_SIZE]; -- const uint32_t* argb = pic->argb; -- const int width = pic->width; -- const int height = pic->height; -- uint32_t last_pix = ~argb[0]; // so we're sure that last_pix != argb[0] -- assert(pic != NULL); -- assert(pic->use_argb); -- -- for (y = 0; y < height; ++y) { -- for (x = 0; x < width; ++x) { -- int key; -- if (argb[x] == last_pix) { -- continue; -- } -- last_pix = argb[x]; -- key = VP8LHashPix(last_pix, COLOR_HASH_RIGHT_SHIFT); -- while (1) { -- if (!in_use[key]) { -- colors[key] = last_pix; -- in_use[key] = 1; -- ++num_colors; -- if (num_colors > MAX_PALETTE_SIZE) { -- return MAX_PALETTE_SIZE + 1; // Exact count not needed. -- } -- break; -- } else if (colors[key] == last_pix) { -- break; // The color is already there. -- } else { -- // Some other color sits here, so do linear conflict resolution. -- ++key; -- key &= (COLOR_HASH_SIZE - 1); // Key mask. -- } -- } -- } -- argb += pic->argb_stride; -- } -- -- if (palette != NULL) { // Fill the colors into palette. -- num_colors = 0; -- for (i = 0; i < COLOR_HASH_SIZE; ++i) { -- if (in_use[i]) { -- palette[num_colors] = colors[i]; -- ++num_colors; -- } -- } -- } -- return num_colors; -+ return GetColorPalette(pic, palette); - } - --#undef COLOR_HASH_SIZE --#undef COLOR_HASH_RIGHT_SHIFT -- - //------------------------------------------------------------------------------ - - #if defined(WEBP_NEED_LOG_TABLE_8BIT) -diff --git a/3rdparty/libwebp/src/utils/utils.h b/3rdparty/libwebp/src/utils/utils.h -index 2a3ec926784e..b2241fbf9bf7 100644 ---- a/3rdparty/libwebp/src/utils/utils.h -+++ b/3rdparty/libwebp/src/utils/utils.h -@@ -20,9 +20,7 @@ - #endif - - #include --#include - --#include "src/dsp/dsp.h" - #include "src/webp/types.h" - - #ifdef __cplusplus -@@ -42,6 +40,10 @@ extern "C" { - #endif - #endif // WEBP_MAX_ALLOCABLE_MEMORY - -+static WEBP_INLINE int CheckSizeOverflow(uint64_t size) { -+ return size == (size_t)size; -+} -+ - // size-checking safe malloc/calloc: verify that the requested size is not too - // large, or return NULL. You don't need to call these for constructs like - // malloc(sizeof(foo)), but only if there's picture-dependent size involved -@@ -60,7 +62,8 @@ WEBP_EXTERN void WebPSafeFree(void* const ptr); - // Alignment - - #define WEBP_ALIGN_CST 31 --#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & ~WEBP_ALIGN_CST) -+#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & \ -+ ~(uintptr_t)WEBP_ALIGN_CST) - - #include - // memcpy() is the safe way of moving potentially unaligned 32b memory. -@@ -69,10 +72,19 @@ static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) { - memcpy(&A, ptr, sizeof(A)); - return A; - } -+ -+static WEBP_INLINE int32_t WebPMemToInt32(const uint8_t* const ptr) { -+ return (int32_t)WebPMemToUint32(ptr); -+} -+ - static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) { - memcpy(ptr, &val, sizeof(val)); - } - -+static WEBP_INLINE void WebPInt32ToMem(uint8_t* const ptr, int val) { -+ WebPUint32ToMem(ptr, (uint32_t)val); -+} -+ - //------------------------------------------------------------------------------ - // Reading/writing data. - -@@ -107,24 +119,33 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) { - PutLE16(data + 2, (int)(val >> 16)); - } - --// Returns (int)floor(log2(n)). n must be > 0. - // use GNU builtins where available. - #if defined(__GNUC__) && \ - ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) -+// Returns (int)floor(log2(n)). n must be > 0. - static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - return 31 ^ __builtin_clz(n); - } -+// counts the number of trailing zero -+static WEBP_INLINE int BitsCtz(uint32_t n) { return __builtin_ctz(n); } - #elif defined(_MSC_VER) && _MSC_VER > 1310 && \ - (defined(_M_X64) || defined(_M_IX86)) - #include - #pragma intrinsic(_BitScanReverse) -+#pragma intrinsic(_BitScanForward) - - static WEBP_INLINE int BitsLog2Floor(uint32_t n) { -- unsigned long first_set_bit; -+ unsigned long first_set_bit; // NOLINT (runtime/int) - _BitScanReverse(&first_set_bit, n); - return first_set_bit; - } --#else // default: use the C-version. -+static WEBP_INLINE int BitsCtz(uint32_t n) { -+ unsigned long first_set_bit; // NOLINT (runtime/int) -+ _BitScanForward(&first_set_bit, n); -+ return first_set_bit; -+} -+#else // default: use the (slow) C-version. -+#define WEBP_HAVE_SLOW_CLZ_CTZ // signal that the Clz/Ctz function are slow - // Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either - // based on table or not. Can be used as fallback if clz() is not available. - #define WEBP_NEED_LOG_TABLE_8BIT -@@ -139,6 +160,15 @@ static WEBP_INLINE int WebPLog2FloorC(uint32_t n) { - } - - static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); } -+ -+static WEBP_INLINE int BitsCtz(uint32_t n) { -+ int i; -+ for (i = 0; i < 32; ++i, n >>= 1) { -+ if (n & 1) return i; -+ } -+ return 32; -+} -+ - #endif - - //------------------------------------------------------------------------------ -@@ -166,6 +196,7 @@ WEBP_EXTERN void WebPCopyPixels(const struct WebPPicture* const src, - // MAX_PALETTE_SIZE, also outputs the actual unique colors into 'palette'. - // Note: 'palette' is assumed to be an array already allocated with at least - // MAX_PALETTE_SIZE elements. -+// TODO(vrabaud) remove whenever we can break the ABI. - WEBP_EXTERN int WebPGetColorPalette(const struct WebPPicture* const pic, - uint32_t* const palette); - -diff --git a/3rdparty/libwebp/src/webp/decode.h b/3rdparty/libwebp/src/webp/decode.h -index 44fcd64a84d4..9d968061d160 100644 ---- a/3rdparty/libwebp/src/webp/decode.h -+++ b/3rdparty/libwebp/src/webp/decode.h -@@ -81,11 +81,12 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, - // returned is the Y samples buffer. Upon return, *u and *v will point to - // the U and V chroma data. These U and V buffers need NOT be passed to - // WebPFree(), unlike the returned Y luma one. The dimension of the U and V --// planes are both (*width + 1) / 2 and (*height + 1)/ 2. -+// planes are both (*width + 1) / 2 and (*height + 1) / 2. - // Upon return, the Y buffer has a stride returned as '*stride', while U and V - // have a common stride returned as '*uv_stride'. --// Return NULL in case of error. --// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr -+// 'width' and 'height' may be NULL, the other pointers must not be. -+// Returns NULL in case of error. -+// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr - WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, - int* width, int* height, - uint8_t** u, uint8_t** v, -@@ -250,23 +251,24 @@ typedef enum VP8StatusCode { - // WebPIDecoder object. This object can be left in a SUSPENDED state if the - // picture is only partially decoded, pending additional input. - // Code example: --// --// WebPInitDecBuffer(&output_buffer); --// output_buffer.colorspace = mode; --// ... --// WebPIDecoder* idec = WebPINewDecoder(&output_buffer); --// while (additional_data_is_available) { --// // ... (get additional data in some new_data[] buffer) --// status = WebPIAppend(idec, new_data, new_data_size); --// if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) { --// break; // an error occurred. --// } --// --// // The above call decodes the current available buffer. --// // Part of the image can now be refreshed by calling --// // WebPIDecGetRGB()/WebPIDecGetYUVA() etc. --// } --// WebPIDelete(idec); -+/* -+ WebPInitDecBuffer(&output_buffer); -+ output_buffer.colorspace = mode; -+ ... -+ WebPIDecoder* idec = WebPINewDecoder(&output_buffer); -+ while (additional_data_is_available) { -+ // ... (get additional data in some new_data[] buffer) -+ status = WebPIAppend(idec, new_data, new_data_size); -+ if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) { -+ break; // an error occurred. -+ } -+ -+ // The above call decodes the current available buffer. -+ // Part of the image can now be refreshed by calling -+ // WebPIDecGetRGB()/WebPIDecGetYUVA() etc. -+ } -+ WebPIDelete(idec); -+*/ - - // Creates a new incremental decoder with the supplied buffer parameter. - // This output_buffer can be passed NULL, in which case a default output buffer -@@ -388,7 +390,7 @@ WEBP_EXTERN const WebPDecBuffer* WebPIDecodedArea( - CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK); - - // C) Adjust 'config', if needed -- config.no_fancy_upsampling = 1; -+ config.options.no_fancy_upsampling = 1; - config.output.colorspace = MODE_BGRA; - // etc. - -diff --git a/3rdparty/libwebp/src/webp/encode.h b/3rdparty/libwebp/src/webp/encode.h -index b4c599df8765..56b68e2f10e0 100644 ---- a/3rdparty/libwebp/src/webp/encode.h -+++ b/3rdparty/libwebp/src/webp/encode.h -@@ -441,7 +441,7 @@ WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture, - // the original dimension will be lost). Picture 'dst' need not be initialized - // with WebPPictureInit() if it is different from 'src', since its content will - // be overwritten. --// Returns false in case of memory allocation error or invalid parameters. -+// Returns false in case of invalid parameters. - WEBP_EXTERN int WebPPictureView(const WebPPicture* src, - int left, int top, int width, int height, - WebPPicture* dst); -@@ -455,7 +455,7 @@ WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture); - // dimension will be calculated preserving the aspect ratio. - // No gamma correction is applied. - // Returns false in case of error (invalid parameter or insufficient memory). --WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height); -+WEBP_EXTERN int WebPPictureRescale(WebPPicture* picture, int width, int height); - - // Colorspace conversion function to import RGB samples. - // Previous buffer will be free'd, if any. -@@ -526,7 +526,7 @@ WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture); - // Remove the transparency information (if present) by blending the color with - // the background color 'background_rgb' (specified as 24bit RGB triplet). - // After this call, all alpha values are reset to 0xff. --WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb); -+WEBP_EXTERN void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb); - - //------------------------------------------------------------------------------ - // Main call -diff --git a/3rdparty/libwebp/src/webp/format_constants.h b/3rdparty/libwebp/src/webp/format_constants.h -index eca6981a47d0..999035c5d265 100644 ---- a/3rdparty/libwebp/src/webp/format_constants.h -+++ b/3rdparty/libwebp/src/webp/format_constants.h -@@ -55,7 +55,7 @@ - typedef enum { - PREDICTOR_TRANSFORM = 0, - CROSS_COLOR_TRANSFORM = 1, -- SUBTRACT_GREEN = 2, -+ SUBTRACT_GREEN_TRANSFORM = 2, - COLOR_INDEXING_TRANSFORM = 3 - } VP8LImageTransformType; - -diff --git a/3rdparty/libwebp/src/webp/types.h b/3rdparty/libwebp/src/webp/types.h -index 47f7f2b00706..f255432e413c 100644 ---- a/3rdparty/libwebp/src/webp/types.h -+++ b/3rdparty/libwebp/src/webp/types.h -@@ -42,7 +42,11 @@ typedef long long int int64_t; - # if defined(__GNUC__) && __GNUC__ >= 4 - # define WEBP_EXTERN extern __attribute__ ((visibility ("default"))) - # else --# define WEBP_EXTERN extern -+# if defined(_MSC_VER) && defined(WEBP_DLL) -+# define WEBP_EXTERN __declspec(dllexport) -+# else -+# define WEBP_EXTERN extern -+# endif - # endif /* __GNUC__ >= 4 */ - #endif /* WEBP_EXTERN */ - - -From 9b0cd53f2091071d780b2d3588a8b755bb1fdc68 Mon Sep 17 00:00:00 2001 -From: Vincent Rabaud -Date: Fri, 15 Sep 2023 09:37:50 +0200 -Subject: [PATCH 2/2] Add the sharpyuv folder. - ---- - 3rdparty/libwebp/CMakeLists.txt | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt -index 532c03026568..f3b6ebd0d620 100644 ---- a/3rdparty/libwebp/CMakeLists.txt -+++ b/3rdparty/libwebp/CMakeLists.txt -@@ -9,8 +9,8 @@ if(ANDROID) - ocv_include_directories(${CPUFEATURES_INCLUDE_DIRS}) - endif() - --file(GLOB lib_srcs src/dec/*.c src/demux/*.c src/dsp/*.c src/enc/*.c src/mux/*.c src/utils/*.c src/webp/*.c) --file(GLOB lib_hdrs src/dec/*.h src/demux/*.h src/dsp/*.h src/enc/*.h src/mux/*.h src/utils/*.h src/webp/*.h) -+file(GLOB lib_srcs sharpyuv/*.c src/dec/*.c src/demux/*.c src/dsp/*.c src/enc/*.c src/mux/*.c src/utils/*.c src/webp/*.c) -+file(GLOB lib_hdrs sharpyuv/*.h src/dec/*.h src/demux/*.h src/dsp/*.h src/enc/*.h src/mux/*.h src/utils/*.h src/webp/*.h) - - # FIXIT - if(ANDROID AND ARMEABI_V7A AND NOT NEON) diff --git a/third_party/libtiff b/third_party/libtiff index 4f34a37..8b20804 160000 --- a/third_party/libtiff +++ b/third_party/libtiff @@ -1 +1 @@ -Subproject commit 4f34a374b2e2cba3c8055ab77458c5730c073aff +Subproject commit 8b20804fc0ddeaa93667b799b5e1a2a7dc9e3fb2 diff --git a/third_party/opencv b/third_party/opencv index f9a59f2..5199850 160000 --- a/third_party/opencv +++ b/third_party/opencv @@ -1 +1 @@ -Subproject commit f9a59f2592993d3dcc080e495f4f5e02dd8ec7ef +Subproject commit 5199850039ad23f1f0e6cccea5061a9fea5efca6