From 0eda6929db5b8eba1dd0cd019dc3978cf3bce096 Mon Sep 17 00:00:00 2001
From: Krzysztof Lecki <klecki@nvidia.com>
Date: Fri, 6 Oct 2023 12:04:25 +0200
Subject: [PATCH] Update dependecies: OpenCV, libtiff (#89)

Revert the webp patch as it is no longer needed

Signed-off-by: Krzysztof Lecki <klecki@nvidia.com>
---
 README.rst                                 |     8 +-
 build_scripts/build_opencv.sh              |     1 -
 patches/opencv-libwebp-CVE-2023-5129.patch | 13905 -------------------
 third_party/libtiff                        |     2 +-
 third_party/opencv                         |     2 +-
 5 files changed, 6 insertions(+), 13912 deletions(-)
 delete mode 100644 patches/opencv-libwebp-CVE-2023-5129.patch

diff --git a/README.rst b/README.rst
index f286eb3..9d67f03 100644
--- a/README.rst
+++ b/README.rst
@@ -96,8 +96,8 @@ The repository consists mostly of externally hosted subrepositories:
 .. _opencv: https://github.com/opencv/opencv/
 .. |opencvlic| replace:: Apache License 2.0
 .. _opencvlic: https://github.com/opencv/opencv/blob/master/LICENSE
-.. |opencvver| replace:: 4.8.0
-.. _opencvver: https://github.com/opencv/opencv/releases/tag/4.8.0
+.. |opencvver| replace:: 4.8.1
+.. _opencvver: https://github.com/opencv/opencv/releases/tag/4.8.1
 
 .. _openjpeg: https://github.com/uclouvain/openjpeg
 .. |openjpeglic| replace:: BSD-2 license
@@ -108,8 +108,8 @@ The repository consists mostly of externally hosted subrepositories:
 .. _libtiff: https://gitlab.com/libtiff/libtiff
 .. |libtifflic| replace:: BSD-2 license
 .. _libtifflic: https://gitlab.com/libtiff/libtiff/-/blob/master/README.md
-.. |libtiffver| replace:: 4.5.1 (+ Build System Patch)
-.. _libtiffver: https://gitlab.com/libtiff/libtiff/-/tree/v4.5.1
+.. |libtiffver| replace:: 4.6.0 (+ Build System Patch)
+.. _libtiffver: https://gitlab.com/libtiff/libtiff/-/tree/v4.6.0
 
 .. _zstd: https://github.com/facebook/zstd
 .. |zstdlic| replace:: BSD-3 license
diff --git a/build_scripts/build_opencv.sh b/build_scripts/build_opencv.sh
index a853f3b..508e76d 100755
--- a/build_scripts/build_opencv.sh
+++ b/build_scripts/build_opencv.sh
@@ -16,7 +16,6 @@
 
 # OpenCV
 pushd third_party/opencv
-patch -p1 < ${ROOT_DIR}/patches/opencv-libwebp-CVE-2023-5129.patch
 mkdir -p build
 cd build
 cmake -DCMAKE_BUILD_TYPE=RELEASE \
diff --git a/patches/opencv-libwebp-CVE-2023-5129.patch b/patches/opencv-libwebp-CVE-2023-5129.patch
deleted file mode 100644
index 057ae37..0000000
--- a/patches/opencv-libwebp-CVE-2023-5129.patch
+++ /dev/null
@@ -1,13905 +0,0 @@
-From 74a671d5394e3b2e6e8dbd1f32e152260c60e847 Mon Sep 17 00:00:00 2001
-From: Vincent Rabaud <vrabaud@google.com>
-Date: Thu, 14 Sep 2023 09:49:29 +0200
-Subject: [PATCH 1/2] Bump libwebp to 1.3.2
-
----
- 3rdparty/libwebp/CMakeLists.txt               |   6 -
- .../patches/20190910-msa-asm-patch.diff       |  22 -
- 3rdparty/libwebp/sharpyuv/sharpyuv.c          | 565 ++++++++++++++
- 3rdparty/libwebp/sharpyuv/sharpyuv.h          | 174 +++++
- 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c      |  14 +
- 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h      |  22 +
- 3rdparty/libwebp/sharpyuv/sharpyuv_csp.c      | 110 +++
- 3rdparty/libwebp/sharpyuv/sharpyuv_csp.h      |  60 ++
- 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c      | 104 +++
- 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h      |  28 +
- 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c    | 419 +++++++++++
- 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h    |  38 +
- 3rdparty/libwebp/sharpyuv/sharpyuv_neon.c     | 181 +++++
- 3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c     | 201 +++++
- 3rdparty/libwebp/src/dec/alpha_dec.c          |  37 +-
- 3rdparty/libwebp/src/dec/buffer_dec.c         |  12 +-
- 3rdparty/libwebp/src/dec/frame_dec.c          |   2 +-
- 3rdparty/libwebp/src/dec/io_dec.c             |  98 ++-
- 3rdparty/libwebp/src/dec/tree_dec.c           |   3 +-
- 3rdparty/libwebp/src/dec/vp8_dec.c            |   6 +-
- 3rdparty/libwebp/src/dec/vp8i_dec.h           |   4 +-
- 3rdparty/libwebp/src/dec/vp8l_dec.c           | 244 +++---
- 3rdparty/libwebp/src/dec/vp8li_dec.h          |  15 +-
- 3rdparty/libwebp/src/dec/webp_dec.c           |  52 +-
- 3rdparty/libwebp/src/dec/webpi_dec.h          |   4 +
- 3rdparty/libwebp/src/demux/anim_decode.c      |  52 +-
- 3rdparty/libwebp/src/demux/demux.c            |  23 +-
- 3rdparty/libwebp/src/dsp/alpha_processing.c   |  66 +-
- .../libwebp/src/dsp/alpha_processing_neon.c   |  27 +-
- .../libwebp/src/dsp/alpha_processing_sse2.c   |  73 +-
- .../libwebp/src/dsp/alpha_processing_sse41.c  |   8 +-
- 3rdparty/libwebp/src/dsp/cost.c               |   5 +-
- 3rdparty/libwebp/src/dsp/cost_neon.c          |   4 +-
- 3rdparty/libwebp/src/dsp/cpu.c                |  21 +-
- 3rdparty/libwebp/src/dsp/cpu.h                | 266 +++++++
- 3rdparty/libwebp/src/dsp/dec.c                |   7 +-
- 3rdparty/libwebp/src/dsp/dec_neon.c           |   4 +-
- 3rdparty/libwebp/src/dsp/dec_sse2.c           |  93 +--
- 3rdparty/libwebp/src/dsp/dec_sse41.c          |   2 +-
- 3rdparty/libwebp/src/dsp/dsp.h                | 264 ++-----
- 3rdparty/libwebp/src/dsp/enc.c                |   7 +-
- 3rdparty/libwebp/src/dsp/enc_neon.c           |  13 +-
- 3rdparty/libwebp/src/dsp/enc_sse2.c           | 293 ++++++--
- 3rdparty/libwebp/src/dsp/filters.c            |  14 +-
- 3rdparty/libwebp/src/dsp/filters_sse2.c       |   5 +
- 3rdparty/libwebp/src/dsp/lossless.c           |  81 +-
- 3rdparty/libwebp/src/dsp/lossless.h           |  53 +-
- 3rdparty/libwebp/src/dsp/lossless_common.h    |   8 +-
- 3rdparty/libwebp/src/dsp/lossless_enc.c       |  73 +-
- .../libwebp/src/dsp/lossless_enc_mips32.c     |  22 +-
- 3rdparty/libwebp/src/dsp/lossless_enc_neon.c  |   2 +-
- 3rdparty/libwebp/src/dsp/lossless_enc_sse2.c  | 121 ++-
- 3rdparty/libwebp/src/dsp/lossless_enc_sse41.c | 175 +++--
- .../libwebp/src/dsp/lossless_mips_dsp_r2.c    |  37 +-
- 3rdparty/libwebp/src/dsp/lossless_neon.c      |  22 +-
- 3rdparty/libwebp/src/dsp/lossless_sse2.c      | 130 ++--
- 3rdparty/libwebp/src/dsp/lossless_sse41.c     | 133 ++++
- 3rdparty/libwebp/src/dsp/msa_macro.h          |  37 +-
- 3rdparty/libwebp/src/dsp/neon.h               |  11 +-
- 3rdparty/libwebp/src/dsp/quant.h              |  16 +-
- 3rdparty/libwebp/src/dsp/rescaler.c           |  12 +-
- 3rdparty/libwebp/src/dsp/rescaler_sse2.c      |   6 +-
- 3rdparty/libwebp/src/dsp/ssim.c               |   3 +-
- 3rdparty/libwebp/src/dsp/upsampling.c         |  11 +-
- 3rdparty/libwebp/src/dsp/upsampling_neon.c    |   2 +-
- 3rdparty/libwebp/src/dsp/upsampling_sse2.c    |   2 +-
- 3rdparty/libwebp/src/dsp/yuv.c                |  85 +--
- 3rdparty/libwebp/src/dsp/yuv.h                |   2 +-
- 3rdparty/libwebp/src/dsp/yuv_neon.c           | 108 ---
- 3rdparty/libwebp/src/dsp/yuv_sse2.c           | 132 +---
- 3rdparty/libwebp/src/dsp/yuv_sse41.c          |   6 +-
- 3rdparty/libwebp/src/enc/alpha_enc.c          |  32 +-
- 3rdparty/libwebp/src/enc/analysis_enc.c       |  12 +-
- .../src/enc/backward_references_cost_enc.c    |  75 +-
- .../libwebp/src/enc/backward_references_enc.c |  83 ++-
- .../libwebp/src/enc/backward_references_enc.h |  12 +-
- 3rdparty/libwebp/src/enc/frame_enc.c          |  21 +-
- 3rdparty/libwebp/src/enc/histogram_enc.c      | 252 +++----
- 3rdparty/libwebp/src/enc/histogram_enc.h      |  30 +-
- 3rdparty/libwebp/src/enc/picture_csp_enc.c    | 502 ++-----------
- 3rdparty/libwebp/src/enc/picture_enc.c        |  46 +-
- .../libwebp/src/enc/picture_rescale_enc.c     | 119 ++-
- 3rdparty/libwebp/src/enc/picture_tools_enc.c  |  45 +-
- 3rdparty/libwebp/src/enc/predictor_enc.c      |  52 +-
- 3rdparty/libwebp/src/enc/quant_enc.c          | 120 +--
- 3rdparty/libwebp/src/enc/syntax_enc.c         |   8 +-
- 3rdparty/libwebp/src/enc/vp8i_enc.h           |  29 +-
- 3rdparty/libwebp/src/enc/vp8l_enc.c           | 694 +++++++++---------
- 3rdparty/libwebp/src/enc/vp8li_enc.h          |  28 +-
- 3rdparty/libwebp/src/enc/webp_enc.c           |  14 +-
- 3rdparty/libwebp/src/mux/anim_encode.c        |  25 +-
- 3rdparty/libwebp/src/mux/muxedit.c            |   6 +-
- 3rdparty/libwebp/src/mux/muxi.h               |   4 +-
- 3rdparty/libwebp/src/mux/muxinternal.c        |   9 +-
- 3rdparty/libwebp/src/mux/muxread.c            |  11 +-
- .../libwebp/src/utils/bit_reader_inl_utils.h  |  13 +-
- 3rdparty/libwebp/src/utils/bit_reader_utils.c |   3 +-
- 3rdparty/libwebp/src/utils/bit_reader_utils.h |   3 +-
- 3rdparty/libwebp/src/utils/bit_writer_utils.c |   4 +-
- .../libwebp/src/utils/color_cache_utils.c     |  22 +-
- .../libwebp/src/utils/huffman_encode_utils.c  |   5 +-
- .../libwebp/src/utils/huffman_encode_utils.h  |   2 +-
- 3rdparty/libwebp/src/utils/huffman_utils.c    |  99 ++-
- 3rdparty/libwebp/src/utils/huffman_utils.h    |  27 +-
- 3rdparty/libwebp/src/utils/palette.c          | 402 ++++++++++
- 3rdparty/libwebp/src/utils/palette.h          |  60 ++
- .../src/utils/quant_levels_dec_utils.c        |   2 +-
- 3rdparty/libwebp/src/utils/rescaler_utils.c   | 114 +--
- 3rdparty/libwebp/src/utils/rescaler_utils.h   |  13 +-
- 3rdparty/libwebp/src/utils/utils.c            |  77 +-
- 3rdparty/libwebp/src/utils/utils.h            |  43 +-
- 3rdparty/libwebp/src/webp/decode.h            |  44 +-
- 3rdparty/libwebp/src/webp/encode.h            |   6 +-
- 3rdparty/libwebp/src/webp/format_constants.h  |   2 +-
- 3rdparty/libwebp/src/webp/types.h             |   6 +-
- 115 files changed, 5352 insertions(+), 2782 deletions(-)
- delete mode 100644 3rdparty/libwebp/patches/20190910-msa-asm-patch.diff
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv.c
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv.h
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_csp.c
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_csp.h
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_neon.c
- create mode 100644 3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c
- create mode 100644 3rdparty/libwebp/src/dsp/cpu.h
- create mode 100644 3rdparty/libwebp/src/dsp/lossless_sse41.c
- create mode 100644 3rdparty/libwebp/src/utils/palette.c
- create mode 100644 3rdparty/libwebp/src/utils/palette.h
-
-diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt
-index 723575c8db3d..532c03026568 100644
---- a/3rdparty/libwebp/CMakeLists.txt
-+++ b/3rdparty/libwebp/CMakeLists.txt
-@@ -21,12 +21,6 @@ if(ANDROID AND ARMEABI_V7A AND NOT NEON)
-   endforeach()
- endif()
- 
--# FIX for quant.h - requires C99 for() loops
--ocv_check_flag_support(C "-std=c99" _varname "${CMAKE_C_FLAGS}")
--if(${_varname})
--  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
--endif()
--
- 
- # ----------------------------------------------------------------------------------
- #         Define the library target:
-diff --git a/3rdparty/libwebp/patches/20190910-msa-asm-patch.diff b/3rdparty/libwebp/patches/20190910-msa-asm-patch.diff
-deleted file mode 100644
-index 1be213520312..000000000000
---- a/3rdparty/libwebp/patches/20190910-msa-asm-patch.diff
-+++ /dev/null
-@@ -1,22 +0,0 @@
--diff --git a/3rdparty/libwebp/src/dsp/msa_macro.h b/3rdparty/libwebp/src/dsp/msa_macro.h
--index de026a1d9e..a16c0bb300 100644
----- a/3rdparty/libwebp/src/dsp/msa_macro.h
--+++ b/3rdparty/libwebp/src/dsp/msa_macro.h
--@@ -73,7 +73,7 @@
--   static inline TYPE FUNC_NAME(const void* const psrc) {  \
--     const uint8_t* const psrc_m = (const uint8_t*)psrc;   \
--     TYPE val_m;                                           \
---    asm volatile (                                        \
--+    __asm__ volatile (                                        \
--       "" #INSTR " %[val_m], %[psrc_m]  \n\t"              \
--       : [val_m] "=r" (val_m)                              \
--       : [psrc_m] "m" (*psrc_m));                          \
--@@ -86,7 +86,7 @@
--   static inline void FUNC_NAME(TYPE val, void* const pdst) { \
--     uint8_t* const pdst_m = (uint8_t*)pdst;                  \
--     TYPE val_m = val;                                        \
---    asm volatile (                                           \
--+    __asm__ volatile (                                           \
--       " " #INSTR "  %[val_m],  %[pdst_m]  \n\t"              \
--       : [pdst_m] "=m" (*pdst_m)                              \
--       : [val_m] "r" (val_m));                                \
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv.c b/3rdparty/libwebp/sharpyuv/sharpyuv.c
-new file mode 100644
-index 000000000000..b94885a6c320
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv.c
-@@ -0,0 +1,565 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Sharp RGB to YUV conversion.
-+//
-+// Author: Skal (pascal.massimino@gmail.com)
-+
-+#include "sharpyuv/sharpyuv.h"
-+
-+#include <assert.h>
-+#include <limits.h>
-+#include <stddef.h>
-+#include <stdlib.h>
-+#include <string.h>
-+
-+#include "src/webp/types.h"
-+#include "sharpyuv/sharpyuv_cpu.h"
-+#include "sharpyuv/sharpyuv_dsp.h"
-+#include "sharpyuv/sharpyuv_gamma.h"
-+
-+//------------------------------------------------------------------------------
-+
-+int SharpYuvGetVersion(void) {
-+  return SHARPYUV_VERSION;
-+}
-+
-+//------------------------------------------------------------------------------
-+// Sharp RGB->YUV conversion
-+
-+static const int kNumIterations = 4;
-+
-+#define YUV_FIX 16  // fixed-point precision for RGB->YUV
-+static const int kYuvHalf = 1 << (YUV_FIX - 1);
-+
-+// Max bit depth so that intermediate calculations fit in 16 bits.
-+static const int kMaxBitDepth = 14;
-+
-+// Returns the precision shift to use based on the input rgb_bit_depth.
-+static int GetPrecisionShift(int rgb_bit_depth) {
-+  // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove
-+  // bits if needed.
-+  return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2
-+                                               : (kMaxBitDepth - rgb_bit_depth);
-+}
-+
-+typedef int16_t fixed_t;      // signed type with extra precision for UV
-+typedef uint16_t fixed_y_t;   // unsigned type with extra precision for W
-+
-+//------------------------------------------------------------------------------
-+
-+static uint8_t clip_8b(fixed_t v) {
-+  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
-+}
-+
-+static uint16_t clip(fixed_t v, int max) {
-+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-+}
-+
-+static fixed_y_t clip_bit_depth(int y, int bit_depth) {
-+  const int max = (1 << bit_depth) - 1;
-+  return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max;
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+static int RGBToGray(int64_t r, int64_t g, int64_t b) {
-+  const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf;
-+  return (int)(luma >> YUV_FIX);
-+}
-+
-+static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
-+                          int rgb_bit_depth,
-+                          SharpYuvTransferFunctionType transfer_type) {
-+  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-+  const uint32_t A = SharpYuvGammaToLinear(a, bit_depth, transfer_type);
-+  const uint32_t B = SharpYuvGammaToLinear(b, bit_depth, transfer_type);
-+  const uint32_t C = SharpYuvGammaToLinear(c, bit_depth, transfer_type);
-+  const uint32_t D = SharpYuvGammaToLinear(d, bit_depth, transfer_type);
-+  return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth,
-+                               transfer_type);
-+}
-+
-+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w,
-+                                int rgb_bit_depth,
-+                                SharpYuvTransferFunctionType transfer_type) {
-+  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-+  int i;
-+  for (i = 0; i < w; ++i) {
-+    const uint32_t R =
-+        SharpYuvGammaToLinear(src[0 * w + i], bit_depth, transfer_type);
-+    const uint32_t G =
-+        SharpYuvGammaToLinear(src[1 * w + i], bit_depth, transfer_type);
-+    const uint32_t B =
-+        SharpYuvGammaToLinear(src[2 * w + i], bit_depth, transfer_type);
-+    const uint32_t Y = RGBToGray(R, G, B);
-+    dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth, transfer_type);
-+  }
-+}
-+
-+static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
-+                         fixed_t* dst, int uv_w, int rgb_bit_depth,
-+                         SharpYuvTransferFunctionType transfer_type) {
-+  int i;
-+  for (i = 0; i < uv_w; ++i) {
-+    const int r =
-+        ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0],
-+                  src2[0 * uv_w + 1], rgb_bit_depth, transfer_type);
-+    const int g =
-+        ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0],
-+                  src2[2 * uv_w + 1], rgb_bit_depth, transfer_type);
-+    const int b =
-+        ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0],
-+                  src2[4 * uv_w + 1], rgb_bit_depth, transfer_type);
-+    const int W = RGBToGray(r, g, b);
-+    dst[0 * uv_w] = (fixed_t)(r - W);
-+    dst[1 * uv_w] = (fixed_t)(g - W);
-+    dst[2 * uv_w] = (fixed_t)(b - W);
-+    dst  += 1;
-+    src1 += 2;
-+    src2 += 2;
-+  }
-+}
-+
-+static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
-+  int i;
-+  assert(w > 0);
-+  for (i = 0; i < w; ++i) {
-+    y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
-+  }
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) {
-+  const int v0 = (A * 3 + B + 2) >> 2;
-+  return clip_bit_depth(v0 + W0, bit_depth);
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+static WEBP_INLINE int Shift(int v, int shift) {
-+  return (shift >= 0) ? (v << shift) : (v >> -shift);
-+}
-+
-+static void ImportOneRow(const uint8_t* const r_ptr,
-+                         const uint8_t* const g_ptr,
-+                         const uint8_t* const b_ptr,
-+                         int rgb_step,
-+                         int rgb_bit_depth,
-+                         int pic_width,
-+                         fixed_y_t* const dst) {
-+  // Convert the rgb_step from a number of bytes to a number of uint8_t or
-+  // uint16_t values depending the bit depth.
-+  const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step;
-+  int i;
-+  const int w = (pic_width + 1) & ~1;
-+  for (i = 0; i < pic_width; ++i) {
-+    const int off = i * step;
-+    const int shift = GetPrecisionShift(rgb_bit_depth);
-+    if (rgb_bit_depth == 8) {
-+      dst[i + 0 * w] = Shift(r_ptr[off], shift);
-+      dst[i + 1 * w] = Shift(g_ptr[off], shift);
-+      dst[i + 2 * w] = Shift(b_ptr[off], shift);
-+    } else {
-+      dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift);
-+      dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift);
-+      dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift);
-+    }
-+  }
-+  if (pic_width & 1) {  // replicate rightmost pixel
-+    dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
-+    dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
-+    dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
-+  }
-+}
-+
-+static void InterpolateTwoRows(const fixed_y_t* const best_y,
-+                               const fixed_t* prev_uv,
-+                               const fixed_t* cur_uv,
-+                               const fixed_t* next_uv,
-+                               int w,
-+                               fixed_y_t* out1,
-+                               fixed_y_t* out2,
-+                               int rgb_bit_depth) {
-+  const int uv_w = w >> 1;
-+  const int len = (w - 1) >> 1;   // length to filter
-+  int k = 3;
-+  const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth);
-+  while (k-- > 0) {   // process each R/G/B segments in turn
-+    // special boundary case for i==0
-+    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth);
-+    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth);
-+
-+    SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1,
-+                      bit_depth);
-+    SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1,
-+                      bit_depth);
-+
-+    // special boundary case for i == w - 1 when w is even
-+    if (!(w & 1)) {
-+      out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
-+                            best_y[w - 1 + 0], bit_depth);
-+      out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
-+                            best_y[w - 1 + w], bit_depth);
-+    }
-+    out1 += w;
-+    out2 += w;
-+    prev_uv += uv_w;
-+    cur_uv  += uv_w;
-+    next_uv += uv_w;
-+  }
-+}
-+
-+static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b,
-+                                         const int coeffs[4], int sfix) {
-+  const int srounder = 1 << (YUV_FIX + sfix - 1);
-+  const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b +
-+                   coeffs[3] + srounder;
-+  return (luma >> (YUV_FIX + sfix));
-+}
-+
-+static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
-+                            uint8_t* y_ptr, int y_stride, uint8_t* u_ptr,
-+                            int u_stride, uint8_t* v_ptr, int v_stride,
-+                            int rgb_bit_depth,
-+                            int yuv_bit_depth, int width, int height,
-+                            const SharpYuvConversionMatrix* yuv_matrix) {
-+  int i, j;
-+  const fixed_t* const best_uv_base = best_uv;
-+  const int w = (width + 1) & ~1;
-+  const int h = (height + 1) & ~1;
-+  const int uv_w = w >> 1;
-+  const int uv_h = h >> 1;
-+  const int sfix = GetPrecisionShift(rgb_bit_depth);
-+  const int yuv_max = (1 << yuv_bit_depth) - 1;
-+
-+  for (best_uv = best_uv_base, j = 0; j < height; ++j) {
-+    for (i = 0; i < width; ++i) {
-+      const int off = (i >> 1);
-+      const int W = best_y[i];
-+      const int r = best_uv[off + 0 * uv_w] + W;
-+      const int g = best_uv[off + 1 * uv_w] + W;
-+      const int b = best_uv[off + 2 * uv_w] + W;
-+      const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix);
-+      if (yuv_bit_depth <= 8) {
-+        y_ptr[i] = clip_8b(y);
-+      } else {
-+        ((uint16_t*)y_ptr)[i] = clip(y, yuv_max);
-+      }
-+    }
-+    best_y += w;
-+    best_uv += (j & 1) * 3 * uv_w;
-+    y_ptr += y_stride;
-+  }
-+  for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
-+    for (i = 0; i < uv_w; ++i) {
-+      const int off = i;
-+      // Note r, g and b values here are off by W, but a constant offset on all
-+      // 3 components doesn't change the value of u and v with a YCbCr matrix.
-+      const int r = best_uv[off + 0 * uv_w];
-+      const int g = best_uv[off + 1 * uv_w];
-+      const int b = best_uv[off + 2 * uv_w];
-+      const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix);
-+      const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix);
-+      if (yuv_bit_depth <= 8) {
-+        u_ptr[i] = clip_8b(u);
-+        v_ptr[i] = clip_8b(v);
-+      } else {
-+        ((uint16_t*)u_ptr)[i] = clip(u, yuv_max);
-+        ((uint16_t*)v_ptr)[i] = clip(v, yuv_max);
-+      }
-+    }
-+    best_uv += 3 * uv_w;
-+    u_ptr += u_stride;
-+    v_ptr += v_stride;
-+  }
-+  return 1;
-+}
-+
-+//------------------------------------------------------------------------------
-+// Main function
-+
-+static void* SafeMalloc(uint64_t nmemb, size_t size) {
-+  const uint64_t total_size = nmemb * (uint64_t)size;
-+  if (total_size != (size_t)total_size) return NULL;
-+  return malloc((size_t)total_size);
-+}
-+
-+#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T)))
-+
-+static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
-+                            const uint8_t* b_ptr, int rgb_step, int rgb_stride,
-+                            int rgb_bit_depth, uint8_t* y_ptr, int y_stride,
-+                            uint8_t* u_ptr, int u_stride, uint8_t* v_ptr,
-+                            int v_stride, int yuv_bit_depth, int width,
-+                            int height,
-+                            const SharpYuvConversionMatrix* yuv_matrix,
-+                            SharpYuvTransferFunctionType transfer_type) {
-+  // we expand the right/bottom border if needed
-+  const int w = (width + 1) & ~1;
-+  const int h = (height + 1) & ~1;
-+  const int uv_w = w >> 1;
-+  const int uv_h = h >> 1;
-+  uint64_t prev_diff_y_sum = ~0;
-+  int j, iter;
-+
-+  // TODO(skal): allocate one big memory chunk. But for now, it's easier
-+  // for valgrind debugging to have several chunks.
-+  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
-+  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
-+  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
-+  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
-+  fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
-+  fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
-+  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
-+  fixed_y_t* best_y = best_y_base;
-+  fixed_y_t* target_y = target_y_base;
-+  fixed_t* best_uv = best_uv_base;
-+  fixed_t* target_uv = target_uv_base;
-+  const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
-+  int ok;
-+  assert(w > 0);
-+  assert(h > 0);
-+
-+  if (best_y_base == NULL || best_uv_base == NULL ||
-+      target_y_base == NULL || target_uv_base == NULL ||
-+      best_rgb_y == NULL || best_rgb_uv == NULL ||
-+      tmp_buffer == NULL) {
-+    ok = 0;
-+    goto End;
-+  }
-+
-+  // Import RGB samples to W/RGB representation.
-+  for (j = 0; j < height; j += 2) {
-+    const int is_last_row = (j == height - 1);
-+    fixed_y_t* const src1 = tmp_buffer + 0 * w;
-+    fixed_y_t* const src2 = tmp_buffer + 3 * w;
-+
-+    // prepare two rows of input
-+    ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width,
-+                 src1);
-+    if (!is_last_row) {
-+      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
-+                   rgb_step, rgb_bit_depth, width, src2);
-+    } else {
-+      memcpy(src2, src1, 3 * w * sizeof(*src2));
-+    }
-+    StoreGray(src1, best_y + 0, w);
-+    StoreGray(src2, best_y + w, w);
-+
-+    UpdateW(src1, target_y, w, rgb_bit_depth, transfer_type);
-+    UpdateW(src2, target_y + w, w, rgb_bit_depth, transfer_type);
-+    UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth, transfer_type);
-+    memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
-+    best_y += 2 * w;
-+    best_uv += 3 * uv_w;
-+    target_y += 2 * w;
-+    target_uv += 3 * uv_w;
-+    r_ptr += 2 * rgb_stride;
-+    g_ptr += 2 * rgb_stride;
-+    b_ptr += 2 * rgb_stride;
-+  }
-+
-+  // Iterate and resolve clipping conflicts.
-+  for (iter = 0; iter < kNumIterations; ++iter) {
-+    const fixed_t* cur_uv = best_uv_base;
-+    const fixed_t* prev_uv = best_uv_base;
-+    uint64_t diff_y_sum = 0;
-+
-+    best_y = best_y_base;
-+    best_uv = best_uv_base;
-+    target_y = target_y_base;
-+    target_uv = target_uv_base;
-+    for (j = 0; j < h; j += 2) {
-+      fixed_y_t* const src1 = tmp_buffer + 0 * w;
-+      fixed_y_t* const src2 = tmp_buffer + 3 * w;
-+      {
-+        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
-+        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w,
-+                           src1, src2, rgb_bit_depth);
-+        prev_uv = cur_uv;
-+        cur_uv = next_uv;
-+      }
-+
-+      UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth, transfer_type);
-+      UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth, transfer_type);
-+      UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth, transfer_type);
-+
-+      // update two rows of Y and one row of RGB
-+      diff_y_sum +=
-+          SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w,
-+                          rgb_bit_depth + GetPrecisionShift(rgb_bit_depth));
-+      SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
-+
-+      best_y += 2 * w;
-+      best_uv += 3 * uv_w;
-+      target_y += 2 * w;
-+      target_uv += 3 * uv_w;
-+    }
-+    // test exit condition
-+    if (iter > 0) {
-+      if (diff_y_sum < diff_y_threshold) break;
-+      if (diff_y_sum > prev_diff_y_sum) break;
-+    }
-+    prev_diff_y_sum = diff_y_sum;
-+  }
-+
-+  // final reconstruction
-+  ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr,
-+                        u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth,
-+                        width, height, yuv_matrix);
-+
-+ End:
-+  free(best_y_base);
-+  free(best_uv_base);
-+  free(target_y_base);
-+  free(target_uv_base);
-+  free(best_rgb_y);
-+  free(best_rgb_uv);
-+  free(tmp_buffer);
-+  return ok;
-+}
-+#undef SAFE_ALLOC
-+
-+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-+#include <pthread.h>  // NOLINT
-+
-+#define LOCK_ACCESS \
-+    static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
-+    if (pthread_mutex_lock(&sharpyuv_lock)) return
-+#define UNLOCK_ACCESS_AND_RETURN                  \
-+    do {                                          \
-+      (void)pthread_mutex_unlock(&sharpyuv_lock); \
-+      return;                                     \
-+    } while (0)
-+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-+#define LOCK_ACCESS do {} while (0)
-+#define UNLOCK_ACCESS_AND_RETURN return
-+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
-+
-+// Hidden exported init function.
-+// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,
-+// users can declare it as extern and call it with an alternate VP8CPUInfo
-+// function.
-+extern VP8CPUInfo SharpYuvGetCPUInfo;
-+SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);
-+void SharpYuvInit(VP8CPUInfo cpu_info_func) {
-+  static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
-+      (VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
-+  LOCK_ACCESS;
-+  // Only update SharpYuvGetCPUInfo when called from external code to avoid a
-+  // race on reading the value in SharpYuvConvert().
-+  if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) {
-+    SharpYuvGetCPUInfo = cpu_info_func;
-+  }
-+  if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) {
-+    UNLOCK_ACCESS_AND_RETURN;
-+  }
-+
-+  SharpYuvInitDsp();
-+  SharpYuvInitGammaTables();
-+
-+  sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;
-+  UNLOCK_ACCESS_AND_RETURN;
-+}
-+
-+int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
-+                    int rgb_step, int rgb_stride, int rgb_bit_depth,
-+                    void* y_ptr, int y_stride, void* u_ptr, int u_stride,
-+                    void* v_ptr, int v_stride, int yuv_bit_depth, int width,
-+                    int height, const SharpYuvConversionMatrix* yuv_matrix) {
-+  SharpYuvOptions options;
-+  options.yuv_matrix = yuv_matrix;
-+  options.transfer_type = kSharpYuvTransferFunctionSrgb;
-+  return SharpYuvConvertWithOptions(
-+      r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, rgb_bit_depth, y_ptr, y_stride,
-+      u_ptr, u_stride, v_ptr, v_stride, yuv_bit_depth, width, height, &options);
-+}
-+
-+int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix* yuv_matrix,
-+                                SharpYuvOptions* options, int version) {
-+  const int major = (version >> 24);
-+  const int minor = (version >> 16) & 0xff;
-+  if (options == NULL || yuv_matrix == NULL ||
-+      (major == SHARPYUV_VERSION_MAJOR && major == 0 &&
-+       minor != SHARPYUV_VERSION_MINOR) ||
-+      (major != SHARPYUV_VERSION_MAJOR)) {
-+    return 0;
-+  }
-+  options->yuv_matrix = yuv_matrix;
-+  options->transfer_type = kSharpYuvTransferFunctionSrgb;
-+  return 1;
-+}
-+
-+int SharpYuvConvertWithOptions(const void* r_ptr, const void* g_ptr,
-+                               const void* b_ptr, int rgb_step, int rgb_stride,
-+                               int rgb_bit_depth, void* y_ptr, int y_stride,
-+                               void* u_ptr, int u_stride, void* v_ptr,
-+                               int v_stride, int yuv_bit_depth, int width,
-+                               int height, const SharpYuvOptions* options) {
-+  const SharpYuvConversionMatrix* yuv_matrix = options->yuv_matrix;
-+  SharpYuvTransferFunctionType transfer_type = options->transfer_type;
-+  SharpYuvConversionMatrix scaled_matrix;
-+  const int rgb_max = (1 << rgb_bit_depth) - 1;
-+  const int rgb_round = 1 << (rgb_bit_depth - 1);
-+  const int yuv_max = (1 << yuv_bit_depth) - 1;
-+  const int sfix = GetPrecisionShift(rgb_bit_depth);
-+
-+  if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX ||
-+      r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL ||
-+      u_ptr == NULL || v_ptr == NULL) {
-+    return 0;
-+  }
-+  if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 &&
-+      rgb_bit_depth != 16) {
-+    return 0;
-+  }
-+  if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) {
-+    return 0;
-+  }
-+  if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) {
-+    // Step/stride should be even for uint16_t buffers.
-+    return 0;
-+  }
-+  if (yuv_bit_depth > 8 &&
-+      (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) {
-+    // Stride should be even for uint16_t buffers.
-+    return 0;
-+  }
-+  // The address of the function pointer is used to avoid a read race.
-+  SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);
-+
-+  // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
-+  // rgb->yuv conversion matrix.
-+  if (rgb_bit_depth == yuv_bit_depth) {
-+    memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix));
-+  } else {
-+    int i;
-+    for (i = 0; i < 3; ++i) {
-+      scaled_matrix.rgb_to_y[i] =
-+          (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max;
-+      scaled_matrix.rgb_to_u[i] =
-+          (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max;
-+      scaled_matrix.rgb_to_v[i] =
-+          (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max;
-+    }
-+  }
-+  // Also incorporate precision change scaling.
-+  scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix);
-+  scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix);
-+  scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix);
-+
-+  return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride,
-+                          rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride,
-+                          v_ptr, v_stride, yuv_bit_depth, width, height,
-+                          &scaled_matrix, transfer_type);
-+}
-+
-+//------------------------------------------------------------------------------
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv.h b/3rdparty/libwebp/sharpyuv/sharpyuv.h
-new file mode 100644
-index 000000000000..23a69ce39c3e
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv.h
-@@ -0,0 +1,174 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Sharp RGB to YUV conversion.
-+
-+#ifndef WEBP_SHARPYUV_SHARPYUV_H_
-+#define WEBP_SHARPYUV_SHARPYUV_H_
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+#ifndef SHARPYUV_EXTERN
-+#ifdef WEBP_EXTERN
-+#define SHARPYUV_EXTERN WEBP_EXTERN
-+#else
-+// This explicitly marks library functions and allows for changing the
-+// signature for e.g., Windows DLL builds.
-+#if defined(__GNUC__) && __GNUC__ >= 4
-+#define SHARPYUV_EXTERN extern __attribute__((visibility("default")))
-+#else
-+#if defined(_MSC_VER) && defined(WEBP_DLL)
-+#define SHARPYUV_EXTERN __declspec(dllexport)
-+#else
-+#define SHARPYUV_EXTERN extern
-+#endif /* _MSC_VER && WEBP_DLL */
-+#endif /* __GNUC__ >= 4 */
-+#endif /* WEBP_EXTERN */
-+#endif /* SHARPYUV_EXTERN */
-+
-+#ifndef SHARPYUV_INLINE
-+#ifdef WEBP_INLINE
-+#define SHARPYUV_INLINE WEBP_INLINE
-+#else
-+#ifndef _MSC_VER
-+#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \
-+    (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
-+#define SHARPYUV_INLINE inline
-+#else
-+#define SHARPYUV_INLINE
-+#endif
-+#else
-+#define SHARPYUV_INLINE __forceinline
-+#endif /* _MSC_VER */
-+#endif /* WEBP_INLINE */
-+#endif /* SHARPYUV_INLINE */
-+
-+// SharpYUV API version following the convention from semver.org
-+#define SHARPYUV_VERSION_MAJOR 0
-+#define SHARPYUV_VERSION_MINOR 4
-+#define SHARPYUV_VERSION_PATCH 0
-+// Version as a uint32_t. The major number is the high 8 bits.
-+// The minor number is the middle 8 bits. The patch number is the low 16 bits.
-+#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
-+  (((MAJOR) << 24) | ((MINOR) << 16) | (PATCH))
-+#define SHARPYUV_VERSION                                                \
-+  SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
-+                        SHARPYUV_VERSION_PATCH)
-+
-+// Returns the library's version number, packed in hexadecimal. See
-+// SHARPYUV_VERSION.
-+SHARPYUV_EXTERN int SharpYuvGetVersion(void);
-+
-+// RGB to YUV conversion matrix, in 16 bit fixed point.
-+// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
-+// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
-+// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3]
-+// Then y, u and v values are divided by 1<<16 and rounded.
-+typedef struct {
-+  int rgb_to_y[4];
-+  int rgb_to_u[4];
-+  int rgb_to_v[4];
-+} SharpYuvConversionMatrix;
-+
-+typedef struct SharpYuvOptions SharpYuvOptions;
-+
-+// Enums for transfer functions, as defined in H.273,
-+// https://www.itu.int/rec/T-REC-H.273-202107-I/en
-+typedef enum SharpYuvTransferFunctionType {
-+  // 0 is reserved
-+  kSharpYuvTransferFunctionBt709 = 1,
-+  // 2 is unspecified
-+  // 3 is reserved
-+  kSharpYuvTransferFunctionBt470M = 4,
-+  kSharpYuvTransferFunctionBt470Bg = 5,
-+  kSharpYuvTransferFunctionBt601 = 6,
-+  kSharpYuvTransferFunctionSmpte240 = 7,
-+  kSharpYuvTransferFunctionLinear = 8,
-+  kSharpYuvTransferFunctionLog100 = 9,
-+  kSharpYuvTransferFunctionLog100_Sqrt10 = 10,
-+  kSharpYuvTransferFunctionIec61966 = 11,
-+  kSharpYuvTransferFunctionBt1361 = 12,
-+  kSharpYuvTransferFunctionSrgb = 13,
-+  kSharpYuvTransferFunctionBt2020_10Bit = 14,
-+  kSharpYuvTransferFunctionBt2020_12Bit = 15,
-+  kSharpYuvTransferFunctionSmpte2084 = 16,  // PQ
-+  kSharpYuvTransferFunctionSmpte428 = 17,
-+  kSharpYuvTransferFunctionHlg = 18,
-+  kSharpYuvTransferFunctionNum
-+} SharpYuvTransferFunctionType;
-+
-+// Converts RGB to YUV420 using a downsampling algorithm that minimizes
-+// artefacts caused by chroma subsampling.
-+// This is slower than standard downsampling (averaging of 4 UV values).
-+// Assumes that the image will be upsampled using a bilinear filter. If nearest
-+// neighbor is used instead, the upsampled image might look worse than with
-+// standard downsampling.
-+// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point
-+//     to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise.
-+// rgb_step: distance in bytes between two horizontally adjacent pixels on the
-+//     r, g and b channels. If rgb_bit_depth is > 8, it should be a
-+//     multiple of 2.
-+// rgb_stride: distance in bytes between two vertically adjacent pixels on the
-+//     r, g, and b channels. If rgb_bit_depth is > 8, it should be a
-+//     multiple of 2.
-+// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16.
-+//     Note: 16 bit input is truncated to 14 bits before conversion to yuv.
-+// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12.
-+// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels.  Should
-+//     point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers
-+//     otherwise.
-+// y_stride, u_stride, v_stride: distance in bytes between two vertically
-+//     adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
-+//     should be multiples of 2.
-+// width, height: width and height of the image in pixels
-+// This function calls SharpYuvConvertWithOptions with a default transfer
-+// function of kSharpYuvTransferFunctionSrgb.
-+SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
-+                                    const void* b_ptr, int rgb_step,
-+                                    int rgb_stride, int rgb_bit_depth,
-+                                    void* y_ptr, int y_stride, void* u_ptr,
-+                                    int u_stride, void* v_ptr, int v_stride,
-+                                    int yuv_bit_depth, int width, int height,
-+                                    const SharpYuvConversionMatrix* yuv_matrix);
-+
-+struct SharpYuvOptions {
-+  // This matrix cannot be NULL and can be initialized by
-+  // SharpYuvComputeConversionMatrix.
-+  const SharpYuvConversionMatrix* yuv_matrix;
-+  SharpYuvTransferFunctionType transfer_type;
-+};
-+
-+// Internal, version-checked, entry point
-+SHARPYUV_EXTERN int SharpYuvOptionsInitInternal(const SharpYuvConversionMatrix*,
-+                                                SharpYuvOptions*, int);
-+
-+// Should always be called, to initialize a fresh SharpYuvOptions
-+// structure before modification. SharpYuvOptionsInit() must have succeeded
-+// before using the 'options' object.
-+static SHARPYUV_INLINE int SharpYuvOptionsInit(
-+    const SharpYuvConversionMatrix* yuv_matrix, SharpYuvOptions* options) {
-+  return SharpYuvOptionsInitInternal(yuv_matrix, options, SHARPYUV_VERSION);
-+}
-+
-+SHARPYUV_EXTERN int SharpYuvConvertWithOptions(
-+    const void* r_ptr, const void* g_ptr, const void* b_ptr, int rgb_step,
-+    int rgb_stride, int rgb_bit_depth, void* y_ptr, int y_stride, void* u_ptr,
-+    int u_stride, void* v_ptr, int v_stride, int yuv_bit_depth, int width,
-+    int height, const SharpYuvOptions* options);
-+
-+// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
-+// support (it's rarely used in practice, especially for images).
-+
-+#ifdef __cplusplus
-+}  // extern "C"
-+#endif
-+
-+#endif  // WEBP_SHARPYUV_SHARPYUV_H_
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c
-new file mode 100644
-index 000000000000..29425a0c4918
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c
-@@ -0,0 +1,14 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+#include "sharpyuv/sharpyuv_cpu.h"
-+
-+// Include src/dsp/cpu.c to create SharpYuvGetCPUInfo from VP8GetCPUInfo. The
-+// function pointer is renamed in sharpyuv_cpu.h.
-+#include "src/dsp/cpu.c"
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h
-new file mode 100644
-index 000000000000..176ca3eb1682
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h
-@@ -0,0 +1,22 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+#ifndef WEBP_SHARPYUV_SHARPYUV_CPU_H_
-+#define WEBP_SHARPYUV_SHARPYUV_CPU_H_
-+
-+#include "sharpyuv/sharpyuv.h"
-+
-+// Avoid exporting SharpYuvGetCPUInfo in shared object / DLL builds.
-+// SharpYuvInit() replaces the use of the function pointer.
-+#undef WEBP_EXTERN
-+#define WEBP_EXTERN extern
-+#define VP8GetCPUInfo SharpYuvGetCPUInfo
-+#include "src/dsp/cpu.h"
-+
-+#endif  // WEBP_SHARPYUV_SHARPYUV_CPU_H_
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c
-new file mode 100644
-index 000000000000..0ad22be9458c
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c
-@@ -0,0 +1,110 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Colorspace utilities.
-+
-+#include "sharpyuv/sharpyuv_csp.h"
-+
-+#include <assert.h>
-+#include <math.h>
-+#include <stddef.h>
-+
-+static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }
-+
-+void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
-+                                     SharpYuvConversionMatrix* matrix) {
-+  const float kr = yuv_color_space->kr;
-+  const float kb = yuv_color_space->kb;
-+  const float kg = 1.0f - kr - kb;
-+  const float cr = 0.5f / (1.0f - kb);
-+  const float cb = 0.5f / (1.0f - kr);
-+
-+  const int shift = yuv_color_space->bit_depth - 8;
-+
-+  const float denom = (float)((1 << yuv_color_space->bit_depth) - 1);
-+  float scale_y = 1.0f;
-+  float add_y = 0.0f;
-+  float scale_u = cr;
-+  float scale_v = cb;
-+  float add_uv = (float)(128 << shift);
-+  assert(yuv_color_space->bit_depth >= 8);
-+
-+  if (yuv_color_space->range == kSharpYuvRangeLimited) {
-+    scale_y *= (219 << shift) / denom;
-+    scale_u *= (224 << shift) / denom;
-+    scale_v *= (224 << shift) / denom;
-+    add_y = (float)(16 << shift);
-+  }
-+
-+  matrix->rgb_to_y[0] = ToFixed16(kr * scale_y);
-+  matrix->rgb_to_y[1] = ToFixed16(kg * scale_y);
-+  matrix->rgb_to_y[2] = ToFixed16(kb * scale_y);
-+  matrix->rgb_to_y[3] = ToFixed16(add_y);
-+
-+  matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u);
-+  matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u);
-+  matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u);
-+  matrix->rgb_to_u[3] = ToFixed16(add_uv);
-+
-+  matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v);
-+  matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v);
-+  matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v);
-+  matrix->rgb_to_v[3] = ToFixed16(add_uv);
-+}
-+
-+// Matrices are in YUV_FIX fixed point precision.
-+// WebP's matrix, similar but not identical to kRec601LimitedMatrix.
-+static const SharpYuvConversionMatrix kWebpMatrix = {
-+  {16839, 33059, 6420, 16 << 16},
-+  {-9719, -19081, 28800, 128 << 16},
-+  {28800, -24116, -4684, 128 << 16},
-+};
-+// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited
-+static const SharpYuvConversionMatrix kRec601LimitedMatrix = {
-+  {16829, 33039, 6416, 16 << 16},
-+  {-9714, -19071, 28784, 128 << 16},
-+  {28784, -24103, -4681, 128 << 16},
-+};
-+// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull
-+static const SharpYuvConversionMatrix kRec601FullMatrix = {
-+  {19595, 38470, 7471, 0},
-+  {-11058, -21710, 32768, 128 << 16},
-+  {32768, -27439, -5329, 128 << 16},
-+};
-+// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited
-+static const SharpYuvConversionMatrix kRec709LimitedMatrix = {
-+  {11966, 40254, 4064, 16 << 16},
-+  {-6596, -22189, 28784, 128 << 16},
-+  {28784, -26145, -2639, 128 << 16},
-+};
-+// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull
-+static const SharpYuvConversionMatrix kRec709FullMatrix = {
-+  {13933, 46871, 4732, 0},
-+  {-7509, -25259, 32768, 128 << 16},
-+  {32768, -29763, -3005, 128 << 16},
-+};
-+
-+const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
-+    SharpYuvMatrixType matrix_type) {
-+  switch (matrix_type) {
-+    case kSharpYuvMatrixWebp:
-+      return &kWebpMatrix;
-+    case kSharpYuvMatrixRec601Limited:
-+      return &kRec601LimitedMatrix;
-+    case kSharpYuvMatrixRec601Full:
-+      return &kRec601FullMatrix;
-+    case kSharpYuvMatrixRec709Limited:
-+      return &kRec709LimitedMatrix;
-+    case kSharpYuvMatrixRec709Full:
-+      return &kRec709FullMatrix;
-+    case kSharpYuvMatrixNum:
-+      return NULL;
-+  }
-+  return NULL;
-+}
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h
-new file mode 100644
-index 000000000000..3214e3ac6075
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h
-@@ -0,0 +1,60 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Colorspace utilities.
-+
-+#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_
-+#define WEBP_SHARPYUV_SHARPYUV_CSP_H_
-+
-+#include "sharpyuv/sharpyuv.h"
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+// Range of YUV values.
-+typedef enum {
-+  kSharpYuvRangeFull,     // YUV values between [0;255] (for 8 bit)
-+  kSharpYuvRangeLimited   // Y in [16;235], YUV in [16;240] (for 8 bit)
-+} SharpYuvRange;
-+
-+// Constants that define a YUV color space.
-+typedef struct {
-+  // Kr and Kb are defined such that:
-+  // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb.
-+  float kr;
-+  float kb;
-+  int bit_depth;  // 8, 10 or 12
-+  SharpYuvRange range;
-+} SharpYuvColorSpace;
-+
-+// Fills in 'matrix' for the given YUVColorSpace.
-+SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
-+    const SharpYuvColorSpace* yuv_color_space,
-+    SharpYuvConversionMatrix* matrix);
-+
-+// Enums for precomputed conversion matrices.
-+typedef enum {
-+  kSharpYuvMatrixWebp = 0,
-+  kSharpYuvMatrixRec601Limited,
-+  kSharpYuvMatrixRec601Full,
-+  kSharpYuvMatrixRec709Limited,
-+  kSharpYuvMatrixRec709Full,
-+  kSharpYuvMatrixNum
-+} SharpYuvMatrixType;
-+
-+// Returns a pointer to a matrix for one of the predefined colorspaces.
-+SHARPYUV_EXTERN const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
-+    SharpYuvMatrixType matrix_type);
-+
-+#ifdef __cplusplus
-+}  // extern "C"
-+#endif
-+
-+#endif  // WEBP_SHARPYUV_SHARPYUV_CSP_H_
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c
-new file mode 100644
-index 000000000000..0da3efc0b813
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c
-@@ -0,0 +1,104 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Speed-critical functions for Sharp YUV.
-+//
-+// Author: Skal (pascal.massimino@gmail.com)
-+
-+#include "sharpyuv/sharpyuv_dsp.h"
-+
-+#include <assert.h>
-+#include <stdlib.h>
-+
-+#include "sharpyuv/sharpyuv_cpu.h"
-+
-+//-----------------------------------------------------------------------------
-+
-+#if !WEBP_NEON_OMIT_C_CODE
-+static uint16_t clip(int v, int max) {
-+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-+}
-+
-+static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src,
-+                                  uint16_t* dst, int len, int bit_depth) {
-+  uint64_t diff = 0;
-+  int i;
-+  const int max_y = (1 << bit_depth) - 1;
-+  for (i = 0; i < len; ++i) {
-+    const int diff_y = ref[i] - src[i];
-+    const int new_y = (int)dst[i] + diff_y;
-+    dst[i] = clip(new_y, max_y);
-+    diff += (uint64_t)abs(diff_y);
-+  }
-+  return diff;
-+}
-+
-+static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src,
-+                                int16_t* dst, int len) {
-+  int i;
-+  for (i = 0; i < len; ++i) {
-+    const int diff_uv = ref[i] - src[i];
-+    dst[i] += diff_uv;
-+  }
-+}
-+
-+static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len,
-+                                const uint16_t* best_y, uint16_t* out,
-+                                int bit_depth) {
-+  int i;
-+  const int max_y = (1 << bit_depth) - 1;
-+  for (i = 0; i < len; ++i, ++A, ++B) {
-+    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
-+    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
-+    out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y);
-+    out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y);
-+  }
-+}
-+#endif  // !WEBP_NEON_OMIT_C_CODE
-+
-+//-----------------------------------------------------------------------------
-+
-+uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
-+                            uint16_t* dst, int len, int bit_depth);
-+void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst,
-+                          int len);
-+void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
-+                          const uint16_t* best_y, uint16_t* out,
-+                          int bit_depth);
-+
-+extern VP8CPUInfo SharpYuvGetCPUInfo;
-+extern void InitSharpYuvSSE2(void);
-+extern void InitSharpYuvNEON(void);
-+
-+void SharpYuvInitDsp(void) {
-+#if !WEBP_NEON_OMIT_C_CODE
-+  SharpYuvUpdateY = SharpYuvUpdateY_C;
-+  SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
-+  SharpYuvFilterRow = SharpYuvFilterRow_C;
-+#endif
-+
-+  if (SharpYuvGetCPUInfo != NULL) {
-+#if defined(WEBP_HAVE_SSE2)
-+    if (SharpYuvGetCPUInfo(kSSE2)) {
-+      InitSharpYuvSSE2();
-+    }
-+#endif  // WEBP_HAVE_SSE2
-+  }
-+
-+#if defined(WEBP_HAVE_NEON)
-+  if (WEBP_NEON_OMIT_C_CODE ||
-+      (SharpYuvGetCPUInfo != NULL && SharpYuvGetCPUInfo(kNEON))) {
-+    InitSharpYuvNEON();
-+  }
-+#endif  // WEBP_HAVE_NEON
-+
-+  assert(SharpYuvUpdateY != NULL);
-+  assert(SharpYuvUpdateRGB != NULL);
-+  assert(SharpYuvFilterRow != NULL);
-+}
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h
-new file mode 100644
-index 000000000000..805fbadbf657
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h
-@@ -0,0 +1,28 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Speed-critical functions for Sharp YUV.
-+
-+#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
-+#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
-+
-+#include "sharpyuv/sharpyuv_cpu.h"
-+#include "src/webp/types.h"
-+
-+extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
-+                                   uint16_t* dst, int len, int bit_depth);
-+extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref,
-+                                 int16_t* dst, int len);
-+extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
-+                                 const uint16_t* best_y, uint16_t* out,
-+                                 int bit_depth);
-+
-+void SharpYuvInitDsp(void);
-+
-+#endif  // WEBP_SHARPYUV_SHARPYUV_DSP_H_
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c
-new file mode 100644
-index 000000000000..fecadc64805d
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c
-@@ -0,0 +1,419 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Gamma correction utilities.
-+
-+#include "sharpyuv/sharpyuv_gamma.h"
-+
-+#include <assert.h>
-+#include <float.h>
-+#include <math.h>
-+
-+#include "src/webp/types.h"
-+
-+// Gamma correction compensates loss of resolution during chroma subsampling.
-+// Size of pre-computed table for converting from gamma to linear.
-+#define GAMMA_TO_LINEAR_TAB_BITS 10
-+#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS)
-+static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2];
-+#define LINEAR_TO_GAMMA_TAB_BITS 9
-+#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS)
-+static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2];
-+
-+static const double kGammaF = 1. / 0.45;
-+#define GAMMA_TO_LINEAR_BITS 16
-+
-+static volatile int kGammaTablesSOk = 0;
-+void SharpYuvInitGammaTables(void) {
-+  assert(GAMMA_TO_LINEAR_BITS <= 16);
-+  if (!kGammaTablesSOk) {
-+    int v;
-+    const double a = 0.09929682680944;
-+    const double thresh = 0.018053968510807;
-+    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
-+    // Precompute gamma to linear table.
-+    {
-+      const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE;
-+      const double a_rec = 1. / (1. + a);
-+      for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) {
-+        const double g = norm * v;
-+        double value;
-+        if (g <= thresh * 4.5) {
-+          value = g / 4.5;
-+        } else {
-+          value = pow(a_rec * (g + a), kGammaF);
-+        }
-+        kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
-+      }
-+      // to prevent small rounding errors to cause read-overflow:
-+      kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] =
-+          kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE];
-+    }
-+    // Precompute linear to gamma table.
-+    {
-+      const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE;
-+      for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) {
-+        const double g = scale * v;
-+        double value;
-+        if (g <= thresh) {
-+          value = 4.5 * g;
-+        } else {
-+          value = (1. + a) * pow(g, 1. / kGammaF) - a;
-+        }
-+        kLinearToGammaTabS[v] =
-+            (uint32_t)(final_scale * value + 0.5);
-+      }
-+      // to prevent small rounding errors to cause read-overflow:
-+      kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] =
-+          kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE];
-+    }
-+    kGammaTablesSOk = 1;
-+  }
-+}
-+
-+static WEBP_INLINE int Shift(int v, int shift) {
-+  return (shift >= 0) ? (v << shift) : (v >> -shift);
-+}
-+
-+static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab,
-+                                                    int tab_pos_shift_right,
-+                                                    int tab_value_shift) {
-+  const uint32_t tab_pos = Shift(v, -tab_pos_shift_right);
-+  // fractional part, in 'tab_pos_shift' fixed-point precision
-+  const uint32_t x = v - (tab_pos << tab_pos_shift_right);  // fractional part
-+  // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1])
-+  const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift);
-+  const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift);
-+  // Final interpolation.
-+  const uint32_t v2 = (v1 - v0) * x;  // note: v1 >= v0.
-+  const int half =
-+      (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0;
-+  const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right);
-+  return result;
-+}
-+
-+static uint32_t ToLinearSrgb(uint16_t v, int bit_depth) {
-+  const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth;
-+  if (shift > 0) {
-+    return kGammaToLinearTabS[v << shift];
-+  }
-+  return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0);
-+}
-+
-+static uint16_t FromLinearSrgb(uint32_t value, int bit_depth) {
-+  return FixedPointInterpolation(
-+      value, kLinearToGammaTabS,
-+      (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS),
-+      bit_depth - GAMMA_TO_LINEAR_BITS);
-+}
-+
-+////////////////////////////////////////////////////////////////////////////////
-+
-+#define CLAMP(x, low, high) \
-+  (((x) < (low)) ? (low) : (((high) < (x)) ? (high) : (x)))
-+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-+
-+static WEBP_INLINE float Roundf(float x) {
-+  if (x < 0)
-+    return (float)ceil((double)(x - 0.5f));
-+  else
-+    return (float)floor((double)(x + 0.5f));
-+}
-+
-+static WEBP_INLINE float Powf(float base, float exp) {
-+  return (float)pow((double)base, (double)exp);
-+}
-+
-+static WEBP_INLINE float Log10f(float x) { return (float)log10((double)x); }
-+
-+static float ToLinear709(float gamma) {
-+  if (gamma < 0.f) {
-+    return 0.f;
-+  } else if (gamma < 4.5f * 0.018053968510807f) {
-+    return gamma / 4.5f;
-+  } else if (gamma < 1.f) {
-+    return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
-+  }
-+  return 1.f;
-+}
-+
-+static float FromLinear709(float linear) {
-+  if (linear < 0.f) {
-+    return 0.f;
-+  } else if (linear < 0.018053968510807f) {
-+    return linear * 4.5f;
-+  } else if (linear < 1.f) {
-+    return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
-+  }
-+  return 1.f;
-+}
-+
-+static float ToLinear470M(float gamma) {
-+  return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.2f);
-+}
-+
-+static float FromLinear470M(float linear) {
-+  return Powf(CLAMP(linear, 0.f, 1.f), 2.2f);
-+}
-+
-+static float ToLinear470Bg(float gamma) {
-+  return Powf(CLAMP(gamma, 0.f, 1.f), 1.f / 2.8f);
-+}
-+
-+static float FromLinear470Bg(float linear) {
-+  return Powf(CLAMP(linear, 0.f, 1.f), 2.8f);
-+}
-+
-+static float ToLinearSmpte240(float gamma) {
-+  if (gamma < 0.f) {
-+    return 0.f;
-+  } else if (gamma < 4.f * 0.022821585529445f) {
-+    return gamma / 4.f;
-+  } else if (gamma < 1.f) {
-+    return Powf((gamma + 0.111572195921731f) / 1.111572195921731f, 1.f / 0.45f);
-+  }
-+  return 1.f;
-+}
-+
-+static float FromLinearSmpte240(float linear) {
-+  if (linear < 0.f) {
-+    return 0.f;
-+  } else if (linear < 0.022821585529445f) {
-+    return linear * 4.f;
-+  } else if (linear < 1.f) {
-+    return 1.111572195921731f * Powf(linear, 0.45f) - 0.111572195921731f;
-+  }
-+  return 1.f;
-+}
-+
-+static float ToLinearLog100(float gamma) {
-+  return (gamma < 0.01f) ? 0.0f : 1.0f + Log10f(MIN(gamma, 1.f)) / 2.0f;
-+}
-+
-+static float FromLinearLog100(float linear) {
-+  // The function is non-bijective so choose the middle of [0, 0.01].
-+  const float mid_interval = 0.01f / 2.f;
-+  return (linear <= 0.0f) ? mid_interval
-+                          : Powf(10.0f, 2.f * (MIN(linear, 1.f) - 1.0f));
-+}
-+
-+static float ToLinearLog100Sqrt10(float gamma) {
-+  return (gamma < 0.00316227766f) ? 0.0f
-+                                  : 1.0f + Log10f(MIN(gamma, 1.f)) / 2.5f;
-+}
-+
-+static float FromLinearLog100Sqrt10(float linear) {
-+  // The function is non-bijective so choose the middle of [0, 0.00316227766f[.
-+  const float mid_interval = 0.00316227766f / 2.f;
-+  return (linear < 0.0f) ? mid_interval
-+                         : Powf(10.0f, 2.5f * (MIN(linear, 1.f) - 1.0f));
-+}
-+
-+static float ToLinearIec61966(float gamma) {
-+  if (gamma <= -4.5f * 0.018053968510807f) {
-+    return Powf((-gamma + 0.09929682680944f) / -1.09929682680944f, 1.f / 0.45f);
-+  } else if (gamma < 4.5f * 0.018053968510807f) {
-+    return gamma / 4.5f;
-+  }
-+  return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
-+}
-+
-+static float FromLinearIec61966(float linear) {
-+  if (linear <= -0.018053968510807f) {
-+    return -1.09929682680944f * Powf(-linear, 0.45f) + 0.09929682680944f;
-+  } else if (linear < 0.018053968510807f) {
-+    return linear * 4.5f;
-+  }
-+  return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
-+}
-+
-+static float ToLinearBt1361(float gamma) {
-+  if (gamma < -0.25f) {
-+    return -0.25f;
-+  } else if (gamma < 0.f) {
-+    return Powf((gamma - 0.02482420670236f) / -0.27482420670236f, 1.f / 0.45f) /
-+           -4.f;
-+  } else if (gamma < 4.5f * 0.018053968510807f) {
-+    return gamma / 4.5f;
-+  } else if (gamma < 1.f) {
-+    return Powf((gamma + 0.09929682680944f) / 1.09929682680944f, 1.f / 0.45f);
-+  }
-+  return 1.f;
-+}
-+
-+static float FromLinearBt1361(float linear) {
-+  if (linear < -0.25f) {
-+    return -0.25f;
-+  } else if (linear < 0.f) {
-+    return -0.27482420670236f * Powf(-4.f * linear, 0.45f) + 0.02482420670236f;
-+  } else if (linear < 0.018053968510807f) {
-+    return linear * 4.5f;
-+  } else if (linear < 1.f) {
-+    return 1.09929682680944f * Powf(linear, 0.45f) - 0.09929682680944f;
-+  }
-+  return 1.f;
-+}
-+
-+static float ToLinearPq(float gamma) {
-+  if (gamma > 0.f) {
-+    const float pow_gamma = Powf(gamma, 32.f / 2523.f);
-+    const float num = MAX(pow_gamma - 107.f / 128.f, 0.0f);
-+    const float den = MAX(2413.f / 128.f - 2392.f / 128.f * pow_gamma, FLT_MIN);
-+    return Powf(num / den, 4096.f / 653.f);
-+  }
-+  return 0.f;
-+}
-+
-+static float FromLinearPq(float linear) {
-+  if (linear > 0.f) {
-+    const float pow_linear = Powf(linear, 653.f / 4096.f);
-+    const float num = 107.f / 128.f + 2413.f / 128.f * pow_linear;
-+    const float den = 1.0f + 2392.f / 128.f * pow_linear;
-+    return Powf(num / den, 2523.f / 32.f);
-+  }
-+  return 0.f;
-+}
-+
-+static float ToLinearSmpte428(float gamma) {
-+  return Powf(0.91655527974030934f * MAX(gamma, 0.f), 1.f / 2.6f);
-+}
-+
-+static float FromLinearSmpte428(float linear) {
-+  return Powf(MAX(linear, 0.f), 2.6f) / 0.91655527974030934f;
-+}
-+
-+// Conversion in BT.2100 requires RGB info. Simplify to gamma correction here.
-+static float ToLinearHlg(float gamma) {
-+  if (gamma < 0.f) {
-+    return 0.f;
-+  } else if (gamma <= 0.5f) {
-+    return Powf((gamma * gamma) * (1.f / 3.f), 1.2f);
-+  }
-+  return Powf((expf((gamma - 0.55991073f) / 0.17883277f) + 0.28466892f) / 12.0f,
-+              1.2f);
-+}
-+
-+static float FromLinearHlg(float linear) {
-+  linear = Powf(linear, 1.f / 1.2f);
-+  if (linear < 0.f) {
-+    return 0.f;
-+  } else if (linear <= (1.f / 12.f)) {
-+    return sqrtf(3.f * linear);
-+  }
-+  return 0.17883277f * logf(12.f * linear - 0.28466892f) + 0.55991073f;
-+}
-+
-+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth,
-+                               SharpYuvTransferFunctionType transfer_type) {
-+  float v_float, linear;
-+  if (transfer_type == kSharpYuvTransferFunctionSrgb) {
-+    return ToLinearSrgb(v, bit_depth);
-+  }
-+  v_float = (float)v / ((1 << bit_depth) - 1);
-+  switch (transfer_type) {
-+    case kSharpYuvTransferFunctionBt709:
-+    case kSharpYuvTransferFunctionBt601:
-+    case kSharpYuvTransferFunctionBt2020_10Bit:
-+    case kSharpYuvTransferFunctionBt2020_12Bit:
-+      linear = ToLinear709(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionBt470M:
-+      linear = ToLinear470M(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionBt470Bg:
-+      linear = ToLinear470Bg(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionSmpte240:
-+      linear = ToLinearSmpte240(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionLinear:
-+      return v;
-+    case kSharpYuvTransferFunctionLog100:
-+      linear = ToLinearLog100(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionLog100_Sqrt10:
-+      linear = ToLinearLog100Sqrt10(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionIec61966:
-+      linear = ToLinearIec61966(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionBt1361:
-+      linear = ToLinearBt1361(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionSmpte2084:
-+      linear = ToLinearPq(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionSmpte428:
-+      linear = ToLinearSmpte428(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionHlg:
-+      linear = ToLinearHlg(v_float);
-+      break;
-+    default:
-+      assert(0);
-+      linear = 0;
-+      break;
-+  }
-+  return (uint32_t)Roundf(linear * ((1 << 16) - 1));
-+}
-+
-+uint16_t SharpYuvLinearToGamma(uint32_t v, int bit_depth,
-+                               SharpYuvTransferFunctionType transfer_type) {
-+  float v_float, linear;
-+  if (transfer_type == kSharpYuvTransferFunctionSrgb) {
-+    return FromLinearSrgb(v, bit_depth);
-+  }
-+  v_float = (float)v / ((1 << 16) - 1);
-+  switch (transfer_type) {
-+    case kSharpYuvTransferFunctionBt709:
-+    case kSharpYuvTransferFunctionBt601:
-+    case kSharpYuvTransferFunctionBt2020_10Bit:
-+    case kSharpYuvTransferFunctionBt2020_12Bit:
-+      linear = FromLinear709(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionBt470M:
-+      linear = FromLinear470M(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionBt470Bg:
-+      linear = FromLinear470Bg(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionSmpte240:
-+      linear = FromLinearSmpte240(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionLinear:
-+      return v;
-+    case kSharpYuvTransferFunctionLog100:
-+      linear = FromLinearLog100(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionLog100_Sqrt10:
-+      linear = FromLinearLog100Sqrt10(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionIec61966:
-+      linear = FromLinearIec61966(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionBt1361:
-+      linear = FromLinearBt1361(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionSmpte2084:
-+      linear = FromLinearPq(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionSmpte428:
-+      linear = FromLinearSmpte428(v_float);
-+      break;
-+    case kSharpYuvTransferFunctionHlg:
-+      linear = FromLinearHlg(v_float);
-+      break;
-+    default:
-+      assert(0);
-+      linear = 0;
-+      break;
-+  }
-+  return (uint16_t)Roundf(linear * ((1 << bit_depth) - 1));
-+}
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h
-new file mode 100644
-index 000000000000..b8ba7e98705e
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h
-@@ -0,0 +1,38 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Gamma correction utilities.
-+
-+#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
-+#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
-+
-+#include "sharpyuv/sharpyuv.h"
-+#include "src/webp/types.h"
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+// Initializes precomputed tables. Must be called once before calling
-+// SharpYuvGammaToLinear or SharpYuvLinearToGamma.
-+void SharpYuvInitGammaTables(void);
-+
-+// Converts a 'bit_depth'-bit gamma color value to a 16-bit linear value.
-+uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth,
-+                               SharpYuvTransferFunctionType transfer_type);
-+
-+// Converts a 16-bit linear color value to a 'bit_depth'-bit gamma value.
-+uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth,
-+                               SharpYuvTransferFunctionType transfer_type);
-+
-+#ifdef __cplusplus
-+}  // extern "C"
-+#endif
-+
-+#endif  // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c b/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c
-new file mode 100644
-index 000000000000..5840914865e0
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c
-@@ -0,0 +1,181 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Speed-critical functions for Sharp YUV.
-+//
-+// Author: Skal (pascal.massimino@gmail.com)
-+
-+#include "sharpyuv/sharpyuv_dsp.h"
-+
-+#if defined(WEBP_USE_NEON)
-+#include <assert.h>
-+#include <stdlib.h>
-+#include <arm_neon.h>
-+
-+static uint16_t clip_NEON(int v, int max) {
-+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-+}
-+
-+static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
-+                                     uint16_t* dst, int len, int bit_depth) {
-+  const int max_y = (1 << bit_depth) - 1;
-+  int i;
-+  const int16x8_t zero = vdupq_n_s16(0);
-+  const int16x8_t max = vdupq_n_s16(max_y);
-+  uint64x2_t sum = vdupq_n_u64(0);
-+  uint64_t diff;
-+
-+  for (i = 0; i + 8 <= len; i += 8) {
-+    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
-+    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
-+    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
-+    const int16x8_t D = vsubq_s16(A, B);       // diff_y
-+    const int16x8_t F = vaddq_s16(C, D);       // new_y
-+    const uint16x8_t H =
-+        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
-+    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
-+    vst1q_u16(dst + i, H);
-+    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
-+  }
-+  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
-+  for (; i < len; ++i) {
-+    const int diff_y = ref[i] - src[i];
-+    const int new_y = (int)(dst[i]) + diff_y;
-+    dst[i] = clip_NEON(new_y, max_y);
-+    diff += (uint64_t)(abs(diff_y));
-+  }
-+  return diff;
-+}
-+
-+static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
-+                                   int16_t* dst, int len) {
-+  int i;
-+  for (i = 0; i + 8 <= len; i += 8) {
-+    const int16x8_t A = vld1q_s16(ref + i);
-+    const int16x8_t B = vld1q_s16(src + i);
-+    const int16x8_t C = vld1q_s16(dst + i);
-+    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
-+    const int16x8_t E = vaddq_s16(C, D);   // new_uv
-+    vst1q_s16(dst + i, E);
-+  }
-+  for (; i < len; ++i) {
-+    const int diff_uv = ref[i] - src[i];
-+    dst[i] += diff_uv;
-+  }
-+}
-+
-+static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B,
-+                                     int len, const uint16_t* best_y,
-+                                     uint16_t* out, int bit_depth) {
-+  const int max_y = (1 << bit_depth) - 1;
-+  int i;
-+  const int16x8_t max = vdupq_n_s16(max_y);
-+  const int16x8_t zero = vdupq_n_s16(0);
-+  for (i = 0; i + 8 <= len; i += 8) {
-+    const int16x8_t a0 = vld1q_s16(A + i + 0);
-+    const int16x8_t a1 = vld1q_s16(A + i + 1);
-+    const int16x8_t b0 = vld1q_s16(B + i + 0);
-+    const int16x8_t b1 = vld1q_s16(B + i + 1);
-+    const int16x8_t a0b1 = vaddq_s16(a0, b1);
-+    const int16x8_t a1b0 = vaddq_s16(a1, b0);
-+    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
-+    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
-+    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
-+    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
-+    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
-+    const int16x8_t e0 = vrhaddq_s16(c1, a0);
-+    const int16x8_t e1 = vrhaddq_s16(c0, a1);
-+    const int16x8x2_t f = vzipq_s16(e0, e1);
-+    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
-+    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
-+    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
-+    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
-+    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
-+    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
-+    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
-+    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
-+  }
-+  for (; i < len; ++i) {
-+    const int a0b1 = A[i + 0] + B[i + 1];
-+    const int a1b0 = A[i + 1] + B[i + 0];
-+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-+    out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
-+    out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
-+  }
-+}
-+
-+static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B,
-+                                     int len, const uint16_t* best_y,
-+                                     uint16_t* out, int bit_depth) {
-+  const int max_y = (1 << bit_depth) - 1;
-+  int i;
-+  const uint16x8_t max = vdupq_n_u16(max_y);
-+  for (i = 0; i + 4 <= len; i += 4) {
-+    const int16x4_t a0 = vld1_s16(A + i + 0);
-+    const int16x4_t a1 = vld1_s16(A + i + 1);
-+    const int16x4_t b0 = vld1_s16(B + i + 0);
-+    const int16x4_t b1 = vld1_s16(B + i + 1);
-+    const int32x4_t a0b1 = vaddl_s16(a0, b1);
-+    const int32x4_t a1b0 = vaddl_s16(a1, b0);
-+    const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0);  // A0+A1+B0+B1
-+    const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1);    // 2*(A0+B1)
-+    const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0);    // 2*(A1+B0)
-+    const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3);
-+    const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3);
-+    const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0));
-+    const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1));
-+    const int32x4x2_t f = vzipq_s32(e0, e1);
-+
-+    const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i));
-+    const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g));
-+    const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g));
-+    const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1));
-+    const uint16x8_t i_clamped = vminq_u16(i_16, max);
-+    vst1q_u16(out + 2 * i + 0, i_clamped);
-+  }
-+  for (; i < len; ++i) {
-+    const int a0b1 = A[i + 0] + B[i + 1];
-+    const int a1b0 = A[i + 1] + B[i + 0];
-+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-+    out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y);
-+    out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y);
-+  }
-+}
-+
-+static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
-+                                   const uint16_t* best_y, uint16_t* out,
-+                                   int bit_depth) {
-+  if (bit_depth <= 10) {
-+    SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth);
-+  } else {
-+    SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth);
-+  }
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+extern void InitSharpYuvNEON(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
-+  SharpYuvUpdateY = SharpYuvUpdateY_NEON;
-+  SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
-+  SharpYuvFilterRow = SharpYuvFilterRow_NEON;
-+}
-+
-+#else  // !WEBP_USE_NEON
-+
-+extern void InitSharpYuvNEON(void);
-+
-+void InitSharpYuvNEON(void) {}
-+
-+#endif  // WEBP_USE_NEON
-diff --git a/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c b/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c
-new file mode 100644
-index 000000000000..9744d1bb6cfe
---- /dev/null
-+++ b/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c
-@@ -0,0 +1,201 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Speed-critical functions for Sharp YUV.
-+//
-+// Author: Skal (pascal.massimino@gmail.com)
-+
-+#include "sharpyuv/sharpyuv_dsp.h"
-+
-+#if defined(WEBP_USE_SSE2)
-+#include <stdlib.h>
-+#include <emmintrin.h>
-+
-+static uint16_t clip_SSE2(int v, int max) {
-+  return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
-+}
-+
-+static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
-+                                     uint16_t* dst, int len, int bit_depth) {
-+  const int max_y = (1 << bit_depth) - 1;
-+  uint64_t diff = 0;
-+  uint32_t tmp[4];
-+  int i;
-+  const __m128i zero = _mm_setzero_si128();
-+  const __m128i max = _mm_set1_epi16(max_y);
-+  const __m128i one = _mm_set1_epi16(1);
-+  __m128i sum = zero;
-+
-+  for (i = 0; i + 8 <= len; i += 8) {
-+    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-+    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-+    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-+    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
-+    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
-+    const __m128i F = _mm_add_epi16(C, D);       // new_y
-+    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
-+    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
-+    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
-+    _mm_storeu_si128((__m128i*)(dst + i), H);
-+    sum = _mm_add_epi32(sum, I);
-+  }
-+  _mm_storeu_si128((__m128i*)tmp, sum);
-+  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
-+  for (; i < len; ++i) {
-+    const int diff_y = ref[i] - src[i];
-+    const int new_y = (int)dst[i] + diff_y;
-+    dst[i] = clip_SSE2(new_y, max_y);
-+    diff += (uint64_t)abs(diff_y);
-+  }
-+  return diff;
-+}
-+
-+static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
-+                                   int16_t* dst, int len) {
-+  int i = 0;
-+  for (i = 0; i + 8 <= len; i += 8) {
-+    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
-+    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
-+    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
-+    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
-+    const __m128i E = _mm_add_epi16(C, D);   // new_uv
-+    _mm_storeu_si128((__m128i*)(dst + i), E);
-+  }
-+  for (; i < len; ++i) {
-+    const int diff_uv = ref[i] - src[i];
-+    dst[i] += diff_uv;
-+  }
-+}
-+
-+static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B,
-+                                     int len, const uint16_t* best_y,
-+                                     uint16_t* out, int bit_depth) {
-+  const int max_y = (1 << bit_depth) - 1;
-+  int i;
-+  const __m128i kCst8 = _mm_set1_epi16(8);
-+  const __m128i max = _mm_set1_epi16(max_y);
-+  const __m128i zero = _mm_setzero_si128();
-+  for (i = 0; i + 8 <= len; i += 8) {
-+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
-+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
-+    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
-+    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
-+    const __m128i a0b1 = _mm_add_epi16(a0, b1);
-+    const __m128i a1b0 = _mm_add_epi16(a1, b0);
-+    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
-+    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
-+    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
-+    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
-+    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
-+    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
-+    const __m128i d0 = _mm_add_epi16(c1, a0);
-+    const __m128i d1 = _mm_add_epi16(c0, a1);
-+    const __m128i e0 = _mm_srai_epi16(d0, 1);
-+    const __m128i e1 = _mm_srai_epi16(d1, 1);
-+    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
-+    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
-+    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
-+    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
-+    const __m128i h0 = _mm_add_epi16(g0, f0);
-+    const __m128i h1 = _mm_add_epi16(g1, f1);
-+    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
-+    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
-+    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
-+    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
-+  }
-+  for (; i < len; ++i) {
-+    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
-+    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
-+    // We reuse the common sub-expressions.
-+    const int a0b1 = A[i + 0] + B[i + 1];
-+    const int a1b0 = A[i + 1] + B[i + 0];
-+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-+    out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
-+    out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
-+  }
-+}
-+
-+static WEBP_INLINE __m128i s16_to_s32(__m128i in) {
-+  return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16);
-+}
-+
-+static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B,
-+                                     int len, const uint16_t* best_y,
-+                                     uint16_t* out, int bit_depth) {
-+  const int max_y = (1 << bit_depth) - 1;
-+  int i;
-+  const __m128i kCst8 = _mm_set1_epi32(8);
-+  const __m128i max = _mm_set1_epi16(max_y);
-+  const __m128i zero = _mm_setzero_si128();
-+  for (i = 0; i + 4 <= len; i += 4) {
-+    const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0)));
-+    const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1)));
-+    const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0)));
-+    const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1)));
-+    const __m128i a0b1 = _mm_add_epi32(a0, b1);
-+    const __m128i a1b0 = _mm_add_epi32(a1, b0);
-+    const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0);  // A0+A1+B0+B1
-+    const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8);
-+    const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1);  // 2*(A0+B1)
-+    const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0);  // 2*(A1+B0)
-+    const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3);
-+    const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3);
-+    const __m128i d0 = _mm_add_epi32(c1, a0);
-+    const __m128i d1 = _mm_add_epi32(c0, a1);
-+    const __m128i e0 = _mm_srai_epi32(d0, 1);
-+    const __m128i e1 = _mm_srai_epi32(d1, 1);
-+    const __m128i f0 = _mm_unpacklo_epi32(e0, e1);
-+    const __m128i f1 = _mm_unpackhi_epi32(e0, e1);
-+    const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
-+    const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1));
-+    const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero);
-+    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), final);
-+  }
-+  for (; i < len; ++i) {
-+    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
-+    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
-+    // We reuse the common sub-expressions.
-+    const int a0b1 = A[i + 0] + B[i + 1];
-+    const int a1b0 = A[i + 1] + B[i + 0];
-+    const int a0a1b0b1 = a0b1 + a1b0 + 8;
-+    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
-+    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
-+    out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y);
-+    out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y);
-+  }
-+}
-+
-+static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
-+                                   const uint16_t* best_y, uint16_t* out,
-+                                   int bit_depth) {
-+  if (bit_depth <= 10) {
-+    SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth);
-+  } else {
-+    SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth);
-+  }
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+extern void InitSharpYuvSSE2(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
-+  SharpYuvUpdateY = SharpYuvUpdateY_SSE2;
-+  SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2;
-+  SharpYuvFilterRow = SharpYuvFilterRow_SSE2;
-+}
-+#else  // !WEBP_USE_SSE2
-+
-+extern void InitSharpYuvSSE2(void);
-+
-+void InitSharpYuvSSE2(void) {}
-+
-+#endif  // WEBP_USE_SSE2
-diff --git a/3rdparty/libwebp/src/dec/alpha_dec.c b/3rdparty/libwebp/src/dec/alpha_dec.c
-index bce735bfc248..663255c42fdc 100644
---- a/3rdparty/libwebp/src/dec/alpha_dec.c
-+++ b/3rdparty/libwebp/src/dec/alpha_dec.c
-@@ -117,21 +117,12 @@ static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
-     const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
-     uint8_t* dst = dec->alpha_plane_ + row * width;
-     assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
--    if (alph_dec->filter_ != WEBP_FILTER_NONE) {
--      assert(WebPUnfilters[alph_dec->filter_] != NULL);
--      for (y = 0; y < num_rows; ++y) {
--        WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
--        prev_line = dst;
--        dst += width;
--        deltas += width;
--      }
--    } else {
--      for (y = 0; y < num_rows; ++y) {
--        memcpy(dst, deltas, width * sizeof(*dst));
--        prev_line = dst;
--        dst += width;
--        deltas += width;
--      }
-+    assert(WebPUnfilters[alph_dec->filter_] != NULL);
-+    for (y = 0; y < num_rows; ++y) {
-+      WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
-+      prev_line = dst;
-+      dst += width;
-+      deltas += width;
-     }
-     dec->alpha_prev_line_ = prev_line;
-   } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
-@@ -155,7 +146,8 @@ static int AllocateAlphaPlane(VP8Decoder* const dec, const VP8Io* const io) {
-   dec->alpha_plane_mem_ =
-       (uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
-   if (dec->alpha_plane_mem_ == NULL) {
--    return 0;
-+    return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
-+                       "Alpha decoder initialization failed.");
-   }
-   dec->alpha_plane_ = dec->alpha_plane_mem_;
-   dec->alpha_prev_line_ = NULL;
-@@ -183,16 +175,25 @@ const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
-   assert(dec != NULL && io != NULL);
- 
-   if (row < 0 || num_rows <= 0 || row + num_rows > height) {
--    return NULL;    // sanity check.
-+    return NULL;
-   }
- 
-   if (!dec->is_alpha_decoded_) {
-     if (dec->alph_dec_ == NULL) {    // Initialize decoder.
-       dec->alph_dec_ = ALPHNew();
--      if (dec->alph_dec_ == NULL) return NULL;
-+      if (dec->alph_dec_ == NULL) {
-+        VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
-+                    "Alpha decoder initialization failed.");
-+        return NULL;
-+      }
-       if (!AllocateAlphaPlane(dec, io)) goto Error;
-       if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
-                     io, dec->alpha_plane_)) {
-+        VP8LDecoder* const vp8l_dec = dec->alph_dec_->vp8l_dec_;
-+        VP8SetError(dec,
-+                    (vp8l_dec == NULL) ? VP8_STATUS_OUT_OF_MEMORY
-+                                       : vp8l_dec->status_,
-+                    "Alpha decoder initialization failed.");
-         goto Error;
-       }
-       // if we allowed use of alpha dithering, check whether it's needed at all
-diff --git a/3rdparty/libwebp/src/dec/buffer_dec.c b/3rdparty/libwebp/src/dec/buffer_dec.c
-index 3cd94eb4d930..11ce76f19e2b 100644
---- a/3rdparty/libwebp/src/dec/buffer_dec.c
-+++ b/3rdparty/libwebp/src/dec/buffer_dec.c
-@@ -75,7 +75,7 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
-     const WebPRGBABuffer* const buf = &buffer->u.RGBA;
-     const int stride = abs(buf->stride);
-     const uint64_t size =
--        MIN_BUFFER_SIZE(width * kModeBpp[mode], height, stride);
-+        MIN_BUFFER_SIZE((uint64_t)width * kModeBpp[mode], height, stride);
-     ok &= (size <= buf->size);
-     ok &= (stride >= width * kModeBpp[mode]);
-     ok &= (buf->rgba != NULL);
-@@ -102,7 +102,7 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
-     int stride;
-     uint64_t size;
- 
--    if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
-+    if ((uint64_t)w * kModeBpp[mode] >= (1ull << 31)) {
-       return VP8_STATUS_INVALID_PARAM;
-     }
-     stride = w * kModeBpp[mode];
-@@ -117,7 +117,6 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
-     }
-     total_size = size + 2 * uv_size + a_size;
- 
--    // Security/sanity checks
-     output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
-     if (output == NULL) {
-       return VP8_STATUS_OUT_OF_MEMORY;
-@@ -156,11 +155,11 @@ VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
-   }
-   if (WebPIsRGBMode(buffer->colorspace)) {
-     WebPRGBABuffer* const buf = &buffer->u.RGBA;
--    buf->rgba += (buffer->height - 1) * buf->stride;
-+    buf->rgba += (int64_t)(buffer->height - 1) * buf->stride;
-     buf->stride = -buf->stride;
-   } else {
-     WebPYUVABuffer* const buf = &buffer->u.YUVA;
--    const int H = buffer->height;
-+    const int64_t H = buffer->height;
-     buf->y += (H - 1) * buf->y_stride;
-     buf->y_stride = -buf->y_stride;
-     buf->u += ((H - 1) >> 1) * buf->u_stride;
-@@ -188,8 +187,7 @@ VP8StatusCode WebPAllocateDecBuffer(int width, int height,
-       const int ch = options->crop_height;
-       const int x = options->crop_left & ~1;
-       const int y = options->crop_top & ~1;
--      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
--          x + cw > width || y + ch > height) {
-+      if (!WebPCheckCropDimensions(width, height, x, y, cw, ch)) {
-         return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
-       }
-       width = cw;
-diff --git a/3rdparty/libwebp/src/dec/frame_dec.c b/3rdparty/libwebp/src/dec/frame_dec.c
-index 04609a8e56be..91ca1f8609a9 100644
---- a/3rdparty/libwebp/src/dec/frame_dec.c
-+++ b/3rdparty/libwebp/src/dec/frame_dec.c
-@@ -705,7 +705,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
-                         + cache_size + alpha_size + WEBP_ALIGN_CST;
-   uint8_t* mem;
- 
--  if (needed != (size_t)needed) return 0;  // check for overflow
-+  if (!CheckSizeOverflow(needed)) return 0;  // check for overflow
-   if (needed > dec->mem_size_) {
-     WebPSafeFree(dec->mem_);
-     dec->mem_size_ = 0;
-diff --git a/3rdparty/libwebp/src/dec/io_dec.c b/3rdparty/libwebp/src/dec/io_dec.c
-index 29dc6345dfd1..5ef6298886eb 100644
---- a/3rdparty/libwebp/src/dec/io_dec.c
-+++ b/3rdparty/libwebp/src/dec/io_dec.c
-@@ -298,46 +298,57 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
-   const int uv_out_height = (out_height + 1) >> 1;
-   const int uv_in_width  = (io->mb_w + 1) >> 1;
-   const int uv_in_height = (io->mb_h + 1) >> 1;
--  const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
-+  // scratch memory for luma rescaler
-+  const size_t work_size = 2 * (size_t)out_width;
-   const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
--  size_t tmp_size, rescaler_size;
-+  uint64_t total_size;
-+  size_t rescaler_size;
-   rescaler_t* work;
-   WebPRescaler* scalers;
-   const int num_rescalers = has_alpha ? 4 : 3;
- 
--  tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
-+  total_size = ((uint64_t)work_size + 2 * uv_work_size) * sizeof(*work);
-   if (has_alpha) {
--    tmp_size += work_size * sizeof(*work);
-+    total_size += (uint64_t)work_size * sizeof(*work);
-   }
-   rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
-+  total_size += rescaler_size;
-+  if (!CheckSizeOverflow(total_size)) {
-+    return 0;
-+  }
- 
--  p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size);
-+  p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
-   if (p->memory == NULL) {
-     return 0;   // memory error
-   }
-   work = (rescaler_t*)p->memory;
- 
--  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size);
-+  scalers = (WebPRescaler*)WEBP_ALIGN(
-+      (const uint8_t*)work + total_size - rescaler_size);
-   p->scaler_y = &scalers[0];
-   p->scaler_u = &scalers[1];
-   p->scaler_v = &scalers[2];
-   p->scaler_a = has_alpha ? &scalers[3] : NULL;
- 
--  WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
--                   buf->y, out_width, out_height, buf->y_stride, 1,
--                   work);
--  WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
--                   buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
--                   work + work_size);
--  WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
--                   buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
--                   work + work_size + uv_work_size);
-+  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
-+                        buf->y, out_width, out_height, buf->y_stride, 1,
-+                        work) ||
-+      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
-+                        buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
-+                        work + work_size) ||
-+      !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
-+                        buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
-+                        work + work_size + uv_work_size)) {
-+    return 0;
-+  }
-   p->emit = EmitRescaledYUV;
- 
-   if (has_alpha) {
--    WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
--                     buf->a, out_width, out_height, buf->a_stride, 1,
--                     work + work_size + 2 * uv_work_size);
-+    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
-+                          buf->a, out_width, out_height, buf->a_stride, 1,
-+                          work + work_size + 2 * uv_work_size)) {
-+      return 0;
-+    }
-     p->emit_alpha = EmitRescaledAlphaYUV;
-     WebPInitAlphaProcessing();
-   }
-@@ -480,51 +491,58 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
-   const int out_height = io->scaled_height;
-   const int uv_in_width  = (io->mb_w + 1) >> 1;
-   const int uv_in_height = (io->mb_h + 1) >> 1;
--  const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
-+  // scratch memory for one rescaler
-+  const size_t work_size = 2 * (size_t)out_width;
-   rescaler_t* work;  // rescalers work area
-   uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
--  size_t tmp_size1, tmp_size2, total_size, rescaler_size;
-+  uint64_t tmp_size1, tmp_size2, total_size;
-+  size_t rescaler_size;
-   WebPRescaler* scalers;
-   const int num_rescalers = has_alpha ? 4 : 3;
- 
--  tmp_size1 = 3 * work_size;
--  tmp_size2 = 3 * out_width;
--  if (has_alpha) {
--    tmp_size1 += work_size;
--    tmp_size2 += out_width;
--  }
-+  tmp_size1 = (uint64_t)num_rescalers * work_size;
-+  tmp_size2 = (uint64_t)num_rescalers * out_width;
-   total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
-   rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
-+  total_size += rescaler_size;
-+  if (!CheckSizeOverflow(total_size)) {
-+    return 0;
-+  }
- 
--  p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size);
-+  p->memory = WebPSafeMalloc(1ULL, (size_t)total_size);
-   if (p->memory == NULL) {
-     return 0;   // memory error
-   }
-   work = (rescaler_t*)p->memory;
-   tmp = (uint8_t*)(work + tmp_size1);
- 
--  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size);
-+  scalers = (WebPRescaler*)WEBP_ALIGN(
-+      (const uint8_t*)work + total_size - rescaler_size);
-   p->scaler_y = &scalers[0];
-   p->scaler_u = &scalers[1];
-   p->scaler_v = &scalers[2];
-   p->scaler_a = has_alpha ? &scalers[3] : NULL;
- 
--  WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
--                   tmp + 0 * out_width, out_width, out_height, 0, 1,
--                   work + 0 * work_size);
--  WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
--                   tmp + 1 * out_width, out_width, out_height, 0, 1,
--                   work + 1 * work_size);
--  WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
--                   tmp + 2 * out_width, out_width, out_height, 0, 1,
--                   work + 2 * work_size);
-+  if (!WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
-+                        tmp + 0 * out_width, out_width, out_height, 0, 1,
-+                        work + 0 * work_size) ||
-+      !WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
-+                        tmp + 1 * out_width, out_width, out_height, 0, 1,
-+                        work + 1 * work_size) ||
-+      !WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
-+                        tmp + 2 * out_width, out_width, out_height, 0, 1,
-+                        work + 2 * work_size)) {
-+    return 0;
-+  }
-   p->emit = EmitRescaledRGB;
-   WebPInitYUV444Converters();
- 
-   if (has_alpha) {
--    WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
--                     tmp + 3 * out_width, out_width, out_height, 0, 1,
--                     work + 3 * work_size);
-+    if (!WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
-+                          tmp + 3 * out_width, out_width, out_height, 0, 1,
-+                          work + 3 * work_size)) {
-+      return 0;
-+    }
-     p->emit_alpha = EmitRescaledAlphaRGB;
-     if (p->output->colorspace == MODE_RGBA_4444 ||
-         p->output->colorspace == MODE_rgbA_4444) {
-diff --git a/3rdparty/libwebp/src/dec/tree_dec.c b/3rdparty/libwebp/src/dec/tree_dec.c
-index 1c6fdea27cc6..243460595329 100644
---- a/3rdparty/libwebp/src/dec/tree_dec.c
-+++ b/3rdparty/libwebp/src/dec/tree_dec.c
-@@ -12,10 +12,11 @@
- // Author: Skal (pascal.massimino@gmail.com)
- 
- #include "src/dec/vp8i_dec.h"
-+#include "src/dsp/cpu.h"
- #include "src/utils/bit_reader_inl_utils.h"
- 
- #if !defined(USE_GENERIC_TREE)
--#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
-+#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64
- // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
- #define USE_GENERIC_TREE 1   // ALTERNATE_CODE
- #else
-diff --git a/3rdparty/libwebp/src/dec/vp8_dec.c b/3rdparty/libwebp/src/dec/vp8_dec.c
-index 8f736974784e..20b92e84c4fc 100644
---- a/3rdparty/libwebp/src/dec/vp8_dec.c
-+++ b/3rdparty/libwebp/src/dec/vp8_dec.c
-@@ -335,7 +335,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
-     io->scaled_width = io->width;
-     io->scaled_height = io->height;
- 
--    io->mb_w = io->width;   // sanity check
-+    io->mb_w = io->width;   // for soundness
-     io->mb_h = io->height;  // ditto
- 
-     VP8ResetProba(&dec->proba_);
-@@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = {
-   0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
- };
- 
--// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
-+// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
- static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
-   int v;
-   if (!VP8GetBit(br, p[3], "coeffs")) {
-@@ -494,6 +494,8 @@ static int GetCoeffsAlt(VP8BitReader* const br,
-   return 16;
- }
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
-+
- WEBP_DSP_INIT_FUNC(InitGetCoeffs) {
-   if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
-     GetCoeffs = GetCoeffsAlt;
-diff --git a/3rdparty/libwebp/src/dec/vp8i_dec.h b/3rdparty/libwebp/src/dec/vp8i_dec.h
-index a0c0af15799e..1ae4ff62f2a4 100644
---- a/3rdparty/libwebp/src/dec/vp8i_dec.h
-+++ b/3rdparty/libwebp/src/dec/vp8i_dec.h
-@@ -31,8 +31,8 @@ extern "C" {
- 
- // version numbers
- #define DEC_MAJ_VERSION 1
--#define DEC_MIN_VERSION 2
--#define DEC_REV_VERSION 0
-+#define DEC_MIN_VERSION 3
-+#define DEC_REV_VERSION 1
- 
- // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
- // Constraints are: We need to store one 16x16 block of luma samples (y),
-diff --git a/3rdparty/libwebp/src/dec/vp8l_dec.c b/3rdparty/libwebp/src/dec/vp8l_dec.c
-index 2d603b437974..11c00ea964a9 100644
---- a/3rdparty/libwebp/src/dec/vp8l_dec.c
-+++ b/3rdparty/libwebp/src/dec/vp8l_dec.c
-@@ -12,6 +12,7 @@
- // Authors: Vikas Arora (vikaas.arora@gmail.com)
- //          Jyrki Alakuijala (jyrki@google.com)
- 
-+#include <assert.h>
- #include <stdlib.h>
- 
- #include "src/dec/alphai_dec.h"
-@@ -84,7 +85,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
- // to 256 (green component values) + 24 (length prefix values)
- // + color_cache_size (between 0 and 2048).
- // All values computed for 8-bit first level lookup with Mark Adler's tool:
--// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
-+// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
- #define FIXED_TABLE_SIZE (630 * 3 + 410)
- static const uint16_t kTableSize[12] = {
-   FIXED_TABLE_SIZE + 654,
-@@ -101,6 +102,14 @@ static const uint16_t kTableSize[12] = {
-   FIXED_TABLE_SIZE + 2704
- };
- 
-+static int VP8LSetError(VP8LDecoder* const dec, VP8StatusCode error) {
-+  // The oldest error reported takes precedence over the new one.
-+  if (dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED) {
-+    dec->status_ = error;
-+  }
-+  return 0;
-+}
-+
- static int DecodeImageStream(int xsize, int ysize,
-                              int is_level0,
-                              VP8LDecoder* const dec,
-@@ -178,7 +187,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
- 
- //------------------------------------------------------------------------------
- // Decodes the next Huffman code from bit-stream.
--// FillBitWindow(br) needs to be called at minimum every second call
-+// VP8LFillBitWindow(br) needs to be called at minimum every second call
- // to ReadSymbol, in order to pre-fetch enough bits.
- static WEBP_INLINE int ReadSymbol(const HuffmanCode* table,
-                                   VP8LBitReader* const br) {
-@@ -253,11 +262,11 @@ static int ReadHuffmanCodeLengths(
-   int symbol;
-   int max_symbol;
-   int prev_code_len = DEFAULT_CODE_LENGTH;
--  HuffmanCode table[1 << LENGTHS_TABLE_BITS];
-+  HuffmanTables tables;
- 
--  if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS,
--                             code_length_code_lengths,
--                             NUM_CODE_LENGTH_CODES)) {
-+  if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) ||
-+      !VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS,
-+                             code_length_code_lengths, NUM_CODE_LENGTH_CODES)) {
-     goto End;
-   }
- 
-@@ -277,7 +286,7 @@ static int ReadHuffmanCodeLengths(
-     int code_len;
-     if (max_symbol-- == 0) break;
-     VP8LFillBitWindow(br);
--    p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
-+    p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
-     VP8LSetBitPos(br, br->bit_pos_ + p->bits);
-     code_len = p->value;
-     if (code_len < kCodeLengthLiterals) {
-@@ -300,14 +309,16 @@ static int ReadHuffmanCodeLengths(
-   ok = 1;
- 
-  End:
--  if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
-+  VP8LHuffmanTablesDeallocate(&tables);
-+  if (!ok) return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
-   return ok;
- }
- 
- // 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
- // tree.
- static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
--                           int* const code_lengths, HuffmanCode* const table) {
-+                           int* const code_lengths,
-+                           HuffmanTables* const table) {
-   int ok = 0;
-   int size = 0;
-   VP8LBitReader* const br = &dec->br_;
-@@ -321,7 +332,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
-     // The first code is either 1 bit or 8 bit code.
-     int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
-     code_lengths[symbol] = 1;
--    // The second code (if present), is always 8 bit long.
-+    // The second code (if present), is always 8 bits long.
-     if (num_symbols == 2) {
-       symbol = VP8LReadBits(br, 8);
-       code_lengths[symbol] = 1;
-@@ -331,10 +342,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
-     int i;
-     int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
-     const int num_codes = VP8LReadBits(br, 4) + 4;
--    if (num_codes > NUM_CODE_LENGTH_CODES) {
--      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
--      return 0;
--    }
-+    assert(num_codes <= NUM_CODE_LENGTH_CODES);
- 
-     for (i = 0; i < num_codes; ++i) {
-       code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
-@@ -349,36 +357,35 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
-                                  code_lengths, alphabet_size);
-   }
-   if (!ok || size == 0) {
--    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
--    return 0;
-+    return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
-   }
-   return size;
- }
- 
- static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
-                             int color_cache_bits, int allow_recursion) {
--  int i, j;
-+  int i;
-   VP8LBitReader* const br = &dec->br_;
-   VP8LMetadata* const hdr = &dec->hdr_;
-   uint32_t* huffman_image = NULL;
-   HTreeGroup* htree_groups = NULL;
--  HuffmanCode* huffman_tables = NULL;
--  HuffmanCode* huffman_table = NULL;
-+  HuffmanTables* huffman_tables = &hdr->huffman_tables_;
-   int num_htree_groups = 1;
-   int num_htree_groups_max = 1;
--  int max_alphabet_size = 0;
--  int* code_lengths = NULL;
--  const int table_size = kTableSize[color_cache_bits];
-   int* mapping = NULL;
-   int ok = 0;
- 
-+  // Check the table has been 0 initialized (through InitMetadata).
-+  assert(huffman_tables->root.start == NULL);
-+  assert(huffman_tables->curr_segment == NULL);
-+
-   if (allow_recursion && VP8LReadBits(br, 1)) {
-     // use meta Huffman codes.
-     const int huffman_precision = VP8LReadBits(br, 3) + 2;
-     const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
-     const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
-     const int huffman_pixs = huffman_xsize * huffman_ysize;
--    if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec,
-+    if (!DecodeImageStream(huffman_xsize, huffman_ysize, /*is_level0=*/0, dec,
-                            &huffman_image)) {
-       goto Error;
-     }
-@@ -402,7 +409,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
-       // values [0, num_htree_groups)
-       mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
-       if (mapping == NULL) {
--        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
-+        VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-         goto Error;
-       }
-       // -1 means a value is unmapped, and therefore unused in the Huffman
-@@ -421,29 +428,55 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
- 
-   if (br->eos_) goto Error;
- 
--  // Find maximum alphabet size for the htree group.
--  for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
--    int alphabet_size = kAlphabetSize[j];
--    if (j == 0 && color_cache_bits > 0) {
--      alphabet_size += 1 << color_cache_bits;
--    }
--    if (max_alphabet_size < alphabet_size) {
--      max_alphabet_size = alphabet_size;
--    }
-+  if (!ReadHuffmanCodesHelper(color_cache_bits, num_htree_groups,
-+                              num_htree_groups_max, mapping, dec,
-+                              huffman_tables, &htree_groups)) {
-+    goto Error;
-   }
-+  ok = 1;
- 
--  code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
--                                      sizeof(*code_lengths));
--  huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
--                                                sizeof(*huffman_tables));
--  htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
-+  // All OK. Finalize pointers.
-+  hdr->huffman_image_ = huffman_image;
-+  hdr->num_htree_groups_ = num_htree_groups;
-+  hdr->htree_groups_ = htree_groups;
-+
-+ Error:
-+  WebPSafeFree(mapping);
-+  if (!ok) {
-+    WebPSafeFree(huffman_image);
-+    VP8LHuffmanTablesDeallocate(huffman_tables);
-+    VP8LHtreeGroupsFree(htree_groups);
-+  }
-+  return ok;
-+}
- 
--  if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
--    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
-+int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
-+                           int num_htree_groups_max, const int* const mapping,
-+                           VP8LDecoder* const dec,
-+                           HuffmanTables* const huffman_tables,
-+                           HTreeGroup** const htree_groups) {
-+  int i, j, ok = 0;
-+  const int max_alphabet_size =
-+      kAlphabetSize[0] + ((color_cache_bits > 0) ? 1 << color_cache_bits : 0);
-+  const int table_size = kTableSize[color_cache_bits];
-+  int* code_lengths = NULL;
-+
-+  if ((mapping == NULL && num_htree_groups != num_htree_groups_max) ||
-+      num_htree_groups > num_htree_groups_max) {
-+    goto Error;
-+  }
-+
-+  code_lengths =
-+      (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths));
-+  *htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
-+
-+  if (*htree_groups == NULL || code_lengths == NULL ||
-+      !VP8LHuffmanTablesAllocate(num_htree_groups * table_size,
-+                                 huffman_tables)) {
-+    VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
--  huffman_table = huffman_tables;
-   for (i = 0; i < num_htree_groups_max; ++i) {
-     // If the index "i" is unused in the Huffman image, just make sure the
-     // coefficients are valid but do not store them.
-@@ -460,7 +493,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
-       }
-     } else {
-       HTreeGroup* const htree_group =
--          &htree_groups[(mapping == NULL) ? i : mapping[i]];
-+          &(*htree_groups)[(mapping == NULL) ? i : mapping[i]];
-       HuffmanCode** const htrees = htree_group->htrees;
-       int size;
-       int total_size = 0;
-@@ -468,19 +501,20 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
-       int max_bits = 0;
-       for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
-         int alphabet_size = kAlphabetSize[j];
--        htrees[j] = huffman_table;
-         if (j == 0 && color_cache_bits > 0) {
-           alphabet_size += (1 << color_cache_bits);
-         }
--        size = ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_table);
-+        size =
-+            ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables);
-+        htrees[j] = huffman_tables->curr_segment->curr_table;
-         if (size == 0) {
-           goto Error;
-         }
-         if (is_trivial_literal && kLiteralMap[j] == 1) {
--          is_trivial_literal = (huffman_table->bits == 0);
-+          is_trivial_literal = (htrees[j]->bits == 0);
-         }
--        total_size += huffman_table->bits;
--        huffman_table += size;
-+        total_size += htrees[j]->bits;
-+        huffman_tables->curr_segment->curr_table += size;
-         if (j <= ALPHA) {
-           int local_max_bits = code_lengths[0];
-           int k;
-@@ -511,19 +545,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
-   }
-   ok = 1;
- 
--  // All OK. Finalize pointers.
--  hdr->huffman_image_ = huffman_image;
--  hdr->num_htree_groups_ = num_htree_groups;
--  hdr->htree_groups_ = htree_groups;
--  hdr->huffman_tables_ = huffman_tables;
--
-  Error:
-   WebPSafeFree(code_lengths);
--  WebPSafeFree(mapping);
-   if (!ok) {
--    WebPSafeFree(huffman_image);
--    WebPSafeFree(huffman_tables);
--    VP8LHtreeGroupsFree(htree_groups);
-+    VP8LHuffmanTablesDeallocate(huffman_tables);
-+    VP8LHtreeGroupsFree(*htree_groups);
-+    *htree_groups = NULL;
-   }
-   return ok;
- }
-@@ -547,8 +574,7 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
-                                scaled_data_size * sizeof(*scaled_data);
-   uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory));
-   if (memory == NULL) {
--    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
--    return 0;
-+    return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-   }
-   assert(dec->rescaler_memory == NULL);
-   dec->rescaler_memory = memory;
-@@ -559,8 +585,11 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
-   memory += work_size * sizeof(*work);
-   scaled_data = (uint32_t*)memory;
- 
--  WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
--                   out_width, out_height, 0, num_channels, work);
-+  if (!WebPRescalerInit(dec->rescaler, in_width, in_height,
-+                        (uint8_t*)scaled_data, out_width, out_height,
-+                        0, num_channels, work)) {
-+    return 0;
-+  }
-   return 1;
- }
- #endif   // WEBP_REDUCE_SIZE
-@@ -574,13 +603,14 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
- static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
-                   int rgba_stride, uint8_t* const rgba) {
-   uint32_t* const src = (uint32_t*)rescaler->dst;
-+  uint8_t* dst = rgba;
-   const int dst_width = rescaler->dst_width;
-   int num_lines_out = 0;
-   while (WebPRescalerHasPendingOutput(rescaler)) {
--    uint8_t* const dst = rgba + num_lines_out * rgba_stride;
-     WebPRescalerExportRow(rescaler);
-     WebPMultARGBRow(src, dst_width, 1);
-     VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
-+    dst += rgba_stride;
-     ++num_lines_out;
-   }
-   return num_lines_out;
-@@ -594,8 +624,8 @@ static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
-   int num_lines_in = 0;
-   int num_lines_out = 0;
-   while (num_lines_in < mb_h) {
--    uint8_t* const row_in = in + num_lines_in * in_stride;
--    uint8_t* const row_out = out + num_lines_out * out_stride;
-+    uint8_t* const row_in = in + (uint64_t)num_lines_in * in_stride;
-+    uint8_t* const row_out = out + (uint64_t)num_lines_out * out_stride;
-     const int lines_left = mb_h - num_lines_in;
-     const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
-     int lines_imported;
-@@ -796,7 +826,8 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
-       const WebPDecBuffer* const output = dec->output_;
-       if (WebPIsRGBMode(output->colorspace)) {  // convert to RGBA
-         const WebPRGBABuffer* const buf = &output->u.RGBA;
--        uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
-+        uint8_t* const rgba =
-+            buf->rgba + (int64_t)dec->last_out_row_ * buf->stride;
-         const int num_rows_out =
- #if !defined(WEBP_REDUCE_SIZE)
-          io->use_scaling ?
-@@ -1077,12 +1108,10 @@ static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
-  End:
-   br->eos_ = VP8LIsEndOfStream(br);
-   if (!ok || (br->eos_ && pos < end)) {
--    ok = 0;
--    dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
--                            : VP8_STATUS_BITSTREAM_ERROR;
--  } else {
--    dec->last_pixel_ = pos;
-+    return VP8LSetError(
-+        dec, br->eos_ ? VP8_STATUS_SUSPENDED : VP8_STATUS_BITSTREAM_ERROR);
-   }
-+  dec->last_pixel_ = pos;
-   return ok;
- }
- 
-@@ -1232,9 +1261,20 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
-   }
- 
-   br->eos_ = VP8LIsEndOfStream(br);
--  if (dec->incremental_ && br->eos_ && src < src_end) {
-+  // In incremental decoding:
-+  // br->eos_ && src < src_last: if 'br' reached the end of the buffer and
-+  // 'src_last' has not been reached yet, there is not enough data. 'dec' has to
-+  // be reset until there is more data.
-+  // !br->eos_ && src < src_last: this cannot happen as either the buffer is
-+  // fully read, either enough has been read to reach 'src_last'.
-+  // src >= src_last: 'src_last' is reached, all is fine. 'src' can actually go
-+  // beyond 'src_last' in case the image is cropped and an LZ77 goes further.
-+  // The buffer might have been enough or there is some left. 'br->eos_' does
-+  // not matter.
-+  assert(!dec->incremental_ || (br->eos_ && src < src_last) || src >= src_last);
-+  if (dec->incremental_ && br->eos_ && src < src_last) {
-     RestoreState(dec);
--  } else if (!br->eos_) {
-+  } else if ((dec->incremental_ && src >= src_last) || !br->eos_) {
-     // Process the remaining rows corresponding to last row-block.
-     if (process_func != NULL) {
-       process_func(dec, row > last_row ? last_row : row);
-@@ -1249,8 +1289,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
-   return 1;
- 
-  Error:
--  dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
--  return 0;
-+  return VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
- }
- 
- // -----------------------------------------------------------------------------
-@@ -1276,7 +1315,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
-     uint8_t* const new_data = (uint8_t*)new_color_map;
-     new_color_map[0] = transform->data_[0];
-     for (i = 4; i < 4 * num_colors; ++i) {
--      // Equivalent to AddPixelEq(), on a byte-basis.
-+      // Equivalent to VP8LAddPixels(), on a byte-basis.
-       new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
-     }
-     for (; i < 4 * final_num_colors; ++i) {
-@@ -1317,7 +1356,7 @@ static int ReadTransform(int* const xsize, int const* ysize,
-                                                transform->bits_),
-                              VP8LSubSampleSize(transform->ysize_,
-                                                transform->bits_),
--                             0, dec, &transform->data_);
-+                             /*is_level0=*/0, dec, &transform->data_);
-       break;
-     case COLOR_INDEXING_TRANSFORM: {
-        const int num_colors = VP8LReadBits(br, 8) + 1;
-@@ -1327,11 +1366,14 @@ static int ReadTransform(int* const xsize, int const* ysize,
-                       : 3;
-        *xsize = VP8LSubSampleSize(transform->xsize_, bits);
-        transform->bits_ = bits;
--       ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_);
--       ok = ok && ExpandColorMap(num_colors, transform);
-+       ok = DecodeImageStream(num_colors, /*ysize=*/1, /*is_level0=*/0, dec,
-+                              &transform->data_);
-+       if (ok && !ExpandColorMap(num_colors, transform)) {
-+         return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-+       }
-       break;
-     }
--    case SUBTRACT_GREEN:
-+    case SUBTRACT_GREEN_TRANSFORM:
-       break;
-     default:
-       assert(0);    // can't happen
-@@ -1353,7 +1395,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
-   assert(hdr != NULL);
- 
-   WebPSafeFree(hdr->huffman_image_);
--  WebPSafeFree(hdr->huffman_tables_);
-+  VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_);
-   VP8LHtreeGroupsFree(hdr->htree_groups_);
-   VP8LColorCacheClear(&hdr->color_cache_);
-   VP8LColorCacheClear(&hdr->saved_color_cache_);
-@@ -1434,7 +1476,7 @@ static int DecodeImageStream(int xsize, int ysize,
-     color_cache_bits = VP8LReadBits(br, 4);
-     ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS);
-     if (!ok) {
--      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
-+      VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
-       goto End;
-     }
-   }
-@@ -1443,7 +1485,7 @@ static int DecodeImageStream(int xsize, int ysize,
-   ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize,
-                               color_cache_bits, is_level0);
-   if (!ok) {
--    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
-+    VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
-     goto End;
-   }
- 
-@@ -1451,8 +1493,7 @@ static int DecodeImageStream(int xsize, int ysize,
-   if (color_cache_bits > 0) {
-     hdr->color_cache_size_ = 1 << color_cache_bits;
-     if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) {
--      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
--      ok = 0;
-+      ok = VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-       goto End;
-     }
-   } else {
-@@ -1469,8 +1510,7 @@ static int DecodeImageStream(int xsize, int ysize,
-     const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
-     data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
-     if (data == NULL) {
--      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
--      ok = 0;
-+      ok = VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-       goto End;
-     }
-   }
-@@ -1514,9 +1554,8 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
-   assert(dec->width_ <= final_width);
-   dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
-   if (dec->pixels_ == NULL) {
--    dec->argb_cache_ = NULL;    // for sanity check
--    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
--    return 0;
-+    dec->argb_cache_ = NULL;    // for soundness
-+    return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-   }
-   dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels;
-   return 1;
-@@ -1524,11 +1563,10 @@ static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
- 
- static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
-   const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_;
--  dec->argb_cache_ = NULL;    // for sanity check
-+  dec->argb_cache_ = NULL;    // for soundness
-   dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
-   if (dec->pixels_ == NULL) {
--    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
--    return 0;
-+    return VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-   }
-   return 1;
- }
-@@ -1583,7 +1621,8 @@ int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
-   dec->status_ = VP8_STATUS_OK;
-   VP8LInitBitReader(&dec->br_, data, data_size);
- 
--  if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) {
-+  if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, /*is_level0=*/1,
-+                         dec, /*decoded_data=*/NULL)) {
-     goto Err;
-   }
- 
-@@ -1638,22 +1677,24 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
- 
-   if (dec == NULL) return 0;
-   if (io == NULL) {
--    dec->status_ = VP8_STATUS_INVALID_PARAM;
--    return 0;
-+    return VP8LSetError(dec, VP8_STATUS_INVALID_PARAM);
-   }
- 
-   dec->io_ = io;
-   dec->status_ = VP8_STATUS_OK;
-   VP8LInitBitReader(&dec->br_, io->data, io->data_size);
-   if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) {
--    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
-+    VP8LSetError(dec, VP8_STATUS_BITSTREAM_ERROR);
-     goto Error;
-   }
-   dec->state_ = READ_DIM;
-   io->width = width;
-   io->height = height;
- 
--  if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
-+  if (!DecodeImageStream(width, height, /*is_level0=*/1, dec,
-+                         /*decoded_data=*/NULL)) {
-+    goto Error;
-+  }
-   return 1;
- 
-  Error:
-@@ -1666,10 +1707,9 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
-   VP8Io* io = NULL;
-   WebPDecParams* params = NULL;
- 
--  // Sanity checks.
-   if (dec == NULL) return 0;
- 
--  assert(dec->hdr_.huffman_tables_ != NULL);
-+  assert(dec->hdr_.huffman_tables_.root.start != NULL);
-   assert(dec->hdr_.htree_groups_ != NULL);
-   assert(dec->hdr_.num_htree_groups_ > 0);
- 
-@@ -1684,7 +1724,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
-     assert(dec->output_ != NULL);
- 
-     if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
--      dec->status_ = VP8_STATUS_INVALID_PARAM;
-+      VP8LSetError(dec, VP8_STATUS_INVALID_PARAM);
-       goto Err;
-     }
- 
-@@ -1694,7 +1734,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
-     if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
- #else
-     if (io->use_scaling) {
--      dec->status_ = VP8_STATUS_INVALID_PARAM;
-+      VP8LSetError(dec, VP8_STATUS_INVALID_PARAM);
-       goto Err;
-     }
- #endif
-@@ -1712,7 +1752,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
-           dec->hdr_.saved_color_cache_.colors_ == NULL) {
-         if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_,
-                                 dec->hdr_.color_cache_.hash_bits_)) {
--          dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
-+          VP8LSetError(dec, VP8_STATUS_OUT_OF_MEMORY);
-           goto Err;
-         }
-       }
-diff --git a/3rdparty/libwebp/src/dec/vp8li_dec.h b/3rdparty/libwebp/src/dec/vp8li_dec.h
-index 72b2e8612084..b057573f6c75 100644
---- a/3rdparty/libwebp/src/dec/vp8li_dec.h
-+++ b/3rdparty/libwebp/src/dec/vp8li_dec.h
-@@ -51,7 +51,7 @@ typedef struct {
-   uint32_t*       huffman_image_;
-   int             num_htree_groups_;
-   HTreeGroup*     htree_groups_;
--  HuffmanCode*    huffman_tables_;
-+  HuffmanTables   huffman_tables_;
- } VP8LMetadata;
- 
- typedef struct VP8LDecoder VP8LDecoder;
-@@ -126,6 +126,19 @@ void VP8LClear(VP8LDecoder* const dec);
- // Clears and deallocate a lossless decoder instance.
- void VP8LDelete(VP8LDecoder* const dec);
- 
-+// Helper function for reading the different Huffman codes and storing them in
-+// 'huffman_tables' and 'htree_groups'.
-+// If mapping is NULL 'num_htree_groups_max' must equal 'num_htree_groups'.
-+// If it is not NULL, it maps 'num_htree_groups_max' indices to the
-+// 'num_htree_groups' groups. If 'num_htree_groups_max' > 'num_htree_groups',
-+// some of those indices map to -1. This is used for non-balanced codes to
-+// limit memory usage.
-+int ReadHuffmanCodesHelper(int color_cache_bits, int num_htree_groups,
-+                           int num_htree_groups_max, const int* const mapping,
-+                           VP8LDecoder* const dec,
-+                           HuffmanTables* const huffman_tables,
-+                           HTreeGroup** const htree_groups);
-+
- //------------------------------------------------------------------------------
- 
- #ifdef __cplusplus
-diff --git a/3rdparty/libwebp/src/dec/webp_dec.c b/3rdparty/libwebp/src/dec/webp_dec.c
-index 42d098874d07..f557868b9985 100644
---- a/3rdparty/libwebp/src/dec/webp_dec.c
-+++ b/3rdparty/libwebp/src/dec/webp_dec.c
-@@ -179,7 +179,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
-       return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
-     }
-     // For odd-sized chunk-payload, there's one byte padding at the end.
--    disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
-+    disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u;
-     total_size += disk_chunk_size;
- 
-     // Check that total bytes skipped so far does not exceed riff_size.
-@@ -658,19 +658,26 @@ uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
- uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
-                        int* width, int* height, uint8_t** u, uint8_t** v,
-                        int* stride, int* uv_stride) {
--  WebPDecBuffer output;   // only to preserve the side-infos
--  uint8_t* const out = Decode(MODE_YUV, data, data_size,
--                              width, height, &output);
--
--  if (out != NULL) {
--    const WebPYUVABuffer* const buf = &output.u.YUVA;
--    *u = buf->u;
--    *v = buf->v;
--    *stride = buf->y_stride;
--    *uv_stride = buf->u_stride;
--    assert(buf->u_stride == buf->v_stride);
--  }
--  return out;
-+  // data, width and height are checked by Decode().
-+  if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) {
-+    return NULL;
-+  }
-+
-+  {
-+    WebPDecBuffer output;   // only to preserve the side-infos
-+    uint8_t* const out = Decode(MODE_YUV, data, data_size,
-+                                width, height, &output);
-+
-+    if (out != NULL) {
-+      const WebPYUVABuffer* const buf = &output.u.YUVA;
-+      *u = buf->u;
-+      *v = buf->v;
-+      *stride = buf->y_stride;
-+      *uv_stride = buf->u_stride;
-+      assert(buf->u_stride == buf->v_stride);
-+    }
-+    return out;
-+  }
- }
- 
- static void DefaultFeatures(WebPBitstreamFeatures* const features) {
-@@ -785,6 +792,13 @@ VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
- //------------------------------------------------------------------------------
- // Cropping and rescaling.
- 
-+int WebPCheckCropDimensions(int image_width, int image_height,
-+                            int x, int y, int w, int h) {
-+  return !(x < 0 || y < 0 || w <= 0 || h <= 0 ||
-+           x >= image_width || w > image_width || w > image_width - x ||
-+           y >= image_height || h > image_height || h > image_height - y);
-+}
-+
- int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
-                           VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
-   const int W = io->width;
-@@ -792,7 +806,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
-   int x = 0, y = 0, w = W, h = H;
- 
-   // Cropping
--  io->use_cropping = (options != NULL) && (options->use_cropping > 0);
-+  io->use_cropping = (options != NULL) && options->use_cropping;
-   if (io->use_cropping) {
-     w = options->crop_width;
-     h = options->crop_height;
-@@ -802,7 +816,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
-       x &= ~1;
-       y &= ~1;
-     }
--    if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
-+    if (!WebPCheckCropDimensions(W, H, x, y, w, h)) {
-       return 0;  // out of frame boundary error
-     }
-   }
-@@ -814,7 +828,7 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
-   io->mb_h = h;
- 
-   // Scaling
--  io->use_scaling = (options != NULL) && (options->use_scaling > 0);
-+  io->use_scaling = (options != NULL) && options->use_scaling;
-   if (io->use_scaling) {
-     int scaled_width = options->scaled_width;
-     int scaled_height = options->scaled_height;
-@@ -835,8 +849,8 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
- 
-   if (io->use_scaling) {
-     // disable filter (only for large downscaling ratio).
--    io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
--                           (io->scaled_height < H * 3 / 4);
-+    io->bypass_filtering |= (io->scaled_width < W * 3 / 4) &&
-+                            (io->scaled_height < H * 3 / 4);
-     io->fancy_upsampling = 0;
-   }
-   return 1;
-diff --git a/3rdparty/libwebp/src/dec/webpi_dec.h b/3rdparty/libwebp/src/dec/webpi_dec.h
-index 24baff5d27a8..3b97388c71c1 100644
---- a/3rdparty/libwebp/src/dec/webpi_dec.h
-+++ b/3rdparty/libwebp/src/dec/webpi_dec.h
-@@ -77,6 +77,10 @@ VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
- //------------------------------------------------------------------------------
- // Misc utils
- 
-+// Returns true if crop dimensions are within image bounds.
-+int WebPCheckCropDimensions(int image_width, int image_height,
-+                            int x, int y, int w, int h);
-+
- // Initializes VP8Io with custom setup, io and teardown functions. The default
- // hooks will use the supplied 'params' as io->opaque handle.
- void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
-diff --git a/3rdparty/libwebp/src/demux/anim_decode.c b/3rdparty/libwebp/src/demux/anim_decode.c
-index 3dcacc35d675..e077ffb53640 100644
---- a/3rdparty/libwebp/src/demux/anim_decode.c
-+++ b/3rdparty/libwebp/src/demux/anim_decode.c
-@@ -23,6 +23,14 @@
- 
- #define NUM_CHANNELS 4
- 
-+// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA
-+// buffer.
-+#ifdef WORDS_BIGENDIAN
-+#define CHANNEL_SHIFT(i) (24 - (i) * 8)
-+#else
-+#define CHANNEL_SHIFT(i) ((i) * 8)
-+#endif
-+
- typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
- static void BlendPixelRowNonPremult(uint32_t* const src,
-                                     const uint32_t* const dst, int num_pixels);
-@@ -87,11 +95,19 @@ WebPAnimDecoder* WebPAnimDecoderNewInternal(
-     int abi_version) {
-   WebPAnimDecoderOptions options;
-   WebPAnimDecoder* dec = NULL;
-+  WebPBitstreamFeatures features;
-   if (webp_data == NULL ||
-       WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
-     return NULL;
-   }
- 
-+  // Validate the bitstream before doing expensive allocations. The demuxer may
-+  // be more tolerant than the decoder.
-+  if (WebPGetFeatures(webp_data->bytes, webp_data->size, &features) !=
-+      VP8_STATUS_OK) {
-+    return NULL;
-+  }
-+
-   // Note: calloc() so that the pointer members are initialized to NULL.
-   dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
-   if (dec == NULL) goto Error;
-@@ -145,7 +161,7 @@ static int ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
-                           uint32_t canvas_height) {
-   const uint64_t size =
-       (uint64_t)canvas_width * canvas_height * NUM_CHANNELS * sizeof(*buf);
--  if (size != (size_t)size) return 0;
-+  if (!CheckSizeOverflow(size)) return 0;
-   memset(buf, 0, (size_t)size);
-   return 1;
- }
-@@ -166,7 +182,7 @@ static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset,
- static int CopyCanvas(const uint8_t* src, uint8_t* dst,
-                       uint32_t width, uint32_t height) {
-   const uint64_t size = (uint64_t)width * height * NUM_CHANNELS;
--  if (size != (size_t)size) return 0;
-+  if (!CheckSizeOverflow(size)) return 0;
-   assert(src != NULL && dst != NULL);
-   memcpy(dst, src, (size_t)size);
-   return 1;
-@@ -201,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
-   const uint8_t dst_channel = (dst >> shift) & 0xff;
-   const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
-   assert(blend_unscaled < (1ULL << 32) / scale);
--  return (blend_unscaled * scale) >> 24;
-+  return (blend_unscaled * scale) >> CHANNEL_SHIFT(3);
- }
- 
- // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
- static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
--  const uint8_t src_a = (src >> 24) & 0xff;
-+  const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
- 
-   if (src_a == 0) {
-     return dst;
-   } else {
--    const uint8_t dst_a = (dst >> 24) & 0xff;
-+    const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff;
-     // This is the approximate integer arithmetic for the actual formula:
-     // dst_factor_a = (dst_a * (255 - src_a)) / 255.
-     const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
-     const uint8_t blend_a = src_a + dst_factor_a;
-     const uint32_t scale = (1UL << 24) / blend_a;
- 
--    const uint8_t blend_r =
--        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
--    const uint8_t blend_g =
--        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
--    const uint8_t blend_b =
--        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
-+    const uint8_t blend_r = BlendChannelNonPremult(
-+        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0));
-+    const uint8_t blend_g = BlendChannelNonPremult(
-+        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1));
-+    const uint8_t blend_b = BlendChannelNonPremult(
-+        src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2));
-     assert(src_a + dst_factor_a < 256);
- 
--    return (blend_r << 0) |
--           (blend_g << 8) |
--           (blend_b << 16) |
--           ((uint32_t)blend_a << 24);
-+    return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) |
-+           ((uint32_t)blend_g << CHANNEL_SHIFT(1)) |
-+           ((uint32_t)blend_b << CHANNEL_SHIFT(2)) |
-+           ((uint32_t)blend_a << CHANNEL_SHIFT(3));
-   }
- }
- 
-@@ -239,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src,
-                                     const uint32_t* const dst, int num_pixels) {
-   int i;
-   for (i = 0; i < num_pixels; ++i) {
--    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
-+    const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
-     if (src_alpha != 0xff) {
-       src[i] = BlendPixelNonPremult(src[i], dst[i]);
-     }
-@@ -256,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
- 
- // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
- static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
--  const uint8_t src_a = (src >> 24) & 0xff;
-+  const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
-   return src + ChannelwiseMultiply(dst, 256 - src_a);
- }
- 
-@@ -266,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
-                                  int num_pixels) {
-   int i;
-   for (i = 0; i < num_pixels; ++i) {
--    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
-+    const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
-     if (src_alpha != 0xff) {
-       src[i] = BlendPixelPremult(src[i], dst[i]);
-     }
-diff --git a/3rdparty/libwebp/src/demux/demux.c b/3rdparty/libwebp/src/demux/demux.c
-index 860e2ce7615e..fd45a2500e4b 100644
---- a/3rdparty/libwebp/src/demux/demux.c
-+++ b/3rdparty/libwebp/src/demux/demux.c
-@@ -24,8 +24,8 @@
- #include "src/webp/format_constants.h"
- 
- #define DMUX_MAJ_VERSION 1
--#define DMUX_MIN_VERSION 2
--#define DMUX_REV_VERSION 0
-+#define DMUX_MIN_VERSION 3
-+#define DMUX_REV_VERSION 1
- 
- typedef struct {
-   size_t start_;        // start location of the data
-@@ -221,12 +221,16 @@ static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
-     const size_t chunk_start_offset = mem->start_;
-     const uint32_t fourcc = ReadLE32(mem);
-     const uint32_t payload_size = ReadLE32(mem);
--    const uint32_t payload_size_padded = payload_size + (payload_size & 1);
--    const size_t payload_available = (payload_size_padded > MemDataSize(mem))
--                                   ? MemDataSize(mem) : payload_size_padded;
--    const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available;
-+    uint32_t payload_size_padded;
-+    size_t payload_available;
-+    size_t chunk_size;
- 
-     if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
-+
-+    payload_size_padded = payload_size + (payload_size & 1);
-+    payload_available = (payload_size_padded > MemDataSize(mem))
-+                      ? MemDataSize(mem) : payload_size_padded;
-+    chunk_size = CHUNK_HEADER_SIZE + payload_available;
-     if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
-     if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
- 
-@@ -451,9 +455,11 @@ static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
-     const size_t chunk_start_offset = mem->start_;
-     const uint32_t fourcc = ReadLE32(mem);
-     const uint32_t chunk_size = ReadLE32(mem);
--    const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
-+    uint32_t chunk_size_padded;
- 
-     if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
-+
-+    chunk_size_padded = chunk_size + (chunk_size & 1);
-     if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR;
- 
-     switch (fourcc) {
-@@ -608,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
- 
-   while (f != NULL) {
-     const int cur_frame_set = f->frame_num_;
--    int frame_count = 0;
- 
-     // Check frame properties.
-     for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
-@@ -643,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
-                             dmux->canvas_width_, dmux->canvas_height_)) {
-         return 0;
-       }
--
--      ++frame_count;
-     }
-   }
-   return 1;
-diff --git a/3rdparty/libwebp/src/dsp/alpha_processing.c b/3rdparty/libwebp/src/dsp/alpha_processing.c
-index 3a27990ddc57..1d152f24dada 100644
---- a/3rdparty/libwebp/src/dsp/alpha_processing.c
-+++ b/3rdparty/libwebp/src/dsp/alpha_processing.c
-@@ -157,7 +157,8 @@ void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
-   }
- }
- 
--void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
-+void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
-+                   const uint8_t* WEBP_RESTRICT const alpha,
-                    int width, int inverse) {
-   int x;
-   for (x = 0; x < width; ++x) {
-@@ -178,7 +179,8 @@ void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
- #undef MFIX
- 
- void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
--void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
-+void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
-+                    const uint8_t* WEBP_RESTRICT const alpha,
-                     int width, int inverse);
- 
- //------------------------------------------------------------------------------
-@@ -193,8 +195,8 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
-   }
- }
- 
--void WebPMultRows(uint8_t* ptr, int stride,
--                  const uint8_t* alpha, int alpha_stride,
-+void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
-+                  const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
-                   int width, int num_rows, int inverse) {
-   int n;
-   for (n = 0; n < num_rows; ++n) {
-@@ -290,9 +292,9 @@ static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
- }
- 
- #if !WEBP_NEON_OMIT_C_CODE
--static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
-+static int DispatchAlpha_C(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
-                            int width, int height,
--                           uint8_t* dst, int dst_stride) {
-+                           uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-   uint32_t alpha_mask = 0xff;
-   int i, j;
- 
-@@ -309,9 +311,10 @@ static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
-   return (alpha_mask != 0xff);
- }
- 
--static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride,
--                                   int width, int height,
--                                   uint32_t* dst, int dst_stride) {
-+static void DispatchAlphaToGreen_C(const uint8_t* WEBP_RESTRICT alpha,
-+                                   int alpha_stride, int width, int height,
-+                                   uint32_t* WEBP_RESTRICT dst,
-+                                   int dst_stride) {
-   int i, j;
-   for (j = 0; j < height; ++j) {
-     for (i = 0; i < width; ++i) {
-@@ -322,9 +325,9 @@ static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride,
-   }
- }
- 
--static int ExtractAlpha_C(const uint8_t* argb, int argb_stride,
-+static int ExtractAlpha_C(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
-                           int width, int height,
--                          uint8_t* alpha, int alpha_stride) {
-+                          uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-   uint8_t alpha_mask = 0xff;
-   int i, j;
- 
-@@ -340,7 +343,8 @@ static int ExtractAlpha_C(const uint8_t* argb, int argb_stride,
-   return (alpha_mask == 0xff);
- }
- 
--static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
-+static void ExtractGreen_C(const uint32_t* WEBP_RESTRICT argb,
-+                           uint8_t* WEBP_RESTRICT alpha, int size) {
-   int i;
-   for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
- }
-@@ -372,8 +376,11 @@ static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
- }
- 
- #ifdef WORDS_BIGENDIAN
--static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
--                       const uint8_t* b, int len, uint32_t* out) {
-+static void PackARGB_C(const uint8_t* WEBP_RESTRICT a,
-+                       const uint8_t* WEBP_RESTRICT r,
-+                       const uint8_t* WEBP_RESTRICT g,
-+                       const uint8_t* WEBP_RESTRICT b,
-+                       int len, uint32_t* WEBP_RESTRICT out) {
-   int i;
-   for (i = 0; i < len; ++i) {
-     out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
-@@ -381,8 +388,10 @@ static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
- }
- #endif
- 
--static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
--                      int len, int step, uint32_t* out) {
-+static void PackRGB_C(const uint8_t* WEBP_RESTRICT r,
-+                      const uint8_t* WEBP_RESTRICT g,
-+                      const uint8_t* WEBP_RESTRICT b,
-+                      int len, int step, uint32_t* WEBP_RESTRICT out) {
-   int i, offset = 0;
-   for (i = 0; i < len; ++i) {
-     out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
-@@ -392,16 +401,22 @@ static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
- 
- void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
- void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
--int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
--void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
--int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
--void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
-+int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int,
-+                         uint8_t* WEBP_RESTRICT, int);
-+void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT, int, int, int,
-+                                 uint32_t* WEBP_RESTRICT, int);
-+int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT, int, int, int,
-+                        uint8_t* WEBP_RESTRICT, int);
-+void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb,
-+                         uint8_t* WEBP_RESTRICT alpha, int size);
- #ifdef WORDS_BIGENDIAN
- void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
-                      const uint8_t* b, int, uint32_t*);
- #endif
--void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
--                    int len, int step, uint32_t* out);
-+void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
-+                    const uint8_t* WEBP_RESTRICT g,
-+                    const uint8_t* WEBP_RESTRICT b,
-+                    int len, int step, uint32_t* WEBP_RESTRICT out);
- 
- int (*WebPHasAlpha8b)(const uint8_t* src, int length);
- int (*WebPHasAlpha32b)(const uint8_t* src, int length);
-@@ -410,6 +425,7 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color);
- //------------------------------------------------------------------------------
- // Init function
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void WebPInitAlphaProcessingMIPSdspR2(void);
- extern void WebPInitAlphaProcessingSSE2(void);
- extern void WebPInitAlphaProcessingSSE41(void);
-@@ -438,10 +454,10 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       WebPInitAlphaProcessingSSE2();
--#if defined(WEBP_USE_SSE41)
-+#if defined(WEBP_HAVE_SSE41)
-       if (VP8GetCPUInfo(kSSE4_1)) {
-         WebPInitAlphaProcessingSSE41();
-       }
-@@ -455,7 +471,7 @@ WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     WebPInitAlphaProcessingNEON();
-diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_neon.c b/3rdparty/libwebp/src/dsp/alpha_processing_neon.c
-index 9d55421704cc..6716fb77f0d8 100644
---- a/3rdparty/libwebp/src/dsp/alpha_processing_neon.c
-+++ b/3rdparty/libwebp/src/dsp/alpha_processing_neon.c
-@@ -80,10 +80,10 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
- 
- //------------------------------------------------------------------------------
- 
--static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
--                              int width, int height,
--                              uint8_t* dst, int dst_stride) {
--  uint32_t alpha_mask = 0xffffffffu;
-+static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
-+                              int alpha_stride, int width, int height,
-+                              uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-+  uint32_t alpha_mask = 0xffu;
-   uint8x8_t mask8 = vdup_n_u8(0xff);
-   uint32_t tmp[2];
-   int i, j;
-@@ -107,14 +107,16 @@ static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
-     dst += dst_stride;
-   }
-   vst1_u8((uint8_t*)tmp, mask8);
-+  alpha_mask *= 0x01010101;
-   alpha_mask &= tmp[0];
-   alpha_mask &= tmp[1];
-   return (alpha_mask != 0xffffffffu);
- }
- 
--static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
--                                      int width, int height,
--                                      uint32_t* dst, int dst_stride) {
-+static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
-+                                      int alpha_stride, int width, int height,
-+                                      uint32_t* WEBP_RESTRICT dst,
-+                                      int dst_stride) {
-   int i, j;
-   uint8x8x4_t greens;   // leave A/R/B channels zero'd.
-   greens.val[0] = vdup_n_u8(0);
-@@ -131,10 +133,10 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
-   }
- }
- 
--static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
-+static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
-                              int width, int height,
--                             uint8_t* alpha, int alpha_stride) {
--  uint32_t alpha_mask = 0xffffffffu;
-+                             uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-+  uint32_t alpha_mask = 0xffu;
-   uint8x8_t mask8 = vdup_n_u8(0xff);
-   uint32_t tmp[2];
-   int i, j;
-@@ -156,13 +158,14 @@ static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
-     alpha += alpha_stride;
-   }
-   vst1_u8((uint8_t*)tmp, mask8);
-+  alpha_mask *= 0x01010101;
-   alpha_mask &= tmp[0];
-   alpha_mask &= tmp[1];
-   return (alpha_mask == 0xffffffffu);
- }
- 
--static void ExtractGreen_NEON(const uint32_t* argb,
--                              uint8_t* alpha, int size) {
-+static void ExtractGreen_NEON(const uint32_t* WEBP_RESTRICT argb,
-+                              uint8_t* WEBP_RESTRICT alpha, int size) {
-   int i;
-   for (i = 0; i + 16 <= size; i += 16) {
-     const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i));
-diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c b/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
-index f6c6e0fb1a6d..aa0cc2848ae9 100644
---- a/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c
-@@ -18,16 +18,16 @@
- 
- //------------------------------------------------------------------------------
- 
--static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
--                              int width, int height,
--                              uint8_t* dst, int dst_stride) {
-+static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
-+                              int alpha_stride, int width, int height,
-+                              uint8_t* WEBP_RESTRICT dst, int dst_stride) {
-   // alpha_and stores an 'and' operation of all the alpha[] values. The final
-   // value is not 0xff if any of the alpha[] is not equal to 0xff.
-   uint32_t alpha_and = 0xff;
-   int i, j;
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u);  // to preserve RGB
--  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
-+  const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00);  // to preserve RGB
-+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
-   __m128i all_alphas = all_0xff;
- 
-   // We must be able to access 3 extra bytes after the last written byte
-@@ -72,9 +72,10 @@ static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
-   return (alpha_and != 0xff);
- }
- 
--static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
--                                      int width, int height,
--                                      uint32_t* dst, int dst_stride) {
-+static void DispatchAlphaToGreen_SSE2(const uint8_t* WEBP_RESTRICT alpha,
-+                                      int alpha_stride, int width, int height,
-+                                      uint32_t* WEBP_RESTRICT dst,
-+                                      int dst_stride) {
-   int i, j;
-   const __m128i zero = _mm_setzero_si128();
-   const int limit = width & ~15;
-@@ -98,15 +99,15 @@ static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
-   }
- }
- 
--static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
-+static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
-                              int width, int height,
--                             uint8_t* alpha, int alpha_stride) {
-+                             uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-   // alpha_and stores an 'and' operation of all the alpha[] values. The final
-   // value is not 0xff if any of the alpha[] is not equal to 0xff.
-   uint32_t alpha_and = 0xff;
-   int i, j;
--  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
--  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
-+  const __m128i a_mask = _mm_set1_epi32(0xff);  // to preserve alpha
-+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
-   __m128i all_alphas = all_0xff;
- 
-   // We must be able to access 3 extra bytes after the last written byte
-@@ -143,6 +144,46 @@ static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
-   return (alpha_and == 0xff);
- }
- 
-+static void ExtractGreen_SSE2(const uint32_t* WEBP_RESTRICT argb,
-+                              uint8_t* WEBP_RESTRICT alpha, int size) {
-+  int i;
-+  const __m128i mask = _mm_set1_epi32(0xff);
-+  const __m128i* src = (const __m128i*)argb;
-+
-+  for (i = 0; i + 16 <= size; i += 16, src += 4) {
-+    const __m128i a0 = _mm_loadu_si128(src + 0);
-+    const __m128i a1 = _mm_loadu_si128(src + 1);
-+    const __m128i a2 = _mm_loadu_si128(src + 2);
-+    const __m128i a3 = _mm_loadu_si128(src + 3);
-+    const __m128i b0 = _mm_srli_epi32(a0, 8);
-+    const __m128i b1 = _mm_srli_epi32(a1, 8);
-+    const __m128i b2 = _mm_srli_epi32(a2, 8);
-+    const __m128i b3 = _mm_srli_epi32(a3, 8);
-+    const __m128i c0 = _mm_and_si128(b0, mask);
-+    const __m128i c1 = _mm_and_si128(b1, mask);
-+    const __m128i c2 = _mm_and_si128(b2, mask);
-+    const __m128i c3 = _mm_and_si128(b3, mask);
-+    const __m128i d0 = _mm_packs_epi32(c0, c1);
-+    const __m128i d1 = _mm_packs_epi32(c2, c3);
-+    const __m128i e = _mm_packus_epi16(d0, d1);
-+    // store
-+    _mm_storeu_si128((__m128i*)&alpha[i], e);
-+  }
-+  if (i + 8 <= size) {
-+    const __m128i a0 = _mm_loadu_si128(src + 0);
-+    const __m128i a1 = _mm_loadu_si128(src + 1);
-+    const __m128i b0 = _mm_srli_epi32(a0, 8);
-+    const __m128i b1 = _mm_srli_epi32(a1, 8);
-+    const __m128i c0 = _mm_and_si128(b0, mask);
-+    const __m128i c1 = _mm_and_si128(b1, mask);
-+    const __m128i d = _mm_packs_epi32(c0, c1);
-+    const __m128i e = _mm_packus_epi16(d, d);
-+    _mm_storel_epi64((__m128i*)&alpha[i], e);
-+    i += 8;
-+  }
-+  for (; i < size; ++i) alpha[i] = argb[i] >> 8;
-+}
-+
- //------------------------------------------------------------------------------
- // Non-dither premultiplied modes
- 
-@@ -177,7 +218,7 @@ static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
- static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
-                                     int w, int h, int stride) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i kMult = _mm_set1_epi16(0x8081u);
-+  const __m128i kMult = _mm_set1_epi16((short)0x8081);
-   const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
-   const int kSpan = 4;
-   while (h-- > 0) {
-@@ -266,7 +307,7 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
- }
- 
- static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
--  const __m128i m_color = _mm_set1_epi32(color);
-+  const __m128i m_color = _mm_set1_epi32((int)color);
-   const __m128i zero = _mm_setzero_si128();
-   int i = 0;
-   for (; i + 8 <= length; i += 8) {
-@@ -317,7 +358,8 @@ static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
-   if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
- }
- 
--static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
-+static void MultRow_SSE2(uint8_t* WEBP_RESTRICT const ptr,
-+                         const uint8_t* WEBP_RESTRICT const alpha,
-                          int width, int inverse) {
-   int x = 0;
-   if (!inverse) {
-@@ -352,6 +394,7 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
-   WebPDispatchAlpha = DispatchAlpha_SSE2;
-   WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
-   WebPExtractAlpha = ExtractAlpha_SSE2;
-+  WebPExtractGreen = ExtractGreen_SSE2;
- 
-   WebPHasAlpha8b = HasAlpha8b_SSE2;
-   WebPHasAlpha32b = HasAlpha32b_SSE2;
-diff --git a/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c b/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c
-index 56040f9c8801..1156ac3417b2 100644
---- a/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c
-+++ b/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c
-@@ -19,14 +19,14 @@
- 
- //------------------------------------------------------------------------------
- 
--static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
--                              int width, int height,
--                              uint8_t* alpha, int alpha_stride) {
-+static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb,
-+                              int argb_stride, int width, int height,
-+                              uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
-   // alpha_and stores an 'and' operation of all the alpha[] values. The final
-   // value is not 0xff if any of the alpha[] is not equal to 0xff.
-   uint32_t alpha_and = 0xff;
-   int i, j;
--  const __m128i all_0xff = _mm_set1_epi32(~0u);
-+  const __m128i all_0xff = _mm_set1_epi32(~0);
-   __m128i all_alphas = all_0xff;
- 
-   // We must be able to access 3 extra bytes after the last written byte
-diff --git a/3rdparty/libwebp/src/dsp/cost.c b/3rdparty/libwebp/src/dsp/cost.c
-index cc681cdd4bf1..73d2140177cb 100644
---- a/3rdparty/libwebp/src/dsp/cost.c
-+++ b/3rdparty/libwebp/src/dsp/cost.c
-@@ -374,6 +374,7 @@ static void SetResidualCoeffs_C(const int16_t* const coeffs,
- VP8GetResidualCostFunc VP8GetResidualCost;
- VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8EncDspCostInitMIPS32(void);
- extern void VP8EncDspCostInitMIPSdspR2(void);
- extern void VP8EncDspCostInitSSE2(void);
-@@ -395,12 +396,12 @@ WEBP_DSP_INIT_FUNC(VP8EncDspCostInit) {
-       VP8EncDspCostInitMIPSdspR2();
-     }
- #endif
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8EncDspCostInitSSE2();
-     }
- #endif
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-     if (VP8GetCPUInfo(kNEON)) {
-       VP8EncDspCostInitNEON();
-     }
-diff --git a/3rdparty/libwebp/src/dsp/cost_neon.c b/3rdparty/libwebp/src/dsp/cost_neon.c
-index 8cc8ce58aa14..6582669cb3f9 100644
---- a/3rdparty/libwebp/src/dsp/cost_neon.c
-+++ b/3rdparty/libwebp/src/dsp/cost_neon.c
-@@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
-   const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1));
-   const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position));
- 
--#ifdef __aarch64__
-+#if WEBP_AARCH64
-   res->last = vmaxvq_u8(masked) - 1;
- #else
-   const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked));
-@@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
- 
-   vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0);
-   --res->last;
--#endif  // __aarch64__
-+#endif  // WEBP_AARCH64
- 
-   res->coeffs = coeffs;
- }
-diff --git a/3rdparty/libwebp/src/dsp/cpu.c b/3rdparty/libwebp/src/dsp/cpu.c
-index 4ca90d88bf8c..2234c77b3568 100644
---- a/3rdparty/libwebp/src/dsp/cpu.c
-+++ b/3rdparty/libwebp/src/dsp/cpu.c
-@@ -11,7 +11,7 @@
- //
- // Author: Christian Duvivier (cduvivier@google.com)
- 
--#include "src/dsp/dsp.h"
-+#include "src/dsp/cpu.h"
- 
- #if defined(WEBP_HAVE_NEON_RTCD)
- #include <stdio.h>
-@@ -173,6 +173,7 @@ static int x86CPUInfo(CPUFeature feature) {
-   }
-   return 0;
- }
-+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
- #elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
- static int AndroidCPUInfo(CPUFeature feature) {
-@@ -184,22 +185,23 @@ static int AndroidCPUInfo(CPUFeature feature) {
-   }
-   return 0;
- }
-+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
- #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
- // Use compile flags as an indicator of SIMD support instead of a runtime check.
- static int wasmCPUInfo(CPUFeature feature) {
-   switch (feature) {
--#ifdef WEBP_USE_SSE2
-+#ifdef WEBP_HAVE_SSE2
-     case kSSE2:
-       return 1;
- #endif
--#ifdef WEBP_USE_SSE41
-+#ifdef WEBP_HAVE_SSE41
-     case kSSE3:
-     case kSlowSSSE3:
-     case kSSE4_1:
-       return 1;
- #endif
--#ifdef WEBP_USE_NEON
-+#ifdef WEBP_HAVE_NEON
-     case kNEON:
-       return 1;
- #endif
-@@ -208,10 +210,12 @@ static int wasmCPUInfo(CPUFeature feature) {
-   }
-   return 0;
- }
-+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
--#elif defined(WEBP_USE_NEON)
--// define a dummy function to enable turning off NEON at runtime by setting
--// VP8DecGetCPUInfo = NULL
-+#elif defined(WEBP_HAVE_NEON)
-+// In most cases this function doesn't check for NEON support (it's assumed by
-+// the configuration), but enables turning off NEON at runtime, for testing
-+// purposes, by setting VP8GetCPUInfo = NULL.
- static int armCPUInfo(CPUFeature feature) {
-   if (feature != kNEON) return 0;
- #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
-@@ -235,6 +239,7 @@ static int armCPUInfo(CPUFeature feature) {
-   return 1;
- #endif
- }
-+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
- #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
-       defined(WEBP_USE_MSA)
-@@ -246,7 +251,9 @@ static int mipsCPUInfo(CPUFeature feature) {
-   }
- 
- }
-+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
- #else
-+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- VP8CPUInfo VP8GetCPUInfo = NULL;
- #endif
-diff --git a/3rdparty/libwebp/src/dsp/cpu.h b/3rdparty/libwebp/src/dsp/cpu.h
-new file mode 100644
-index 000000000000..c86540f28013
---- /dev/null
-+++ b/3rdparty/libwebp/src/dsp/cpu.h
-@@ -0,0 +1,266 @@
-+// Copyright 2022 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+//   CPU detection functions and macros.
-+//
-+// Author: Skal (pascal.massimino@gmail.com)
-+
-+#ifndef WEBP_DSP_CPU_H_
-+#define WEBP_DSP_CPU_H_
-+
-+#include <stddef.h>
-+
-+#ifdef HAVE_CONFIG_H
-+#include "src/webp/config.h"
-+#endif
-+
-+#include "src/webp/types.h"
-+
-+#if defined(__GNUC__)
-+#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
-+#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
-+#else
-+#define LOCAL_GCC_VERSION 0
-+#define LOCAL_GCC_PREREQ(maj, min) 0
-+#endif
-+
-+#if defined(__clang__)
-+#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-+#define LOCAL_CLANG_PREREQ(maj, min) \
-+  (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-+#else
-+#define LOCAL_CLANG_VERSION 0
-+#define LOCAL_CLANG_PREREQ(maj, min) 0
-+#endif
-+
-+#ifndef __has_builtin
-+#define __has_builtin(x) 0
-+#endif
-+
-+//------------------------------------------------------------------------------
-+// x86 defines.
-+
-+#if !defined(HAVE_CONFIG_H)
-+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
-+    (defined(_M_X64) || defined(_M_IX86))
-+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
-+#endif
-+
-+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
-+    (defined(_M_X64) || defined(_M_IX86))
-+#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
-+#endif
-+#endif
-+
-+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
-+// files without intrinsics, allowing the corresponding Init() to be called.
-+// Files containing intrinsics will need to be built targeting the instruction
-+// set so should succeed on one of the earlier tests.
-+#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
-+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
-+#define WEBP_USE_SSE2
-+#endif
-+
-+#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
-+#define WEBP_HAVE_SSE2
-+#endif
-+
-+#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
-+    (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
-+#define WEBP_USE_SSE41
-+#endif
-+
-+#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
-+#define WEBP_HAVE_SSE41
-+#endif
-+
-+#undef WEBP_MSC_SSE41
-+#undef WEBP_MSC_SSE2
-+
-+//------------------------------------------------------------------------------
-+// Arm defines.
-+
-+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
-+// inline assembly would need to be modified for use with Native Client.
-+#if ((defined(__ARM_NEON__) || defined(__aarch64__)) &&       \
-+     (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
-+    !defined(__native_client__)
-+#define WEBP_USE_NEON
-+#endif
-+
-+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
-+    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
-+#define WEBP_ANDROID_NEON  // Android targets that may have NEON
-+#define WEBP_USE_NEON
-+#endif
-+
-+// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
-+// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
-+// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
-+// vtbl4_u8(); a fix was made in 16.6.
-+#if defined(_MSC_VER) && \
-+    ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
-+     (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC))))
-+#define WEBP_USE_NEON
-+#define WEBP_USE_INTRINSICS
-+#endif
-+
-+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
-+#define WEBP_AARCH64 1
-+#else
-+#define WEBP_AARCH64 0
-+#endif
-+
-+#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
-+#define WEBP_HAVE_NEON
-+#endif
-+
-+//------------------------------------------------------------------------------
-+// MIPS defines.
-+
-+#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
-+    (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
-+#define WEBP_USE_MIPS32
-+#if (__mips_isa_rev >= 2)
-+#define WEBP_USE_MIPS32_R2
-+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
-+#define WEBP_USE_MIPS_DSP_R2
-+#endif
-+#endif
-+#endif
-+
-+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
-+#define WEBP_USE_MSA
-+#endif
-+
-+//------------------------------------------------------------------------------
-+
-+#ifndef WEBP_DSP_OMIT_C_CODE
-+#define WEBP_DSP_OMIT_C_CODE 1
-+#endif
-+
-+#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
-+#define WEBP_NEON_OMIT_C_CODE 1
-+#else
-+#define WEBP_NEON_OMIT_C_CODE 0
-+#endif
-+
-+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
-+#define WEBP_NEON_WORK_AROUND_GCC 1
-+#else
-+#define WEBP_NEON_WORK_AROUND_GCC 0
-+#endif
-+
-+//------------------------------------------------------------------------------
-+
-+// This macro prevents thread_sanitizer from reporting known concurrent writes.
-+#define WEBP_TSAN_IGNORE_FUNCTION
-+#if defined(__has_feature)
-+#if __has_feature(thread_sanitizer)
-+#undef WEBP_TSAN_IGNORE_FUNCTION
-+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
-+#endif
-+#endif
-+
-+#if defined(__has_feature)
-+#if __has_feature(memory_sanitizer)
-+#define WEBP_MSAN
-+#endif
-+#endif
-+
-+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-+#include <pthread.h>  // NOLINT
-+
-+#define WEBP_DSP_INIT(func)                                         \
-+  do {                                                              \
-+    static volatile VP8CPUInfo func##_last_cpuinfo_used =           \
-+        (VP8CPUInfo)&func##_last_cpuinfo_used;                      \
-+    static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
-+    if (pthread_mutex_lock(&func##_lock)) break;                    \
-+    if (func##_last_cpuinfo_used != VP8GetCPUInfo) func();          \
-+    func##_last_cpuinfo_used = VP8GetCPUInfo;                       \
-+    (void)pthread_mutex_unlock(&func##_lock);                       \
-+  } while (0)
-+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
-+#define WEBP_DSP_INIT(func)                               \
-+  do {                                                    \
-+    static volatile VP8CPUInfo func##_last_cpuinfo_used = \
-+        (VP8CPUInfo)&func##_last_cpuinfo_used;            \
-+    if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
-+    func();                                               \
-+    func##_last_cpuinfo_used = VP8GetCPUInfo;             \
-+  } while (0)
-+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
-+
-+// Defines an Init + helper function that control multiple initialization of
-+// function pointers / tables.
-+/* Usage:
-+   WEBP_DSP_INIT_FUNC(InitFunc) {
-+     ...function body
-+   }
-+*/
-+#define WEBP_DSP_INIT_FUNC(name)                                            \
-+  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void);                  \
-+  WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
-+  static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
-+
-+#define WEBP_UBSAN_IGNORE_UNDEF
-+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-+#if defined(__clang__) && defined(__has_attribute)
-+#if __has_attribute(no_sanitize)
-+// This macro prevents the undefined behavior sanitizer from reporting
-+// failures. This is only meant to silence unaligned loads on platforms that
-+// are known to support them.
-+#undef WEBP_UBSAN_IGNORE_UNDEF
-+#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
-+
-+// This macro prevents the undefined behavior sanitizer from reporting
-+// failures related to unsigned integer overflows. This is only meant to
-+// silence cases where this well defined behavior is expected.
-+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
-+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
-+  __attribute__((no_sanitize("unsigned-integer-overflow")))
-+#endif
-+#endif
-+
-+// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
-+// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
-+#if !defined(WEBP_OFFSET_PTR)
-+#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
-+#endif
-+
-+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
-+#if !defined(WEBP_SWAP_16BIT_CSP)
-+#define WEBP_SWAP_16BIT_CSP 0
-+#endif
-+
-+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
-+#if !defined(WORDS_BIGENDIAN) &&                   \
-+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
-+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
-+#define WORDS_BIGENDIAN
-+#endif
-+
-+typedef enum {
-+  kSSE2,
-+  kSSE3,
-+  kSlowSSSE3,  // special feature for slow SSSE3 architectures
-+  kSSE4_1,
-+  kAVX,
-+  kAVX2,
-+  kNEON,
-+  kMIPS32,
-+  kMIPSdspR2,
-+  kMSA
-+} CPUFeature;
-+
-+// returns true if the CPU supports the feature.
-+typedef int (*VP8CPUInfo)(CPUFeature feature);
-+
-+#endif  // WEBP_DSP_CPU_H_
-diff --git a/3rdparty/libwebp/src/dsp/dec.c b/3rdparty/libwebp/src/dsp/dec.c
-index 1119842dd3de..33d8df8a621f 100644
---- a/3rdparty/libwebp/src/dsp/dec.c
-+++ b/3rdparty/libwebp/src/dsp/dec.c
-@@ -734,6 +734,7 @@ VP8SimpleFilterFunc VP8SimpleHFilter16i;
- void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
-                             int dst_stride);
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8DspInitSSE2(void);
- extern void VP8DspInitSSE41(void);
- extern void VP8DspInitNEON(void);
-@@ -807,10 +808,10 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8DspInitSSE2();
--#if defined(WEBP_USE_SSE41)
-+#if defined(WEBP_HAVE_SSE41)
-       if (VP8GetCPUInfo(kSSE4_1)) {
-         VP8DspInitSSE41();
-       }
-@@ -834,7 +835,7 @@ WEBP_DSP_INIT_FUNC(VP8DspInit) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     VP8DspInitNEON();
-diff --git a/3rdparty/libwebp/src/dsp/dec_neon.c b/3rdparty/libwebp/src/dsp/dec_neon.c
-index fa851707e265..22784cf15ae9 100644
---- a/3rdparty/libwebp/src/dsp/dec_neon.c
-+++ b/3rdparty/libwebp/src/dsp/dec_neon.c
-@@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
- 
-   if (do_top) {
-     const uint8x8_t A = vld1_u8(dst - BPS);  // top row
--#if defined(__aarch64__)
-+#if WEBP_AARCH64
-     const uint16_t p2 = vaddlv_u8(A);
-     sum_top = vdupq_n_u16(p2);
- #else
-@@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) {
- 
-   if (do_top) {
-     const uint8x16_t A = vld1q_u8(dst - BPS);  // top row
--#if defined(__aarch64__)
-+#if WEBP_AARCH64
-     const uint16_t p3 = vaddlvq_u8(A);
-     sum_top = vdupq_n_u16(p3);
- #else
-diff --git a/3rdparty/libwebp/src/dsp/dec_sse2.c b/3rdparty/libwebp/src/dsp/dec_sse2.c
-index 873aa59e8a97..01e6bcb636f0 100644
---- a/3rdparty/libwebp/src/dsp/dec_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/dec_sse2.c
-@@ -158,10 +158,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
-       dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS));
-     } else {
-       // Load four bytes/pixels per line.
--      dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
--      dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
--      dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
--      dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
-+      dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
-+      dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
-+      dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
-+      dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
-     }
-     // Convert to 16b.
-     dst0 = _mm_unpacklo_epi8(dst0, zero);
-@@ -187,10 +187,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
-       _mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3);
-     } else {
-       // Store four bytes/pixels per line.
--      WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
--      WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
--      WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
--      WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
-+      WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
-+      WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
-+      WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
-+      WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
-     }
-   }
- }
-@@ -213,10 +213,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
-   const __m128i m3 = _mm_subs_epi16(B, d4);
-   const __m128i zero = _mm_setzero_si128();
-   // Load the source pixels.
--  __m128i dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
--  __m128i dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
--  __m128i dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
--  __m128i dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
-+  __m128i dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
-+  __m128i dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
-+  __m128i dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
-+  __m128i dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
-   // Convert to 16b.
-   dst0 = _mm_unpacklo_epi8(dst0, zero);
-   dst1 = _mm_unpacklo_epi8(dst1, zero);
-@@ -233,10 +233,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
-   dst2 = _mm_packus_epi16(dst2, dst2);
-   dst3 = _mm_packus_epi16(dst3, dst3);
-   // Store the results.
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
- }
- #undef MUL
- #endif   // USE_TRANSFORM_AC3
-@@ -477,11 +477,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
-   // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
-   // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
-   const __m128i A0 = _mm_set_epi32(
--      WebPMemToUint32(&b[6 * stride]), WebPMemToUint32(&b[2 * stride]),
--      WebPMemToUint32(&b[4 * stride]), WebPMemToUint32(&b[0 * stride]));
-+      WebPMemToInt32(&b[6 * stride]), WebPMemToInt32(&b[2 * stride]),
-+      WebPMemToInt32(&b[4 * stride]), WebPMemToInt32(&b[0 * stride]));
-   const __m128i A1 = _mm_set_epi32(
--      WebPMemToUint32(&b[7 * stride]), WebPMemToUint32(&b[3 * stride]),
--      WebPMemToUint32(&b[5 * stride]), WebPMemToUint32(&b[1 * stride]));
-+      WebPMemToInt32(&b[7 * stride]), WebPMemToInt32(&b[3 * stride]),
-+      WebPMemToInt32(&b[5 * stride]), WebPMemToInt32(&b[1 * stride]));
- 
-   // B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
-   // B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
-@@ -540,7 +540,7 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
-                                       uint8_t* dst, int stride) {
-   int i;
-   for (i = 0; i < 4; ++i, dst += stride) {
--    WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
-+    WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x));
-     *x = _mm_srli_si128(*x, 4);
-   }
- }
-@@ -908,10 +908,10 @@ static void VE4_SSE2(uint8_t* dst) {    // vertical
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
-   const __m128i b = _mm_subs_epu8(a, lsb);
-   const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
--  const uint32_t vals = _mm_cvtsi128_si32(avg);
-+  const int vals = _mm_cvtsi128_si32(avg);
-   int i;
-   for (i = 0; i < 4; ++i) {
--    WebPUint32ToMem(dst + i * BPS, vals);
-+    WebPInt32ToMem(dst + i * BPS, vals);
-   }
- }
- 
-@@ -925,10 +925,10 @@ static void LD4_SSE2(uint8_t* dst) {   // Down-Left
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
-   const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
-   const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
- }
- 
- static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
-@@ -946,10 +946,10 @@ static void VR4_SSE2(uint8_t* dst) {   // Vertical-Right
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
-   const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
-   const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
- 
-   // these two are hard to implement in SSE2, so we keep the C-version:
-   DST(0, 2) = AVG3(J, I, X);
-@@ -970,11 +970,12 @@ static void VL4_SSE2(uint8_t* dst) {   // Vertical-Left
-   const __m128i abbc = _mm_or_si128(ab, bc);
-   const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
-   const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
--  const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
-+  const uint32_t extra_out =
-+      (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
- 
-   // these two are hard to get and irregular
-   DST(3, 2) = (extra_out >> 0) & 0xff;
-@@ -990,7 +991,7 @@ static void RD4_SSE2(uint8_t* dst) {   // Down-right
-   const uint32_t K = dst[-1 + 2 * BPS];
-   const uint32_t L = dst[-1 + 3 * BPS];
-   const __m128i LKJI_____ =
--      _mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24));
-+      _mm_cvtsi32_si128((int)(L | (K << 8) | (J << 16) | (I << 24)));
-   const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD);
-   const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
-   const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
-@@ -998,10 +999,10 @@ static void RD4_SSE2(uint8_t* dst) {   // Down-right
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
-   const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
-   const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
- }
- 
- #undef DST
-@@ -1015,13 +1016,13 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
-   const __m128i zero = _mm_setzero_si128();
-   int y;
-   if (size == 4) {
--    const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
-+    const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
-     const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
-     for (y = 0; y < 4; ++y, dst += BPS) {
-       const int val = dst[-1] - top[-1];
-       const __m128i base = _mm_set1_epi16(val);
-       const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
--      WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
-+      WebPInt32ToMem(dst, _mm_cvtsi128_si32(out));
-     }
-   } else if (size == 8) {
-     const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
-@@ -1062,7 +1063,7 @@ static void VE16_SSE2(uint8_t* dst) {
- static void HE16_SSE2(uint8_t* dst) {     // horizontal
-   int j;
-   for (j = 16; j > 0; --j) {
--    const __m128i values = _mm_set1_epi8(dst[-1]);
-+    const __m128i values = _mm_set1_epi8((char)dst[-1]);
-     _mm_storeu_si128((__m128i*)dst, values);
-     dst += BPS;
-   }
-@@ -1070,7 +1071,7 @@ static void HE16_SSE2(uint8_t* dst) {     // horizontal
- 
- static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
-   int j;
--  const __m128i values = _mm_set1_epi8(v);
-+  const __m128i values = _mm_set1_epi8((char)v);
-   for (j = 0; j < 16; ++j) {
-     _mm_storeu_si128((__m128i*)(dst + j * BPS), values);
-   }
-@@ -1130,7 +1131,7 @@ static void VE8uv_SSE2(uint8_t* dst) {    // vertical
- // helper for chroma-DC predictions
- static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
-   int j;
--  const __m128i values = _mm_set1_epi8(v);
-+  const __m128i values = _mm_set1_epi8((char)v);
-   for (j = 0; j < 8; ++j) {
-     _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
-   }
-diff --git a/3rdparty/libwebp/src/dsp/dec_sse41.c b/3rdparty/libwebp/src/dsp/dec_sse41.c
-index 8f18506d54bf..08a363027226 100644
---- a/3rdparty/libwebp/src/dsp/dec_sse41.c
-+++ b/3rdparty/libwebp/src/dsp/dec_sse41.c
-@@ -23,7 +23,7 @@ static void HE16_SSE41(uint8_t* dst) {     // horizontal
-   int j;
-   const __m128i kShuffle3 = _mm_set1_epi8(3);
-   for (j = 16; j > 0; --j) {
--    const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
-+    const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4));
-     const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
-     _mm_storeu_si128((__m128i*)dst, values);
-     dst += BPS;
-diff --git a/3rdparty/libwebp/src/dsp/dsp.h b/3rdparty/libwebp/src/dsp/dsp.h
-index 298c721ae2d1..d2000b8efcba 100644
---- a/3rdparty/libwebp/src/dsp/dsp.h
-+++ b/3rdparty/libwebp/src/dsp/dsp.h
-@@ -18,6 +18,7 @@
- #include "src/webp/config.h"
- #endif
- 
-+#include "src/dsp/cpu.h"
- #include "src/webp/types.h"
- 
- #ifdef __cplusplus
-@@ -27,205 +28,22 @@ extern "C" {
- #define BPS 32   // this is the common stride for enc/dec
- 
- //------------------------------------------------------------------------------
--// CPU detection
--
-+// WEBP_RESTRICT
-+
-+// Declares a pointer with the restrict type qualifier if available.
-+// This allows code to hint to the compiler that only this pointer references a
-+// particular object or memory region within the scope of the block in which it
-+// is declared. This may allow for improved optimizations due to the lack of
-+// pointer aliasing. See also:
-+// https://en.cppreference.com/w/c/language/restrict
- #if defined(__GNUC__)
--# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
--# define LOCAL_GCC_PREREQ(maj, min) \
--    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
--#else
--# define LOCAL_GCC_VERSION 0
--# define LOCAL_GCC_PREREQ(maj, min) 0
--#endif
--
--#if defined(__clang__)
--# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
--# define LOCAL_CLANG_PREREQ(maj, min) \
--    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-+#define WEBP_RESTRICT __restrict__
-+#elif defined(_MSC_VER)
-+#define WEBP_RESTRICT __restrict
- #else
--# define LOCAL_CLANG_VERSION 0
--# define LOCAL_CLANG_PREREQ(maj, min) 0
--#endif
--
--#ifndef __has_builtin
--# define __has_builtin(x) 0
--#endif
--
--#if !defined(HAVE_CONFIG_H)
--#if defined(_MSC_VER) && _MSC_VER > 1310 && \
--    (defined(_M_X64) || defined(_M_IX86))
--#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
--#endif
--
--#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
--    (defined(_M_X64) || defined(_M_IX86))
--#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
--#endif
--#endif
--
--// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
--// files without intrinsics, allowing the corresponding Init() to be called.
--// Files containing intrinsics will need to be built targeting the instruction
--// set so should succeed on one of the earlier tests.
--#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
--#define WEBP_USE_SSE2
-+#define WEBP_RESTRICT
- #endif
- 
--#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
--#define WEBP_USE_SSE41
--#endif
--
--#undef WEBP_MSC_SSE41
--#undef WEBP_MSC_SSE2
--
--// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
--// inline assembly would need to be modified for use with Native Client.
--#if (defined(__ARM_NEON__) || \
--     defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
--    !defined(__native_client__)
--#define WEBP_USE_NEON
--#endif
--
--#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
--    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
--#define WEBP_ANDROID_NEON  // Android targets that may have NEON
--#define WEBP_USE_NEON
--#endif
--
--#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
--#define WEBP_USE_NEON
--#define WEBP_USE_INTRINSICS
--#endif
--
--#if defined(__mips__) && !defined(__mips64) && \
--    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
--#define WEBP_USE_MIPS32
--#if (__mips_isa_rev >= 2)
--#define WEBP_USE_MIPS32_R2
--#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
--#define WEBP_USE_MIPS_DSP_R2
--#endif
--#endif
--#endif
--
--#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
--#define WEBP_USE_MSA
--#endif
--
--#ifndef WEBP_DSP_OMIT_C_CODE
--#define WEBP_DSP_OMIT_C_CODE 1
--#endif
--
--#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
--#define WEBP_NEON_OMIT_C_CODE 1
--#else
--#define WEBP_NEON_OMIT_C_CODE 0
--#endif
--
--#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
--#define WEBP_NEON_WORK_AROUND_GCC 1
--#else
--#define WEBP_NEON_WORK_AROUND_GCC 0
--#endif
--
--// This macro prevents thread_sanitizer from reporting known concurrent writes.
--#define WEBP_TSAN_IGNORE_FUNCTION
--#if defined(__has_feature)
--#if __has_feature(thread_sanitizer)
--#undef WEBP_TSAN_IGNORE_FUNCTION
--#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
--#endif
--#endif
--
--#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
--#include <pthread.h>  // NOLINT
--
--#define WEBP_DSP_INIT(func) do {                                    \
--  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
--      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
--  static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
--  if (pthread_mutex_lock(&func ## _lock)) break;                    \
--  if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func();          \
--  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
--  (void)pthread_mutex_unlock(&func ## _lock);                       \
--} while (0)
--#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
--#define WEBP_DSP_INIT(func) do {                                    \
--  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
--      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
--  if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break;           \
--  func();                                                           \
--  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
--} while (0)
--#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
--
--// Defines an Init + helper function that control multiple initialization of
--// function pointers / tables.
--/* Usage:
--   WEBP_DSP_INIT_FUNC(InitFunc) {
--     ...function body
--   }
--*/
--#define WEBP_DSP_INIT_FUNC(name)                             \
--  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
--  WEBP_TSAN_IGNORE_FUNCTION void name(void) {                \
--    WEBP_DSP_INIT(name ## _body);                            \
--  }                                                          \
--  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
--
--#define WEBP_UBSAN_IGNORE_UNDEF
--#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
--#if defined(__clang__) && defined(__has_attribute)
--#if __has_attribute(no_sanitize)
--// This macro prevents the undefined behavior sanitizer from reporting
--// failures. This is only meant to silence unaligned loads on platforms that
--// are known to support them.
--#undef WEBP_UBSAN_IGNORE_UNDEF
--#define WEBP_UBSAN_IGNORE_UNDEF \
--  __attribute__((no_sanitize("undefined")))
--
--// This macro prevents the undefined behavior sanitizer from reporting
--// failures related to unsigned integer overflows. This is only meant to
--// silence cases where this well defined behavior is expected.
--#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
--#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
--  __attribute__((no_sanitize("unsigned-integer-overflow")))
--#endif
--#endif
--
--// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
--// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
--#if !defined(WEBP_OFFSET_PTR)
--#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
--#endif
--
--// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
--#if !defined(WEBP_SWAP_16BIT_CSP)
--#define WEBP_SWAP_16BIT_CSP 0
--#endif
--
--// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
--#if !defined(WORDS_BIGENDIAN) && \
--    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
--     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
--#define WORDS_BIGENDIAN
--#endif
--
--typedef enum {
--  kSSE2,
--  kSSE3,
--  kSlowSSSE3,  // special feature for slow SSSE3 architectures
--  kSSE4_1,
--  kAVX,
--  kAVX2,
--  kNEON,
--  kMIPS32,
--  kMIPSdspR2,
--  kMSA
--} CPUFeature;
--// returns true if the CPU supports the feature.
--typedef int (*VP8CPUInfo)(CPUFeature feature);
--WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
- 
- //------------------------------------------------------------------------------
- // Init stub generator
-@@ -514,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
- extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
-                                     uint8_t* u, uint8_t* v, int width);
- 
--// utilities for accurate RGB->YUV conversion
--extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
--                                       uint16_t* dst, int len);
--extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
--                                     int16_t* dst, int len);
--extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
--                                     int len,
--                                     const uint16_t* best_y, uint16_t* out);
--
- // Must be called before using the above.
- void WebPInitConvertARGBToYUV(void);
- 
-@@ -578,26 +387,29 @@ extern void (*WebPApplyAlphaMultiply4444)(
- 
- // Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
- // Returns true if alpha[] plane has non-trivial values different from 0xff.
--extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
--                                int width, int height,
--                                uint8_t* dst, int dst_stride);
-+extern int (*WebPDispatchAlpha)(const uint8_t* WEBP_RESTRICT alpha,
-+                                int alpha_stride, int width, int height,
-+                                uint8_t* WEBP_RESTRICT dst, int dst_stride);
- 
- // Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
- // A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
--extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
--                                        int width, int height,
--                                        uint32_t* dst, int dst_stride);
-+extern void (*WebPDispatchAlphaToGreen)(const uint8_t* WEBP_RESTRICT alpha,
-+                                        int alpha_stride, int width, int height,
-+                                        uint32_t* WEBP_RESTRICT dst,
-+                                        int dst_stride);
- 
- // Extract the alpha values from 32b values in argb[] and pack them into alpha[]
- // (this is the opposite of WebPDispatchAlpha).
- // Returns true if there's only trivial 0xff alpha values.
--extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
--                               int width, int height,
--                               uint8_t* alpha, int alpha_stride);
-+extern int (*WebPExtractAlpha)(const uint8_t* WEBP_RESTRICT argb,
-+                               int argb_stride, int width, int height,
-+                               uint8_t* WEBP_RESTRICT alpha,
-+                               int alpha_stride);
- 
- // Extract the green values from 32b values in argb[] and pack them into alpha[]
- // (this is the opposite of WebPDispatchAlphaToGreen).
--extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
-+extern void (*WebPExtractGreen)(const uint32_t* WEBP_RESTRICT argb,
-+                                uint8_t* WEBP_RESTRICT alpha, int size);
- 
- // Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B).
- // Un-Multiply operation transforms x into x * 255 / A.
-@@ -610,29 +422,35 @@ void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
-                       int inverse);
- 
- // Same for a row of single values, with side alpha values.
--extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
-+extern void (*WebPMultRow)(uint8_t* WEBP_RESTRICT const ptr,
-+                           const uint8_t* WEBP_RESTRICT const alpha,
-                            int width, int inverse);
- 
- // Same a WebPMultRow(), but for several 'num_rows' rows.
--void WebPMultRows(uint8_t* ptr, int stride,
--                  const uint8_t* alpha, int alpha_stride,
-+void WebPMultRows(uint8_t* WEBP_RESTRICT ptr, int stride,
-+                  const uint8_t* WEBP_RESTRICT alpha, int alpha_stride,
-                   int width, int num_rows, int inverse);
- 
- // Plain-C versions, used as fallback by some implementations.
--void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
-+void WebPMultRow_C(uint8_t* WEBP_RESTRICT const ptr,
-+                   const uint8_t* WEBP_RESTRICT const alpha,
-                    int width, int inverse);
- void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
- 
- #ifdef WORDS_BIGENDIAN
- // ARGB packing function: a/r/g/b input is rgba or bgra order.
--extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
--                            const uint8_t* g, const uint8_t* b, int len,
--                            uint32_t* out);
-+extern void (*WebPPackARGB)(const uint8_t* WEBP_RESTRICT a,
-+                            const uint8_t* WEBP_RESTRICT r,
-+                            const uint8_t* WEBP_RESTRICT g,
-+                            const uint8_t* WEBP_RESTRICT b,
-+                            int len, uint32_t* WEBP_RESTRICT out);
- #endif
- 
- // RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
--extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
--                           int len, int step, uint32_t* out);
-+extern void (*WebPPackRGB)(const uint8_t* WEBP_RESTRICT r,
-+                           const uint8_t* WEBP_RESTRICT g,
-+                           const uint8_t* WEBP_RESTRICT b,
-+                           int len, int step, uint32_t* WEBP_RESTRICT out);
- 
- // This function returns true if src[i] contains a value different from 0xff.
- extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
-diff --git a/3rdparty/libwebp/src/dsp/enc.c b/3rdparty/libwebp/src/dsp/enc.c
-index 2fddbc4c5247..2ba97ba8d661 100644
---- a/3rdparty/libwebp/src/dsp/enc.c
-+++ b/3rdparty/libwebp/src/dsp/enc.c
-@@ -732,6 +732,7 @@ VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
- VP8BlockCopy VP8Copy4x4;
- VP8BlockCopy VP8Copy16x8;
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8EncDspInitSSE2(void);
- extern void VP8EncDspInitSSE41(void);
- extern void VP8EncDspInitNEON(void);
-@@ -773,10 +774,10 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8EncDspInitSSE2();
--#if defined(WEBP_USE_SSE41)
-+#if defined(WEBP_HAVE_SSE41)
-       if (VP8GetCPUInfo(kSSE4_1)) {
-         VP8EncDspInitSSE41();
-       }
-@@ -800,7 +801,7 @@ WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     VP8EncDspInitNEON();
-diff --git a/3rdparty/libwebp/src/dsp/enc_neon.c b/3rdparty/libwebp/src/dsp/enc_neon.c
-index 43bf1245c536..714800367ba7 100644
---- a/3rdparty/libwebp/src/dsp/enc_neon.c
-+++ b/3rdparty/libwebp/src/dsp/enc_neon.c
-@@ -9,7 +9,7 @@
- //
- // ARM NEON version of speed-critical encoding functions.
- //
--// adapted from libvpx (http://www.webmproject.org/code/)
-+// adapted from libvpx (https://www.webmproject.org/code/)
- 
- #include "src/dsp/dsp.h"
- 
-@@ -764,9 +764,14 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
- 
- // Horizontal sum of all four uint32_t values in 'sum'.
- static int SumToInt_NEON(uint32x4_t sum) {
-+#if WEBP_AARCH64
-+  return (int)vaddvq_u32(sum);
-+#else
-   const uint64x2_t sum2 = vpaddlq_u32(sum);
--  const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
--  return (int)sum3;
-+  const uint32x2_t sum3 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(sum2)),
-+                                   vreinterpret_u32_u64(vget_high_u64(sum2)));
-+  return (int)vget_lane_u32(sum3, 0);
-+#endif
- }
- 
- static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
-@@ -860,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
-   uint8x8x4_t shuffles;
-   // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
-   // non-standard versions there.
--#if defined(__APPLE__) && defined(__aarch64__) && \
-+#if defined(__APPLE__) && WEBP_AARCH64 && \
-     defined(__apple_build_version__) && (__apple_build_version__< 6020037)
-   uint8x16x2_t all_out;
-   INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
-diff --git a/3rdparty/libwebp/src/dsp/enc_sse2.c b/3rdparty/libwebp/src/dsp/enc_sse2.c
-index b2e78ed9411f..010624a2f712 100644
---- a/3rdparty/libwebp/src/dsp/enc_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/enc_sse2.c
-@@ -25,9 +25,160 @@
- //------------------------------------------------------------------------------
- // Transforms (Paragraph 14.4)
- 
--// Does one or two inverse transforms.
--static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
--                            int do_two) {
-+// Does one inverse transform.
-+static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
-+                                uint8_t* dst) {
-+  // This implementation makes use of 16-bit fixed point versions of two
-+  // multiply constants:
-+  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
-+  //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
-+  //
-+  // To be able to use signed 16-bit integers, we use the following trick to
-+  // have constants within range:
-+  // - Associated constants are obtained by subtracting the 16-bit fixed point
-+  //   version of one:
-+  //      k = K - (1 << 16)  =>  K = k + (1 << 16)
-+  //      K1 = 85267  =>  k1 =  20091
-+  //      K2 = 35468  =>  k2 = -30068
-+  // - The multiplication of a variable by a constant become the sum of the
-+  //   variable and the multiplication of that variable by the associated
-+  //   constant:
-+  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
-+  const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068,
-+                                     20091, 20091, 20091, 20091);
-+  const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091,
-+                                     -30068, -30068, -30068, -30068);
-+  const __m128i zero = _mm_setzero_si128();
-+  const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4);
-+  __m128i T01, T23;
-+
-+  // Load and concatenate the transform coefficients.
-+  const __m128i in01 = _mm_loadu_si128((const __m128i*)&in[0]);
-+  const __m128i in23 = _mm_loadu_si128((const __m128i*)&in[8]);
-+  // a00 a10 a20 a30   a01 a11 a21 a31
-+  // a02 a12 a22 a32   a03 a13 a23 a33
-+
-+  // Vertical pass and subsequent transpose.
-+  {
-+    const __m128i in1 = _mm_unpackhi_epi64(in01, in01);
-+    const __m128i in3 = _mm_unpackhi_epi64(in23, in23);
-+
-+    // First pass, c and d calculations are longer because of the "trick"
-+    // multiplications.
-+    // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
-+    // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
-+    const __m128i a_d3 = _mm_add_epi16(in01, in23);
-+    const __m128i b_c3 = _mm_sub_epi16(in01, in23);
-+    const __m128i c1d1 = _mm_mulhi_epi16(in1, k2k1);
-+    const __m128i c2d2 = _mm_mulhi_epi16(in3, k1k2);
-+    const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3);
-+    const __m128i c4 = _mm_sub_epi16(c1d1, c2d2);
-+    const __m128i c = _mm_add_epi16(c3, c4);
-+    const __m128i d4u = _mm_add_epi16(c1d1, c2d2);
-+    const __m128i du = _mm_add_epi16(a_d3, d4u);
-+    const __m128i d = _mm_unpackhi_epi64(du, du);
-+
-+    // Second pass.
-+    const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3);
-+    const __m128i comb_dc = _mm_unpacklo_epi64(d, c);
-+
-+    const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc);
-+    const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc);
-+    const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2));
-+
-+    const __m128i transpose_0 = _mm_unpacklo_epi16(tmp01, tmp23);
-+    const __m128i transpose_1 = _mm_unpackhi_epi16(tmp01, tmp23);
-+    // a00 a20 a01 a21   a02 a22 a03 a23
-+    // a10 a30 a11 a31   a12 a32 a13 a33
-+
-+    T01 = _mm_unpacklo_epi16(transpose_0, transpose_1);
-+    T23 = _mm_unpackhi_epi16(transpose_0, transpose_1);
-+    // a00 a10 a20 a30   a01 a11 a21 a31
-+    // a02 a12 a22 a32   a03 a13 a23 a33
-+  }
-+
-+  // Horizontal pass and subsequent transpose.
-+  {
-+    const __m128i T1 = _mm_unpackhi_epi64(T01, T01);
-+    const __m128i T3 = _mm_unpackhi_epi64(T23, T23);
-+
-+    // First pass, c and d calculations are longer because of the "trick"
-+    // multiplications.
-+    const __m128i dc = _mm_add_epi16(T01, zero_four);
-+
-+    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
-+    // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
-+    const __m128i a_d3 = _mm_add_epi16(dc, T23);
-+    const __m128i b_c3 = _mm_sub_epi16(dc, T23);
-+    const __m128i c1d1 = _mm_mulhi_epi16(T1, k2k1);
-+    const __m128i c2d2 = _mm_mulhi_epi16(T3, k1k2);
-+    const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3);
-+    const __m128i c4 = _mm_sub_epi16(c1d1, c2d2);
-+    const __m128i c = _mm_add_epi16(c3, c4);
-+    const __m128i d4u = _mm_add_epi16(c1d1, c2d2);
-+    const __m128i du = _mm_add_epi16(a_d3, d4u);
-+    const __m128i d = _mm_unpackhi_epi64(du, du);
-+
-+    // Second pass.
-+    const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3);
-+    const __m128i comb_dc = _mm_unpacklo_epi64(d, c);
-+
-+    const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc);
-+    const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc);
-+    const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2));
-+
-+    const __m128i shifted01 = _mm_srai_epi16(tmp01, 3);
-+    const __m128i shifted23 = _mm_srai_epi16(tmp23, 3);
-+    // a00 a01 a02 a03   a10 a11 a12 a13
-+    // a20 a21 a22 a23   a30 a31 a32 a33
-+
-+    const __m128i transpose_0 = _mm_unpacklo_epi16(shifted01, shifted23);
-+    const __m128i transpose_1 = _mm_unpackhi_epi16(shifted01, shifted23);
-+    // a00 a20 a01 a21   a02 a22 a03 a23
-+    // a10 a30 a11 a31   a12 a32 a13 a33
-+
-+    T01 = _mm_unpacklo_epi16(transpose_0, transpose_1);
-+    T23 = _mm_unpackhi_epi16(transpose_0, transpose_1);
-+    // a00 a10 a20 a30   a01 a11 a21 a31
-+    // a02 a12 a22 a32   a03 a13 a23 a33
-+  }
-+
-+  // Add inverse transform to 'ref' and store.
-+  {
-+    // Load the reference(s).
-+    __m128i ref01, ref23, ref0123;
-+    int32_t buf[4];
-+
-+    // Load four bytes/pixels per line.
-+    const __m128i ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS]));
-+    const __m128i ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS]));
-+    const __m128i ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS]));
-+    const __m128i ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS]));
-+    ref01 = _mm_unpacklo_epi32(ref0, ref1);
-+    ref23 = _mm_unpacklo_epi32(ref2, ref3);
-+
-+    // Convert to 16b.
-+    ref01 = _mm_unpacklo_epi8(ref01, zero);
-+    ref23 = _mm_unpacklo_epi8(ref23, zero);
-+    // Add the inverse transform(s).
-+    ref01 = _mm_add_epi16(ref01, T01);
-+    ref23 = _mm_add_epi16(ref23, T23);
-+    // Unsigned saturate to 8b.
-+    ref0123 = _mm_packus_epi16(ref01, ref23);
-+
-+    _mm_storeu_si128((__m128i *)buf, ref0123);
-+
-+    // Store four bytes/pixels per line.
-+    WebPInt32ToMem(&dst[0 * BPS], buf[0]);
-+    WebPInt32ToMem(&dst[1 * BPS], buf[1]);
-+    WebPInt32ToMem(&dst[2 * BPS], buf[2]);
-+    WebPInt32ToMem(&dst[3 * BPS], buf[3]);
-+  }
-+}
-+
-+// Does two inverse transforms.
-+static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
-+                                uint8_t* dst) {
-   // This implementation makes use of 16-bit fixed point versions of two
-   // multiply constants:
-   //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
-@@ -49,33 +200,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-   __m128i T0, T1, T2, T3;
- 
-   // Load and concatenate the transform coefficients (we'll do two inverse
--  // transforms in parallel). In the case of only one inverse transform, the
--  // second half of the vectors will just contain random value we'll never
--  // use nor store.
-+  // transforms in parallel).
-   __m128i in0, in1, in2, in3;
-   {
--    in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
--    in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
--    in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
--    in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
--    // a00 a10 a20 a30   x x x x
--    // a01 a11 a21 a31   x x x x
--    // a02 a12 a22 a32   x x x x
--    // a03 a13 a23 a33   x x x x
--    if (do_two) {
--      const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
--      const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
--      const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
--      const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
--      in0 = _mm_unpacklo_epi64(in0, inB0);
--      in1 = _mm_unpacklo_epi64(in1, inB1);
--      in2 = _mm_unpacklo_epi64(in2, inB2);
--      in3 = _mm_unpacklo_epi64(in3, inB3);
--      // a00 a10 a20 a30   b00 b10 b20 b30
--      // a01 a11 a21 a31   b01 b11 b21 b31
--      // a02 a12 a22 a32   b02 b12 b22 b32
--      // a03 a13 a23 a33   b03 b13 b23 b33
--    }
-+    const __m128i tmp0 = _mm_loadu_si128((const __m128i*)&in[0]);
-+    const __m128i tmp1 = _mm_loadu_si128((const __m128i*)&in[8]);
-+    const __m128i tmp2 = _mm_loadu_si128((const __m128i*)&in[16]);
-+    const __m128i tmp3 = _mm_loadu_si128((const __m128i*)&in[24]);
-+    in0 = _mm_unpacklo_epi64(tmp0, tmp2);
-+    in1 = _mm_unpackhi_epi64(tmp0, tmp2);
-+    in2 = _mm_unpacklo_epi64(tmp1, tmp3);
-+    in3 = _mm_unpackhi_epi64(tmp1, tmp3);
-+    // a00 a10 a20 a30   b00 b10 b20 b30
-+    // a01 a11 a21 a31   b01 b11 b21 b31
-+    // a02 a12 a22 a32   b02 b12 b22 b32
-+    // a03 a13 a23 a33   b03 b13 b23 b33
-   }
- 
-   // Vertical pass and subsequent transpose.
-@@ -148,19 +287,11 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-     const __m128i zero = _mm_setzero_si128();
-     // Load the reference(s).
-     __m128i ref0, ref1, ref2, ref3;
--    if (do_two) {
--      // Load eight bytes/pixels per line.
--      ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
--      ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
--      ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
--      ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
--    } else {
--      // Load four bytes/pixels per line.
--      ref0 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[0 * BPS]));
--      ref1 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[1 * BPS]));
--      ref2 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[2 * BPS]));
--      ref3 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[3 * BPS]));
--    }
-+    // Load eight bytes/pixels per line.
-+    ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
-+    ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
-+    ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
-+    ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
-     // Convert to 16b.
-     ref0 = _mm_unpacklo_epi8(ref0, zero);
-     ref1 = _mm_unpacklo_epi8(ref1, zero);
-@@ -176,20 +307,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-     ref1 = _mm_packus_epi16(ref1, ref1);
-     ref2 = _mm_packus_epi16(ref2, ref2);
-     ref3 = _mm_packus_epi16(ref3, ref3);
--    // Store the results.
--    if (do_two) {
--      // Store eight bytes/pixels per line.
--      _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
--      _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
--      _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
--      _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
--    } else {
--      // Store four bytes/pixels per line.
--      WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0));
--      WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1));
--      WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2));
--      WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3));
--    }
-+    // Store eight bytes/pixels per line.
-+    _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
-+    _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
-+    _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
-+    _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
-+  }
-+}
-+
-+// Does one or two inverse transforms.
-+static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-+                            int do_two) {
-+  if (do_two) {
-+    ITransform_Two_SSE2(ref, in, dst);
-+  } else {
-+    ITransform_One_SSE2(ref, in, dst);
-   }
- }
- 
-@@ -481,7 +613,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
- // helper for chroma-DC predictions
- static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
-   int j;
--  const __m128i values = _mm_set1_epi8(v);
-+  const __m128i values = _mm_set1_epi8((char)v);
-   for (j = 0; j < 8; ++j) {
-     _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
-   }
-@@ -489,7 +621,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
- 
- static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
-   int j;
--  const __m128i values = _mm_set1_epi8(v);
-+  const __m128i values = _mm_set1_epi8((char)v);
-   for (j = 0; j < 16; ++j) {
-     _mm_store_si128((__m128i*)(dst + j * BPS), values);
-   }
-@@ -540,7 +672,7 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
- static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
-   int j;
-   for (j = 0; j < 8; ++j) {
--    const __m128i values = _mm_set1_epi8(left[j]);
-+    const __m128i values = _mm_set1_epi8((char)left[j]);
-     _mm_storel_epi64((__m128i*)dst, values);
-     dst += BPS;
-   }
-@@ -549,7 +681,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
- static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
-   int j;
-   for (j = 0; j < 16; ++j) {
--    const __m128i values = _mm_set1_epi8(left[j]);
-+    const __m128i values = _mm_set1_epi8((char)left[j]);
-     _mm_store_si128((__m128i*)dst, values);
-     dst += BPS;
-   }
-@@ -722,10 +854,10 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
-   const __m128i b = _mm_subs_epu8(a, lsb);
-   const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
--  const uint32_t vals = _mm_cvtsi128_si32(avg);
-+  const int vals = _mm_cvtsi128_si32(avg);
-   int i;
-   for (i = 0; i < 4; ++i) {
--    WebPUint32ToMem(dst + i * BPS, vals);
-+    WebPInt32ToMem(dst + i * BPS, vals);
-   }
- }
- 
-@@ -760,10 +892,10 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
-   const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
-   const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
- }
- 
- static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
-@@ -782,10 +914,10 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
-   const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
-   const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               abcd    ));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               efgh    ));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
- 
-   // these two are hard to implement in SSE2, so we keep the C-version:
-   DST(0, 2) = AVG3(J, I, X);
-@@ -807,11 +939,12 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
-   const __m128i abbc = _mm_or_si128(ab, bc);
-   const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
-   const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
--  const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
-+  const uint32_t extra_out =
-+      (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(               avg1    ));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(               avg4    ));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
- 
-   // these two are hard to get and irregular
-   DST(3, 2) = (extra_out >> 0) & 0xff;
-@@ -829,10 +962,10 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
-   const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
-   const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
-   const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
--  WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
--  WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
--  WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
--  WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
-+  WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(               abcdefg    ));
-+  WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
-+  WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
-+  WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
- }
- 
- static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
-@@ -875,14 +1008,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
- 
- static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
-+  const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
-   const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
-   int y;
-   for (y = 0; y < 4; ++y, dst += BPS) {
-     const int val = top[-2 - y] - top[-1];
-     const __m128i base = _mm_set1_epi16(val);
-     const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
--    WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
-+    WebPInt32ToMem(dst, _mm_cvtsi128_si32(out));
-   }
- }
- 
-diff --git a/3rdparty/libwebp/src/dsp/filters.c b/3rdparty/libwebp/src/dsp/filters.c
-index 9e910d99c92a..c1350d5c9d2a 100644
---- a/3rdparty/libwebp/src/dsp/filters.c
-+++ b/3rdparty/libwebp/src/dsp/filters.c
-@@ -189,6 +189,12 @@ static void GradientFilter_C(const uint8_t* data, int width, int height,
- 
- //------------------------------------------------------------------------------
- 
-+static void NoneUnfilter_C(const uint8_t* prev, const uint8_t* in,
-+                           uint8_t* out, int width) {
-+  (void)prev;
-+  if (out != in) memcpy(out, in, width * sizeof(*out));
-+}
-+
- static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
-                                  uint8_t* out, int width) {
-   uint8_t pred = (prev == NULL) ? 0 : prev[0];
-@@ -233,13 +239,14 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
- WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
- WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8FiltersInitMIPSdspR2(void);
- extern void VP8FiltersInitMSA(void);
- extern void VP8FiltersInitNEON(void);
- extern void VP8FiltersInitSSE2(void);
- 
- WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
--  WebPUnfilters[WEBP_FILTER_NONE] = NULL;
-+  WebPUnfilters[WEBP_FILTER_NONE] = NoneUnfilter_C;
- #if !WEBP_NEON_OMIT_C_CODE
-   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
-   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
-@@ -254,7 +261,7 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
- #endif
- 
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8FiltersInitSSE2();
-     }
-@@ -271,13 +278,14 @@ WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     VP8FiltersInitNEON();
-   }
- #endif
- 
-+  assert(WebPUnfilters[WEBP_FILTER_NONE] != NULL);
-   assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
-   assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
-   assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
-diff --git a/3rdparty/libwebp/src/dsp/filters_sse2.c b/3rdparty/libwebp/src/dsp/filters_sse2.c
-index 4b3f2d020f40..5c33ec15e219 100644
---- a/3rdparty/libwebp/src/dsp/filters_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/filters_sse2.c
-@@ -320,7 +320,12 @@ extern void VP8FiltersInitSSE2(void);
- 
- WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
-   WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
-+#if defined(CHROMIUM)
-+  // TODO(crbug.com/654974)
-+  (void)VerticalUnfilter_SSE2;
-+#else
-   WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
-+#endif
-   WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
- 
-   WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
-diff --git a/3rdparty/libwebp/src/dsp/lossless.c b/3rdparty/libwebp/src/dsp/lossless.c
-index 46b220e2edc9..9f8120945397 100644
---- a/3rdparty/libwebp/src/dsp/lossless.c
-+++ b/3rdparty/libwebp/src/dsp/lossless.c
-@@ -49,7 +49,7 @@ static WEBP_INLINE uint32_t Clip255(uint32_t a) {
- }
- 
- static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
--  return Clip255(a + b - c);
-+  return Clip255((uint32_t)(a + b - c));
- }
- 
- static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
-@@ -66,7 +66,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
- }
- 
- static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
--  return Clip255(a + (a - b) / 2);
-+  return Clip255((uint32_t)(a + (a - b) / 2));
- }
- 
- static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
-@@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
- //------------------------------------------------------------------------------
- // Predictors
- 
--uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor0_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   (void)top;
-   (void)left;
-   return ARGB_BLACK;
- }
--uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor1_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   (void)top;
--  return left;
-+  return *left;
- }
--uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor2_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   (void)left;
-   return top[0];
- }
--uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor3_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   (void)left;
-   return top[1];
- }
--uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor4_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   (void)left;
-   return top[-1];
- }
--uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average3(left, top[0], top[1]);
-+uint32_t VP8LPredictor5_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-+  const uint32_t pred = Average3(*left, top[0], top[1]);
-   return pred;
- }
--uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average2(left, top[-1]);
-+uint32_t VP8LPredictor6_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-+  const uint32_t pred = Average2(*left, top[-1]);
-   return pred;
- }
--uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average2(left, top[0]);
-+uint32_t VP8LPredictor7_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-+  const uint32_t pred = Average2(*left, top[0]);
-   return pred;
- }
--uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor8_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   const uint32_t pred = Average2(top[-1], top[0]);
-   (void)left;
-   return pred;
- }
--uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) {
-+uint32_t VP8LPredictor9_C(const uint32_t* const left,
-+                          const uint32_t* const top) {
-   const uint32_t pred = Average2(top[0], top[1]);
-   (void)left;
-   return pred;
- }
--uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
-+uint32_t VP8LPredictor10_C(const uint32_t* const left,
-+                           const uint32_t* const top) {
-+  const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
-   return pred;
- }
--uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Select(top[0], left, top[-1]);
-+uint32_t VP8LPredictor11_C(const uint32_t* const left,
-+                           const uint32_t* const top) {
-+  const uint32_t pred = Select(top[0], *left, top[-1]);
-   return pred;
- }
--uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
-+uint32_t VP8LPredictor12_C(const uint32_t* const left,
-+                           const uint32_t* const top) {
-+  const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
-   return pred;
- }
--uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
-+uint32_t VP8LPredictor13_C(const uint32_t* const left,
-+                           const uint32_t* const top) {
-+  const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
-   return pred;
- }
- 
-@@ -279,10 +293,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
-     const uint32_t red = argb >> 16;
-     int new_red = red & 0xff;
-     int new_blue = argb & 0xff;
--    new_red += ColorTransformDelta(m->green_to_red_, green);
-+    new_red += ColorTransformDelta((int8_t)m->green_to_red_, green);
-     new_red &= 0xff;
--    new_blue += ColorTransformDelta(m->green_to_blue_, green);
--    new_blue += ColorTransformDelta(m->red_to_blue_, (int8_t)new_red);
-+    new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green);
-+    new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red);
-     new_blue &= 0xff;
-     dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
-   }
-@@ -381,7 +395,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
-   assert(row_start < row_end);
-   assert(row_end <= transform->ysize_);
-   switch (transform->type_) {
--    case SUBTRACT_GREEN:
-+    case SUBTRACT_GREEN_TRANSFORM:
-       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
-       break;
-     case PREDICTOR_TRANSFORM:
-@@ -574,7 +588,9 @@ VP8LConvertFunc VP8LConvertBGRAToBGR;
- VP8LMapARGBFunc VP8LMapColor32b;
- VP8LMapAlphaFunc VP8LMapColor8b;
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8LDspInitSSE2(void);
-+extern void VP8LDspInitSSE41(void);
- extern void VP8LDspInitNEON(void);
- extern void VP8LDspInitMIPSdspR2(void);
- extern void VP8LDspInitMSA(void);
-@@ -621,9 +637,14 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8LDspInitSSE2();
-+#if defined(WEBP_HAVE_SSE41)
-+      if (VP8GetCPUInfo(kSSE4_1)) {
-+        VP8LDspInitSSE41();
-+      }
-+#endif
-     }
- #endif
- #if defined(WEBP_USE_MIPS_DSP_R2)
-@@ -638,7 +659,7 @@ WEBP_DSP_INIT_FUNC(VP8LDspInit) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     VP8LDspInitNEON();
-diff --git a/3rdparty/libwebp/src/dsp/lossless.h b/3rdparty/libwebp/src/dsp/lossless.h
-index ebd316d1ed7b..0bf10a1a3dab 100644
---- a/3rdparty/libwebp/src/dsp/lossless.h
-+++ b/3rdparty/libwebp/src/dsp/lossless.h
-@@ -28,23 +28,38 @@ extern "C" {
- //------------------------------------------------------------------------------
- // Decoding
- 
--typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
-+typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
-+                                      const uint32_t* const top);
- extern VP8LPredictorFunc VP8LPredictors[16];
- 
--uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top);
--uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top);
-+uint32_t VP8LPredictor0_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor1_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor2_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor3_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor4_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor5_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor6_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor7_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor8_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor9_C(const uint32_t* const left,
-+                          const uint32_t* const top);
-+uint32_t VP8LPredictor10_C(const uint32_t* const left,
-+                           const uint32_t* const top);
-+uint32_t VP8LPredictor11_C(const uint32_t* const left,
-+                           const uint32_t* const top);
-+uint32_t VP8LPredictor12_C(const uint32_t* const left,
-+                           const uint32_t* const top);
-+uint32_t VP8LPredictor13_C(const uint32_t* const left,
-+                           const uint32_t* const top);
- 
- // These Add/Sub function expects upper[-1] and out[-1] to be readable.
- typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
-@@ -167,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
- // -----------------------------------------------------------------------------
- // Huffman-cost related functions.
- 
--typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
--typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
--                                       int length);
-+typedef uint32_t (*VP8LCostFunc)(const uint32_t* population, int length);
-+typedef uint32_t (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
-+                                         int length);
- typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
-                                                 const int Y[256]);
- 
-@@ -183,7 +198,7 @@ typedef struct {        // small struct to hold counters
- } VP8LStreaks;
- 
- typedef struct {            // small struct to hold bit entropy results
--  double entropy;           // entropy
-+  float entropy;            // entropy
-   uint32_t sum;             // sum of the population
-   int nonzeros;             // number of non-zero elements in the population
-   uint32_t max_val;         // maximum value in the population
-diff --git a/3rdparty/libwebp/src/dsp/lossless_common.h b/3rdparty/libwebp/src/dsp/lossless_common.h
-index 96a106f9eebc..d6139b2b577d 100644
---- a/3rdparty/libwebp/src/dsp/lossless_common.h
-+++ b/3rdparty/libwebp/src/dsp/lossless_common.h
-@@ -16,9 +16,9 @@
- #ifndef WEBP_DSP_LOSSLESS_COMMON_H_
- #define WEBP_DSP_LOSSLESS_COMMON_H_
- 
--#include "src/webp/types.h"
--
-+#include "src/dsp/cpu.h"
- #include "src/utils/utils.h"
-+#include "src/webp/types.h"
- 
- #ifdef __cplusplus
- extern "C" {
-@@ -166,7 +166,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
- }
- 
- //------------------------------------------------------------------------------
--// Transform-related functions use din both encoding and decoding.
-+// Transform-related functions used in both encoding and decoding.
- 
- // Macros used to create a batch predictor that iteratively uses a
- // one-pixel predictor.
-@@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
-   int x;                                                             \
-   assert(upper != NULL);                                             \
-   for (x = 0; x < num_pixels; ++x) {                                 \
--    const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x);        \
-+    const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x);       \
-     out[x] = VP8LAddPixels(in[x], pred);                             \
-   }                                                                  \
- }
-diff --git a/3rdparty/libwebp/src/dsp/lossless_enc.c b/3rdparty/libwebp/src/dsp/lossless_enc.c
-index a0c7ab911798..997d56c2ad30 100644
---- a/3rdparty/libwebp/src/dsp/lossless_enc.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_enc.c
-@@ -329,6 +329,15 @@ const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
- static float FastSLog2Slow_C(uint32_t v) {
-   assert(v >= LOG_LOOKUP_IDX_MAX);
-   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
-+#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
-+    // use clz if available
-+    const int log_cnt = BitsLog2Floor(v) - 7;
-+    const uint32_t y = 1 << log_cnt;
-+    int correction = 0;
-+    const float v_f = (float)v;
-+    const uint32_t orig_v = v;
-+    v >>= log_cnt;
-+#else
-     int log_cnt = 0;
-     uint32_t y = 1;
-     int correction = 0;
-@@ -339,6 +348,7 @@ static float FastSLog2Slow_C(uint32_t v) {
-       v = v >> 1;
-       y = y << 1;
-     } while (v >= LOG_LOOKUP_IDX_MAX);
-+#endif
-     // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
-     // Xf = floor(Xf) * (1 + (v % y) / v)
-     // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
-@@ -355,6 +365,14 @@ static float FastSLog2Slow_C(uint32_t v) {
- static float FastLog2Slow_C(uint32_t v) {
-   assert(v >= LOG_LOOKUP_IDX_MAX);
-   if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
-+#if !defined(WEBP_HAVE_SLOW_CLZ_CTZ)
-+    // use clz if available
-+    const int log_cnt = BitsLog2Floor(v) - 7;
-+    const uint32_t y = 1 << log_cnt;
-+    const uint32_t orig_v = v;
-+    double log_2;
-+    v >>= log_cnt;
-+#else
-     int log_cnt = 0;
-     uint32_t y = 1;
-     const uint32_t orig_v = v;
-@@ -364,6 +382,7 @@ static float FastLog2Slow_C(uint32_t v) {
-       v = v >> 1;
-       y = y << 1;
-     } while (v >= LOG_LOOKUP_IDX_MAX);
-+#endif
-     log_2 = kLog2Table[v] + log_cnt;
-     if (orig_v >= APPROX_LOG_MAX) {
-       // Since the division is still expensive, add this correction factor only
-@@ -383,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
- // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
- static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
-   int i;
--  double retval = 0.;
-+  float retval = 0.f;
-   int sumX = 0, sumXY = 0;
-   for (i = 0; i < 256; ++i) {
-     const int x = X[i];
-@@ -399,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
-     }
-   }
-   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
--  return (float)retval;
-+  return retval;
- }
- 
- void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
-@@ -503,11 +522,11 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
- void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
-   int i;
-   for (i = 0; i < num_pixels; ++i) {
--    const int argb = argb_data[i];
-+    const int argb = (int)argb_data[i];
-     const int green = (argb >> 8) & 0xff;
-     const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
-     const uint32_t new_b = (((argb >>  0) & 0xff) - green) & 0xff;
--    argb_data[i] = (argb & 0xff00ff00u) | (new_r << 16) | new_b;
-+    argb_data[i] = ((uint32_t)argb & 0xff00ff00u) | (new_r << 16) | new_b;
-   }
- }
- 
-@@ -528,10 +547,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
-     const int8_t red   = U32ToS8(argb >> 16);
-     int new_red = red & 0xff;
-     int new_blue = argb & 0xff;
--    new_red -= ColorTransformDelta(m->green_to_red_, green);
-+    new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green);
-     new_red &= 0xff;
--    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
--    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
-+    new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green);
-+    new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red);
-     new_blue &= 0xff;
-     data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
-   }
-@@ -541,7 +560,7 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
-                                              uint32_t argb) {
-   const int8_t green = U32ToS8(argb >> 8);
-   int new_red = argb >> 16;
--  new_red -= ColorTransformDelta(green_to_red, green);
-+  new_red -= ColorTransformDelta((int8_t)green_to_red, green);
-   return (new_red & 0xff);
- }
- 
-@@ -550,9 +569,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
-                                               uint32_t argb) {
-   const int8_t green = U32ToS8(argb >>  8);
-   const int8_t red   = U32ToS8(argb >> 16);
--  uint8_t new_blue = argb & 0xff;
--  new_blue -= ColorTransformDelta(green_to_blue, green);
--  new_blue -= ColorTransformDelta(red_to_blue, red);
-+  int new_blue = argb & 0xff;
-+  new_blue -= ColorTransformDelta((int8_t)green_to_blue, green);
-+  new_blue -= ColorTransformDelta((int8_t)red_to_blue, red);
-   return (new_blue & 0xff);
- }
- 
-@@ -617,20 +636,25 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
- 
- //------------------------------------------------------------------------------
- 
--static double ExtraCost_C(const uint32_t* population, int length) {
-+static uint32_t ExtraCost_C(const uint32_t* population, int length) {
-   int i;
--  double cost = 0.;
--  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
-+  uint32_t cost = population[4] + population[5];
-+  assert(length % 2 == 0);
-+  for (i = 2; i < length / 2 - 1; ++i) {
-+    cost += i * (population[2 * i + 2] + population[2 * i + 3]);
-+  }
-   return cost;
- }
- 
--static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
--                                  int length) {
-+static uint32_t ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
-+                                    int length) {
-   int i;
--  double cost = 0.;
--  for (i = 2; i < length - 2; ++i) {
--    const int xy = X[i + 2] + Y[i + 2];
--    cost += (i >> 1) * xy;
-+  uint32_t cost = X[4] + Y[4] + X[5] + Y[5];
-+  assert(length % 2 == 0);
-+  for (i = 2; i < length / 2 - 1; ++i) {
-+    const int xy0 = X[2 * i + 2] + Y[2 * i + 2];
-+    const int xy1 = X[2 * i + 3] + Y[2 * i + 3];
-+    cost += i * (xy0 + xy1);
-   }
-   return cost;
- }
-@@ -726,7 +750,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in,              \
-   assert(upper != NULL);                                                   \
-   for (x = 0; x < num_pixels; ++x) {                                       \
-     const uint32_t pred =                                                  \
--        VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x);              \
-+        VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x);             \
-     out[x] = VP8LSubPixels(in[x], pred);                                   \
-   }                                                                        \
- }
-@@ -772,6 +796,7 @@ VP8LBundleColorMapFunc VP8LBundleColorMap;
- VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
- VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8LEncDspInitSSE2(void);
- extern void VP8LEncDspInitSSE41(void);
- extern void VP8LEncDspInitNEON(void);
-@@ -843,10 +868,10 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8LEncDspInitSSE2();
--#if defined(WEBP_USE_SSE41)
-+#if defined(WEBP_HAVE_SSE41)
-       if (VP8GetCPUInfo(kSSE4_1)) {
-         VP8LEncDspInitSSE41();
-       }
-@@ -870,7 +895,7 @@ WEBP_DSP_INIT_FUNC(VP8LEncDspInit) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     VP8LEncDspInitNEON();
-diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c b/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
-index 0412a093cf9a..e10f12da9d58 100644
---- a/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_enc_mips32.c
-@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
- //     cost += i * *(pop + 1);
- //     pop += 2;
- //   }
--//   return (double)cost;
--static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
-+//   return cost;
-+static uint32_t ExtraCost_MIPS32(const uint32_t* const population, int length) {
-   int i, temp0, temp1;
-   const uint32_t* pop = &population[4];
-   const uint32_t* const LoopEnd = &population[length];
-@@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
-     : "memory", "hi", "lo"
-   );
- 
--  return (double)((int64_t)temp0 << 32 | temp1);
-+  return ((int64_t)temp0 << 32 | temp1);
- }
- 
- // C version of this function:
-@@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
- //     pX += 2;
- //     pY += 2;
- //   }
--//   return (double)cost;
--static double ExtraCostCombined_MIPS32(const uint32_t* const X,
--                                       const uint32_t* const Y, int length) {
-+//   return cost;
-+static uint32_t ExtraCostCombined_MIPS32(const uint32_t* const X,
-+                                         const uint32_t* const Y, int length) {
-   int i, temp0, temp1, temp2, temp3;
-   const uint32_t* pX = &X[4];
-   const uint32_t* pY = &Y[4];
-@@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
-     : "memory", "hi", "lo"
-   );
- 
--  return (double)((int64_t)temp0 << 32 | temp1);
-+  return ((int64_t)temp0 << 32 | temp1);
- }
- 
- #define HUFFMAN_COST_PASS                                 \
-@@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
- static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
-                              uint32_t* pout, int size) {
-   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
--  const uint32_t end = ((size) / 4) * 4;
-+  const int end = ((size) / 4) * 4;
-   const uint32_t* const LoopEnd = pa + end;
-   int i;
-   ASM_START
-   ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
-   ASM_END_0
--  for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
-+  for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
- }
- 
- static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
-   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
--  const uint32_t end = ((size) / 4) * 4;
-+  const int end = ((size) / 4) * 4;
-   const uint32_t* const LoopEnd = pa + end;
-   int i;
-   ASM_START
-   ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
-   ASM_END_1
--  for (i = end; i < size; ++i) pout[i] += pa[i];
-+  for (i = 0; i < size - end; ++i) pout[i] += pa[i];
- }
- 
- #undef ASM_END_1
-diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_neon.c b/3rdparty/libwebp/src/dsp/lossless_enc_neon.c
-index 7c7b73f8b692..e32c7961a239 100644
---- a/3rdparty/libwebp/src/dsp/lossless_enc_neon.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_enc_neon.c
-@@ -25,7 +25,7 @@
- 
- // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
- // non-standard versions there.
--#if defined(__APPLE__) && defined(__aarch64__) && \
-+#if defined(__APPLE__) && WEBP_AARCH64 && \
-     defined(__apple_build_version__) && (__apple_build_version__< 6020037)
- #define USE_VTBLQ
- #endif
-diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
-index 90c263735f58..66cbaab7720a 100644
---- a/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c
-@@ -54,8 +54,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
-   const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
-                                      CST_5b(m->green_to_blue_));
-   const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
--  const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
--  const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff);  // red-blue masks
-+  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);  // alpha-green masks
-+  const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff);       // red-blue masks
-   int i;
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
-@@ -232,79 +232,55 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
- //------------------------------------------------------------------------------
- // Entropy
- 
--// Checks whether the X or Y contribution is worth computing and adding.
--// Used in loop unrolling.
--#define ANALYZE_X_OR_Y(x_or_y, j)                                           \
--  do {                                                                      \
--    if ((x_or_y)[i + (j)] != 0) retval -= VP8LFastSLog2((x_or_y)[i + (j)]); \
--  } while (0)
--
--// Checks whether the X + Y contribution is worth computing and adding.
--// Used in loop unrolling.
--#define ANALYZE_XY(j)                  \
--  do {                                 \
--    if (tmp[j] != 0) {                 \
--      retval -= VP8LFastSLog2(tmp[j]); \
--      ANALYZE_X_OR_Y(X, j);            \
--    }                                  \
--  } while (0)
--
--#if !(defined(__i386__) || defined(_M_IX86))
-+// TODO(https://crbug.com/webp/499): this function produces different results
-+// from the C code due to use of double/float resulting in output differences
-+// when compared to -noasm.
-+#if !(defined(WEBP_HAVE_SLOW_CLZ_CTZ) || defined(__i386__) || defined(_M_IX86))
-+
- static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
-   int i;
--  double retval = 0.;
--  int sumX, sumXY;
--  int32_t tmp[4];
--  __m128i zero = _mm_setzero_si128();
--  // Sums up X + Y, 4 ints at a time (and will merge it at the end for sumXY).
--  __m128i sumXY_128 = zero;
--  __m128i sumX_128 = zero;
--
--  for (i = 0; i < 256; i += 4) {
--    const __m128i x = _mm_loadu_si128((const __m128i*)(X + i));
--    const __m128i y = _mm_loadu_si128((const __m128i*)(Y + i));
--
--    // Check if any X is non-zero: this actually provides a speedup as X is
--    // usually sparse.
--    if (_mm_movemask_epi8(_mm_cmpeq_epi32(x, zero)) != 0xFFFF) {
--      const __m128i xy_128 = _mm_add_epi32(x, y);
--      sumXY_128 = _mm_add_epi32(sumXY_128, xy_128);
--
--      sumX_128 = _mm_add_epi32(sumX_128, x);
--
--      // Analyze the different X + Y.
--      _mm_storeu_si128((__m128i*)tmp, xy_128);
--
--      ANALYZE_XY(0);
--      ANALYZE_XY(1);
--      ANALYZE_XY(2);
--      ANALYZE_XY(3);
--    } else {
--      // X is fully 0, so only deal with Y.
--      sumXY_128 = _mm_add_epi32(sumXY_128, y);
--
--      ANALYZE_X_OR_Y(Y, 0);
--      ANALYZE_X_OR_Y(Y, 1);
--      ANALYZE_X_OR_Y(Y, 2);
--      ANALYZE_X_OR_Y(Y, 3);
-+  float retval = 0.f;
-+  int sumX = 0, sumXY = 0;
-+  const __m128i zero = _mm_setzero_si128();
-+
-+  for (i = 0; i < 256; i += 16) {
-+    const __m128i x0 = _mm_loadu_si128((const __m128i*)(X + i +  0));
-+    const __m128i y0 = _mm_loadu_si128((const __m128i*)(Y + i +  0));
-+    const __m128i x1 = _mm_loadu_si128((const __m128i*)(X + i +  4));
-+    const __m128i y1 = _mm_loadu_si128((const __m128i*)(Y + i +  4));
-+    const __m128i x2 = _mm_loadu_si128((const __m128i*)(X + i +  8));
-+    const __m128i y2 = _mm_loadu_si128((const __m128i*)(Y + i +  8));
-+    const __m128i x3 = _mm_loadu_si128((const __m128i*)(X + i + 12));
-+    const __m128i y3 = _mm_loadu_si128((const __m128i*)(Y + i + 12));
-+    const __m128i x4 = _mm_packs_epi16(_mm_packs_epi32(x0, x1),
-+                                       _mm_packs_epi32(x2, x3));
-+    const __m128i y4 = _mm_packs_epi16(_mm_packs_epi32(y0, y1),
-+                                       _mm_packs_epi32(y2, y3));
-+    const int32_t mx = _mm_movemask_epi8(_mm_cmpgt_epi8(x4, zero));
-+    int32_t my = _mm_movemask_epi8(_mm_cmpgt_epi8(y4, zero)) | mx;
-+    while (my) {
-+      const int32_t j = BitsCtz(my);
-+      int xy;
-+      if ((mx >> j) & 1) {
-+        const int x = X[i + j];
-+        sumXY += x;
-+        retval -= VP8LFastSLog2(x);
-+      }
-+      xy = X[i + j] + Y[i + j];
-+      sumX += xy;
-+      retval -= VP8LFastSLog2(xy);
-+      my &= my - 1;
-     }
-   }
--
--  // Sum up sumX_128 to get sumX.
--  _mm_storeu_si128((__m128i*)tmp, sumX_128);
--  sumX = tmp[3] + tmp[2] + tmp[1] + tmp[0];
--
--  // Sum up sumXY_128 to get sumXY.
--  _mm_storeu_si128((__m128i*)tmp, sumXY_128);
--  sumXY = tmp[3] + tmp[2] + tmp[1] + tmp[0];
--
-   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
--  return (float)retval;
-+  return retval;
- }
--#endif  // !(defined(__i386__) || defined(_M_IX86))
- 
--#undef ANALYZE_X_OR_Y
--#undef ANALYZE_XY
-+#else
-+
-+#define DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC   // won't be faster
-+
-+#endif
- 
- //------------------------------------------------------------------------------
- 
-@@ -400,7 +376,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
-       break;
-     }
-     case 2: {
--      const __m128i mask_or = _mm_set1_epi32(0xff000000);
-+      const __m128i mask_or = _mm_set1_epi32((int)0xff000000);
-       const __m128i mul_cst = _mm_set1_epi16(0x0104);
-       const __m128i mask_mul = _mm_set1_epi16(0x0f00);
-       for (x = 0; x + 16 <= width; x += 16, dst += 4) {
-@@ -451,7 +427,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
- static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
-                                int num_pixels, uint32_t* out) {
-   int i;
--  const __m128i black = _mm_set1_epi32(ARGB_BLACK);
-+  const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
-     const __m128i res = _mm_sub_epi8(src, black);
-@@ -662,10 +638,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
-   VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE2;
-   VP8LAddVector = AddVector_SSE2;
-   VP8LAddVectorEq = AddVectorEq_SSE2;
--  // TODO(https://crbug.com/webp/499): this function produces different results
--  // from the C code due to use of double/float resulting in output differences
--  // when compared to -noasm.
--#if !(defined(__i386__) || defined(_M_IX86))
-+#if !defined(DONT_USE_COMBINED_SHANNON_ENTROPY_SSE2_FUNC)
-   VP8LCombinedShannonEntropy = CombinedShannonEntropy_SSE2;
- #endif
-   VP8LVectorMismatch = VectorMismatch_SSE2;
-diff --git a/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c b/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
-index 719d8ed25e15..7ab83c2604b4 100644
---- a/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_enc_sse41.c
-@@ -18,8 +18,53 @@
- #include <smmintrin.h>
- #include "src/dsp/lossless.h"
- 
--// For sign-extended multiplying constants, pre-shifted by 5:
--#define CST_5b(X)  (((int16_t)((uint16_t)(X) << 8)) >> 5)
-+//------------------------------------------------------------------------------
-+// Cost operations.
-+
-+static WEBP_INLINE uint32_t HorizontalSum_SSE41(__m128i cost) {
-+  cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 8));
-+  cost = _mm_add_epi32(cost, _mm_srli_si128(cost, 4));
-+  return _mm_cvtsi128_si32(cost);
-+}
-+
-+static uint32_t ExtraCost_SSE41(const uint32_t* const a, int length) {
-+  int i;
-+  __m128i cost = _mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]);
-+  assert(length % 8 == 0);
-+
-+  for (i = 8; i + 8 <= length; i += 8) {
-+    const int j = (i - 2) >> 1;
-+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
-+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
-+    const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
-+    const __m128i a2 = _mm_hadd_epi32(a0, a1);
-+    const __m128i mul = _mm_mullo_epi32(a2, w);
-+    cost = _mm_add_epi32(mul, cost);
-+  }
-+  return HorizontalSum_SSE41(cost);
-+}
-+
-+static uint32_t ExtraCostCombined_SSE41(const uint32_t* const a,
-+                                        const uint32_t* const b, int length) {
-+  int i;
-+  __m128i cost = _mm_add_epi32(_mm_set_epi32(2 * a[7], 2 * a[6], a[5], a[4]),
-+                               _mm_set_epi32(2 * b[7], 2 * b[6], b[5], b[4]));
-+  assert(length % 8 == 0);
-+
-+  for (i = 8; i + 8 <= length; i += 8) {
-+    const int j = (i - 2) >> 1;
-+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i]);
-+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i + 4]);
-+    const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i]);
-+    const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i + 4]);
-+    const __m128i w = _mm_set_epi32(j + 3, j + 2, j + 1, j);
-+    const __m128i a2 = _mm_hadd_epi32(a0, a1);
-+    const __m128i b2 = _mm_hadd_epi32(b0, b1);
-+    const __m128i mul = _mm_mullo_epi32(_mm_add_epi32(a2, b2), w);
-+    cost = _mm_add_epi32(mul, cost);
-+  }
-+  return HorizontalSum_SSE41(cost);
-+}
- 
- //------------------------------------------------------------------------------
- // Subtract-Green Transform
-@@ -44,46 +89,50 @@ static void SubtractGreenFromBlueAndRed_SSE41(uint32_t* argb_data,
- //------------------------------------------------------------------------------
- // Color Transform
- 
--#define SPAN 8
-+// For sign-extended multiplying constants, pre-shifted by 5:
-+#define CST_5b(X) (((int16_t)((uint16_t)(X) << 8)) >> 5)
-+
-+#define MK_CST_16(HI, LO) \
-+  _mm_set1_epi32((int)(((uint32_t)(HI) << 16) | ((LO) & 0xffff)))
-+
- static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
-                                              int tile_width, int tile_height,
-                                              int green_to_blue, int red_to_blue,
-                                              int histo[]) {
--  const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue));
--  const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue));
--  const __m128i mask_g = _mm_set1_epi16((short)0xff00);   // green mask
--  const __m128i mask_gb = _mm_set1_epi32(0xffff);         // green/blue mask
--  const __m128i mask_b = _mm_set1_epi16(0x00ff);          // blue mask
--  const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1,
--                                            -1, -1, -1, -1, -1, -1, -1);
--  const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1,
--                                            2, -1, 6, -1, 10, -1, 14);
--  int y;
--  for (y = 0; y < tile_height; ++y) {
--    const uint32_t* const src = argb + y * stride;
--    int i, x;
--    for (x = 0; x + SPAN <= tile_width; x += SPAN) {
--      uint16_t values[SPAN];
--      const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
--      const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
--      const __m128i r0 = _mm_shuffle_epi8(in0, shuffler_lo);
--      const __m128i r1 = _mm_shuffle_epi8(in1, shuffler_hi);
--      const __m128i r = _mm_or_si128(r0, r1);         // r 0
--      const __m128i gb0 = _mm_and_si128(in0, mask_gb);
--      const __m128i gb1 = _mm_and_si128(in1, mask_gb);
--      const __m128i gb = _mm_packus_epi32(gb0, gb1);  // g b
--      const __m128i g = _mm_and_si128(gb, mask_g);    // g 0
--      const __m128i A = _mm_mulhi_epi16(r, mults_r);  // x dbr
--      const __m128i B = _mm_mulhi_epi16(g, mults_g);  // x dbg
--      const __m128i C = _mm_sub_epi8(gb, B);          // x b'
--      const __m128i D = _mm_sub_epi8(C, A);           // x b''
--      const __m128i E = _mm_and_si128(D, mask_b);     // 0 b''
--      _mm_storeu_si128((__m128i*)values, E);
--      for (i = 0; i < SPAN; ++i) ++histo[values[i]];
-+  const __m128i mult =
-+      MK_CST_16(CST_5b(red_to_blue) + 256,CST_5b(green_to_blue));
-+  const __m128i perm =
-+      _mm_setr_epi8(-1, 1, -1, 2, -1, 5, -1, 6, -1, 9, -1, 10, -1, 13, -1, 14);
-+  if (tile_width >= 4) {
-+    int y;
-+    for (y = 0; y < tile_height; ++y) {
-+      const uint32_t* const src = argb + y * stride;
-+      const __m128i A1 = _mm_loadu_si128((const __m128i*)src);
-+      const __m128i B1 = _mm_shuffle_epi8(A1, perm);
-+      const __m128i C1 = _mm_mulhi_epi16(B1, mult);
-+      const __m128i D1 = _mm_sub_epi16(A1, C1);
-+      __m128i E = _mm_add_epi16(_mm_srli_epi32(D1, 16), D1);
-+      int x;
-+      for (x = 4; x + 4 <= tile_width; x += 4) {
-+        const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x));
-+        __m128i B2, C2, D2;
-+        ++histo[_mm_extract_epi8(E,  0)];
-+        B2 = _mm_shuffle_epi8(A2, perm);
-+        ++histo[_mm_extract_epi8(E,  4)];
-+        C2 = _mm_mulhi_epi16(B2, mult);
-+        ++histo[_mm_extract_epi8(E,  8)];
-+        D2 = _mm_sub_epi16(A2, C2);
-+        ++histo[_mm_extract_epi8(E, 12)];
-+        E = _mm_add_epi16(_mm_srli_epi32(D2, 16), D2);
-+      }
-+      ++histo[_mm_extract_epi8(E,  0)];
-+      ++histo[_mm_extract_epi8(E,  4)];
-+      ++histo[_mm_extract_epi8(E,  8)];
-+      ++histo[_mm_extract_epi8(E, 12)];
-     }
-   }
-   {
--    const int left_over = tile_width & (SPAN - 1);
-+    const int left_over = tile_width & 3;
-     if (left_over > 0) {
-       VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride,
-                                        left_over, tile_height,
-@@ -95,33 +144,37 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
- static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
-                                             int tile_width, int tile_height,
-                                             int green_to_red, int histo[]) {
--  const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_red));
--  const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
--  const __m128i mask = _mm_set1_epi16(0xff);
--
--  int y;
--  for (y = 0; y < tile_height; ++y) {
--    const uint32_t* const src = argb + y * stride;
--    int i, x;
--    for (x = 0; x + SPAN <= tile_width; x += SPAN) {
--      uint16_t values[SPAN];
--      const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x + 0]);
--      const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
--      const __m128i g0 = _mm_and_si128(in0, mask_g);  // 0 0  | g 0
--      const __m128i g1 = _mm_and_si128(in1, mask_g);
--      const __m128i g = _mm_packus_epi32(g0, g1);     // g 0
--      const __m128i A0 = _mm_srli_epi32(in0, 16);     // 0 0  | x r
--      const __m128i A1 = _mm_srli_epi32(in1, 16);
--      const __m128i A = _mm_packus_epi32(A0, A1);     // x r
--      const __m128i B = _mm_mulhi_epi16(g, mults_g);  // x dr
--      const __m128i C = _mm_sub_epi8(A, B);           // x r'
--      const __m128i D = _mm_and_si128(C, mask);       // 0 r'
--      _mm_storeu_si128((__m128i*)values, D);
--      for (i = 0; i < SPAN; ++i) ++histo[values[i]];
-+
-+  const __m128i mult = MK_CST_16(0, CST_5b(green_to_red));
-+  const __m128i mask_g = _mm_set1_epi32(0x0000ff00);
-+  if (tile_width >= 4) {
-+    int y;
-+    for (y = 0; y < tile_height; ++y) {
-+      const uint32_t* const src = argb + y * stride;
-+      const __m128i A1 = _mm_loadu_si128((const __m128i*)src);
-+      const __m128i B1 = _mm_and_si128(A1, mask_g);
-+      const __m128i C1 = _mm_madd_epi16(B1, mult);
-+      __m128i D = _mm_sub_epi16(A1, C1);
-+      int x;
-+      for (x = 4; x + 4 <= tile_width; x += 4) {
-+        const __m128i A2 = _mm_loadu_si128((const __m128i*)(src + x));
-+        __m128i B2, C2;
-+        ++histo[_mm_extract_epi8(D,  2)];
-+        B2 = _mm_and_si128(A2, mask_g);
-+        ++histo[_mm_extract_epi8(D,  6)];
-+        C2 = _mm_madd_epi16(B2, mult);
-+        ++histo[_mm_extract_epi8(D, 10)];
-+        ++histo[_mm_extract_epi8(D, 14)];
-+        D = _mm_sub_epi16(A2, C2);
-+      }
-+      ++histo[_mm_extract_epi8(D,  2)];
-+      ++histo[_mm_extract_epi8(D,  6)];
-+      ++histo[_mm_extract_epi8(D, 10)];
-+      ++histo[_mm_extract_epi8(D, 14)];
-     }
-   }
-   {
--    const int left_over = tile_width & (SPAN - 1);
-+    const int left_over = tile_width & 3;
-     if (left_over > 0) {
-       VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride,
-                                       left_over, tile_height, green_to_red,
-@@ -130,12 +183,16 @@ static void CollectColorRedTransforms_SSE41(const uint32_t* argb, int stride,
-   }
- }
- 
-+#undef MK_CST_16
-+
- //------------------------------------------------------------------------------
- // Entry point
- 
- extern void VP8LEncDspInitSSE41(void);
- 
- WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
-+  VP8LExtraCost = ExtraCost_SSE41;
-+  VP8LExtraCostCombined = ExtraCostCombined_SSE41;
-   VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_SSE41;
-   VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_SSE41;
-   VP8LCollectColorRedTransforms = CollectColorRedTransforms_SSE41;
-diff --git a/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c b/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
-index 9888854d5719..bfe5ea6b3865 100644
---- a/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
-@@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
-   return Average2(Average2(a0, a1), Average2(a2, a3));
- }
- 
--static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
--  return Average3(left, top[0], top[1]);
-+static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
-+                                     const uint32_t* const top) {
-+  return Average3(*left, top[0], top[1]);
- }
- 
--static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
--  return Average2(left, top[-1]);
-+static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
-+                                     const uint32_t* const top) {
-+  return Average2(*left, top[-1]);
- }
- 
--static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
--  return Average2(left, top[0]);
-+static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
-+                                     const uint32_t* const top) {
-+  return Average2(*left, top[0]);
- }
- 
--static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
-+static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
-+                                     const uint32_t* const top) {
-   (void)left;
-   return Average2(top[-1], top[0]);
- }
- 
--static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
-+static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
-+                                     const uint32_t* const top) {
-   (void)left;
-   return Average2(top[0], top[1]);
- }
- 
--static uint32_t Predictor10_MIPSdspR2(uint32_t left,
-+static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
-                                       const uint32_t* const top) {
--  return Average4(left, top[-1], top[0], top[1]);
-+  return Average4(*left, top[-1], top[0], top[1]);
- }
- 
--static uint32_t Predictor11_MIPSdspR2(uint32_t left,
-+static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
-                                       const uint32_t* const top) {
--  return Select(top[0], left, top[-1]);
-+  return Select(top[0], *left, top[-1]);
- }
- 
--static uint32_t Predictor12_MIPSdspR2(uint32_t left,
-+static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
-                                       const uint32_t* const top) {
--  return ClampedAddSubtractFull(left, top[0], top[-1]);
-+  return ClampedAddSubtractFull(*left, top[0], top[-1]);
- }
- 
--static uint32_t Predictor13_MIPSdspR2(uint32_t left,
-+static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
-                                       const uint32_t* const top) {
--  return ClampedAddSubtractHalf(left, top[0], top[-1]);
-+  return ClampedAddSubtractHalf(*left, top[0], top[-1]);
- }
- 
- // Add green to blue and red channels (i.e. perform the inverse transform of
-diff --git a/3rdparty/libwebp/src/dsp/lossless_neon.c b/3rdparty/libwebp/src/dsp/lossless_neon.c
-index 76a1b6f8732c..ddc9b61711e3 100644
---- a/3rdparty/libwebp/src/dsp/lossless_neon.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_neon.c
-@@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1,
-   return avg;
- }
- 
--static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) {
--  return Average3_NEON(left, top[0], top[1]);
-+static uint32_t Predictor5_NEON(const uint32_t* const left,
-+                                const uint32_t* const top) {
-+  return Average3_NEON(*left, top[0], top[1]);
- }
--static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) {
--  return Average2_NEON(left, top[-1]);
-+static uint32_t Predictor6_NEON(const uint32_t* const left,
-+                                const uint32_t* const top) {
-+  return Average2_NEON(*left, top[-1]);
- }
--static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) {
--  return Average2_NEON(left, top[0]);
-+static uint32_t Predictor7_NEON(const uint32_t* const left,
-+                                const uint32_t* const top) {
-+  return Average2_NEON(*left, top[0]);
- }
--static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) {
--  return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]);
-+static uint32_t Predictor13_NEON(const uint32_t* const left,
-+                                 const uint32_t* const top) {
-+  return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]);
- }
- 
- // Batch versions of those functions.
-@@ -494,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
- 
- // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
- // non-standard versions there.
--#if defined(__APPLE__) && defined(__aarch64__) && \
-+#if defined(__APPLE__) && WEBP_AARCH64 && \
-     defined(__apple_build_version__) && (__apple_build_version__< 6020037)
- #define USE_VTBLQ
- #endif
-diff --git a/3rdparty/libwebp/src/dsp/lossless_sse2.c b/3rdparty/libwebp/src/dsp/lossless_sse2.c
-index aef0cee1b370..4b6a532c239c 100644
---- a/3rdparty/libwebp/src/dsp/lossless_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/lossless_sse2.c
-@@ -18,7 +18,6 @@
- #include "src/dsp/common_sse2.h"
- #include "src/dsp/lossless.h"
- #include "src/dsp/lossless_common.h"
--#include <assert.h>
- #include <emmintrin.h>
- 
- //------------------------------------------------------------------------------
-@@ -28,23 +27,22 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0,
-                                                         uint32_t c1,
-                                                         uint32_t c2) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
--  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
--  const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
-+  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero);
-+  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero);
-+  const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero);
-   const __m128i V1 = _mm_add_epi16(C0, C1);
-   const __m128i V2 = _mm_sub_epi16(V1, C2);
-   const __m128i b = _mm_packus_epi16(V2, V2);
--  const uint32_t output = _mm_cvtsi128_si32(b);
--  return output;
-+  return (uint32_t)_mm_cvtsi128_si32(b);
- }
- 
- static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
-                                                         uint32_t c1,
-                                                         uint32_t c2) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
--  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
--  const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
-+  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero);
-+  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero);
-+  const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero);
-   const __m128i avg = _mm_add_epi16(C1, C0);
-   const __m128i A0 = _mm_srli_epi16(avg, 1);
-   const __m128i A1 = _mm_sub_epi16(A0, B0);
-@@ -53,16 +51,15 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
-   const __m128i A3 = _mm_srai_epi16(A2, 1);
-   const __m128i A4 = _mm_add_epi16(A0, A3);
-   const __m128i A5 = _mm_packus_epi16(A4, A4);
--  const uint32_t output = _mm_cvtsi128_si32(A5);
--  return output;
-+  return (uint32_t)_mm_cvtsi128_si32(A5);
- }
- 
- static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) {
-   int pa_minus_pb;
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i A0 = _mm_cvtsi32_si128(a);
--  const __m128i B0 = _mm_cvtsi32_si128(b);
--  const __m128i C0 = _mm_cvtsi32_si128(c);
-+  const __m128i A0 = _mm_cvtsi32_si128((int)a);
-+  const __m128i B0 = _mm_cvtsi32_si128((int)b);
-+  const __m128i C0 = _mm_cvtsi32_si128((int)c);
-   const __m128i AC0 = _mm_subs_epu8(A0, C0);
-   const __m128i CA0 = _mm_subs_epu8(C0, A0);
-   const __m128i BC0 = _mm_subs_epu8(B0, C0);
-@@ -95,8 +92,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
-                                              __m128i* const avg) {
-   // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
-   const __m128i ones = _mm_set1_epi8(1);
--  const __m128i A0 = _mm_cvtsi32_si128(a0);
--  const __m128i A1 = _mm_cvtsi32_si128(a1);
-+  const __m128i A0 = _mm_cvtsi32_si128((int)a0);
-+  const __m128i A1 = _mm_cvtsi32_si128((int)a1);
-   const __m128i avg1 = _mm_avg_epu8(A0, A1);
-   const __m128i one = _mm_and_si128(_mm_xor_si128(A0, A1), ones);
-   *avg = _mm_sub_epi8(avg1, one);
-@@ -104,8 +101,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
- 
- static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
--  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
-+  const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a0), zero);
-+  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero);
-   const __m128i sum = _mm_add_epi16(A1, A0);
-   return _mm_srli_epi16(sum, 1);
- }
-@@ -113,19 +110,18 @@ static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
- static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) {
-   __m128i output;
-   Average2_uint32_SSE2(a0, a1, &output);
--  return _mm_cvtsi128_si32(output);
-+  return (uint32_t)_mm_cvtsi128_si32(output);
- }
- 
- static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
-                                           uint32_t a2) {
-   const __m128i zero = _mm_setzero_si128();
-   const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2);
--  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
-+  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero);
-   const __m128i sum = _mm_add_epi16(avg1, A1);
-   const __m128i avg2 = _mm_srli_epi16(sum, 1);
-   const __m128i A2 = _mm_packus_epi16(avg2, avg2);
--  const uint32_t output = _mm_cvtsi128_si32(A2);
--  return output;
-+  return (uint32_t)_mm_cvtsi128_si32(A2);
- }
- 
- static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
-@@ -135,46 +131,54 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
-   const __m128i sum = _mm_add_epi16(avg2, avg1);
-   const __m128i avg3 = _mm_srli_epi16(sum, 1);
-   const __m128i A0 = _mm_packus_epi16(avg3, avg3);
--  const uint32_t output = _mm_cvtsi128_si32(A0);
--  return output;
-+  return (uint32_t)_mm_cvtsi128_si32(A0);
- }
- 
--static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
-+static uint32_t Predictor5_SSE2(const uint32_t* const left,
-+                                const uint32_t* const top) {
-+  const uint32_t pred = Average3_SSE2(*left, top[0], top[1]);
-   return pred;
- }
--static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average2_SSE2(left, top[-1]);
-+static uint32_t Predictor6_SSE2(const uint32_t* const left,
-+                                const uint32_t* const top) {
-+  const uint32_t pred = Average2_SSE2(*left, top[-1]);
-   return pred;
- }
--static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average2_SSE2(left, top[0]);
-+static uint32_t Predictor7_SSE2(const uint32_t* const left,
-+                                const uint32_t* const top) {
-+  const uint32_t pred = Average2_SSE2(*left, top[0]);
-   return pred;
- }
--static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
-+static uint32_t Predictor8_SSE2(const uint32_t* const left,
-+                                const uint32_t* const top) {
-   const uint32_t pred = Average2_SSE2(top[-1], top[0]);
-   (void)left;
-   return pred;
- }
--static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
-+static uint32_t Predictor9_SSE2(const uint32_t* const left,
-+                                const uint32_t* const top) {
-   const uint32_t pred = Average2_SSE2(top[0], top[1]);
-   (void)left;
-   return pred;
- }
--static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
-+static uint32_t Predictor10_SSE2(const uint32_t* const left,
-+                                 const uint32_t* const top) {
-+  const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]);
-   return pred;
- }
--static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
-+static uint32_t Predictor11_SSE2(const uint32_t* const left,
-+                                 const uint32_t* const top) {
-+  const uint32_t pred = Select_SSE2(top[0], *left, top[-1]);
-   return pred;
- }
--static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
-+static uint32_t Predictor12_SSE2(const uint32_t* const left,
-+                                 const uint32_t* const top) {
-+  const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]);
-   return pred;
- }
--static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
--  const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
-+static uint32_t Predictor13_SSE2(const uint32_t* const left,
-+                                 const uint32_t* const top) {
-+  const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]);
-   return pred;
- }
- 
-@@ -184,7 +188,7 @@ static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
- static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
-                                int num_pixels, uint32_t* out) {
-   int i;
--  const __m128i black = _mm_set1_epi32(ARGB_BLACK);
-+  const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
-     const __m128i res = _mm_add_epi8(src, black);
-@@ -200,7 +204,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
- static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
-                                int num_pixels, uint32_t* out) {
-   int i;
--  __m128i prev = _mm_set1_epi32(out[-1]);
-+  __m128i prev = _mm_set1_epi32((int)out[-1]);
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     // a | b | c | d
-     const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
-@@ -277,12 +281,12 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
- #undef GENERATE_PREDICTOR_2
- 
- // Predictor10: average of (average of (L,TL), average of (T, TR)).
--#define DO_PRED10(OUT) do {               \
--  __m128i avgLTL, avg;                    \
--  Average2_m128i(&L, &TL, &avgLTL);       \
--  Average2_m128i(&avgTTR, &avgLTL, &avg); \
--  L = _mm_add_epi8(avg, src);             \
--  out[i + (OUT)] = _mm_cvtsi128_si32(L);  \
-+#define DO_PRED10(OUT) do {                         \
-+  __m128i avgLTL, avg;                              \
-+  Average2_m128i(&L, &TL, &avgLTL);                 \
-+  Average2_m128i(&avgTTR, &avgLTL, &avg);           \
-+  L = _mm_add_epi8(avg, src);                       \
-+  out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L);  \
- } while (0)
- 
- #define DO_PRED10_SHIFT do {                                  \
-@@ -295,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
- static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
-                                 int num_pixels, uint32_t* out) {
-   int i;
--  __m128i L = _mm_cvtsi32_si128(out[-1]);
-+  __m128i L = _mm_cvtsi32_si128((int)out[-1]);
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
-     __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
-@@ -328,7 +332,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
-   const __m128i B = _mm_andnot_si128(mask, T);                         \
-   const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \
-   L = _mm_add_epi8(src, pred);                                         \
--  out[i + (OUT)] = _mm_cvtsi128_si32(L);                               \
-+  out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L);                     \
- } while (0)
- 
- #define DO_PRED11_SHIFT do {                                \
-@@ -343,7 +347,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
-                                 int num_pixels, uint32_t* out) {
-   int i;
-   __m128i pa;
--  __m128i L = _mm_cvtsi32_si128(out[-1]);
-+  __m128i L = _mm_cvtsi32_si128((int)out[-1]);
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
-     __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
-@@ -376,12 +380,12 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
- #undef DO_PRED11_SHIFT
- 
- // Predictor12: ClampedAddSubtractFull.
--#define DO_PRED12(DIFF, LANE, OUT) do {            \
--  const __m128i all = _mm_add_epi16(L, (DIFF));    \
--  const __m128i alls = _mm_packus_epi16(all, all); \
--  const __m128i res = _mm_add_epi8(src, alls);     \
--  out[i + (OUT)] = _mm_cvtsi128_si32(res);         \
--  L = _mm_unpacklo_epi8(res, zero);                \
-+#define DO_PRED12(DIFF, LANE, OUT) do {              \
-+  const __m128i all = _mm_add_epi16(L, (DIFF));      \
-+  const __m128i alls = _mm_packus_epi16(all, all);   \
-+  const __m128i res = _mm_add_epi8(src, alls);       \
-+  out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(res); \
-+  L = _mm_unpacklo_epi8(res, zero);                  \
- } while (0)
- 
- #define DO_PRED12_SHIFT(DIFF, LANE) do {                    \
-@@ -394,7 +398,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
-                                 int num_pixels, uint32_t* out) {
-   int i;
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i L8 = _mm_cvtsi32_si128(out[-1]);
-+  const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
-   __m128i L = _mm_unpacklo_epi8(L8, zero);
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     // Load 4 pixels at a time.
-@@ -460,7 +464,7 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
-   const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
- #undef MK_CST_16
- #undef CST
--  const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
-+  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);  // alpha-green masks
-   int i;
-   for (i = 0; i + 4 <= num_pixels; i += 4) {
-     const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
-@@ -524,7 +528,7 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
- 
- static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
-                                    int num_pixels, uint8_t* dst) {
--  const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
-+  const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
-   const __m128i* in = (const __m128i*)src;
-   __m128i* out = (__m128i*)dst;
-   while (num_pixels >= 8) {
-@@ -553,7 +557,7 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
- static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
-                                        int num_pixels, uint8_t* dst) {
-   const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
--  const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
-+  const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
-   const __m128i* in = (const __m128i*)src;
-   __m128i* out = (__m128i*)dst;
-   while (num_pixels >= 8) {
-@@ -588,8 +592,8 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
- 
- static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
-                                      int num_pixels, uint8_t* dst) {
--  const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
--  const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
-+  const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
-+  const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
-   const __m128i mask_0x07 = _mm_set1_epi8(0x07);
-   const __m128i* in = (const __m128i*)src;
-   __m128i* out = (__m128i*)dst;
-diff --git a/3rdparty/libwebp/src/dsp/lossless_sse41.c b/3rdparty/libwebp/src/dsp/lossless_sse41.c
-new file mode 100644
-index 000000000000..bb7ce7611fa9
---- /dev/null
-+++ b/3rdparty/libwebp/src/dsp/lossless_sse41.c
-@@ -0,0 +1,133 @@
-+// Copyright 2021 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// SSE41 variant of methods for lossless decoder
-+
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_SSE41)
-+
-+#include "src/dsp/common_sse41.h"
-+#include "src/dsp/lossless.h"
-+#include "src/dsp/lossless_common.h"
-+
-+//------------------------------------------------------------------------------
-+// Color-space conversion functions
-+
-+static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
-+                                        const uint32_t* const src,
-+                                        int num_pixels, uint32_t* dst) {
-+// sign-extended multiplying constants, pre-shifted by 5.
-+#define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
-+  const __m128i mults_rb =
-+      _mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |
-+                           (CST(green_to_blue_) & 0xffff)));
-+  const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_));
-+#undef CST
-+  const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);
-+  const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,
-+                                      -1, 9, -1, 9, -1, 13, -1, 13);
-+  const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1,
-+                                      -1, 10, -1, -1, -1, 14, -1, -1);
-+  int i;
-+  for (i = 0; i + 4 <= num_pixels; i += 4) {
-+    const __m128i A = _mm_loadu_si128((const __m128i*)(src + i));
-+    const __m128i B = _mm_shuffle_epi8(A, perm1); // argb -> g0g0
-+    const __m128i C = _mm_mulhi_epi16(B, mults_rb);
-+    const __m128i D = _mm_add_epi8(A, C);
-+    const __m128i E = _mm_shuffle_epi8(D, perm2);
-+    const __m128i F = _mm_mulhi_epi16(E, mults_b2);
-+    const __m128i G = _mm_add_epi8(D, F);
-+    const __m128i out = _mm_blendv_epi8(G, A, mask_ag);
-+    _mm_storeu_si128((__m128i*)&dst[i], out);
-+  }
-+  // Fall-back to C-version for left-overs.
-+  if (i != num_pixels) {
-+    VP8LTransformColorInverse_C(m, src + i, num_pixels - i, dst + i);
-+  }
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+#define ARGB_TO_RGB_SSE41 do {                        \
-+  while (num_pixels >= 16) {                          \
-+    const __m128i in0 = _mm_loadu_si128(in + 0);      \
-+    const __m128i in1 = _mm_loadu_si128(in + 1);      \
-+    const __m128i in2 = _mm_loadu_si128(in + 2);      \
-+    const __m128i in3 = _mm_loadu_si128(in + 3);      \
-+    const __m128i a0 = _mm_shuffle_epi8(in0, perm0);  \
-+    const __m128i a1 = _mm_shuffle_epi8(in1, perm1);  \
-+    const __m128i a2 = _mm_shuffle_epi8(in2, perm2);  \
-+    const __m128i a3 = _mm_shuffle_epi8(in3, perm3);  \
-+    const __m128i b0 = _mm_blend_epi16(a0, a1, 0xc0); \
-+    const __m128i b1 = _mm_blend_epi16(a1, a2, 0xf0); \
-+    const __m128i b2 = _mm_blend_epi16(a2, a3, 0xfc); \
-+    _mm_storeu_si128(out + 0, b0);                    \
-+    _mm_storeu_si128(out + 1, b1);                    \
-+    _mm_storeu_si128(out + 2, b2);                    \
-+    in += 4;                                          \
-+    out += 3;                                         \
-+    num_pixels -= 16;                                 \
-+  }                                                   \
-+} while (0)
-+
-+static void ConvertBGRAToRGB_SSE41(const uint32_t* src, int num_pixels,
-+                                   uint8_t* dst) {
-+  const __m128i* in = (const __m128i*)src;
-+  __m128i* out = (__m128i*)dst;
-+  const __m128i perm0 = _mm_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9,
-+                                      8, 14, 13, 12, -1, -1, -1, -1);
-+  const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);
-+  const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);
-+  const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);
-+
-+  ARGB_TO_RGB_SSE41;
-+
-+  // left-overs
-+  if (num_pixels > 0) {
-+    VP8LConvertBGRAToRGB_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
-+  }
-+}
-+
-+static void ConvertBGRAToBGR_SSE41(const uint32_t* src,
-+                                   int num_pixels, uint8_t* dst) {
-+  const __m128i* in = (const __m128i*)src;
-+  __m128i* out = (__m128i*)dst;
-+  const __m128i perm0 = _mm_setr_epi8(0, 1, 2, 4, 5, 6, 8, 9, 10,
-+                                      12, 13, 14, -1, -1, -1, -1);
-+  const __m128i perm1 = _mm_shuffle_epi32(perm0, 0x39);
-+  const __m128i perm2 = _mm_shuffle_epi32(perm0, 0x4e);
-+  const __m128i perm3 = _mm_shuffle_epi32(perm0, 0x93);
-+
-+  ARGB_TO_RGB_SSE41;
-+
-+  // left-overs
-+  if (num_pixels > 0) {
-+    VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
-+  }
-+}
-+
-+#undef ARGB_TO_RGB_SSE41
-+
-+//------------------------------------------------------------------------------
-+// Entry point
-+
-+extern void VP8LDspInitSSE41(void);
-+
-+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE41(void) {
-+  VP8LTransformColorInverse = TransformColorInverse_SSE41;
-+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_SSE41;
-+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_SSE41;
-+}
-+
-+#else  // !WEBP_USE_SSE41
-+
-+WEBP_DSP_INIT_STUB(VP8LDspInitSSE41)
-+
-+#endif  // WEBP_USE_SSE41
-diff --git a/3rdparty/libwebp/src/dsp/msa_macro.h b/3rdparty/libwebp/src/dsp/msa_macro.h
-index a16c0bb3009b..90adbbc3197e 100644
---- a/3rdparty/libwebp/src/dsp/msa_macro.h
-+++ b/3rdparty/libwebp/src/dsp/msa_macro.h
-@@ -14,6 +14,10 @@
- #ifndef WEBP_DSP_MSA_MACRO_H_
- #define WEBP_DSP_MSA_MACRO_H_
- 
-+#include "src/dsp/dsp.h"
-+
-+#if defined(WEBP_USE_MSA)
-+
- #include <stdint.h>
- #include <msa.h>
- 
-@@ -69,27 +73,25 @@
- #define ST_UW(...) ST_W(v4u32, __VA_ARGS__)
- #define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
- 
--#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME)             \
--  static inline TYPE FUNC_NAME(const void* const psrc) {  \
--    const uint8_t* const psrc_m = (const uint8_t*)psrc;   \
--    TYPE val_m;                                           \
--    __asm__ volatile (                                        \
--      "" #INSTR " %[val_m], %[psrc_m]  \n\t"              \
--      : [val_m] "=r" (val_m)                              \
--      : [psrc_m] "m" (*psrc_m));                          \
--    return val_m;                                         \
-+#define MSA_LOAD_FUNC(TYPE, INSTR, FUNC_NAME)               \
-+  static inline TYPE FUNC_NAME(const void* const psrc) {    \
-+    const uint8_t* const psrc_m = (const uint8_t*)psrc;     \
-+    TYPE val_m;                                             \
-+    __asm__ volatile("" #INSTR " %[val_m], %[psrc_m]  \n\t" \
-+                     : [val_m] "=r"(val_m)                  \
-+                     : [psrc_m] "m"(*psrc_m));              \
-+    return val_m;                                           \
-   }
- 
- #define MSA_LOAD(psrc, FUNC_NAME)  FUNC_NAME(psrc)
- 
--#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME)               \
--  static inline void FUNC_NAME(TYPE val, void* const pdst) { \
--    uint8_t* const pdst_m = (uint8_t*)pdst;                  \
--    TYPE val_m = val;                                        \
--    __asm__ volatile (                                           \
--      " " #INSTR "  %[val_m],  %[pdst_m]  \n\t"              \
--      : [pdst_m] "=m" (*pdst_m)                              \
--      : [val_m] "r" (val_m));                                \
-+#define MSA_STORE_FUNC(TYPE, INSTR, FUNC_NAME)                 \
-+  static inline void FUNC_NAME(TYPE val, void* const pdst) {   \
-+    uint8_t* const pdst_m = (uint8_t*)pdst;                    \
-+    TYPE val_m = val;                                          \
-+    __asm__ volatile(" " #INSTR "  %[val_m],  %[pdst_m]  \n\t" \
-+                     : [pdst_m] "=m"(*pdst_m)                  \
-+                     : [val_m] "r"(val_m));                    \
-   }
- 
- #define MSA_STORE(val, pdst, FUNC_NAME)  FUNC_NAME(val, pdst)
-@@ -1389,4 +1391,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
- } while (0)
- #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
- 
-+#endif  // WEBP_USE_MSA
- #endif  // WEBP_DSP_MSA_MACRO_H_
-diff --git a/3rdparty/libwebp/src/dsp/neon.h b/3rdparty/libwebp/src/dsp/neon.h
-index aa1dea130106..14acb4044ba6 100644
---- a/3rdparty/libwebp/src/dsp/neon.h
-+++ b/3rdparty/libwebp/src/dsp/neon.h
-@@ -12,14 +12,16 @@
- #ifndef WEBP_DSP_NEON_H_
- #define WEBP_DSP_NEON_H_
- 
--#include <arm_neon.h>
--
- #include "src/dsp/dsp.h"
- 
-+#if defined(WEBP_USE_NEON)
-+
-+#include <arm_neon.h>
-+
- // Right now, some intrinsics functions seem slower, so we disable them
- // everywhere except newer clang/gcc or aarch64 where the inline assembly is
- // incompatible.
--#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__)
-+#if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64
- #define WEBP_USE_INTRINSICS   // use intrinsics when possible
- #endif
- 
-@@ -44,7 +46,7 @@
- // if using intrinsics, this flag avoids some functions that make gcc-4.6.3
- // crash ("internal compiler error: in immed_double_const, at emit-rtl.").
- // (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
--#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
-+#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
- #define WORK_AROUND_GCC
- #endif
- 
-@@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
- } while (0)
- #endif
- 
-+#endif  // WEBP_USE_NEON
- #endif  // WEBP_DSP_NEON_H_
-diff --git a/3rdparty/libwebp/src/dsp/quant.h b/3rdparty/libwebp/src/dsp/quant.h
-index 5e8dba8d19e8..dcbc11c77c59 100644
---- a/3rdparty/libwebp/src/dsp/quant.h
-+++ b/3rdparty/libwebp/src/dsp/quant.h
-@@ -21,18 +21,24 @@
- 
- #define IsFlat IsFlat_NEON
- 
--static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
-+static uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
-+#if WEBP_AARCH64
-+  return vaddvq_u32(a);
-+#else
-   const uint64x2_t b = vpaddlq_u32(a);
--  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
--                  vreinterpret_u32_u64(vget_high_u64(b)));
-+  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-+                                vreinterpret_u32_u64(vget_high_u64(b)));
-+  return vget_lane_u32(c, 0);
-+#endif
- }
- 
- static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
-                               int thresh) {
-   const int16x8_t tst_ones = vdupq_n_s16(-1);
-   uint32x4_t sum = vdupq_n_u32(0);
-+  int i;
- 
--  for (int i = 0; i < num_blocks; ++i) {
-+  for (i = 0; i < num_blocks; ++i) {
-     // Set DC to zero.
-     const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0);
-     const int16x8_t a_1 = vld1q_s16(levels + 8);
-@@ -45,7 +51,7 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
- 
-     levels += 16;
-   }
--  return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
-+  return thresh >= (int)horizontal_add_uint32x4(sum);
- }
- 
- #else
-diff --git a/3rdparty/libwebp/src/dsp/rescaler.c b/3rdparty/libwebp/src/dsp/rescaler.c
-index c5a01e82df5c..325d8be1808b 100644
---- a/3rdparty/libwebp/src/dsp/rescaler.c
-+++ b/3rdparty/libwebp/src/dsp/rescaler.c
-@@ -38,8 +38,9 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
-     int x_out = channel;
-     // simple bilinear interpolation
-     int accum = wrk->x_add;
--    int left = src[x_in];
--    int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
-+    rescaler_t left = (rescaler_t)src[x_in];
-+    rescaler_t right =
-+        (wrk->src_width > 1) ? (rescaler_t)src[x_in + x_stride] : left;
-     x_in += x_stride;
-     while (1) {
-       wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
-@@ -50,7 +51,7 @@ void WebPRescalerImportRowExpand_C(WebPRescaler* const wrk,
-         left = right;
-         x_in += x_stride;
-         assert(x_in < wrk->src_width * x_stride);
--        right = src[x_in];
-+        right = (rescaler_t)src[x_in];
-         accum += wrk->x_add;
-       }
-     }
-@@ -196,6 +197,7 @@ WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
- WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
- WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void WebPRescalerDspInitSSE2(void);
- extern void WebPRescalerDspInitMIPS32(void);
- extern void WebPRescalerDspInitMIPSdspR2(void);
-@@ -213,7 +215,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
-   WebPRescalerImportRowShrink = WebPRescalerImportRowShrink_C;
- 
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       WebPRescalerDspInitSSE2();
-     }
-@@ -235,7 +237,7 @@ WEBP_DSP_INIT_FUNC(WebPRescalerDspInit) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     WebPRescalerDspInitNEON();
-diff --git a/3rdparty/libwebp/src/dsp/rescaler_sse2.c b/3rdparty/libwebp/src/dsp/rescaler_sse2.c
-index d7effea16ea2..3f18e94e9359 100644
---- a/3rdparty/libwebp/src/dsp/rescaler_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/rescaler_sse2.c
-@@ -85,7 +85,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
-       const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum);
-       const __m128i out = _mm_madd_epi16(cur_pixels, mult);
-       assert(sizeof(*frow) == sizeof(uint32_t));
--      WebPUint32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
-+      WebPInt32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
-       frow += 1;
-       if (frow >= frow_end) break;
-       accum -= wrk->x_sub;
-@@ -132,7 +132,7 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
-     __m128i base = zero;
-     accum += wrk->x_add;
-     while (accum > 0) {
--      const __m128i A = _mm_cvtsi32_si128(WebPMemToUint32(src));
-+      const __m128i A = _mm_cvtsi32_si128(WebPMemToInt32(src));
-       src += 4;
-       base = _mm_unpacklo_epi8(A, zero);
-       // To avoid overflow, we need: base * x_add / x_sub < 32768
-@@ -198,7 +198,7 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
-                                         const __m128i* const mult,
-                                         uint8_t* const dst) {
-   const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
--  const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
-+  const __m128i mask = _mm_set_epi32(~0, 0, ~0, 0);
-   const __m128i B0 = _mm_mul_epu32(*A0, *mult);
-   const __m128i B1 = _mm_mul_epu32(*A1, *mult);
-   const __m128i B2 = _mm_mul_epu32(*A2, *mult);
-diff --git a/3rdparty/libwebp/src/dsp/ssim.c b/3rdparty/libwebp/src/dsp/ssim.c
-index 989ce8254c9f..9a1341ed9585 100644
---- a/3rdparty/libwebp/src/dsp/ssim.c
-+++ b/3rdparty/libwebp/src/dsp/ssim.c
-@@ -137,6 +137,7 @@ VP8SSIMGetClippedFunc VP8SSIMGetClipped;
- VP8AccumulateSSEFunc VP8AccumulateSSE;
- #endif
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void VP8SSIMDspInitSSE2(void);
- 
- WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {
-@@ -150,7 +151,7 @@ WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {
- #endif
- 
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       VP8SSIMDspInitSSE2();
-     }
-diff --git a/3rdparty/libwebp/src/dsp/upsampling.c b/3rdparty/libwebp/src/dsp/upsampling.c
-index 9b60da5bbb2a..983b9c42d36c 100644
---- a/3rdparty/libwebp/src/dsp/upsampling.c
-+++ b/3rdparty/libwebp/src/dsp/upsampling.c
-@@ -215,6 +215,7 @@ static void EmptyYuv444Func(const uint8_t* y,
- 
- WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void WebPInitYUV444ConvertersMIPSdspR2(void);
- extern void WebPInitYUV444ConvertersSSE2(void);
- extern void WebPInitYUV444ConvertersSSE41(void);
-@@ -233,12 +234,12 @@ WEBP_DSP_INIT_FUNC(WebPInitYUV444Converters) {
-   WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444_C;
- 
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       WebPInitYUV444ConvertersSSE2();
-     }
- #endif
--#if defined(WEBP_USE_SSE41)
-+#if defined(WEBP_HAVE_SSE41)
-     if (VP8GetCPUInfo(kSSE4_1)) {
-       WebPInitYUV444ConvertersSSE41();
-     }
-@@ -278,12 +279,12 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       WebPInitUpsamplersSSE2();
-     }
- #endif
--#if defined(WEBP_USE_SSE41)
-+#if defined(WEBP_HAVE_SSE41)
-     if (VP8GetCPUInfo(kSSE4_1)) {
-       WebPInitUpsamplersSSE41();
-     }
-@@ -300,7 +301,7 @@ WEBP_DSP_INIT_FUNC(WebPInitUpsamplers) {
- #endif
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     WebPInitUpsamplersNEON();
-diff --git a/3rdparty/libwebp/src/dsp/upsampling_neon.c b/3rdparty/libwebp/src/dsp/upsampling_neon.c
-index 6ba71a7de537..bbc000ca2d38 100644
---- a/3rdparty/libwebp/src/dsp/upsampling_neon.c
-+++ b/3rdparty/libwebp/src/dsp/upsampling_neon.c
-@@ -111,7 +111,7 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
-   vst4_u8(out, v255_r_g_b);                                             \
- } while (0)
- 
--#if !defined(WEBP_SWAP_16BIT_CSP)
-+#if (WEBP_SWAP_16BIT_CSP == 0)
- #define ZIP_U8(lo, hi) vzip_u8((lo), (hi))
- #else
- #define ZIP_U8(lo, hi) vzip_u8((hi), (lo))
-diff --git a/3rdparty/libwebp/src/dsp/upsampling_sse2.c b/3rdparty/libwebp/src/dsp/upsampling_sse2.c
-index 340f1e2ac238..08b6d0b1cfb8 100644
---- a/3rdparty/libwebp/src/dsp/upsampling_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/upsampling_sse2.c
-@@ -121,7 +121,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
-   int uv_pos, pos;                                                             \
-   /* 16byte-aligned array to cache reconstructed u and v */                    \
-   uint8_t uv_buf[14 * 32 + 15] = { 0 };                                        \
--  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);             \
-+  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15);  \
-   uint8_t* const r_v = r_u + 32;                                               \
-                                                                                \
-   assert(top_y != NULL);                                                       \
-diff --git a/3rdparty/libwebp/src/dsp/yuv.c b/3rdparty/libwebp/src/dsp/yuv.c
-index 14e67fc28ef8..8a04b85d82dd 100644
---- a/3rdparty/libwebp/src/dsp/yuv.c
-+++ b/3rdparty/libwebp/src/dsp/yuv.c
-@@ -70,6 +70,7 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
- 
- WebPSamplerRowFunc WebPSamplers[MODE_LAST];
- 
-+extern VP8CPUInfo VP8GetCPUInfo;
- extern void WebPInitSamplersSSE2(void);
- extern void WebPInitSamplersSSE41(void);
- extern void WebPInitSamplersMIPS32(void);
-@@ -90,16 +91,16 @@ WEBP_DSP_INIT_FUNC(WebPInitSamplers) {
- 
-   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       WebPInitSamplersSSE2();
-     }
--#endif  // WEBP_USE_SSE2
--#if defined(WEBP_USE_SSE41)
-+#endif  // WEBP_HAVE_SSE2
-+#if defined(WEBP_HAVE_SSE41)
-     if (VP8GetCPUInfo(kSSE4_1)) {
-       WebPInitSamplersSSE41();
-     }
--#endif  // WEBP_USE_SSE41
-+#endif  // WEBP_HAVE_SSE41
- #if defined(WEBP_USE_MIPS32)
-     if (VP8GetCPUInfo(kMIPS32)) {
-       WebPInitSamplersMIPS32();
-@@ -194,50 +195,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
- 
- //-----------------------------------------------------------------------------
- 
--#if !WEBP_NEON_OMIT_C_CODE
--#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
--static uint16_t clip_y(int v) {
--  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
--}
--
--static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
--                                  uint16_t* dst, int len) {
--  uint64_t diff = 0;
--  int i;
--  for (i = 0; i < len; ++i) {
--    const int diff_y = ref[i] - src[i];
--    const int new_y = (int)dst[i] + diff_y;
--    dst[i] = clip_y(new_y);
--    diff += (uint64_t)abs(diff_y);
--  }
--  return diff;
--}
--
--static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
--                                int16_t* dst, int len) {
--  int i;
--  for (i = 0; i < len; ++i) {
--    const int diff_uv = ref[i] - src[i];
--    dst[i] += diff_uv;
--  }
--}
--
--static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
--                                const uint16_t* best_y, uint16_t* out) {
--  int i;
--  for (i = 0; i < len; ++i, ++A, ++B) {
--    const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
--    const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
--    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
--    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
--  }
--}
--#endif  // !WEBP_NEON_OMIT_C_CODE
--
--#undef MAX_Y
--
--//-----------------------------------------------------------------------------
--
- void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
- void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
- void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
-@@ -247,18 +204,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
- void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
-                             int src_width, int do_store);
- 
--uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
--                                uint16_t* dst, int len);
--void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
--                              int16_t* dst, int len);
--void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
--                              const uint16_t* best_y, uint16_t* out);
--
- extern void WebPInitConvertARGBToYUVSSE2(void);
- extern void WebPInitConvertARGBToYUVSSE41(void);
- extern void WebPInitConvertARGBToYUVNEON(void);
--extern void WebPInitSharpYUVSSE2(void);
--extern void WebPInitSharpYUVNEON(void);
- 
- WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
-   WebPConvertARGBToY = ConvertARGBToY_C;
-@@ -269,40 +217,29 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
- 
-   WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
- 
--#if !WEBP_NEON_OMIT_C_CODE
--  WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
--  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
--  WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
--#endif
--
-   if (VP8GetCPUInfo != NULL) {
--#if defined(WEBP_USE_SSE2)
-+#if defined(WEBP_HAVE_SSE2)
-     if (VP8GetCPUInfo(kSSE2)) {
-       WebPInitConvertARGBToYUVSSE2();
--      WebPInitSharpYUVSSE2();
-     }
--#endif  // WEBP_USE_SSE2
--#if defined(WEBP_USE_SSE41)
-+#endif  // WEBP_HAVE_SSE2
-+#if defined(WEBP_HAVE_SSE41)
-     if (VP8GetCPUInfo(kSSE4_1)) {
-       WebPInitConvertARGBToYUVSSE41();
-     }
--#endif  // WEBP_USE_SSE41
-+#endif  // WEBP_HAVE_SSE41
-   }
- 
--#if defined(WEBP_USE_NEON)
-+#if defined(WEBP_HAVE_NEON)
-   if (WEBP_NEON_OMIT_C_CODE ||
-       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
-     WebPInitConvertARGBToYUVNEON();
--    WebPInitSharpYUVNEON();
-   }
--#endif  // WEBP_USE_NEON
-+#endif  // WEBP_HAVE_NEON
- 
-   assert(WebPConvertARGBToY != NULL);
-   assert(WebPConvertARGBToUV != NULL);
-   assert(WebPConvertRGB24ToY != NULL);
-   assert(WebPConvertBGR24ToY != NULL);
-   assert(WebPConvertRGBA32ToUV != NULL);
--  assert(WebPSharpYUVUpdateY != NULL);
--  assert(WebPSharpYUVUpdateRGB != NULL);
--  assert(WebPSharpYUVFilterRow != NULL);
- }
-diff --git a/3rdparty/libwebp/src/dsp/yuv.h b/3rdparty/libwebp/src/dsp/yuv.h
-index c12be1d094b6..66a397d117b4 100644
---- a/3rdparty/libwebp/src/dsp/yuv.h
-+++ b/3rdparty/libwebp/src/dsp/yuv.h
-@@ -10,7 +10,7 @@
- // inline YUV<->RGB conversion function
- //
- // The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
--// More information at: http://en.wikipedia.org/wiki/YCbCr
-+// More information at: https://en.wikipedia.org/wiki/YCbCr
- // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
- // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
- // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
-diff --git a/3rdparty/libwebp/src/dsp/yuv_neon.c b/3rdparty/libwebp/src/dsp/yuv_neon.c
-index a34d60248f6a..ff77b009801d 100644
---- a/3rdparty/libwebp/src/dsp/yuv_neon.c
-+++ b/3rdparty/libwebp/src/dsp/yuv_neon.c
-@@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
-   WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
- }
- 
--//------------------------------------------------------------------------------
--
--#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
--static uint16_t clip_y_NEON(int v) {
--  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
--}
--
--static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
--                                     uint16_t* dst, int len) {
--  int i;
--  const int16x8_t zero = vdupq_n_s16(0);
--  const int16x8_t max = vdupq_n_s16(MAX_Y);
--  uint64x2_t sum = vdupq_n_u64(0);
--  uint64_t diff;
--
--  for (i = 0; i + 8 <= len; i += 8) {
--    const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
--    const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
--    const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
--    const int16x8_t D = vsubq_s16(A, B);       // diff_y
--    const int16x8_t F = vaddq_s16(C, D);       // new_y
--    const uint16x8_t H =
--        vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
--    const int16x8_t I = vabsq_s16(D);          // abs(diff_y)
--    vst1q_u16(dst + i, H);
--    sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
--  }
--  diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
--  for (; i < len; ++i) {
--    const int diff_y = ref[i] - src[i];
--    const int new_y = (int)(dst[i]) + diff_y;
--    dst[i] = clip_y_NEON(new_y);
--    diff += (uint64_t)(abs(diff_y));
--  }
--  return diff;
--}
--
--static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
--                                   int16_t* dst, int len) {
--  int i;
--  for (i = 0; i + 8 <= len; i += 8) {
--    const int16x8_t A = vld1q_s16(ref + i);
--    const int16x8_t B = vld1q_s16(src + i);
--    const int16x8_t C = vld1q_s16(dst + i);
--    const int16x8_t D = vsubq_s16(A, B);   // diff_uv
--    const int16x8_t E = vaddq_s16(C, D);   // new_uv
--    vst1q_s16(dst + i, E);
--  }
--  for (; i < len; ++i) {
--    const int diff_uv = ref[i] - src[i];
--    dst[i] += diff_uv;
--  }
--}
--
--static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
--                                   const uint16_t* best_y, uint16_t* out) {
--  int i;
--  const int16x8_t max = vdupq_n_s16(MAX_Y);
--  const int16x8_t zero = vdupq_n_s16(0);
--  for (i = 0; i + 8 <= len; i += 8) {
--    const int16x8_t a0 = vld1q_s16(A + i + 0);
--    const int16x8_t a1 = vld1q_s16(A + i + 1);
--    const int16x8_t b0 = vld1q_s16(B + i + 0);
--    const int16x8_t b1 = vld1q_s16(B + i + 1);
--    const int16x8_t a0b1 = vaddq_s16(a0, b1);
--    const int16x8_t a1b0 = vaddq_s16(a1, b0);
--    const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0);  // A0+A1+B0+B1
--    const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1);    // 2*(A0+B1)
--    const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0);    // 2*(A1+B0)
--    const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
--    const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
--    const int16x8_t d0 = vaddq_s16(c1, a0);
--    const int16x8_t d1 = vaddq_s16(c0, a1);
--    const int16x8_t e0 = vrshrq_n_s16(d0, 1);
--    const int16x8_t e1 = vrshrq_n_s16(d1, 1);
--    const int16x8x2_t f = vzipq_s16(e0, e1);
--    const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
--    const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
--    const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
--    const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
--    const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
--    const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
--    vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
--    vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
--  }
--  for (; i < len; ++i) {
--    const int a0b1 = A[i + 0] + B[i + 1];
--    const int a1b0 = A[i + 1] + B[i + 0];
--    const int a0a1b0b1 = a0b1 + a1b0 + 8;
--    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
--    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
--    out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
--    out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
--  }
--}
--#undef MAX_Y
--
--//------------------------------------------------------------------------------
--
--extern void WebPInitSharpYUVNEON(void);
--
--WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
--  WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
--  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
--  WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
--}
--
- #else  // !WEBP_USE_NEON
- 
- WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
--WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
- 
- #endif  // WEBP_USE_NEON
-diff --git a/3rdparty/libwebp/src/dsp/yuv_sse2.c b/3rdparty/libwebp/src/dsp/yuv_sse2.c
-index baa48d537175..01a48f9af2c6 100644
---- a/3rdparty/libwebp/src/dsp/yuv_sse2.c
-+++ b/3rdparty/libwebp/src/dsp/yuv_sse2.c
-@@ -15,10 +15,12 @@
- 
- #if defined(WEBP_USE_SSE2)
- 
--#include "src/dsp/common_sse2.h"
- #include <stdlib.h>
- #include <emmintrin.h>
- 
-+#include "src/dsp/common_sse2.h"
-+#include "src/utils/utils.h"
-+
- //-----------------------------------------------------------------------------
- // Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
- 
-@@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) {
- // Load and replicate the U/V samples
- static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
-+  const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src));
-   const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
-   return _mm_unpacklo_epi16(tmp1, tmp1);   // replicate samples
- }
-@@ -130,7 +132,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
-   const __m128i rg0 = _mm_packus_epi16(*B, *A);
-   const __m128i ba0 = _mm_packus_epi16(*R, *G);
- #endif
--  const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
-+  const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
-   const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0);  // rbrbrbrbrb...
-   const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0);  // gagagagaga...
-   const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0);
-@@ -147,9 +149,10 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
-   const __m128i r0 = _mm_packus_epi16(*R, *R);
-   const __m128i g0 = _mm_packus_epi16(*G, *G);
-   const __m128i b0 = _mm_packus_epi16(*B, *B);
--  const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8));
-+  const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8((char)0xf8));
-   const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f));
--  const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5);
-+  const __m128i g1 =
-+      _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8((char)0xe0)), 5);
-   const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
-   const __m128i rg = _mm_or_si128(r1, g1);
-   const __m128i gb = _mm_or_si128(g2, b1);
-@@ -747,128 +750,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
-   WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
- }
- 
--//------------------------------------------------------------------------------
--
--#define MAX_Y ((1 << 10) - 1)    // 10b precision over 16b-arithmetic
--static uint16_t clip_y(int v) {
--  return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
--}
--
--static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
--                                     uint16_t* dst, int len) {
--  uint64_t diff = 0;
--  uint32_t tmp[4];
--  int i;
--  const __m128i zero = _mm_setzero_si128();
--  const __m128i max = _mm_set1_epi16(MAX_Y);
--  const __m128i one = _mm_set1_epi16(1);
--  __m128i sum = zero;
--
--  for (i = 0; i + 8 <= len; i += 8) {
--    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
--    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
--    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
--    const __m128i D = _mm_sub_epi16(A, B);       // diff_y
--    const __m128i E = _mm_cmpgt_epi16(zero, D);  // sign (-1 or 0)
--    const __m128i F = _mm_add_epi16(C, D);       // new_y
--    const __m128i G = _mm_or_si128(E, one);      // -1 or 1
--    const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
--    const __m128i I = _mm_madd_epi16(D, G);      // sum(abs(...))
--    _mm_storeu_si128((__m128i*)(dst + i), H);
--    sum = _mm_add_epi32(sum, I);
--  }
--  _mm_storeu_si128((__m128i*)tmp, sum);
--  diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
--  for (; i < len; ++i) {
--    const int diff_y = ref[i] - src[i];
--    const int new_y = (int)dst[i] + diff_y;
--    dst[i] = clip_y(new_y);
--    diff += (uint64_t)abs(diff_y);
--  }
--  return diff;
--}
--
--static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
--                                   int16_t* dst, int len) {
--  int i = 0;
--  for (i = 0; i + 8 <= len; i += 8) {
--    const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
--    const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
--    const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
--    const __m128i D = _mm_sub_epi16(A, B);   // diff_uv
--    const __m128i E = _mm_add_epi16(C, D);   // new_uv
--    _mm_storeu_si128((__m128i*)(dst + i), E);
--  }
--  for (; i < len; ++i) {
--    const int diff_uv = ref[i] - src[i];
--    dst[i] += diff_uv;
--  }
--}
--
--static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
--                                   const uint16_t* best_y, uint16_t* out) {
--  int i;
--  const __m128i kCst8 = _mm_set1_epi16(8);
--  const __m128i max = _mm_set1_epi16(MAX_Y);
--  const __m128i zero = _mm_setzero_si128();
--  for (i = 0; i + 8 <= len; i += 8) {
--    const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
--    const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
--    const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
--    const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
--    const __m128i a0b1 = _mm_add_epi16(a0, b1);
--    const __m128i a1b0 = _mm_add_epi16(a1, b0);
--    const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0);  // A0+A1+B0+B1
--    const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
--    const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1);    // 2*(A0+B1)
--    const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0);    // 2*(A1+B0)
--    const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
--    const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
--    const __m128i d0 = _mm_add_epi16(c1, a0);
--    const __m128i d1 = _mm_add_epi16(c0, a1);
--    const __m128i e0 = _mm_srai_epi16(d0, 1);
--    const __m128i e1 = _mm_srai_epi16(d1, 1);
--    const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
--    const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
--    const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
--    const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
--    const __m128i h0 = _mm_add_epi16(g0, f0);
--    const __m128i h1 = _mm_add_epi16(g1, f1);
--    const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
--    const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
--    _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
--    _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
--  }
--  for (; i < len; ++i) {
--    //   (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
--    // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
--    // We reuse the common sub-expressions.
--    const int a0b1 = A[i + 0] + B[i + 1];
--    const int a1b0 = A[i + 1] + B[i + 0];
--    const int a0a1b0b1 = a0b1 + a1b0 + 8;
--    const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
--    const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
--    out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
--    out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
--  }
--}
--
--#undef MAX_Y
--
--//------------------------------------------------------------------------------
--
--extern void WebPInitSharpYUVSSE2(void);
--
--WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
--  WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
--  WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
--  WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
--}
--
- #else  // !WEBP_USE_SSE2
- 
- WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
- WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
--WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)
- 
- #endif  // WEBP_USE_SSE2
-diff --git a/3rdparty/libwebp/src/dsp/yuv_sse41.c b/3rdparty/libwebp/src/dsp/yuv_sse41.c
-index 579d1f7402c2..f79b802e4712 100644
---- a/3rdparty/libwebp/src/dsp/yuv_sse41.c
-+++ b/3rdparty/libwebp/src/dsp/yuv_sse41.c
-@@ -15,10 +15,12 @@
- 
- #if defined(WEBP_USE_SSE41)
- 
--#include "src/dsp/common_sse41.h"
- #include <stdlib.h>
- #include <smmintrin.h>
- 
-+#include "src/dsp/common_sse41.h"
-+#include "src/utils/utils.h"
-+
- //-----------------------------------------------------------------------------
- // Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
- 
-@@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) {
- // Load and replicate the U/V samples
- static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
-   const __m128i zero = _mm_setzero_si128();
--  const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
-+  const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src));
-   const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
-   return _mm_unpacklo_epi16(tmp1, tmp1);   // replicate samples
- }
-diff --git a/3rdparty/libwebp/src/enc/alpha_enc.c b/3rdparty/libwebp/src/enc/alpha_enc.c
-index dce9ca957d3a..4a599f88a98d 100644
---- a/3rdparty/libwebp/src/enc/alpha_enc.c
-+++ b/3rdparty/libwebp/src/enc/alpha_enc.c
-@@ -13,6 +13,7 @@
- 
- #include <assert.h>
- #include <stdlib.h>
-+#include <string.h>
- 
- #include "src/enc/vp8i_enc.h"
- #include "src/dsp/dsp.h"
-@@ -54,7 +55,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
-   WebPConfig config;
-   WebPPicture picture;
- 
--  WebPPictureInit(&picture);
-+  if (!WebPPictureInit(&picture)) return 0;
-   picture.width = width;
-   picture.height = height;
-   picture.use_argb = 1;
-@@ -86,7 +87,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
-   // a decoder bug related to alpha with color cache.
-   // See: https://code.google.com/p/webp/issues/detail?id=239
-   // Need to re-enable this later.
--  ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK);
-+  ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0);
-   WebPPictureFree(&picture);
-   ok = ok && !bw->error_;
-   if (!ok) {
-@@ -140,6 +141,11 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
-                               !reduce_levels, &tmp_bw, &result->stats);
-     if (ok) {
-       output = VP8LBitWriterFinish(&tmp_bw);
-+      if (tmp_bw.error_) {
-+        VP8LBitWriterWipeOut(&tmp_bw);
-+        memset(&result->bw, 0, sizeof(result->bw));
-+        return 0;
-+      }
-       output_size = VP8LBitWriterNumBytes(&tmp_bw);
-       if (output_size > data_size) {
-         // compressed size is larger than source! Revert to uncompressed mode.
-@@ -148,6 +154,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
-       }
-     } else {
-       VP8LBitWriterWipeOut(&tmp_bw);
-+      memset(&result->bw, 0, sizeof(result->bw));
-       return 0;
-     }
-   }
-@@ -162,7 +169,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
-   header = method | (filter << 2);
-   if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
- 
--  VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size);
-+  if (!VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size)) ok = 0;
-   ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN);
-   ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
- 
-@@ -303,7 +310,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
-   int ok = 1;
-   const int reduce_levels = (quality < 100);
- 
--  // quick sanity checks
-+  // quick correctness checks
-   assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
-   assert(enc != NULL && pic != NULL && pic->a != NULL);
-   assert(output != NULL && output_size != NULL);
-@@ -312,11 +319,11 @@ static int EncodeAlpha(VP8Encoder* const enc,
-   assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
- 
-   if (quality < 0 || quality > 100) {
--    return 0;
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-   }
- 
-   if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
--    return 0;
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-   }
- 
-   if (method == ALPHA_NO_COMPRESSION) {
-@@ -326,7 +333,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
- 
-   quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
-   if (quant_alpha == NULL) {
--    return 0;
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-   }
- 
-   // Extract alpha data (width x height) from raw_data (stride x height).
-@@ -346,6 +353,9 @@ static int EncodeAlpha(VP8Encoder* const enc,
-     ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
-                                filter, reduce_levels, effort_level, output,
-                                output_size, pic->stats);
-+    if (!ok) {
-+      WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);  // imprecise
-+    }
- #if !defined(WEBP_DISABLE_STATS)
-     if (pic->stats != NULL) {  // need stats?
-       pic->stats->coded_size += (int)(*output_size);
-@@ -361,7 +371,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
- //------------------------------------------------------------------------------
- // Main calls
- 
--static int CompressAlphaJob(void* arg1, void* dummy) {
-+static int CompressAlphaJob(void* arg1, void* unused) {
-   VP8Encoder* const enc = (VP8Encoder*)arg1;
-   const WebPConfig* config = enc->config_;
-   uint8_t* alpha_data = NULL;
-@@ -375,13 +385,13 @@ static int CompressAlphaJob(void* arg1, void* dummy) {
-                    filter, effort_level, &alpha_data, &alpha_size)) {
-     return 0;
-   }
--  if (alpha_size != (uint32_t)alpha_size) {  // Sanity check.
-+  if (alpha_size != (uint32_t)alpha_size) {  // Soundness check.
-     WebPSafeFree(alpha_data);
-     return 0;
-   }
-   enc->alpha_data_size_ = (uint32_t)alpha_size;
-   enc->alpha_data_ = alpha_data;
--  (void)dummy;
-+  (void)unused;
-   return 1;
- }
- 
-@@ -405,7 +415,7 @@ int VP8EncStartAlpha(VP8Encoder* const enc) {
-       WebPWorker* const worker = &enc->alpha_worker_;
-       // Makes sure worker is good to go.
-       if (!WebPGetWorkerInterface()->Reset(worker)) {
--        return 0;
-+        return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
-       }
-       WebPGetWorkerInterface()->Launch(worker);
-       return 1;
-diff --git a/3rdparty/libwebp/src/enc/analysis_enc.c b/3rdparty/libwebp/src/enc/analysis_enc.c
-index ebb784261c63..962eaa998f87 100644
---- a/3rdparty/libwebp/src/enc/analysis_enc.c
-+++ b/3rdparty/libwebp/src/enc/analysis_enc.c
-@@ -391,12 +391,14 @@ static int DoSegmentsJob(void* arg1, void* arg2) {
-   return ok;
- }
- 
-+#ifdef WEBP_USE_THREAD
- static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
-   int i;
-   for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
-   dst->alpha += src->alpha;
-   dst->uv_alpha += src->uv_alpha;
- }
-+#endif
- 
- // initialize the job struct with some tasks to perform
- static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
-@@ -425,10 +427,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
-       (enc->method_ <= 1);  // for method 0 - 1, we need preds_[] to be filled.
-   if (do_segments) {
-     const int last_row = enc->mb_h_;
--    // We give a little more than a half work to the main thread.
--    const int split_row = (9 * last_row + 15) >> 4;
-     const int total_mb = last_row * enc->mb_w_;
- #ifdef WEBP_USE_THREAD
-+    // We give a little more than a half work to the main thread.
-+    const int split_row = (9 * last_row + 15) >> 4;
-     const int kMinSplitRow = 2;  // minimal rows needed for mt to be worth it
-     const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
- #else
-@@ -438,6 +440,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
-         WebPGetWorkerInterface();
-     SegmentJob main_job;
-     if (do_mt) {
-+#ifdef WEBP_USE_THREAD
-       SegmentJob side_job;
-       // Note the use of '&' instead of '&&' because we must call the functions
-       // no matter what.
-@@ -455,6 +458,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
-       }
-       worker_interface->End(&side_job.worker);
-       if (ok) MergeJobs(&side_job, &main_job);  // merge results together
-+#endif  // WEBP_USE_THREAD
-     } else {
-       // Even for single-thread case, we use the generic Worker tools.
-       InitSegmentJob(enc, &main_job, 0, last_row);
-@@ -470,6 +474,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
-   } else {   // Use only one default segment.
-     ResetAllMBInfo(enc);
-   }
-+  if (!ok) {
-+    return WebPEncodingSetError(enc->pic_,
-+                                VP8_ENC_ERROR_OUT_OF_MEMORY);  // imprecise
-+  }
-   return ok;
- }
- 
-diff --git a/3rdparty/libwebp/src/enc/backward_references_cost_enc.c b/3rdparty/libwebp/src/enc/backward_references_cost_enc.c
-index 516abd73eb45..6968ef3c9f3e 100644
---- a/3rdparty/libwebp/src/enc/backward_references_cost_enc.c
-+++ b/3rdparty/libwebp/src/enc/backward_references_cost_enc.c
-@@ -15,10 +15,11 @@
- //
- 
- #include <assert.h>
-+#include <float.h>
- 
-+#include "src/dsp/lossless_common.h"
- #include "src/enc/backward_references_enc.h"
- #include "src/enc/histogram_enc.h"
--#include "src/dsp/lossless_common.h"
- #include "src/utils/color_cache_utils.h"
- #include "src/utils/utils.h"
- 
-@@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
-                                       const PixOrCopy v);
- 
- typedef struct {
--  double alpha_[VALUES_IN_BYTE];
--  double red_[VALUES_IN_BYTE];
--  double blue_[VALUES_IN_BYTE];
--  double distance_[NUM_DISTANCE_CODES];
--  double* literal_;
-+  float alpha_[VALUES_IN_BYTE];
-+  float red_[VALUES_IN_BYTE];
-+  float blue_[VALUES_IN_BYTE];
-+  float distance_[NUM_DISTANCE_CODES];
-+  float* literal_;
- } CostModel;
- 
- static void ConvertPopulationCountTableToBitEstimates(
--    int num_symbols, const uint32_t population_counts[], double output[]) {
-+    int num_symbols, const uint32_t population_counts[], float output[]) {
-   uint32_t sum = 0;
-   int nonzeros = 0;
-   int i;
-@@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates(
-   if (nonzeros <= 1) {
-     memset(output, 0, num_symbols * sizeof(*output));
-   } else {
--    const double logsum = VP8LFastLog2(sum);
-+    const float logsum = VP8LFastLog2(sum);
-     for (i = 0; i < num_symbols; ++i) {
-       output[i] = logsum - VP8LFastLog2(population_counts[i]);
-     }
-@@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
-   }
- 
-   ConvertPopulationCountTableToBitEstimates(
--      VP8LHistogramNumCodes(histo->palette_code_bits_),
--      histo->literal_, m->literal_);
-+      VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_,
-+      m->literal_);
-   ConvertPopulationCountTableToBitEstimates(
-       VALUES_IN_BYTE, histo->red_, m->red_);
-   ConvertPopulationCountTableToBitEstimates(
-@@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits,
-   return ok;
- }
- 
--static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
-+static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) {
-   return m->alpha_[v >> 24] +
-          m->red_[(v >> 16) & 0xff] +
-          m->literal_[(v >> 8) & 0xff] +
-          m->blue_[v & 0xff];
- }
- 
--static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
-+static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) {
-   const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
-   return m->literal_[literal_idx];
- }
- 
--static WEBP_INLINE double GetLengthCost(const CostModel* const m,
--                                        uint32_t length) {
-+static WEBP_INLINE float GetLengthCost(const CostModel* const m,
-+                                       uint32_t length) {
-   int code, extra_bits;
-   VP8LPrefixEncodeBits(length, &code, &extra_bits);
-   return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
- }
- 
--static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
--                                          uint32_t distance) {
-+static WEBP_INLINE float GetDistanceCost(const CostModel* const m,
-+                                         uint32_t distance) {
-   int code, extra_bits;
-   VP8LPrefixEncodeBits(distance, &code, &extra_bits);
-   return m->distance_[code] + extra_bits;
-@@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel(
-     const uint32_t* const argb, VP8LColorCache* const hashers,
-     const CostModel* const cost_model, int idx, int use_color_cache,
-     float prev_cost, float* const cost, uint16_t* const dist_array) {
--  double cost_val = prev_cost;
-+  float cost_val = prev_cost;
-   const uint32_t color = argb[idx];
-   const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1;
-   if (ix >= 0) {
-     // use_color_cache is true and hashers contains color
--    const double mul0 = 0.68;
-+    const float mul0 = 0.68f;
-     cost_val += GetCacheCost(cost_model, ix) * mul0;
-   } else {
--    const double mul1 = 0.82;
-+    const float mul1 = 0.82f;
-     if (use_color_cache) VP8LColorCacheInsert(hashers, color);
-     cost_val += GetLiteralCost(cost_model, color) * mul1;
-   }
-   if (cost[idx] > cost_val) {
--    cost[idx] = (float)cost_val;
-+    cost[idx] = cost_val;
-     dist_array[idx] = 1;  // only one is inserted.
-   }
- }
-@@ -172,7 +173,7 @@ struct CostInterval {
- 
- // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval.
- typedef struct {
--  double cost_;
-+  float cost_;
-   int start_;
-   int end_;       // Exclusive.
- } CostCacheInterval;
-@@ -187,7 +188,7 @@ typedef struct {
-   int count_;  // The number of stored intervals.
-   CostCacheInterval* cache_intervals_;
-   size_t cache_intervals_size_;
--  double cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
-+  float cost_cache_[MAX_LENGTH];  // Contains the GetLengthCost(cost_model, k).
-   float* costs_;
-   uint16_t* dist_array_;
-   // Most of the time, we only need few intervals -> use a free-list, to avoid
-@@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager,
-   CostManagerInitFreeList(manager);
- 
-   // Fill in the cost_cache_.
-+  // Has to be done in two passes due to a GCC bug on i686
-+  // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
-+  for (i = 0; i < cost_cache_size; ++i) {
-+    manager->cost_cache_[i] = GetLengthCost(cost_model, i);
-+  }
-   manager->cache_intervals_size_ = 1;
--  manager->cost_cache_[0] = GetLengthCost(cost_model, 0);
-   for (i = 1; i < cost_cache_size; ++i) {
--    manager->cost_cache_[i] = GetLengthCost(cost_model, i);
-     // Get the number of bound intervals.
-     if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) {
-       ++manager->cache_intervals_size_;
-@@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager,
-     cur->end_ = 1;
-     cur->cost_ = manager->cost_cache_[0];
-     for (i = 1; i < cost_cache_size; ++i) {
--      const double cost_val = manager->cost_cache_[i];
-+      const float cost_val = manager->cost_cache_[i];
-       if (cost_val != cur->cost_) {
-         ++cur;
-         // Initialize an interval.
-@@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager,
-       }
-       cur->end_ = i + 1;
-     }
-+    assert((size_t)(cur - manager->cache_intervals_) + 1 ==
-+           manager->cache_intervals_size_);
-   }
- 
-   manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_));
-@@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager,
-     return 0;
-   }
-   // Set the initial costs_ high for every pixel as we will keep the minimum.
--  for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f;
-+  for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX;
- 
-   return 1;
- }
-@@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager,
- // If handling the interval or one of its subintervals becomes to heavy, its
- // contribution is added to the costs right away.
- static WEBP_INLINE void PushInterval(CostManager* const manager,
--                                     double distance_cost, int position,
-+                                     float distance_cost, int position,
-                                      int len) {
-   size_t i;
-   CostInterval* interval = manager->head_;
-@@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
-       const int k = j - position;
-       float cost_tmp;
-       assert(k >= 0 && k < MAX_LENGTH);
--      cost_tmp = (float)(distance_cost + manager->cost_cache_[k]);
-+      cost_tmp = distance_cost + manager->cost_cache_[k];
- 
-       if (manager->costs_[j] > cost_tmp) {
-         manager->costs_[j] = cost_tmp;
-@@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager,
-     const int end = position + (cost_cache_intervals[i].end_ > len
-                                  ? len
-                                  : cost_cache_intervals[i].end_);
--    const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_);
-+    const float cost = distance_cost + cost_cache_intervals[i].cost_;
- 
-     for (; interval != NULL && interval->start_ < end;
-          interval = interval_next) {
-@@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly(
-   const int pix_count = xsize * ysize;
-   const int use_color_cache = (cache_bits > 0);
-   const size_t literal_array_size =
--      sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
--                        ((cache_bits > 0) ? (1 << cache_bits) : 0));
-+      sizeof(float) * (VP8LHistogramNumCodes(cache_bits));
-   const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
-   CostModel* const cost_model =
-       (CostModel*)WebPSafeCalloc(1ULL, cost_model_size);
-   VP8LColorCache hashers;
-   CostManager* cost_manager =
--      (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager));
-+      (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager));
-   int offset_prev = -1, len_prev = -1;
--  double offset_cost = -1;
-+  float offset_cost = -1.f;
-   int first_offset_is_constant = -1;  // initialized with 'impossible' value
-   int reach = 0;
- 
-   if (cost_model == NULL || cost_manager == NULL) goto Error;
- 
--  cost_model->literal_ = (double*)(cost_model + 1);
-+  cost_model->literal_ = (float*)(cost_model + 1);
-   if (use_color_cache) {
-     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-     if (!cc_init) goto Error;
-@@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly(
-   }
- 
-   ok = !refs->error_;
--Error:
-+ Error:
-   if (cc_init) VP8LColorCacheClear(&hashers);
-   CostManagerClear(cost_manager);
-   WebPSafeFree(cost_model);
-diff --git a/3rdparty/libwebp/src/enc/backward_references_enc.c b/3rdparty/libwebp/src/enc/backward_references_enc.c
-index 519b36a09153..dc98bf171943 100644
---- a/3rdparty/libwebp/src/enc/backward_references_enc.c
-+++ b/3rdparty/libwebp/src/enc/backward_references_enc.c
-@@ -10,6 +10,8 @@
- // Author: Jyrki Alakuijala (jyrki@google.com)
- //
- 
-+#include "src/enc/backward_references_enc.h"
-+
- #include <assert.h>
- #include <float.h>
- #include <math.h>
-@@ -17,10 +19,11 @@
- #include "src/dsp/dsp.h"
- #include "src/dsp/lossless.h"
- #include "src/dsp/lossless_common.h"
--#include "src/enc/backward_references_enc.h"
- #include "src/enc/histogram_enc.h"
-+#include "src/enc/vp8i_enc.h"
- #include "src/utils/color_cache_utils.h"
- #include "src/utils/utils.h"
-+#include "src/webp/encode.h"
- 
- #define MIN_BLOCK_SIZE 256  // minimum block size for backward references
- 
-@@ -255,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) {
- 
- int VP8LHashChainFill(VP8LHashChain* const p, int quality,
-                       const uint32_t* const argb, int xsize, int ysize,
--                      int low_effort) {
-+                      int low_effort, const WebPPicture* const pic,
-+                      int percent_range, int* const percent) {
-   const int size = xsize * ysize;
-   const int iter_max = GetMaxItersForQuality(quality);
-   const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
-+  int remaining_percent = percent_range;
-+  int percent_start = *percent;
-   int pos;
-   int argb_comp;
-   uint32_t base_position;
-@@ -276,7 +282,12 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
- 
-   hash_to_first_index =
-       (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
--  if (hash_to_first_index == NULL) return 0;
-+  if (hash_to_first_index == NULL) {
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+  }
-+
-+  percent_range = remaining_percent / 2;
-+  remaining_percent -= percent_range;
- 
-   // Set the int32_t array to -1.
-   memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
-@@ -323,12 +334,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
-       hash_to_first_index[hash_code] = pos++;
-       argb_comp = argb_comp_next;
-     }
-+
-+    if (!WebPReportProgress(
-+            pic, percent_start + percent_range * pos / (size - 2), percent)) {
-+      WebPSafeFree(hash_to_first_index);
-+      return 0;
-+    }
-   }
-   // Process the penultimate pixel.
-   chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
- 
-   WebPSafeFree(hash_to_first_index);
- 
-+  percent_start += percent_range;
-+  if (!WebPReportProgress(pic, percent_start, percent)) return 0;
-+  percent_range = remaining_percent;
-+
-   // Find the best match interval at each pixel, defined by an offset to the
-   // pixel and a length. The right-most pixel cannot match anything to the right
-   // (hence a best length of 0) and the left-most pixel nothing to the left
-@@ -417,8 +438,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
-         max_base_position = base_position;
-       }
-     }
-+
-+    if (!WebPReportProgress(pic,
-+                            percent_start + percent_range *
-+                                                (size - 2 - base_position) /
-+                                                (size - 2),
-+                            percent)) {
-+      return 0;
-+    }
-   }
--  return 1;
-+
-+  return WebPReportProgress(pic, percent_start + percent_range, percent);
- }
- 
- static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
-@@ -728,7 +758,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
-                                   int* const best_cache_bits) {
-   int i;
-   const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
--  double entropy_min = MAX_ENTROPY;
-+  float entropy_min = MAX_ENTROPY;
-   int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
-   VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
-   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
-@@ -813,14 +843,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality,
-   }
- 
-   for (i = 0; i <= cache_bits_max; ++i) {
--    const double entropy = VP8LHistogramEstimateBits(histos[i]);
-+    const float entropy = VP8LHistogramEstimateBits(histos[i]);
-     if (i == 0 || entropy < entropy_min) {
-       entropy_min = entropy;
-       *best_cache_bits = i;
-     }
-   }
-   ok = 1;
--Error:
-+ Error:
-   for (i = 0; i <= cache_bits_max; ++i) {
-     if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
-     VP8LFreeHistogram(histos[i]);
-@@ -890,7 +920,7 @@ static int GetBackwardReferences(int width, int height,
-   int i, lz77_type;
-   // Index 0 is for a color cache, index 1 for no cache (if needed).
-   int lz77_types_best[2] = {0, 0};
--  double bit_costs_best[2] = {DBL_MAX, DBL_MAX};
-+  float bit_costs_best[2] = {FLT_MAX, FLT_MAX};
-   VP8LHashChain hash_chain_box;
-   VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
-   int status = 0;
-@@ -902,7 +932,7 @@ static int GetBackwardReferences(int width, int height,
-   for (lz77_type = 1; lz77_types_to_try;
-        lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
-     int res = 0;
--    double bit_cost = 0.;
-+    float bit_cost = 0.f;
-     if ((lz77_types_to_try & lz77_type) == 0) continue;
-     switch (lz77_type) {
-       case kLZ77RLE:
-@@ -976,15 +1006,16 @@ static int GetBackwardReferences(int width, int height,
-       const VP8LHashChain* const hash_chain_tmp =
-           (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
-       const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
--      if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
--                                               hash_chain_tmp, &refs[i],
--                                               refs_tmp)) {
--        double bit_cost_trace;
--        VP8LHistogramCreate(histo, refs_tmp, cache_bits);
--        bit_cost_trace = VP8LHistogramEstimateBits(histo);
--        if (bit_cost_trace < bit_costs_best[i]) {
--          BackwardRefsSwap(refs_tmp, &refs[i]);
--        }
-+      float bit_cost_trace;
-+      if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
-+                                                hash_chain_tmp, &refs[i],
-+                                                refs_tmp)) {
-+        goto Error;
-+      }
-+      VP8LHistogramCreate(histo, refs_tmp, cache_bits);
-+      bit_cost_trace = VP8LHistogramEstimateBits(histo);
-+      if (bit_cost_trace < bit_costs_best[i]) {
-+        BackwardRefsSwap(refs_tmp, &refs[i]);
-       }
-     }
- 
-@@ -1000,31 +1031,35 @@ static int GetBackwardReferences(int width, int height,
-   }
-   status = 1;
- 
--Error:
-+ Error:
-   VP8LHashChainClear(&hash_chain_box);
-   VP8LFreeHistogram(histo);
-   return status;
- }
- 
--WebPEncodingError VP8LGetBackwardReferences(
-+int VP8LGetBackwardReferences(
-     int width, int height, const uint32_t* const argb, int quality,
-     int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
-     const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
--    int* const cache_bits_best) {
-+    int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
-+    int* const percent) {
-   if (low_effort) {
-     VP8LBackwardRefs* refs_best;
-     *cache_bits_best = cache_bits_max;
-     refs_best = GetBackwardReferencesLowEffort(
-         width, height, argb, cache_bits_best, hash_chain, refs);
--    if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    if (refs_best == NULL) {
-+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+    }
-     // Set it in first position.
-     BackwardRefsSwap(refs_best, &refs[0]);
-   } else {
-     if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
-                                cache_bits_max, do_no_cache, hash_chain, refs,
-                                cache_bits_best)) {
--      return VP8_ENC_ERROR_OUT_OF_MEMORY;
-+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     }
-   }
--  return VP8_ENC_OK;
-+
-+  return WebPReportProgress(pic, *percent + percent_range, percent);
- }
-diff --git a/3rdparty/libwebp/src/enc/backward_references_enc.h b/3rdparty/libwebp/src/enc/backward_references_enc.h
-index 4c0267b41e90..4dff1c27b57c 100644
---- a/3rdparty/libwebp/src/enc/backward_references_enc.h
-+++ b/3rdparty/libwebp/src/enc/backward_references_enc.h
-@@ -134,10 +134,11 @@ struct VP8LHashChain {
- 
- // Must be called first, to set size.
- int VP8LHashChainInit(VP8LHashChain* const p, int size);
--// Pre-compute the best matches for argb.
-+// Pre-compute the best matches for argb. pic and percent are for progress.
- int VP8LHashChainFill(VP8LHashChain* const p, int quality,
-                       const uint32_t* const argb, int xsize, int ysize,
--                      int low_effort);
-+                      int low_effort, const WebPPicture* const pic,
-+                      int percent_range, int* const percent);
- void VP8LHashChainClear(VP8LHashChain* const p);  // release memory
- 
- static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p,
-@@ -227,11 +228,14 @@ enum VP8LLZ77Type {
- // VP8LBackwardRefs is put in the first element, the best value with no-cache in
- // the second element.
- // In both cases, the last element is used as temporary internally.
--WebPEncodingError VP8LGetBackwardReferences(
-+// pic and percent are for progress.
-+// Returns false in case of error (stored in pic->error_code).
-+int VP8LGetBackwardReferences(
-     int width, int height, const uint32_t* const argb, int quality,
-     int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
-     const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
--    int* const cache_bits_best);
-+    int* const cache_bits_best, const WebPPicture* const pic, int percent_range,
-+    int* const percent);
- 
- #ifdef __cplusplus
- }
-diff --git a/3rdparty/libwebp/src/enc/frame_enc.c b/3rdparty/libwebp/src/enc/frame_enc.c
-index af538d83bacd..01860ca757e6 100644
---- a/3rdparty/libwebp/src/enc/frame_enc.c
-+++ b/3rdparty/libwebp/src/enc/frame_enc.c
-@@ -578,7 +578,7 @@ static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
-   uint64_t size = 0;
-   uint64_t size_p0 = 0;
-   uint64_t distortion = 0;
--  const uint64_t pixel_count = nb_mbs * 384;
-+  const uint64_t pixel_count = (uint64_t)nb_mbs * 384;
- 
-   VP8IteratorInit(enc, &it);
-   SetLoopParams(enc, s->q);
-@@ -689,7 +689,7 @@ static int PreLoopInitialize(VP8Encoder* const enc) {
-   }
-   if (!ok) {
-     VP8EncFreeBitWriters(enc);  // malloc error occurred
--    WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
-   }
-   return ok;
- }
-@@ -719,6 +719,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
-   } else {
-     // Something bad happened -> need to do some memory cleanup.
-     VP8EncFreeBitWriters(enc);
-+    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
-   }
-   return ok;
- }
-@@ -754,6 +755,11 @@ int VP8EncLoop(VP8Encoder* const enc) {
-     // *then* decide how to code the skip decision if there's one.
-     if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
-       CodeResiduals(it.bw_, &it, &info);
-+      if (it.bw_->error_) {
-+        // enc->pic_->error_code is set in PostLoopFinalize().
-+        ok = 0;
-+        break;
-+      }
-     } else {   // reset predictors after a skip
-       ResetAfterSkip(&it);
-     }
-@@ -778,11 +784,12 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
-   // Roughly refresh the proba eight times per pass
-   int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
-   int num_pass_left = enc->config_->pass;
-+  int remaining_progress = 40;  // percents
-   const int do_search = enc->do_search_;
-   VP8EncIterator it;
-   VP8EncProba* const proba = &enc->proba_;
-   const VP8RDLevel rd_opt = enc->rd_opt_level_;
--  const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
-+  const uint64_t pixel_count = (uint64_t)enc->mb_w_ * enc->mb_h_ * 384;
-   PassStats stats;
-   int ok;
- 
-@@ -805,6 +812,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
-     uint64_t size_p0 = 0;
-     uint64_t distortion = 0;
-     int cnt = max_count;
-+    // The final number of passes is not trivial to know in advance.
-+    const int pass_progress = remaining_progress / (2 + num_pass_left);
-+    remaining_progress -= pass_progress;
-     VP8IteratorInit(enc, &it);
-     SetLoopParams(enc, stats.q);
-     if (is_last_pass) {
-@@ -832,7 +842,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
-         StoreSideInfo(&it);
-         VP8StoreFilterStats(&it);
-         VP8IteratorExport(&it);
--        ok = VP8IteratorProgress(&it, 20);
-+        ok = VP8IteratorProgress(&it, pass_progress);
-       }
-       VP8IteratorSaveBoundary(&it);
-     } while (ok && VP8IteratorNext(&it));
-@@ -878,7 +888,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
-     ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
-                        (const uint8_t*)proba->coeffs_, 1);
-   }
--  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
-+  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
-+                                &enc->percent_);
-   return PostLoopFinalize(&it, ok);
- }
- 
-diff --git a/3rdparty/libwebp/src/enc/histogram_enc.c b/3rdparty/libwebp/src/enc/histogram_enc.c
-index edc6e4faa43b..3ca67b3ad09b 100644
---- a/3rdparty/libwebp/src/enc/histogram_enc.c
-+++ b/3rdparty/libwebp/src/enc/histogram_enc.c
-@@ -13,15 +13,17 @@
- #include "src/webp/config.h"
- #endif
- 
-+#include <float.h>
- #include <math.h>
- 
--#include "src/enc/backward_references_enc.h"
--#include "src/enc/histogram_enc.h"
- #include "src/dsp/lossless.h"
- #include "src/dsp/lossless_common.h"
-+#include "src/enc/backward_references_enc.h"
-+#include "src/enc/histogram_enc.h"
-+#include "src/enc/vp8i_enc.h"
- #include "src/utils/utils.h"
- 
--#define MAX_COST 1.e38
-+#define MAX_BIT_COST FLT_MAX
- 
- // Number of partitions for the three dominant (literal, red and blue) symbol
- // costs.
-@@ -228,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
- // -----------------------------------------------------------------------------
- // Entropy-related functions.
- 
--static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
--  double mix;
-+static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) {
-+  float mix;
-   if (entropy->nonzeros < 5) {
-     if (entropy->nonzeros <= 1) {
-       return 0;
-@@ -238,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
-     // Let's mix in a bit of entropy to favor good clustering when
-     // distributions of these are combined.
-     if (entropy->nonzeros == 2) {
--      return 0.99 * entropy->sum + 0.01 * entropy->entropy;
-+      return 0.99f * entropy->sum + 0.01f * entropy->entropy;
-     }
-     // No matter what the entropy says, we cannot be better than min_limit
-     // with Huffman coding. I am mixing a bit of entropy into the
-     // min_limit since it produces much better (~0.5 %) compression results
-     // perhaps because of better entropy clustering.
-     if (entropy->nonzeros == 3) {
--      mix = 0.95;
-+      mix = 0.95f;
-     } else {
--      mix = 0.7;  // nonzeros == 4.
-+      mix = 0.7f;  // nonzeros == 4.
-     }
-   } else {
--    mix = 0.627;
-+    mix = 0.627f;
-   }
- 
-   {
--    double min_limit = 2 * entropy->sum - entropy->max_val;
--    min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
-+    float min_limit = 2.f * entropy->sum - entropy->max_val;
-+    min_limit = mix * min_limit + (1.f - mix) * entropy->entropy;
-     return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
-   }
- }
- 
--double VP8LBitsEntropy(const uint32_t* const array, int n) {
-+float VP8LBitsEntropy(const uint32_t* const array, int n) {
-   VP8LBitEntropy entropy;
-   VP8LBitsEntropyUnrefined(array, n, &entropy);
- 
-   return BitsEntropyRefine(&entropy);
- }
- 
--static double InitialHuffmanCost(void) {
-+static float InitialHuffmanCost(void) {
-   // Small bias because Huffman code length is typically not stored in
-   // full length.
-   static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
--  static const double kSmallBias = 9.1;
-+  static const float kSmallBias = 9.1f;
-   return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
- }
- 
- // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
--static double FinalHuffmanCost(const VP8LStreaks* const stats) {
-+static float FinalHuffmanCost(const VP8LStreaks* const stats) {
-   // The constants in this function are experimental and got rounded from
-   // their original values in 1/8 when switched to 1/1024.
--  double retval = InitialHuffmanCost();
-+  float retval = InitialHuffmanCost();
-   // Second coefficient: Many zeros in the histogram are covered efficiently
-   // by a run-length encode. Originally 2/8.
--  retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
-+  retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1];
-   // Second coefficient: Constant values are encoded less efficiently, but still
-   // RLE'ed. Originally 6/8.
--  retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
-+  retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1];
-   // 0s are usually encoded more efficiently than non-0s.
-   // Originally 15/8.
--  retval += 1.796875 * stats->streaks[0][0];
-+  retval += 1.796875f * stats->streaks[0][0];
-   // Originally 26/8.
--  retval += 3.28125 * stats->streaks[1][0];
-+  retval += 3.28125f * stats->streaks[1][0];
-   return retval;
- }
- 
- // Get the symbol entropy for the distribution 'population'.
- // Set 'trivial_sym', if there's only one symbol present in the distribution.
--static double PopulationCost(const uint32_t* const population, int length,
--                             uint32_t* const trivial_sym,
--                             uint8_t* const is_used) {
-+static float PopulationCost(const uint32_t* const population, int length,
-+                            uint32_t* const trivial_sym,
-+                            uint8_t* const is_used) {
-   VP8LBitEntropy bit_entropy;
-   VP8LStreaks stats;
-   VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
-@@ -314,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length,
- 
- // trivial_at_end is 1 if the two histograms only have one element that is
- // non-zero: both the zero-th one, or both the last one.
--static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
--                                             const uint32_t* const Y,
--                                             int length, int is_X_used,
--                                             int is_Y_used,
--                                             int trivial_at_end) {
-+static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X,
-+                                            const uint32_t* const Y, int length,
-+                                            int is_X_used, int is_Y_used,
-+                                            int trivial_at_end) {
-   VP8LStreaks stats;
-   if (trivial_at_end) {
-     // This configuration is due to palettization that transforms an indexed
-@@ -356,16 +357,18 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
- }
- 
- // Estimates the Entropy + Huffman + other block overhead size cost.
--double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
--  return
--      PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
--                     NULL, &p->is_used_[0])
--      + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1])
--      + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2])
--      + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3])
--      + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL, &p->is_used_[4])
--      + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
--      + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
-+float VP8LHistogramEstimateBits(VP8LHistogram* const p) {
-+  return PopulationCost(p->literal_,
-+                        VP8LHistogramNumCodes(p->palette_code_bits_), NULL,
-+                        &p->is_used_[0]) +
-+         PopulationCost(p->red_, NUM_LITERAL_CODES, NULL, &p->is_used_[1]) +
-+         PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL, &p->is_used_[2]) +
-+         PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL, &p->is_used_[3]) +
-+         PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL,
-+                        &p->is_used_[4]) +
-+         (float)VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES,
-+                              NUM_LENGTH_CODES) +
-+         (float)VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
- }
- 
- // -----------------------------------------------------------------------------
-@@ -373,17 +376,16 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) {
- 
- static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
-                                        const VP8LHistogram* const b,
--                                       double cost_threshold,
--                                       double* cost) {
-+                                       float cost_threshold, float* cost) {
-   const int palette_code_bits = a->palette_code_bits_;
-   int trivial_at_end = 0;
-   assert(a->palette_code_bits_ == b->palette_code_bits_);
-   *cost += GetCombinedEntropy(a->literal_, b->literal_,
-                               VP8LHistogramNumCodes(palette_code_bits),
-                               a->is_used_[0], b->is_used_[0], 0);
--  *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
--                                 b->literal_ + NUM_LITERAL_CODES,
--                                 NUM_LENGTH_CODES);
-+  *cost += (float)VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
-+                                        b->literal_ + NUM_LITERAL_CODES,
-+                                        NUM_LENGTH_CODES);
-   if (*cost > cost_threshold) return 0;
- 
-   if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM &&
-@@ -417,8 +419,8 @@ static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
-   *cost +=
-       GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES,
-                          a->is_used_[4], b->is_used_[4], 0);
--  *cost +=
--      VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
-+  *cost += (float)VP8LExtraCostCombined(a->distance_, b->distance_,
-+                                        NUM_DISTANCE_CODES);
-   if (*cost > cost_threshold) return 0;
- 
-   return 1;
-@@ -439,12 +441,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
- // Since the previous score passed is 'cost_threshold', we only need to compare
- // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
- // early.
--static double HistogramAddEval(const VP8LHistogram* const a,
--                               const VP8LHistogram* const b,
--                               VP8LHistogram* const out,
--                               double cost_threshold) {
--  double cost = 0;
--  const double sum_cost = a->bit_cost_ + b->bit_cost_;
-+static float HistogramAddEval(const VP8LHistogram* const a,
-+                              const VP8LHistogram* const b,
-+                              VP8LHistogram* const out, float cost_threshold) {
-+  float cost = 0;
-+  const float sum_cost = a->bit_cost_ + b->bit_cost_;
-   cost_threshold += sum_cost;
- 
-   if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
-@@ -459,10 +460,10 @@ static double HistogramAddEval(const VP8LHistogram* const a,
- // Same as HistogramAddEval(), except that the resulting histogram
- // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
- // the term C(b) which is constant over all the evaluations.
--static double HistogramAddThresh(const VP8LHistogram* const a,
--                                 const VP8LHistogram* const b,
--                                 double cost_threshold) {
--  double cost;
-+static float HistogramAddThresh(const VP8LHistogram* const a,
-+                                const VP8LHistogram* const b,
-+                                float cost_threshold) {
-+  float cost;
-   assert(a != NULL && b != NULL);
-   cost = -a->bit_cost_;
-   GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
-@@ -473,24 +474,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a,
- 
- // The structure to keep track of cost range for the three dominant entropy
- // symbols.
--// TODO(skal): Evaluate if float can be used here instead of double for
--// representing the entropy costs.
- typedef struct {
--  double literal_max_;
--  double literal_min_;
--  double red_max_;
--  double red_min_;
--  double blue_max_;
--  double blue_min_;
-+  float literal_max_;
-+  float literal_min_;
-+  float red_max_;
-+  float red_min_;
-+  float blue_max_;
-+  float blue_min_;
- } DominantCostRange;
- 
- static void DominantCostRangeInit(DominantCostRange* const c) {
-   c->literal_max_ = 0.;
--  c->literal_min_ = MAX_COST;
-+  c->literal_min_ = MAX_BIT_COST;
-   c->red_max_ = 0.;
--  c->red_min_ = MAX_COST;
-+  c->red_min_ = MAX_BIT_COST;
-   c->blue_max_ = 0.;
--  c->blue_min_ = MAX_COST;
-+  c->blue_min_ = MAX_BIT_COST;
- }
- 
- static void UpdateDominantCostRange(
-@@ -505,16 +504,15 @@ static void UpdateDominantCostRange(
- 
- static void UpdateHistogramCost(VP8LHistogram* const h) {
-   uint32_t alpha_sym, red_sym, blue_sym;
--  const double alpha_cost =
--      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym,
--                     &h->is_used_[3]);
--  const double distance_cost =
-+  const float alpha_cost =
-+      PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]);
-+  const float distance_cost =
-       PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) +
--      VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
-+      (float)VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
-   const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
-   h->literal_cost_ =
-       PopulationCost(h->literal_, num_codes, NULL, &h->is_used_[0]) +
--          VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
-+      (float)VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES);
-   h->red_cost_ =
-       PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym, &h->is_used_[1]);
-   h->blue_cost_ =
-@@ -529,10 +527,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) {
-   }
- }
- 
--static int GetBinIdForEntropy(double min, double max, double val) {
--  const double range = max - min;
-+static int GetBinIdForEntropy(float min, float max, float val) {
-+  const float range = max - min;
-   if (range > 0.) {
--    const double delta = val - min;
-+    const float delta = val - min;
-     return (int)((NUM_PARTITIONS - 1e-6) * delta / range);
-   } else {
-     return 0;
-@@ -641,15 +639,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
- 
- // Merges some histograms with same bin_id together if it's advantageous.
- // Sets the remaining histograms to NULL.
--static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
--                                       int* num_used,
--                                       const uint16_t* const clusters,
--                                       uint16_t* const cluster_mappings,
--                                       VP8LHistogram* cur_combo,
--                                       const uint16_t* const bin_map,
--                                       int num_bins,
--                                       double combine_cost_factor,
--                                       int low_effort) {
-+static void HistogramCombineEntropyBin(
-+    VP8LHistogramSet* const image_histo, int* num_used,
-+    const uint16_t* const clusters, uint16_t* const cluster_mappings,
-+    VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins,
-+    float combine_cost_factor, int low_effort) {
-   VP8LHistogram** const histograms = image_histo->histograms;
-   int idx;
-   struct {
-@@ -679,11 +673,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
-       cluster_mappings[clusters[idx]] = clusters[first];
-     } else {
-       // try to merge #idx into #first (both share the same bin_id)
--      const double bit_cost = histograms[idx]->bit_cost_;
--      const double bit_cost_thresh = -bit_cost * combine_cost_factor;
--      const double curr_cost_diff =
--          HistogramAddEval(histograms[first], histograms[idx],
--                           cur_combo, bit_cost_thresh);
-+      const float bit_cost = histograms[idx]->bit_cost_;
-+      const float bit_cost_thresh = -bit_cost * combine_cost_factor;
-+      const float curr_cost_diff = HistogramAddEval(
-+          histograms[first], histograms[idx], cur_combo, bit_cost_thresh);
-       if (curr_cost_diff < bit_cost_thresh) {
-         // Try to merge two histograms only if the combo is a trivial one or
-         // the two candidate histograms are already non-trivial.
-@@ -731,8 +724,8 @@ static uint32_t MyRand(uint32_t* const seed) {
- typedef struct {
-   int idx1;
-   int idx2;
--  double cost_diff;
--  double cost_combo;
-+  float cost_diff;
-+  float cost_combo;
- } HistogramPair;
- 
- typedef struct {
-@@ -787,10 +780,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue,
- // Update the cost diff and combo of a pair of histograms. This needs to be
- // called when the the histograms have been merged with a third one.
- static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
--                                 const VP8LHistogram* const h2,
--                                 double threshold,
-+                                 const VP8LHistogram* const h2, float threshold,
-                                  HistogramPair* const pair) {
--  const double sum_cost = h1->bit_cost_ + h2->bit_cost_;
-+  const float sum_cost = h1->bit_cost_ + h2->bit_cost_;
-   pair->cost_combo = 0.;
-   GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo);
-   pair->cost_diff = pair->cost_combo - sum_cost;
-@@ -799,9 +791,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1,
- // Create a pair from indices "idx1" and "idx2" provided its cost
- // is inferior to "threshold", a negative entropy.
- // It returns the cost of the pair, or 0. if it superior to threshold.
--static double HistoQueuePush(HistoQueue* const histo_queue,
--                             VP8LHistogram** const histograms, int idx1,
--                             int idx2, double threshold) {
-+static float HistoQueuePush(HistoQueue* const histo_queue,
-+                            VP8LHistogram** const histograms, int idx1,
-+                            int idx2, float threshold) {
-   const VP8LHistogram* h1;
-   const VP8LHistogram* h2;
-   HistogramPair pair;
-@@ -945,8 +937,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
-            ++tries_with_no_success < num_tries_no_success;
-        ++iter) {
-     int* mapping_index;
--    double best_cost =
--        (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff;
-+    float best_cost =
-+        (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff;
-     int best_idx1 = -1, best_idx2 = 1;
-     const uint32_t rand_range = (*num_used - 1) * (*num_used);
-     // (*num_used) / 2 was chosen empirically. Less means faster but worse
-@@ -955,7 +947,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
- 
-     // Pick random samples.
-     for (j = 0; *num_used >= 2 && j < num_tries; ++j) {
--      double curr_cost;
-+      float curr_cost;
-       // Choose two different histograms at random and try to combine them.
-       const uint32_t tmp = MyRand(&seed) % rand_range;
-       uint32_t idx1 = tmp / (*num_used - 1);
-@@ -1034,7 +1026,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
-   *do_greedy = (*num_used <= min_cluster_size);
-   ok = 1;
- 
--End:
-+ End:
-   HistoQueueClear(&histo_queue);
-   WebPSafeFree(mappings);
-   return ok;
-@@ -1057,7 +1049,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
-   if (out_size > 1) {
-     for (i = 0; i < in_size; ++i) {
-       int best_out = 0;
--      double best_bits = MAX_COST;
-+      float best_bits = MAX_BIT_COST;
-       int k;
-       if (in_histo[i] == NULL) {
-         // Arbitrarily set to the previous value if unused to help future LZ77.
-@@ -1065,7 +1057,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
-         continue;
-       }
-       for (k = 0; k < out_size; ++k) {
--        double cur_bits;
-+        float cur_bits;
-         cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
-         if (k == 0 || cur_bits < best_bits) {
-           best_bits = cur_bits;
-@@ -1093,13 +1085,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
-   }
- }
- 
--static double GetCombineCostFactor(int histo_size, int quality) {
--  double combine_cost_factor = 0.16;
-+static float GetCombineCostFactor(int histo_size, int quality) {
-+  float combine_cost_factor = 0.16f;
-   if (quality < 90) {
--    if (histo_size > 256) combine_cost_factor /= 2.;
--    if (histo_size > 512) combine_cost_factor /= 2.;
--    if (histo_size > 1024) combine_cost_factor /= 2.;
--    if (quality <= 50) combine_cost_factor /= 2.;
-+    if (histo_size > 256) combine_cost_factor /= 2.f;
-+    if (histo_size > 512) combine_cost_factor /= 2.f;
-+    if (histo_size > 1024) combine_cost_factor /= 2.f;
-+    if (quality <= 50) combine_cost_factor /= 2.f;
-   }
-   return combine_cost_factor;
- }
-@@ -1169,15 +1161,17 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) {
- }
- 
- int VP8LGetHistoImageSymbols(int xsize, int ysize,
--                             const VP8LBackwardRefs* const refs,
--                             int quality, int low_effort,
--                             int histo_bits, int cache_bits,
-+                             const VP8LBackwardRefs* const refs, int quality,
-+                             int low_effort, int histogram_bits, int cache_bits,
-                              VP8LHistogramSet* const image_histo,
-                              VP8LHistogram* const tmp_histo,
--                             uint16_t* const histogram_symbols) {
--  int ok = 0;
--  const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
--  const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
-+                             uint16_t* const histogram_symbols,
-+                             const WebPPicture* const pic, int percent_range,
-+                             int* const percent) {
-+  const int histo_xsize =
-+      histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1;
-+  const int histo_ysize =
-+      histogram_bits ? VP8LSubSampleSize(ysize, histogram_bits) : 1;
-   const int image_histo_raw_size = histo_xsize * histo_ysize;
-   VP8LHistogramSet* const orig_histo =
-       VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
-@@ -1187,13 +1181,16 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
-   const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
-   int entropy_combine;
-   uint16_t* const map_tmp =
--      WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp));
-+      WebPSafeMalloc(2 * image_histo_raw_size, sizeof(*map_tmp));
-   uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size;
-   int num_used = image_histo_raw_size;
--  if (orig_histo == NULL || map_tmp == NULL) goto Error;
-+  if (orig_histo == NULL || map_tmp == NULL) {
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+    goto Error;
-+  }
- 
-   // Construct the histograms from backward references.
--  HistogramBuild(xsize, histo_bits, refs, orig_histo);
-+  HistogramBuild(xsize, histogram_bits, refs, orig_histo);
-   // Copies the histograms and computes its bit_cost.
-   // histogram_symbols is optimized
-   HistogramCopyAndAnalyze(orig_histo, image_histo, &num_used,
-@@ -1204,16 +1201,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
- 
-   if (entropy_combine) {
-     uint16_t* const bin_map = map_tmp;
--    const double combine_cost_factor =
-+    const float combine_cost_factor =
-         GetCombineCostFactor(image_histo_raw_size, quality);
-     const uint32_t num_clusters = num_used;
- 
-     HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort);
-     // Collapse histograms with similar entropy.
--    HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols,
--                               cluster_mappings, tmp_histo, bin_map,
--                               entropy_combine_num_bins, combine_cost_factor,
--                               low_effort);
-+    HistogramCombineEntropyBin(
-+        image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo,
-+        bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort);
-     OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters,
-                              map_tmp, histogram_symbols);
-   }
-@@ -1227,11 +1223,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
-     int do_greedy;
-     if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size,
-                                     &do_greedy)) {
-+      WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-       goto Error;
-     }
-     if (do_greedy) {
-       RemoveEmptyHistograms(image_histo);
-       if (!HistogramCombineGreedy(image_histo, &num_used)) {
-+        WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-         goto Error;
-       }
-     }
-@@ -1241,10 +1239,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
-   RemoveEmptyHistograms(image_histo);
-   HistogramRemap(orig_histo, image_histo, histogram_symbols);
- 
--  ok = 1;
-+  if (!WebPReportProgress(pic, *percent + percent_range, percent)) {
-+    goto Error;
-+  }
- 
-  Error:
-   VP8LFreeHistogramSet(orig_histo);
-   WebPSafeFree(map_tmp);
--  return ok;
-+  return (pic->error_code == VP8_ENC_OK);
- }
-diff --git a/3rdparty/libwebp/src/enc/histogram_enc.h b/3rdparty/libwebp/src/enc/histogram_enc.h
-index 54c2d2178393..4c0bb97464de 100644
---- a/3rdparty/libwebp/src/enc/histogram_enc.h
-+++ b/3rdparty/libwebp/src/enc/histogram_enc.h
-@@ -40,10 +40,10 @@ typedef struct {
-   int palette_code_bits_;
-   uint32_t trivial_symbol_;  // True, if histograms for Red, Blue & Alpha
-                              // literal symbols are single valued.
--  double bit_cost_;          // cached value of bit cost.
--  double literal_cost_;      // Cached values of dominant entropy costs:
--  double red_cost_;          // literal, red & blue.
--  double blue_cost_;
-+  float bit_cost_;           // cached value of bit cost.
-+  float literal_cost_;       // Cached values of dominant entropy costs:
-+  float red_cost_;           // literal, red & blue.
-+  float blue_cost_;
-   uint8_t is_used_[5];       // 5 for literal, red, blue, alpha, distance
- } VP8LHistogram;
- 
-@@ -64,8 +64,8 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
-                          const VP8LBackwardRefs* const refs,
-                          int palette_code_bits);
- 
--// Return the size of the histogram for a given palette_code_bits.
--int VP8LGetHistogramSize(int palette_code_bits);
-+// Return the size of the histogram for a given cache_bits.
-+int VP8LGetHistogramSize(int cache_bits);
- 
- // Set the palette_code_bits and reset the stats.
- // If init_arrays is true, the arrays are also filled with 0's.
-@@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
-       ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
- }
- 
--// Builds the histogram image.
-+// Builds the histogram image. pic and percent are for progress.
-+// Returns false in case of error (stored in pic->error_code).
- int VP8LGetHistoImageSymbols(int xsize, int ysize,
--                             const VP8LBackwardRefs* const refs,
--                             int quality, int low_effort,
--                             int histogram_bits, int cache_bits,
--                             VP8LHistogramSet* const image_in,
-+                             const VP8LBackwardRefs* const refs, int quality,
-+                             int low_effort, int histogram_bits, int cache_bits,
-+                             VP8LHistogramSet* const image_histo,
-                              VP8LHistogram* const tmp_histo,
--                             uint16_t* const histogram_symbols);
-+                             uint16_t* const histogram_symbols,
-+                             const WebPPicture* const pic, int percent_range,
-+                             int* const percent);
- 
- // Returns the entropy for the symbols in the input array.
--double VP8LBitsEntropy(const uint32_t* const array, int n);
-+float VP8LBitsEntropy(const uint32_t* const array, int n);
- 
- // Estimate how many bits the combined entropy of literals and distance
- // approximately maps to.
--double VP8LHistogramEstimateBits(VP8LHistogram* const p);
-+float VP8LHistogramEstimateBits(VP8LHistogram* const p);
- 
- #ifdef __cplusplus
- }
-diff --git a/3rdparty/libwebp/src/enc/picture_csp_enc.c b/3rdparty/libwebp/src/enc/picture_csp_enc.c
-index 35eede96355b..a9280e6c3050 100644
---- a/3rdparty/libwebp/src/enc/picture_csp_enc.c
-+++ b/3rdparty/libwebp/src/enc/picture_csp_enc.c
-@@ -15,12 +15,19 @@
- #include <stdlib.h>
- #include <math.h>
- 
-+#include "sharpyuv/sharpyuv.h"
-+#include "sharpyuv/sharpyuv_csp.h"
- #include "src/enc/vp8i_enc.h"
- #include "src/utils/random_utils.h"
- #include "src/utils/utils.h"
- #include "src/dsp/dsp.h"
- #include "src/dsp/lossless.h"
- #include "src/dsp/yuv.h"
-+#include "src/dsp/cpu.h"
-+
-+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
-+#include <pthread.h>
-+#endif
- 
- // Uncomment to disable gamma-compression during RGB->U/V averaging
- #define USE_GAMMA_COMPRESSION
-@@ -62,10 +69,12 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
- int WebPPictureHasTransparency(const WebPPicture* picture) {
-   if (picture == NULL) return 0;
-   if (picture->use_argb) {
--    const int alpha_offset = ALPHA_OFFSET;
--    return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
--                          picture->width, picture->height,
--                          4, picture->argb_stride * sizeof(*picture->argb));
-+    if (picture->argb != NULL) {
-+      return CheckNonOpaque((const uint8_t*)picture->argb + ALPHA_OFFSET,
-+                            picture->width, picture->height,
-+                            4, picture->argb_stride * sizeof(*picture->argb));
-+    }
-+    return 0;
-   }
-   return CheckNonOpaque(picture->a, picture->width, picture->height,
-                         1, picture->a_stride);
-@@ -76,30 +85,31 @@ int WebPPictureHasTransparency(const WebPPicture* picture) {
- 
- #if defined(USE_GAMMA_COMPRESSION)
- 
--// gamma-compensates loss of resolution during chroma subsampling
--#define kGamma 0.80      // for now we use a different gamma value than kGammaF
--#define kGammaFix 12     // fixed-point precision for linear values
--#define kGammaScale ((1 << kGammaFix) - 1)
--#define kGammaTabFix 7   // fixed-point fractional bits precision
--#define kGammaTabScale (1 << kGammaTabFix)
--#define kGammaTabRounder (kGammaTabScale >> 1)
--#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
-+// Gamma correction compensates loss of resolution during chroma subsampling.
-+#define GAMMA_FIX 12      // fixed-point precision for linear values
-+#define GAMMA_TAB_FIX 7   // fixed-point fractional bits precision
-+#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX))
-+static const double kGamma = 0.80;
-+static const int kGammaScale = ((1 << GAMMA_FIX) - 1);
-+static const int kGammaTabScale = (1 << GAMMA_TAB_FIX);
-+static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1);
- 
--static int kLinearToGammaTab[kGammaTabSize + 1];
-+static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
- static uint16_t kGammaToLinearTab[256];
- static volatile int kGammaTablesOk = 0;
- static void InitGammaTables(void);
-+extern VP8CPUInfo VP8GetCPUInfo;
- 
- WEBP_DSP_INIT_FUNC(InitGammaTables) {
-   if (!kGammaTablesOk) {
-     int v;
--    const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
-+    const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale;
-     const double norm = 1. / 255.;
-     for (v = 0; v <= 255; ++v) {
-       kGammaToLinearTab[v] =
-           (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
-     }
--    for (v = 0; v <= kGammaTabSize; ++v) {
-+    for (v = 0; v <= GAMMA_TAB_SIZE; ++v) {
-       kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
-     }
-     kGammaTablesOk = 1;
-@@ -111,12 +121,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
- }
- 
- static WEBP_INLINE int Interpolate(int v) {
--  const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
-+  const int tab_pos = v >> (GAMMA_TAB_FIX + 2);    // integer part
-   const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
-   const int v0 = kLinearToGammaTab[tab_pos];
-   const int v1 = kLinearToGammaTab[tab_pos + 1];
-   const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
--  assert(tab_pos + 1 < kGammaTabSize + 1);
-+  assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1);
-   return y;
- }
- 
-@@ -124,7 +134,7 @@ static WEBP_INLINE int Interpolate(int v) {
- // U/V value, suitable for RGBToU/V calls.
- static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
-   const int y = Interpolate(base_value << shift);   // final uplifted value
--  return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
-+  return (y + kGammaTabRounder) >> GAMMA_TAB_FIX;    // descale
- }
- 
- #else
-@@ -158,415 +168,26 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) {
- //------------------------------------------------------------------------------
- // Sharp RGB->YUV conversion
- 
--static const int kNumIterations = 4;
- static const int kMinDimensionIterativeConversion = 4;
- 
--// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
--// banding sometimes. Better use extra precision.
--#define SFIX 2                // fixed-point precision of RGB and Y/W
--typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
--typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
--
--#define SHALF (1 << SFIX >> 1)
--#define MAX_Y_T ((256 << SFIX) - 1)
--#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
--
--#if defined(USE_GAMMA_COMPRESSION)
--
--// We use tables of different size and precision for the Rec709 / BT2020
--// transfer function.
--#define kGammaF (1./0.45)
--static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
--#define GAMMA_TO_LINEAR_BITS 14
--static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX
--static volatile int kGammaTablesSOk = 0;
--static void InitGammaTablesS(void);
--
--WEBP_DSP_INIT_FUNC(InitGammaTablesS) {
--  assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
--  if (!kGammaTablesSOk) {
--    int v;
--    const double norm = 1. / MAX_Y_T;
--    const double scale = 1. / kGammaTabSize;
--    const double a = 0.09929682680944;
--    const double thresh = 0.018053968510807;
--    const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
--    for (v = 0; v <= MAX_Y_T; ++v) {
--      const double g = norm * v;
--      double value;
--      if (g <= thresh * 4.5) {
--        value = g / 4.5;
--      } else {
--        const double a_rec = 1. / (1. + a);
--        value = pow(a_rec * (g + a), kGammaF);
--      }
--      kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
--    }
--    for (v = 0; v <= kGammaTabSize; ++v) {
--      const double g = scale * v;
--      double value;
--      if (g <= thresh) {
--        value = 4.5 * g;
--      } else {
--        value = (1. + a) * pow(g, 1. / kGammaF) - a;
--      }
--      // we already incorporate the 1/2 rounding constant here
--      kLinearToGammaTabS[v] =
--          (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
--    }
--    // to prevent small rounding errors to cause read-overflow:
--    kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
--    kGammaTablesSOk = 1;
--  }
--}
--
--// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
--static WEBP_INLINE uint32_t GammaToLinearS(int v) {
--  return kGammaToLinearTabS[v];
--}
--
--static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
--  // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
--  const uint32_t v = value * kGammaTabSize;
--  const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
--  // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
--  const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part
--  // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
--  const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
--  const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
--  // Final interpolation. Note that rounding is already included.
--  const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0.
--  const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
--  return result;
--}
--
--#else
--
--static void InitGammaTablesS(void) {}
--static WEBP_INLINE uint32_t GammaToLinearS(int v) {
--  return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
--}
--static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
--  return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
--}
--
--#endif    // USE_GAMMA_COMPRESSION
--
--//------------------------------------------------------------------------------
--
--static uint8_t clip_8b(fixed_t v) {
--  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
--}
--
--static fixed_y_t clip_y(int y) {
--  return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
--}
--
--//------------------------------------------------------------------------------
--
--static int RGBToGray(int r, int g, int b) {
--  const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
--  return (luma >> YUV_FIX);
--}
--
--static uint32_t ScaleDown(int a, int b, int c, int d) {
--  const uint32_t A = GammaToLinearS(a);
--  const uint32_t B = GammaToLinearS(b);
--  const uint32_t C = GammaToLinearS(c);
--  const uint32_t D = GammaToLinearS(d);
--  return LinearToGammaS((A + B + C + D + 2) >> 2);
--}
--
--static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
--  int i;
--  for (i = 0; i < w; ++i) {
--    const uint32_t R = GammaToLinearS(src[0 * w + i]);
--    const uint32_t G = GammaToLinearS(src[1 * w + i]);
--    const uint32_t B = GammaToLinearS(src[2 * w + i]);
--    const uint32_t Y = RGBToGray(R, G, B);
--    dst[i] = (fixed_y_t)LinearToGammaS(Y);
--  }
--}
--
--static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
--                         fixed_t* dst, int uv_w) {
--  int i;
--  for (i = 0; i < uv_w; ++i) {
--    const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
--                            src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
--    const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
--                            src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
--    const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
--                            src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
--    const int W = RGBToGray(r, g, b);
--    dst[0 * uv_w] = (fixed_t)(r - W);
--    dst[1 * uv_w] = (fixed_t)(g - W);
--    dst[2 * uv_w] = (fixed_t)(b - W);
--    dst  += 1;
--    src1 += 2;
--    src2 += 2;
--  }
--}
--
--static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
--  int i;
--  for (i = 0; i < w; ++i) {
--    y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
--  }
--}
--
--//------------------------------------------------------------------------------
--
--static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
--  const int v0 = (A * 3 + B + 2) >> 2;
--  return clip_y(v0 + W0);
--}
--
--//------------------------------------------------------------------------------
--
--static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
--  return ((fixed_y_t)a << SFIX) | SHALF;
--}
--
--static void ImportOneRow(const uint8_t* const r_ptr,
--                         const uint8_t* const g_ptr,
--                         const uint8_t* const b_ptr,
--                         int step,
--                         int pic_width,
--                         fixed_y_t* const dst) {
--  int i;
--  const int w = (pic_width + 1) & ~1;
--  for (i = 0; i < pic_width; ++i) {
--    const int off = i * step;
--    dst[i + 0 * w] = UpLift(r_ptr[off]);
--    dst[i + 1 * w] = UpLift(g_ptr[off]);
--    dst[i + 2 * w] = UpLift(b_ptr[off]);
--  }
--  if (pic_width & 1) {  // replicate rightmost pixel
--    dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
--    dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
--    dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
--  }
--}
--
--static void InterpolateTwoRows(const fixed_y_t* const best_y,
--                               const fixed_t* prev_uv,
--                               const fixed_t* cur_uv,
--                               const fixed_t* next_uv,
--                               int w,
--                               fixed_y_t* out1,
--                               fixed_y_t* out2) {
--  const int uv_w = w >> 1;
--  const int len = (w - 1) >> 1;   // length to filter
--  int k = 3;
--  while (k-- > 0) {   // process each R/G/B segments in turn
--    // special boundary case for i==0
--    out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
--    out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
--
--    WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
--    WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
--
--    // special boundary case for i == w - 1 when w is even
--    if (!(w & 1)) {
--      out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
--                            best_y[w - 1 + 0]);
--      out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
--                            best_y[w - 1 + w]);
--    }
--    out1 += w;
--    out2 += w;
--    prev_uv += uv_w;
--    cur_uv  += uv_w;
--    next_uv += uv_w;
--  }
--}
--
--static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
--  const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
--  return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
--}
--
--static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
--  const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
--  return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
--}
--
--static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
--  const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
--  return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
--}
--
--static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
--                            WebPPicture* const picture) {
--  int i, j;
--  uint8_t* dst_y = picture->y;
--  uint8_t* dst_u = picture->u;
--  uint8_t* dst_v = picture->v;
--  const fixed_t* const best_uv_base = best_uv;
--  const int w = (picture->width + 1) & ~1;
--  const int h = (picture->height + 1) & ~1;
--  const int uv_w = w >> 1;
--  const int uv_h = h >> 1;
--  for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
--    for (i = 0; i < picture->width; ++i) {
--      const int off = (i >> 1);
--      const int W = best_y[i];
--      const int r = best_uv[off + 0 * uv_w] + W;
--      const int g = best_uv[off + 1 * uv_w] + W;
--      const int b = best_uv[off + 2 * uv_w] + W;
--      dst_y[i] = ConvertRGBToY(r, g, b);
--    }
--    best_y += w;
--    best_uv += (j & 1) * 3 * uv_w;
--    dst_y += picture->y_stride;
--  }
--  for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
--    for (i = 0; i < uv_w; ++i) {
--      const int off = i;
--      const int r = best_uv[off + 0 * uv_w];
--      const int g = best_uv[off + 1 * uv_w];
--      const int b = best_uv[off + 2 * uv_w];
--      dst_u[i] = ConvertRGBToU(r, g, b);
--      dst_v[i] = ConvertRGBToV(r, g, b);
--    }
--    best_uv += 3 * uv_w;
--    dst_u += picture->uv_stride;
--    dst_v += picture->uv_stride;
--  }
--  return 1;
--}
--
- //------------------------------------------------------------------------------
- // Main function
- 
--#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
--
- static int PreprocessARGB(const uint8_t* r_ptr,
-                           const uint8_t* g_ptr,
-                           const uint8_t* b_ptr,
-                           int step, int rgb_stride,
-                           WebPPicture* const picture) {
--  // we expand the right/bottom border if needed
--  const int w = (picture->width + 1) & ~1;
--  const int h = (picture->height + 1) & ~1;
--  const int uv_w = w >> 1;
--  const int uv_h = h >> 1;
--  uint64_t prev_diff_y_sum = ~0;
--  int j, iter;
--
--  // TODO(skal): allocate one big memory chunk. But for now, it's easier
--  // for valgrind debugging to have several chunks.
--  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
--  fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
--  fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
--  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
--  fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
--  fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
--  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
--  fixed_y_t* best_y = best_y_base;
--  fixed_y_t* target_y = target_y_base;
--  fixed_t* best_uv = best_uv_base;
--  fixed_t* target_uv = target_uv_base;
--  const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
--  int ok;
--
--  if (best_y_base == NULL || best_uv_base == NULL ||
--      target_y_base == NULL || target_uv_base == NULL ||
--      best_rgb_y == NULL || best_rgb_uv == NULL ||
--      tmp_buffer == NULL) {
--    ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
--    goto End;
--  }
--  assert(picture->width >= kMinDimensionIterativeConversion);
--  assert(picture->height >= kMinDimensionIterativeConversion);
--
--  WebPInitConvertARGBToYUV();
--
--  // Import RGB samples to W/RGB representation.
--  for (j = 0; j < picture->height; j += 2) {
--    const int is_last_row = (j == picture->height - 1);
--    fixed_y_t* const src1 = tmp_buffer + 0 * w;
--    fixed_y_t* const src2 = tmp_buffer + 3 * w;
--
--    // prepare two rows of input
--    ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
--    if (!is_last_row) {
--      ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
--                   step, picture->width, src2);
--    } else {
--      memcpy(src2, src1, 3 * w * sizeof(*src2));
--    }
--    StoreGray(src1, best_y + 0, w);
--    StoreGray(src2, best_y + w, w);
--
--    UpdateW(src1, target_y, w);
--    UpdateW(src2, target_y + w, w);
--    UpdateChroma(src1, src2, target_uv, uv_w);
--    memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
--    best_y += 2 * w;
--    best_uv += 3 * uv_w;
--    target_y += 2 * w;
--    target_uv += 3 * uv_w;
--    r_ptr += 2 * rgb_stride;
--    g_ptr += 2 * rgb_stride;
--    b_ptr += 2 * rgb_stride;
--  }
--
--  // Iterate and resolve clipping conflicts.
--  for (iter = 0; iter < kNumIterations; ++iter) {
--    const fixed_t* cur_uv = best_uv_base;
--    const fixed_t* prev_uv = best_uv_base;
--    uint64_t diff_y_sum = 0;
--
--    best_y = best_y_base;
--    best_uv = best_uv_base;
--    target_y = target_y_base;
--    target_uv = target_uv_base;
--    for (j = 0; j < h; j += 2) {
--      fixed_y_t* const src1 = tmp_buffer + 0 * w;
--      fixed_y_t* const src2 = tmp_buffer + 3 * w;
--      {
--        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
--        InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
--        prev_uv = cur_uv;
--        cur_uv = next_uv;
--      }
--
--      UpdateW(src1, best_rgb_y + 0 * w, w);
--      UpdateW(src2, best_rgb_y + 1 * w, w);
--      UpdateChroma(src1, src2, best_rgb_uv, uv_w);
--
--      // update two rows of Y and one row of RGB
--      diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
--      WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
--
--      best_y += 2 * w;
--      best_uv += 3 * uv_w;
--      target_y += 2 * w;
--      target_uv += 3 * uv_w;
--    }
--    // test exit condition
--    if (iter > 0) {
--      if (diff_y_sum < diff_y_threshold) break;
--      if (diff_y_sum > prev_diff_y_sum) break;
--    }
--    prev_diff_y_sum = diff_y_sum;
-+  const int ok = SharpYuvConvert(
-+      r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8,
-+      picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v,
-+      picture->uv_stride, /*yuv_bit_depth=*/8, picture->width,
-+      picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp));
-+  if (!ok) {
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-   }
--  // final reconstruction
--  ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
--
-- End:
--  WebPSafeFree(best_y_base);
--  WebPSafeFree(best_uv_base);
--  WebPSafeFree(target_y_base);
--  WebPSafeFree(target_uv_base);
--  WebPSafeFree(best_rgb_y);
--  WebPSafeFree(best_rgb_uv);
--  WebPSafeFree(tmp_buffer);
-   return ok;
- }
--#undef SAFE_ALLOC
- 
- //------------------------------------------------------------------------------
- // "Fast" regular RGB->YUV
-@@ -591,8 +212,8 @@ static const int kAlphaFix = 19;
- // and constant are adjusted very tightly to fit 32b arithmetic.
- // In particular, they use the fact that the operands for 'v / a' are actually
- // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
--// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
--// overflow is: kGammaFix + kAlphaFix <= 31.
-+// with ai in [0..255] and pi in [0..1<<GAMMA_FIX). The constraint to avoid
-+// overflow is: GAMMA_FIX + kAlphaFix <= 31.
- static const uint32_t kInvAlpha[4 * 0xff + 1] = {
-   0,  /* alpha = 0 */
-   524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
-@@ -818,11 +439,20 @@ static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
-     dst[0] = SUM4(r_ptr + j, step);
-     dst[1] = SUM4(g_ptr + j, step);
-     dst[2] = SUM4(b_ptr + j, step);
-+    // MemorySanitizer may raise false positives with data that passes through
-+    // RGBA32PackedToPlanar_16b_SSE41() due to incorrect modeling of shuffles.
-+    // See https://crbug.com/webp/573.
-+#ifdef WEBP_MSAN
-+    dst[3] = 0;
-+#endif
-   }
-   if (width & 1) {
-     dst[0] = SUM2(r_ptr + j);
-     dst[1] = SUM2(g_ptr + j);
-     dst[2] = SUM2(b_ptr + j);
-+#ifdef WEBP_MSAN
-+    dst[3] = 0;
-+#endif
-   }
- }
- 
-@@ -839,6 +469,8 @@ static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
-   }
- }
- 
-+extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
-+
- static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
-                               const uint8_t* g_ptr,
-                               const uint8_t* b_ptr,
-@@ -863,18 +495,18 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
-     use_iterative_conversion = 0;
-   }
- 
--  if (!WebPPictureAllocYUVA(picture, width, height)) {
-+  if (!WebPPictureAllocYUVA(picture)) {
-     return 0;
-   }
-   if (has_alpha) {
-     assert(step == 4);
- #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
--    assert(kAlphaFix + kGammaFix <= 31);
-+    assert(kAlphaFix + GAMMA_FIX <= 31);
- #endif
-   }
- 
-   if (use_iterative_conversion) {
--    InitGammaTablesS();
-+    SharpYuvInit(VP8GetCPUInfo);
-     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
-       return 0;
-     }
-@@ -903,7 +535,9 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
-     WebPInitConvertARGBToYUV();
-     InitGammaTables();
- 
--    if (tmp_rgb == NULL) return 0;  // malloc error
-+    if (tmp_rgb == NULL) {
-+      return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+    }
- 
-     // Downsample Y/U/V planes, two rows at a time
-     for (y = 0; y < (height >> 1); ++y) {
-@@ -1044,7 +678,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
-     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-   }
-   // Allocate a new argb buffer (discarding the previous one).
--  if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
-+  if (!WebPPictureAllocARGB(picture)) return 0;
-   picture->use_argb = 1;
- 
-   // Convert
-@@ -1106,6 +740,8 @@ static int Import(WebPPicture* const picture,
-   const int width = picture->width;
-   const int height = picture->height;
- 
-+  if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0;
-+
-   if (!picture->use_argb) {
-     const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
-     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
-@@ -1163,24 +799,24 @@ static int Import(WebPPicture* const picture,
- #if !defined(WEBP_REDUCE_CSP)
- 
- int WebPPictureImportBGR(WebPPicture* picture,
--                         const uint8_t* rgb, int rgb_stride) {
--  return (picture != NULL && rgb != NULL)
--             ? Import(picture, rgb, rgb_stride, 3, 1, 0)
-+                         const uint8_t* bgr, int bgr_stride) {
-+  return (picture != NULL && bgr != NULL)
-+             ? Import(picture, bgr, bgr_stride, 3, 1, 0)
-              : 0;
- }
- 
- int WebPPictureImportBGRA(WebPPicture* picture,
--                          const uint8_t* rgba, int rgba_stride) {
--  return (picture != NULL && rgba != NULL)
--             ? Import(picture, rgba, rgba_stride, 4, 1, 1)
-+                          const uint8_t* bgra, int bgra_stride) {
-+  return (picture != NULL && bgra != NULL)
-+             ? Import(picture, bgra, bgra_stride, 4, 1, 1)
-              : 0;
- }
- 
- 
- int WebPPictureImportBGRX(WebPPicture* picture,
--                          const uint8_t* rgba, int rgba_stride) {
--  return (picture != NULL && rgba != NULL)
--             ? Import(picture, rgba, rgba_stride, 4, 1, 0)
-+                          const uint8_t* bgrx, int bgrx_stride) {
-+  return (picture != NULL && bgrx != NULL)
-+             ? Import(picture, bgrx, bgrx_stride, 4, 1, 0)
-              : 0;
- }
- 
-@@ -1201,9 +837,9 @@ int WebPPictureImportRGBA(WebPPicture* picture,
- }
- 
- int WebPPictureImportRGBX(WebPPicture* picture,
--                          const uint8_t* rgba, int rgba_stride) {
--  return (picture != NULL && rgba != NULL)
--             ? Import(picture, rgba, rgba_stride, 4, 0, 0)
-+                          const uint8_t* rgbx, int rgbx_stride) {
-+  return (picture != NULL && rgbx != NULL)
-+             ? Import(picture, rgbx, rgbx_stride, 4, 0, 0)
-              : 0;
- }
- 
-diff --git a/3rdparty/libwebp/src/enc/picture_enc.c b/3rdparty/libwebp/src/enc/picture_enc.c
-index c691622d03cd..5a2703541f2d 100644
---- a/3rdparty/libwebp/src/enc/picture_enc.c
-+++ b/3rdparty/libwebp/src/enc/picture_enc.c
-@@ -12,10 +12,10 @@
- // Author: Skal (pascal.massimino@gmail.com)
- 
- #include <assert.h>
-+#include <limits.h>
- #include <stdlib.h>
- 
- #include "src/enc/vp8i_enc.h"
--#include "src/dsp/dsp.h"
- #include "src/utils/utils.h"
- 
- //------------------------------------------------------------------------------
-@@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) {
- 
- //------------------------------------------------------------------------------
- 
-+int WebPValidatePicture(const WebPPicture* const picture) {
-+  if (picture == NULL) return 0;
-+  if (picture->width <= 0 || picture->height <= 0) {
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-+  }
-+  if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 ||
-+      picture->height <= 0 || picture->height / 4 > INT_MAX / 4) {
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-+  }
-+  if (picture->colorspace != WEBP_YUV420 &&
-+      picture->colorspace != WEBP_YUV420A) {
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-+  }
-+  return 1;
-+}
-+
- static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
-   picture->memory_argb_ = NULL;
-   picture->argb = NULL;
-@@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) {
-   WebPPictureResetBufferYUVA(picture);
- }
- 
--int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
-+int WebPPictureAllocARGB(WebPPicture* const picture) {
-   void* memory;
-+  const int width = picture->width;
-+  const int height = picture->height;
-   const uint64_t argb_size = (uint64_t)width * height;
- 
--  assert(picture != NULL);
-+  if (!WebPValidatePicture(picture)) return 0;
- 
-   WebPSafeFree(picture->memory_argb_);
-   WebPPictureResetBufferARGB(picture);
- 
--  if (width <= 0 || height <= 0) {
--    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
--  }
-   // allocate a new buffer.
-   memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb));
-   if (memory == NULL) {
-@@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
-   return 1;
- }
- 
--int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
--  const WebPEncCSP uv_csp =
--      (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK);
-+int WebPPictureAllocYUVA(WebPPicture* const picture) {
-   const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT;
-+  const int width = picture->width;
-+  const int height = picture->height;
-   const int y_stride = width;
-   const int uv_width = (int)(((int64_t)width + 1) >> 1);
-   const int uv_height = (int)(((int64_t)height + 1) >> 1);
-@@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
-   uint64_t y_size, uv_size, a_size, total_size;
-   uint8_t* mem;
- 
--  assert(picture != NULL);
-+  if (!WebPValidatePicture(picture)) return 0;
- 
-   WebPSafeFree(picture->memory_);
-   WebPPictureResetBufferYUVA(picture);
- 
--  if (uv_csp != WEBP_YUV420) {
--    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
--  }
--
-   // alpha
-   a_width = has_alpha ? width : 0;
-   a_stride = a_width;
-@@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
- 
- int WebPPictureAlloc(WebPPicture* picture) {
-   if (picture != NULL) {
--    const int width = picture->width;
--    const int height = picture->height;
--
-     WebPPictureFree(picture);   // erase previous buffer
- 
-     if (!picture->use_argb) {
--      return WebPPictureAllocYUVA(picture, width, height);
-+      return WebPPictureAllocYUVA(picture);
-     } else {
--      return WebPPictureAllocARGB(picture, width, height);
-+      return WebPPictureAllocARGB(picture);
-     }
-   }
-   return 1;
-diff --git a/3rdparty/libwebp/src/enc/picture_rescale_enc.c b/3rdparty/libwebp/src/enc/picture_rescale_enc.c
-index 58a6ae7b9de8..ea90d825484e 100644
---- a/3rdparty/libwebp/src/enc/picture_rescale_enc.c
-+++ b/3rdparty/libwebp/src/enc/picture_rescale_enc.c
-@@ -13,14 +13,15 @@
- 
- #include "src/webp/encode.h"
- 
--#if !defined(WEBP_REDUCE_SIZE)
--
- #include <assert.h>
- #include <stdlib.h>
- 
- #include "src/enc/vp8i_enc.h"
-+
-+#if !defined(WEBP_REDUCE_SIZE)
- #include "src/utils/rescaler_utils.h"
- #include "src/utils/utils.h"
-+#endif  // !defined(WEBP_REDUCE_SIZE)
- 
- #define HALVE(x) (((x) + 1) >> 1)
- 
-@@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic,
-   return 1;
- }
- 
-+#if !defined(WEBP_REDUCE_SIZE)
- int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
-   if (src == NULL || dst == NULL) return 0;
-   if (src == dst) return 1;
-@@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
-   }
-   return 1;
- }
-+#endif  // !defined(WEBP_REDUCE_SIZE)
- 
- int WebPPictureIsView(const WebPPicture* picture) {
-   if (picture == NULL) return 0;
-@@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src,
-   return 1;
- }
- 
-+#if !defined(WEBP_REDUCE_SIZE)
- //------------------------------------------------------------------------------
- // Picture cropping
- 
-@@ -133,7 +137,9 @@ int WebPPictureCrop(WebPPicture* pic,
-   PictureGrabSpecs(pic, &tmp);
-   tmp.width = width;
-   tmp.height = height;
--  if (!WebPPictureAlloc(&tmp)) return 0;
-+  if (!WebPPictureAlloc(&tmp)) {
-+    return WebPEncodingSetError(pic, tmp.error_code);
-+  }
- 
-   if (!pic->use_argb) {
-     const int y_offset = top * pic->y_stride + left;
-@@ -164,22 +170,25 @@ int WebPPictureCrop(WebPPicture* pic,
- //------------------------------------------------------------------------------
- // Simple picture rescaler
- 
--static void RescalePlane(const uint8_t* src,
--                         int src_width, int src_height, int src_stride,
--                         uint8_t* dst,
--                         int dst_width, int dst_height, int dst_stride,
--                         rescaler_t* const work,
--                         int num_channels) {
-+static int RescalePlane(const uint8_t* src,
-+                        int src_width, int src_height, int src_stride,
-+                        uint8_t* dst,
-+                        int dst_width, int dst_height, int dst_stride,
-+                        rescaler_t* const work,
-+                        int num_channels) {
-   WebPRescaler rescaler;
-   int y = 0;
--  WebPRescalerInit(&rescaler, src_width, src_height,
--                   dst, dst_width, dst_height, dst_stride,
--                   num_channels, work);
-+  if (!WebPRescalerInit(&rescaler, src_width, src_height,
-+                        dst, dst_width, dst_height, dst_stride,
-+                        num_channels, work)) {
-+    return 0;
-+  }
-   while (y < src_height) {
-     y += WebPRescalerImport(&rescaler, src_height - y,
-                             src + y * src_stride, src_stride);
-     WebPRescalerExport(&rescaler);
-   }
-+  return 1;
- }
- 
- static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) {
-@@ -195,73 +204,76 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
-   }
- }
- 
--int WebPPictureRescale(WebPPicture* pic, int width, int height) {
-+int WebPPictureRescale(WebPPicture* picture, int width, int height) {
-   WebPPicture tmp;
-   int prev_width, prev_height;
-   rescaler_t* work;
- 
--  if (pic == NULL) return 0;
--  prev_width = pic->width;
--  prev_height = pic->height;
-+  if (picture == NULL) return 0;
-+  prev_width = picture->width;
-+  prev_height = picture->height;
-   if (!WebPRescalerGetScaledDimensions(
-           prev_width, prev_height, &width, &height)) {
--    return 0;
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-   }
- 
--  PictureGrabSpecs(pic, &tmp);
-+  PictureGrabSpecs(picture, &tmp);
-   tmp.width = width;
-   tmp.height = height;
--  if (!WebPPictureAlloc(&tmp)) return 0;
-+  if (!WebPPictureAlloc(&tmp)) {
-+    return WebPEncodingSetError(picture, tmp.error_code);
-+  }
- 
--  if (!pic->use_argb) {
-+  if (!picture->use_argb) {
-     work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
-     if (work == NULL) {
-       WebPPictureFree(&tmp);
--      return 0;
-+      return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     }
-     // If present, we need to rescale alpha first (for AlphaMultiplyY).
--    if (pic->a != NULL) {
-+    if (picture->a != NULL) {
-       WebPInitAlphaProcessing();
--      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
--                   tmp.a, width, height, tmp.a_stride, work, 1);
-+      if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride,
-+                        tmp.a, width, height, tmp.a_stride, work, 1)) {
-+        return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-+      }
-     }
- 
-     // We take transparency into account on the luma plane only. That's not
-     // totally exact blending, but still is a good approximation.
--    AlphaMultiplyY(pic, 0);
--    RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
--                 tmp.y, width, height, tmp.y_stride, work, 1);
-+    AlphaMultiplyY(picture, 0);
-+    if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride,
-+                      tmp.y, width, height, tmp.y_stride, work, 1) ||
-+        !RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height),
-+                      picture->uv_stride, tmp.u, HALVE(width), HALVE(height),
-+                      tmp.uv_stride, work, 1) ||
-+        !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height),
-+                      picture->uv_stride, tmp.v, HALVE(width), HALVE(height),
-+                      tmp.uv_stride, work, 1)) {
-+      return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-+    }
-     AlphaMultiplyY(&tmp, 1);
--
--    RescalePlane(pic->u,
--                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
--                 tmp.u,
--                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
--    RescalePlane(pic->v,
--                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
--                 tmp.v,
--                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
-   } else {
-     work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
-     if (work == NULL) {
-       WebPPictureFree(&tmp);
--      return 0;
-+      return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     }
-     // In order to correctly interpolate colors, we need to apply the alpha
-     // weighting first (black-matting), scale the RGB values, and remove
-     // the premultiplication afterward (while preserving the alpha channel).
-     WebPInitAlphaProcessing();
--    AlphaMultiplyARGB(pic, 0);
--    RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
--                 pic->argb_stride * 4,
--                 (uint8_t*)tmp.argb, width, height,
--                 tmp.argb_stride * 4,
--                 work, 4);
-+    AlphaMultiplyARGB(picture, 0);
-+    if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height,
-+                      picture->argb_stride * 4, (uint8_t*)tmp.argb, width,
-+                      height, tmp.argb_stride * 4, work, 4)) {
-+      return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
-+    }
-     AlphaMultiplyARGB(&tmp, 1);
-   }
--  WebPPictureFree(pic);
-+  WebPPictureFree(picture);
-   WebPSafeFree(work);
--  *pic = tmp;
-+  *picture = tmp;
-   return 1;
- }
- 
-@@ -273,23 +285,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
-   return 0;
- }
- 
--int WebPPictureIsView(const WebPPicture* picture) {
--  (void)picture;
--  return 0;
--}
--
--int WebPPictureView(const WebPPicture* src,
--                    int left, int top, int width, int height,
--                    WebPPicture* dst) {
--  (void)src;
--  (void)left;
--  (void)top;
--  (void)width;
--  (void)height;
--  (void)dst;
--  return 0;
--}
--
- int WebPPictureCrop(WebPPicture* pic,
-                     int left, int top, int width, int height) {
-   (void)pic;
-diff --git a/3rdparty/libwebp/src/enc/picture_tools_enc.c b/3rdparty/libwebp/src/enc/picture_tools_enc.c
-index 38cb01534a3f..147cc18608c4 100644
---- a/3rdparty/libwebp/src/enc/picture_tools_enc.c
-+++ b/3rdparty/libwebp/src/enc/picture_tools_enc.c
-@@ -190,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
-   return (0xff000000u | (r << 16) | (g << 8) | b);
- }
- 
--void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
-+void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) {
-   const int red = (background_rgb >> 16) & 0xff;
-   const int green = (background_rgb >> 8) & 0xff;
-   const int blue = (background_rgb >> 0) & 0xff;
-   int x, y;
--  if (pic == NULL) return;
--  if (!pic->use_argb) {
--    const int uv_width = (pic->width >> 1);  // omit last pixel during u/v loop
-+  if (picture == NULL) return;
-+  if (!picture->use_argb) {
-+    // omit last pixel during u/v loop
-+    const int uv_width = (picture->width >> 1);
-     const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
-     // VP8RGBToU/V expects the u/v values summed over four pixels
-     const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
-     const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
--    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
--    uint8_t* y_ptr = pic->y;
--    uint8_t* u_ptr = pic->u;
--    uint8_t* v_ptr = pic->v;
--    uint8_t* a_ptr = pic->a;
-+    const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
-+    uint8_t* y_ptr = picture->y;
-+    uint8_t* u_ptr = picture->u;
-+    uint8_t* v_ptr = picture->v;
-+    uint8_t* a_ptr = picture->a;
-     if (!has_alpha || a_ptr == NULL) return;    // nothing to do
--    for (y = 0; y < pic->height; ++y) {
-+    for (y = 0; y < picture->height; ++y) {
-       // Luma blending
--      for (x = 0; x < pic->width; ++x) {
-+      for (x = 0; x < picture->width; ++x) {
-         const uint8_t alpha = a_ptr[x];
-         if (alpha < 0xff) {
-           y_ptr[x] = BLEND(Y0, y_ptr[x], alpha);
-@@ -219,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
-       // Chroma blending every even line
-       if ((y & 1) == 0) {
-         uint8_t* const a_ptr2 =
--            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
-+            (y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride;
-         for (x = 0; x < uv_width; ++x) {
-           // Average four alpha values into a single blending weight.
-           // TODO(skal): might lead to visible contouring. Can we do better?
-@@ -229,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
-           u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
-           v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
-         }
--        if (pic->width & 1) {   // rightmost pixel
-+        if (picture->width & 1) {  // rightmost pixel
-           const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
-           u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha);
-           v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha);
-         }
-       } else {
--        u_ptr += pic->uv_stride;
--        v_ptr += pic->uv_stride;
-+        u_ptr += picture->uv_stride;
-+        v_ptr += picture->uv_stride;
-       }
--      memset(a_ptr, 0xff, pic->width);  // reset alpha value to opaque
--      a_ptr += pic->a_stride;
--      y_ptr += pic->y_stride;
-+      memset(a_ptr, 0xff, picture->width);  // reset alpha value to opaque
-+      a_ptr += picture->a_stride;
-+      y_ptr += picture->y_stride;
-     }
-   } else {
--    uint32_t* argb = pic->argb;
-+    uint32_t* argb = picture->argb;
-     const uint32_t background = MakeARGB32(red, green, blue);
--    for (y = 0; y < pic->height; ++y) {
--      for (x = 0; x < pic->width; ++x) {
-+    for (y = 0; y < picture->height; ++y) {
-+      for (x = 0; x < picture->width; ++x) {
-         const int alpha = (argb[x] >> 24) & 0xff;
-         if (alpha != 0xff) {
-           if (alpha > 0) {
-@@ -262,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
-           }
-         }
-       }
--      argb += pic->argb_stride;
-+      argb += picture->argb_stride;
-     }
-   }
- }
-diff --git a/3rdparty/libwebp/src/enc/predictor_enc.c b/3rdparty/libwebp/src/enc/predictor_enc.c
-index 2e6762ea0dd2..b3d44b59d506 100644
---- a/3rdparty/libwebp/src/enc/predictor_enc.c
-+++ b/3rdparty/libwebp/src/enc/predictor_enc.c
-@@ -16,6 +16,7 @@
- 
- #include "src/dsp/lossless.h"
- #include "src/dsp/lossless_common.h"
-+#include "src/enc/vp8i_enc.h"
- #include "src/enc/vp8li_enc.h"
- 
- #define MAX_DIFF_COST (1e30f)
-@@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
- // Methods to calculate Entropy (Shannon).
- 
- static float PredictionCostSpatial(const int counts[256], int weight_0,
--                                   double exp_val) {
-+                                   float exp_val) {
-   const int significant_symbols = 256 >> 4;
--  const double exp_decay_factor = 0.6;
--  double bits = weight_0 * counts[0];
-+  const float exp_decay_factor = 0.6f;
-+  float bits = (float)weight_0 * counts[0];
-   int i;
-   for (i = 1; i < significant_symbols; ++i) {
-     bits += exp_val * (counts[i] + counts[256 - i]);
-@@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0,
- static float PredictionCostSpatialHistogram(const int accumulated[4][256],
-                                             const int tile[4][256]) {
-   int i;
--  double retval = 0;
-+  float retval = 0.f;
-   for (i = 0; i < 4; ++i) {
--    const double kExpValue = 0.94;
-+    const float kExpValue = 0.94f;
-     retval += PredictionCostSpatial(tile[i], 1, kExpValue);
-     retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
-   }
-@@ -249,7 +250,7 @@ static WEBP_INLINE void GetResidual(
-       } else if (x == 0) {
-         predict = upper_row[x];  // Top.
-       } else {
--        predict = pred_func(current_row[x - 1], upper_row + x);
-+        predict = pred_func(&current_row[x - 1], upper_row + x);
-       }
- #if (WEBP_NEAR_LOSSLESS == 1)
-       if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||
-@@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height,
- // with respect to predictions. If near_lossless_quality < 100, applies
- // near lossless processing, shaving off more bits of residuals for lower
- // qualities.
--void VP8LResidualImage(int width, int height, int bits, int low_effort,
--                       uint32_t* const argb, uint32_t* const argb_scratch,
--                       uint32_t* const image, int near_lossless_quality,
--                       int exact, int used_subtract_green) {
-+int VP8LResidualImage(int width, int height, int bits, int low_effort,
-+                      uint32_t* const argb, uint32_t* const argb_scratch,
-+                      uint32_t* const image, int near_lossless_quality,
-+                      int exact, int used_subtract_green,
-+                      const WebPPicture* const pic, int percent_range,
-+                      int* const percent) {
-   const int tiles_per_row = VP8LSubSampleSize(width, bits);
-   const int tiles_per_col = VP8LSubSampleSize(height, bits);
-+  int percent_start = *percent;
-   int tile_y;
-   int histo[4][256];
-   const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality);
-@@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort,
-     for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
-       int tile_x;
-       for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
--        const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
--            bits, histo, argb_scratch, argb, max_quantization, exact,
--            used_subtract_green, image);
-+        const int pred = GetBestPredictorForTile(
-+            width, height, tile_x, tile_y, bits, histo, argb_scratch, argb,
-+            max_quantization, exact, used_subtract_green, image);
-         image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
-       }
-+
-+      if (!WebPReportProgress(
-+              pic, percent_start + percent_range * tile_y / tiles_per_col,
-+              percent)) {
-+        return 0;
-+      }
-     }
-   }
- 
-   CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb,
-                           low_effort, max_quantization, exact,
-                           used_subtract_green);
-+  return WebPReportProgress(pic, percent_start + percent_range, percent);
- }
- 
- //------------------------------------------------------------------------------
-@@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256],
-                                       const int counts[256]) {
-   // Favor low entropy, locally and globally.
-   // Favor small absolute values for PredictionCostSpatial
--  static const double kExpValue = 2.4;
-+  static const float kExpValue = 2.4f;
-   return VP8LCombinedShannonEntropy(counts, accumulated) +
-          PredictionCostSpatial(counts, 3, kExpValue);
- }
-@@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize,
-   }
- }
- 
--void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
--                             uint32_t* const argb, uint32_t* image) {
-+int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
-+                            uint32_t* const argb, uint32_t* image,
-+                            const WebPPicture* const pic, int percent_range,
-+                            int* const percent) {
-   const int max_tile_size = 1 << bits;
-   const int tile_xsize = VP8LSubSampleSize(width, bits);
-   const int tile_ysize = VP8LSubSampleSize(height, bits);
-+  int percent_start = *percent;
-   int accumulated_red_histo[256] = { 0 };
-   int accumulated_blue_histo[256] = { 0 };
-   int tile_x, tile_y;
-@@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
-         }
-       }
-     }
-+    if (!WebPReportProgress(
-+            pic, percent_start + percent_range * tile_y / tile_ysize,
-+            percent)) {
-+      return 0;
-+    }
-   }
-+  return 1;
- }
-diff --git a/3rdparty/libwebp/src/enc/quant_enc.c b/3rdparty/libwebp/src/enc/quant_enc.c
-index 01eb565c7f9c..6d8202d27714 100644
---- a/3rdparty/libwebp/src/enc/quant_enc.c
-+++ b/3rdparty/libwebp/src/enc/quant_enc.c
-@@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) {
-   rd->score = MAX_COST;
- }
- 
--static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
-+static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst,
-+                      const VP8ModeScore* WEBP_RESTRICT const src) {
-   dst->D  = src->D;
-   dst->SD = src->SD;
-   dst->R  = src->R;
-@@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
-   dst->score = src->score;
- }
- 
--static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
-+static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst,
-+                     const VP8ModeScore* WEBP_RESTRICT const src) {
-   dst->D  += src->D;
-   dst->SD += src->SD;
-   dst->R  += src->R;
-@@ -585,15 +587,18 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
-   return rate * lambda + RD_DISTO_MULT * distortion;
- }
- 
--static int TrellisQuantizeBlock(const VP8Encoder* const enc,
-+// Coefficient type.
-+enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
-+
-+static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc,
-                                 int16_t in[16], int16_t out[16],
-                                 int ctx0, int coeff_type,
--                                const VP8Matrix* const mtx,
-+                                const VP8Matrix* WEBP_RESTRICT const mtx,
-                                 int lambda) {
-   const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
-   CostArrayPtr const costs =
-       (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
--  const int first = (coeff_type == 0) ? 1 : 0;
-+  const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
-   Node nodes[16][NUM_NODES];
-   ScoreState score_states[2][NUM_NODES];
-   ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
-@@ -657,16 +662,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
-     // test all alternate level values around level0.
-     for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
-       Node* const cur = &NODE(n, m);
--      int level = level0 + m;
-+      const int level = level0 + m;
-       const int ctx = (level > 2) ? 2 : level;
-       const int band = VP8EncBands[n + 1];
-       score_t base_score;
--      score_t best_cur_score = MAX_COST;
--      int best_prev = 0;   // default, in case
-+      score_t best_cur_score;
-+      int best_prev;
-+      score_t cost, score;
- 
--      ss_cur[m].score = MAX_COST;
-       ss_cur[m].costs = costs[n + 1][ctx];
-       if (level < 0 || level > thresh_level) {
-+        ss_cur[m].score = MAX_COST;
-         // Node is dead.
-         continue;
-       }
-@@ -682,18 +688,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
-       }
- 
-       // Inspect all possible non-dead predecessors. Retain only the best one.
--      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
-+      // The base_score is added to all scores so it is only added for the final
-+      // value after the loop.
-+      cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level);
-+      best_cur_score =
-+          ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0);
-+      best_prev = -MIN_DELTA;
-+      for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) {
-         // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
-         // eliminated since their score can't be better than the current best.
--        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
-+        cost = VP8LevelCost(ss_prev[p].costs, level);
-         // Examine node assuming it's a non-terminal one.
--        const score_t score =
--            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
-+        score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
-         if (score < best_cur_score) {
-           best_cur_score = score;
-           best_prev = p;
-         }
-       }
-+      best_cur_score += base_score;
-       // Store best finding in current node.
-       cur->sign = sign;
-       cur->level = level;
-@@ -701,11 +713,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
-       ss_cur[m].score = best_cur_score;
- 
-       // Now, record best terminal node (and thus best entry in the graph).
--      if (level != 0) {
-+      if (level != 0 && best_cur_score < best_score) {
-         const score_t last_pos_cost =
-             (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
-         const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
--        const score_t score = best_cur_score + last_pos_score;
-+        score = best_cur_score + last_pos_score;
-         if (score < best_score) {
-           best_score = score;
-           best_path[0] = n;                     // best eob position
-@@ -717,10 +729,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
-   }
- 
-   // Fresh start
--  memset(in + first, 0, (16 - first) * sizeof(*in));
--  memset(out + first, 0, (16 - first) * sizeof(*out));
-+  // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case.
-+  if (coeff_type == TYPE_I16_AC) {
-+    memset(in + 1, 0, 15 * sizeof(*in));
-+    memset(out + 1, 0, 15 * sizeof(*out));
-+  } else {
-+    memset(in, 0, 16 * sizeof(*in));
-+    memset(out, 0, 16 * sizeof(*out));
-+  }
-   if (best_path[0] == -1) {
--    return 0;   // skip!
-+    return 0;  // skip!
-   }
- 
-   {
-@@ -751,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
- // all at once. Output is the reconstructed block in *yuv_out, and the
- // quantized levels in *levels.
- 
--static int ReconstructIntra16(VP8EncIterator* const it,
--                              VP8ModeScore* const rd,
--                              uint8_t* const yuv_out,
-+static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it,
-+                              VP8ModeScore* WEBP_RESTRICT const rd,
-+                              uint8_t* WEBP_RESTRICT const yuv_out,
-                               int mode) {
-   const VP8Encoder* const enc = it->enc_;
-   const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
-@@ -775,9 +793,9 @@ static int ReconstructIntra16(VP8EncIterator* const it,
-     for (y = 0, n = 0; y < 4; ++y) {
-       for (x = 0; x < 4; ++x, ++n) {
-         const int ctx = it->top_nz_[x] + it->left_nz_[y];
--        const int non_zero =
--            TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
--                                 &dqm->y1_, dqm->lambda_trellis_i16_);
-+        const int non_zero = TrellisQuantizeBlock(
-+            enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
-+            dqm->lambda_trellis_i16_);
-         it->top_nz_[x] = it->left_nz_[y] = non_zero;
-         rd->y_ac_levels[n][0] = 0;
-         nz |= non_zero << n;
-@@ -803,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it,
-   return nz;
- }
- 
--static int ReconstructIntra4(VP8EncIterator* const it,
-+static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it,
-                              int16_t levels[16],
--                             const uint8_t* const src,
--                             uint8_t* const yuv_out,
-+                             const uint8_t* WEBP_RESTRICT const src,
-+                             uint8_t* WEBP_RESTRICT const yuv_out,
-                              int mode) {
-   const VP8Encoder* const enc = it->enc_;
-   const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
-@@ -818,7 +836,7 @@ static int ReconstructIntra4(VP8EncIterator* const it,
-   if (DO_TRELLIS_I4 && it->do_trellis_) {
-     const int x = it->i4_ & 3, y = it->i4_ >> 2;
-     const int ctx = it->top_nz_[x] + it->left_nz_[y];
--    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
-+    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
-                               dqm->lambda_trellis_i4_);
-   } else {
-     nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
-@@ -839,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it,
- 
- // Quantize as usual, but also compute and return the quantization error.
- // Error is already divided by DSHIFT.
--static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
-+static int QuantizeSingle(int16_t* WEBP_RESTRICT const v,
-+                          const VP8Matrix* WEBP_RESTRICT const mtx) {
-   int V = *v;
-   const int sign = (V < 0);
-   if (sign) V = -V;
-@@ -853,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) {
-   return (sign ? -V : V) >> DSCALE;
- }
- 
--static void CorrectDCValues(const VP8EncIterator* const it,
--                            const VP8Matrix* const mtx,
--                            int16_t tmp[][16], VP8ModeScore* const rd) {
-+static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it,
-+                            const VP8Matrix* WEBP_RESTRICT const mtx,
-+                            int16_t tmp[][16],
-+                            VP8ModeScore* WEBP_RESTRICT const rd) {
-   //         | top[0] | top[1]
-   // --------+--------+---------
-   // left[0] | tmp[0]   tmp[1]  <->   err0 err1
-@@ -886,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it,
-   }
- }
- 
--static void StoreDiffusionErrors(VP8EncIterator* const it,
--                                 const VP8ModeScore* const rd) {
-+static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it,
-+                                 const VP8ModeScore* WEBP_RESTRICT const rd) {
-   int ch;
-   for (ch = 0; ch <= 1; ++ch) {
-     int8_t* const top = it->top_derr_[it->x_][ch];
-@@ -906,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it,
- 
- //------------------------------------------------------------------------------
- 
--static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
--                         uint8_t* const yuv_out, int mode) {
-+static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it,
-+                         VP8ModeScore* WEBP_RESTRICT const rd,
-+                         uint8_t* WEBP_RESTRICT const yuv_out, int mode) {
-   const VP8Encoder* const enc = it->enc_;
-   const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
-   const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
-@@ -927,9 +948,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
-       for (y = 0; y < 2; ++y) {
-         for (x = 0; x < 2; ++x, ++n) {
-           const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
--          const int non_zero =
--              TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
--                                   &dqm->uv_, dqm->lambda_trellis_uv_);
-+          const int non_zero = TrellisQuantizeBlock(
-+              enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
-+              dqm->lambda_trellis_uv_);
-           it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
-           nz |= non_zero << n;
-         }
-@@ -978,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) {
-   SwapPtr(&it->yuv_out_, &it->yuv_out2_);
- }
- 
--static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
-+static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it,
-+                            VP8ModeScore* WEBP_RESTRICT rd) {
-   const int kNumBlocks = 16;
-   VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
-   const int lambda = dqm->lambda_i16_;
-@@ -1038,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
- //------------------------------------------------------------------------------
- 
- // return the cost array corresponding to the surrounding prediction modes.
--static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
-+static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it,
-                                      const uint8_t modes[16]) {
-   const int preds_w = it->enc_->preds_w_;
-   const int x = (it->i4_ & 3), y = it->i4_ >> 2;
-@@ -1047,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
-   return VP8FixedCostsI4[top][left];
- }
- 
--static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
-+static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it,
-+                          VP8ModeScore* WEBP_RESTRICT const rd) {
-   const VP8Encoder* const enc = it->enc_;
-   const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
-   const int lambda = dqm->lambda_i4_;
-@@ -1143,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
- 
- //------------------------------------------------------------------------------
- 
--static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
-+static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it,
-+                       VP8ModeScore* WEBP_RESTRICT const rd) {
-   const int kNumBlocks = 8;
-   const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
-   const int lambda = dqm->lambda_uv_;
-@@ -1195,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
- //------------------------------------------------------------------------------
- // Final reconstruction and quantization.
- 
--static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
-+static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it,
-+                           VP8ModeScore* WEBP_RESTRICT const rd) {
-   const VP8Encoder* const enc = it->enc_;
-   const int is_i16 = (it->mb_->type_ == 1);
-   int nz = 0;
-@@ -1220,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
- }
- 
- // Refine intra16/intra4 sub-modes based on distortion only (not rate).
--static void RefineUsingDistortion(VP8EncIterator* const it,
-+static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it,
-                                   int try_both_modes, int refine_uv_mode,
--                                  VP8ModeScore* const rd) {
-+                                  VP8ModeScore* WEBP_RESTRICT const rd) {
-   score_t best_score = MAX_COST;
-   int nz = 0;
-   int mode;
-@@ -1336,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
- //------------------------------------------------------------------------------
- // Entry point
- 
--int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
-+int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
-+                VP8ModeScore* WEBP_RESTRICT const rd,
-                 VP8RDLevel rd_opt) {
-   int is_skipped;
-   const int method = it->enc_->method_;
-diff --git a/3rdparty/libwebp/src/enc/syntax_enc.c b/3rdparty/libwebp/src/enc/syntax_enc.c
-index a9e5a6cf0fec..9b8f524d6981 100644
---- a/3rdparty/libwebp/src/enc/syntax_enc.c
-+++ b/3rdparty/libwebp/src/enc/syntax_enc.c
-@@ -258,7 +258,10 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
-     buf[3 * p + 1] = (part_size >>  8) & 0xff;
-     buf[3 * p + 2] = (part_size >> 16) & 0xff;
-   }
--  return p ? pic->writer(buf, 3 * p, pic) : 1;
-+  if (p && !pic->writer(buf, 3 * p, pic)) {
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
-+  }
-+  return 1;
- }
- 
- //------------------------------------------------------------------------------
-@@ -349,7 +352,7 @@ int VP8EncWrite(VP8Encoder* const enc) {
-                                        (enc->alpha_data_size_ & 1);
-     riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
-   }
--  // Sanity check.
-+  // RIFF size should fit in 32-bits.
-   if (riff_size > 0xfffffffeU) {
-     return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG);
-   }
-@@ -381,6 +384,7 @@ int VP8EncWrite(VP8Encoder* const enc) {
- 
-   enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
-   ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
-+  if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
-   return ok;
- }
- 
-diff --git a/3rdparty/libwebp/src/enc/vp8i_enc.h b/3rdparty/libwebp/src/enc/vp8i_enc.h
-index 0e35562a8c9a..19d9a6edb77d 100644
---- a/3rdparty/libwebp/src/enc/vp8i_enc.h
-+++ b/3rdparty/libwebp/src/enc/vp8i_enc.h
-@@ -31,8 +31,8 @@ extern "C" {
- 
- // version numbers
- #define ENC_MAJ_VERSION 1
--#define ENC_MIN_VERSION 2
--#define ENC_REV_VERSION 0
-+#define ENC_MIN_VERSION 3
-+#define ENC_REV_VERSION 1
- 
- enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
-        MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
-@@ -286,8 +286,7 @@ int VP8IteratorNext(VP8EncIterator* const it);
- // save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
- void VP8IteratorSaveBoundary(VP8EncIterator* const it);
- // Report progression based on macroblock rows. Return 0 for user-abort request.
--int VP8IteratorProgress(const VP8EncIterator* const it,
--                        int final_delta_percent);
-+int VP8IteratorProgress(const VP8EncIterator* const it, int delta);
- // Intra4x4 iterations
- void VP8IteratorStartI4(VP8EncIterator* const it);
- // returns true if not done.
-@@ -471,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc);
- // Sets up segment's quantization values, base_quant_ and filter strengths.
- void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
- // Pick best modes and fills the levels. Returns true if skipped.
--int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
-+int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it,
-+                VP8ModeScore* WEBP_RESTRICT const rd,
-                 VP8RDLevel rd_opt);
- 
-   // in alpha.c
-@@ -491,19 +491,24 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta);
- 
-   // misc utils for picture_*.c:
- 
-+// Returns true if 'picture' is non-NULL and dimensions/colorspace are within
-+// their valid ranges. If returning false, the 'error_code' in 'picture' is
-+// updated.
-+int WebPValidatePicture(const WebPPicture* const picture);
-+
- // Remove reference to the ARGB/YUVA buffer (doesn't free anything).
- void WebPPictureResetBuffers(WebPPicture* const picture);
- 
--// Allocates ARGB buffer of given dimension (previous one is always free'd).
--// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
--// out-of-memory).
--int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
-+// Allocates ARGB buffer according to set width/height (previous one is
-+// always free'd). Preserves the YUV(A) buffer. Returns false in case of error
-+// (invalid param, out-of-memory).
-+int WebPPictureAllocARGB(WebPPicture* const picture);
- 
--// Allocates YUVA buffer of given dimension (previous one is always free'd).
--// Uses picture->csp to determine whether an alpha buffer is needed.
-+// Allocates YUVA buffer according to set width/height (previous one is always
-+// free'd). Uses picture->csp to determine whether an alpha buffer is needed.
- // Preserves the ARGB buffer.
- // Returns false in case of error (invalid param, out-of-memory).
--int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
-+int WebPPictureAllocYUVA(WebPPicture* const picture);
- 
- // Replace samples that are fully transparent by 'color' to help compressibility
- // (no guarantee, though). Assumes pic->use_argb is true.
-diff --git a/3rdparty/libwebp/src/enc/vp8l_enc.c b/3rdparty/libwebp/src/enc/vp8l_enc.c
-index 0b44ebe46ec5..c43d990d17e6 100644
---- a/3rdparty/libwebp/src/enc/vp8l_enc.c
-+++ b/3rdparty/libwebp/src/enc/vp8l_enc.c
-@@ -15,128 +15,25 @@
- #include <assert.h>
- #include <stdlib.h>
- 
-+#include "src/dsp/lossless.h"
-+#include "src/dsp/lossless_common.h"
- #include "src/enc/backward_references_enc.h"
- #include "src/enc/histogram_enc.h"
- #include "src/enc/vp8i_enc.h"
- #include "src/enc/vp8li_enc.h"
--#include "src/dsp/lossless.h"
--#include "src/dsp/lossless_common.h"
- #include "src/utils/bit_writer_utils.h"
- #include "src/utils/huffman_encode_utils.h"
-+#include "src/utils/palette.h"
- #include "src/utils/utils.h"
-+#include "src/webp/encode.h"
- #include "src/webp/format_constants.h"
- 
- // Maximum number of histogram images (sub-blocks).
- #define MAX_HUFF_IMAGE_SIZE       2600
- 
--// Palette reordering for smaller sum of deltas (and for smaller storage).
--
--static int PaletteCompareColorsForQsort(const void* p1, const void* p2) {
--  const uint32_t a = WebPMemToUint32((uint8_t*)p1);
--  const uint32_t b = WebPMemToUint32((uint8_t*)p2);
--  assert(a != b);
--  return (a < b) ? -1 : 1;
--}
--
--static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) {
--  return (v <= 128) ? v : (256 - v);
--}
--
--// Computes a value that is related to the entropy created by the
--// palette entry diff.
--//
--// Note that the last & 0xff is a no-operation in the next statement, but
--// removed by most compilers and is here only for regularity of the code.
--static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) {
--  const uint32_t diff = VP8LSubPixels(col1, col2);
--  const int kMoreWeightForRGBThanForAlpha = 9;
--  uint32_t score;
--  score =  PaletteComponentDistance((diff >>  0) & 0xff);
--  score += PaletteComponentDistance((diff >>  8) & 0xff);
--  score += PaletteComponentDistance((diff >> 16) & 0xff);
--  score *= kMoreWeightForRGBThanForAlpha;
--  score += PaletteComponentDistance((diff >> 24) & 0xff);
--  return score;
--}
--
--static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) {
--  const uint32_t tmp = *col1;
--  *col1 = *col2;
--  *col2 = tmp;
--}
--
--static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) {
--  // Find greedily always the closest color of the predicted color to minimize
--  // deltas in the palette. This reduces storage needs since the
--  // palette is stored with delta encoding.
--  uint32_t predict = 0x00000000;
--  int i, k;
--  for (i = 0; i < num_colors; ++i) {
--    int best_ix = i;
--    uint32_t best_score = ~0U;
--    for (k = i; k < num_colors; ++k) {
--      const uint32_t cur_score = PaletteColorDistance(palette[k], predict);
--      if (best_score > cur_score) {
--        best_score = cur_score;
--        best_ix = k;
--      }
--    }
--    SwapColor(&palette[best_ix], &palette[i]);
--    predict = palette[i];
--  }
--}
--
--// The palette has been sorted by alpha. This function checks if the other
--// components of the palette have a monotonic development with regards to
--// position in the palette. If all have monotonic development, there is
--// no benefit to re-organize them greedily. A monotonic development
--// would be spotted in green-only situations (like lossy alpha) or gray-scale
--// images.
--static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) {
--  uint32_t predict = 0x000000;
--  int i;
--  uint8_t sign_found = 0x00;
--  for (i = 0; i < num_colors; ++i) {
--    const uint32_t diff = VP8LSubPixels(palette[i], predict);
--    const uint8_t rd = (diff >> 16) & 0xff;
--    const uint8_t gd = (diff >>  8) & 0xff;
--    const uint8_t bd = (diff >>  0) & 0xff;
--    if (rd != 0x00) {
--      sign_found |= (rd < 0x80) ? 1 : 2;
--    }
--    if (gd != 0x00) {
--      sign_found |= (gd < 0x80) ? 8 : 16;
--    }
--    if (bd != 0x00) {
--      sign_found |= (bd < 0x80) ? 64 : 128;
--    }
--    predict = palette[i];
--  }
--  return (sign_found & (sign_found << 1)) != 0;  // two consequent signs.
--}
--
- // -----------------------------------------------------------------------------
- // Palette
- 
--// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
--// creates a palette and returns true, else returns false.
--static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
--                                   int low_effort,
--                                   uint32_t palette[MAX_PALETTE_SIZE],
--                                   int* const palette_size) {
--  const int num_colors = WebPGetColorPalette(pic, palette);
--  if (num_colors > MAX_PALETTE_SIZE) {
--    *palette_size = 0;
--    return 0;
--  }
--  *palette_size = num_colors;
--  qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort);
--  if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) {
--    GreedyMinimizeDeltas(palette, num_colors);
--  }
--  return 1;
--}
--
- // These five modes are evaluated and their respective entropy is computed.
- typedef enum {
-   kDirect = 0,
-@@ -165,10 +62,11 @@ typedef enum {
-   kHistoTotal  // Must be last.
- } HistoIx;
- 
--static void AddSingleSubGreen(int p, uint32_t* const r, uint32_t* const b) {
--  const int green = p >> 8;  // The upper bits are masked away later.
--  ++r[((p >> 16) - green) & 0xff];
--  ++b[((p >>  0) - green) & 0xff];
-+static void AddSingleSubGreen(uint32_t p,
-+                              uint32_t* const r, uint32_t* const b) {
-+  const int green = (int)p >> 8;  // The upper bits are masked away later.
-+  ++r[(((int)p >> 16) - green) & 0xff];
-+  ++b[(((int)p >>  0) - green) & 0xff];
- }
- 
- static void AddSingle(uint32_t p,
-@@ -242,8 +140,8 @@ static int AnalyzeEntropy(const uint32_t* argb,
-       curr_row += argb_stride;
-     }
-     {
--      double entropy_comp[kHistoTotal];
--      double entropy[kNumEntropyIx];
-+      float entropy_comp[kHistoTotal];
-+      float entropy[kNumEntropyIx];
-       int k;
-       int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen;
-       int j;
-@@ -362,11 +260,14 @@ typedef struct {
- } CrunchSubConfig;
- typedef struct {
-   int entropy_idx_;
-+  PaletteSorting palette_sorting_type_;
-   CrunchSubConfig sub_configs_[CRUNCH_SUBCONFIGS_MAX];
-   int sub_configs_size_;
- } CrunchConfig;
- 
--#define CRUNCH_CONFIGS_MAX kNumEntropyIx
-+// +2 because we add a palette sorting configuration for kPalette and
-+// kPaletteAndSpatial.
-+#define CRUNCH_CONFIGS_MAX (kNumEntropyIx + 2 * kPaletteSortingNum)
- 
- static int EncoderAnalyze(VP8LEncoder* const enc,
-                           CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX],
-@@ -386,9 +287,12 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
-   int do_no_cache = 0;
-   assert(pic != NULL && pic->argb != NULL);
- 
--  use_palette =
--      AnalyzeAndCreatePalette(pic, low_effort,
--                              enc->palette_, &enc->palette_size_);
-+  // Check whether a palette is possible.
-+  enc->palette_size_ = GetColorPalette(pic, enc->palette_sorted_);
-+  use_palette = (enc->palette_size_ <= MAX_PALETTE_SIZE);
-+  if (!use_palette) {
-+    enc->palette_size_ = 0;
-+  }
- 
-   // Empirical bit sizes.
-   enc->histo_bits_ = GetHistoBits(method, use_palette,
-@@ -398,6 +302,8 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
-   if (low_effort) {
-     // AnalyzeEntropy is somewhat slow.
-     crunch_configs[0].entropy_idx_ = use_palette ? kPalette : kSpatialSubGreen;
-+    crunch_configs[0].palette_sorting_type_ =
-+        use_palette ? kSortedDefault : kUnusedPalette;
-     n_lz77s = 1;
-     *crunch_configs_size = 1;
-   } else {
-@@ -418,13 +324,37 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
-         // a palette.
-         if ((i != kPalette && i != kPaletteAndSpatial) || use_palette) {
-           assert(*crunch_configs_size < CRUNCH_CONFIGS_MAX);
--          crunch_configs[(*crunch_configs_size)++].entropy_idx_ = i;
-+          if (use_palette && (i == kPalette || i == kPaletteAndSpatial)) {
-+            int sorting_method;
-+            for (sorting_method = 0; sorting_method < kPaletteSortingNum;
-+                 ++sorting_method) {
-+              const PaletteSorting typed_sorting_method =
-+                  (PaletteSorting)sorting_method;
-+              // TODO(vrabaud) kSortedDefault should be tested. It is omitted
-+              // for now for backward compatibility.
-+              if (typed_sorting_method == kUnusedPalette ||
-+                  typed_sorting_method == kSortedDefault) {
-+                continue;
-+              }
-+              crunch_configs[(*crunch_configs_size)].entropy_idx_ = i;
-+              crunch_configs[(*crunch_configs_size)].palette_sorting_type_ =
-+                  typed_sorting_method;
-+              ++*crunch_configs_size;
-+            }
-+          } else {
-+            crunch_configs[(*crunch_configs_size)].entropy_idx_ = i;
-+            crunch_configs[(*crunch_configs_size)].palette_sorting_type_ =
-+                kUnusedPalette;
-+            ++*crunch_configs_size;
-+          }
-         }
-       }
-     } else {
-       // Only choose the guessed best transform.
-       *crunch_configs_size = 1;
-       crunch_configs[0].entropy_idx_ = min_entropy_ix;
-+      crunch_configs[0].palette_sorting_type_ =
-+          use_palette ? kMinimizeDelta : kUnusedPalette;
-       if (config->quality >= 75 && method == 5) {
-         // Test with and without color cache.
-         do_no_cache = 1;
-@@ -432,6 +362,7 @@ static int EncoderAnalyze(VP8LEncoder* const enc,
-         if (min_entropy_ix == kPalette) {
-           *crunch_configs_size = 2;
-           crunch_configs[1].entropy_idx_ = kPaletteAndSpatial;
-+          crunch_configs[1].palette_sorting_type_ = kMinimizeDelta;
-         }
-       }
-     }
-@@ -730,11 +661,11 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
-   VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits);
- }
- 
--static WebPEncodingError StoreImageToBitMask(
-+static int StoreImageToBitMask(
-     VP8LBitWriter* const bw, int width, int histo_bits,
-     const VP8LBackwardRefs* const refs,
-     const uint16_t* histogram_symbols,
--    const HuffmanTreeCode* const huffman_codes) {
-+    const HuffmanTreeCode* const huffman_codes, const WebPPicture* const pic) {
-   const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
-   const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits);
-   // x and y trace the position in the image.
-@@ -787,44 +718,52 @@ static WebPEncodingError StoreImageToBitMask(
-     }
-     VP8LRefsCursorNext(&c);
-   }
--  return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK;
-+  if (bw->error_) {
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+  }
-+  return 1;
- }
- 
--// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
--static WebPEncodingError EncodeImageNoHuffman(
--    VP8LBitWriter* const bw, const uint32_t* const argb,
--    VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_array,
--    int width, int height, int quality, int low_effort) {
-+// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31.
-+// pic and percent are for progress.
-+static int EncodeImageNoHuffman(VP8LBitWriter* const bw,
-+                                const uint32_t* const argb,
-+                                VP8LHashChain* const hash_chain,
-+                                VP8LBackwardRefs* const refs_array, int width,
-+                                int height, int quality, int low_effort,
-+                                const WebPPicture* const pic, int percent_range,
-+                                int* const percent) {
-   int i;
-   int max_tokens = 0;
--  WebPEncodingError err = VP8_ENC_OK;
-   VP8LBackwardRefs* refs;
-   HuffmanTreeToken* tokens = NULL;
--  HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } };
--  const uint16_t histogram_symbols[1] = { 0 };    // only one tree, one symbol
-+  HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}};
-+  const uint16_t histogram_symbols[1] = {0};  // only one tree, one symbol
-   int cache_bits = 0;
-   VP8LHistogramSet* histogram_image = NULL;
-   HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
--        3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
-+      3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
-   if (huff_tree == NULL) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-   // Calculate backward references from ARGB image.
--  if (!VP8LHashChainFill(hash_chain, quality, argb, width, height,
--                         low_effort)) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+  if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, low_effort,
-+                         pic, percent_range / 2, percent)) {
-+    goto Error;
-+  }
-+  if (!VP8LGetBackwardReferences(width, height, argb, quality, /*low_effort=*/0,
-+                                 kLZ77Standard | kLZ77RLE, cache_bits,
-+                                 /*do_no_cache=*/0, hash_chain, refs_array,
-+                                 &cache_bits, pic,
-+                                 percent_range - percent_range / 2, percent)) {
-     goto Error;
-   }
--  err = VP8LGetBackwardReferences(
--      width, height, argb, quality, /*low_effort=*/0, kLZ77Standard | kLZ77RLE,
--      cache_bits, /*do_no_cache=*/0, hash_chain, refs_array, &cache_bits);
--  if (err != VP8_ENC_OK) goto Error;
-   refs = &refs_array[0];
-   histogram_image = VP8LAllocateHistogramSet(1, cache_bits);
-   if (histogram_image == NULL) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
-   VP8LHistogramSetClear(histogram_image);
-@@ -835,7 +774,7 @@ static WebPEncodingError EncodeImageNoHuffman(
-   // Create Huffman bit lengths and codes for each histogram image.
-   assert(histogram_image->size == 1);
-   if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-@@ -852,7 +791,7 @@ static WebPEncodingError EncodeImageNoHuffman(
- 
-   tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
-   if (tokens == NULL) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-@@ -864,27 +803,32 @@ static WebPEncodingError EncodeImageNoHuffman(
-   }
- 
-   // Store actual literals.
--  err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols,
--                            huffman_codes);
-+  if (!StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, huffman_codes,
-+                           pic)) {
-+    goto Error;
-+  }
- 
-  Error:
-   WebPSafeFree(tokens);
-   WebPSafeFree(huff_tree);
-   VP8LFreeHistogramSet(histogram_image);
-   WebPSafeFree(huffman_codes[0].codes);
--  return err;
-+  return (pic->error_code == VP8_ENC_OK);
- }
- 
--static WebPEncodingError EncodeImageInternal(
-+// pic and percent are for progress.
-+static int EncodeImageInternal(
-     VP8LBitWriter* const bw, const uint32_t* const argb,
-     VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width,
-     int height, int quality, int low_effort, int use_cache,
-     const CrunchConfig* const config, int* cache_bits, int histogram_bits,
--    size_t init_byte_position, int* const hdr_size, int* const data_size) {
--  WebPEncodingError err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    size_t init_byte_position, int* const hdr_size, int* const data_size,
-+    const WebPPicture* const pic, int percent_range, int* const percent) {
-   const uint32_t histogram_image_xysize =
-       VP8LSubSampleSize(width, histogram_bits) *
-       VP8LSubSampleSize(height, histogram_bits);
-+  int remaining_percent = percent_range;
-+  int percent_start = *percent;
-   VP8LHistogramSet* histogram_image = NULL;
-   VP8LHistogram* tmp_histo = NULL;
-   int histogram_image_size = 0;
-@@ -893,9 +837,8 @@ static WebPEncodingError EncodeImageInternal(
-       3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
-   HuffmanTreeToken* tokens = NULL;
-   HuffmanTreeCode* huffman_codes = NULL;
--  uint16_t* const histogram_symbols =
--      (uint16_t*)WebPSafeMalloc(histogram_image_xysize,
--                                sizeof(*histogram_symbols));
-+  uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc(
-+      histogram_image_xysize, sizeof(*histogram_symbols));
-   int sub_configs_idx;
-   int cache_bits_init, write_histogram_image;
-   VP8LBitWriter bw_init = *bw, bw_best;
-@@ -907,14 +850,27 @@ static WebPEncodingError EncodeImageInternal(
-   assert(hdr_size != NULL);
-   assert(data_size != NULL);
- 
--  // Make sure we can allocate the different objects.
-   memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram));
-+  if (!VP8LBitWriterInit(&bw_best, 0)) {
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+    goto Error;
-+  }
-+
-+  // Make sure we can allocate the different objects.
-   if (huff_tree == NULL || histogram_symbols == NULL ||
--      !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize) ||
--      !VP8LHashChainFill(hash_chain, quality, argb, width, height,
--                         low_effort)) {
-+      !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize)) {
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+    goto Error;
-+  }
-+
-+  percent_range = remaining_percent / 5;
-+  if (!VP8LHashChainFill(hash_chain, quality, argb, width, height,
-+                         low_effort, pic, percent_range, percent)) {
-     goto Error;
-   }
-+  percent_start += percent_range;
-+  remaining_percent -= percent_range;
-+
-   if (use_cache) {
-     // If the value is different from zero, it has been set during the
-     // palette analysis.
-@@ -923,22 +879,27 @@ static WebPEncodingError EncodeImageInternal(
-     cache_bits_init = 0;
-   }
-   // If several iterations will happen, clone into bw_best.
--  if (!VP8LBitWriterInit(&bw_best, 0) ||
--      ((config->sub_configs_size_ > 1 ||
--        config->sub_configs_[0].do_no_cache_) &&
--       !VP8LBitWriterClone(bw, &bw_best))) {
-+  if ((config->sub_configs_size_ > 1 || config->sub_configs_[0].do_no_cache_) &&
-+      !VP8LBitWriterClone(bw, &bw_best)) {
-+    WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
-+
-   for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_;
-        ++sub_configs_idx) {
-     const CrunchSubConfig* const sub_config =
-         &config->sub_configs_[sub_configs_idx];
-     int cache_bits_best, i_cache;
--    err = VP8LGetBackwardReferences(width, height, argb, quality, low_effort,
--                                    sub_config->lz77_, cache_bits_init,
--                                    sub_config->do_no_cache_, hash_chain,
--                                    &refs_array[0], &cache_bits_best);
--    if (err != VP8_ENC_OK) goto Error;
-+    int i_remaining_percent = remaining_percent / config->sub_configs_size_;
-+    int i_percent_range = i_remaining_percent / 4;
-+    i_remaining_percent -= i_percent_range;
-+
-+    if (!VP8LGetBackwardReferences(
-+            width, height, argb, quality, low_effort, sub_config->lz77_,
-+            cache_bits_init, sub_config->do_no_cache_, hash_chain,
-+            &refs_array[0], &cache_bits_best, pic, i_percent_range, percent)) {
-+      goto Error;
-+    }
- 
-     for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) {
-       const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0;
-@@ -953,11 +914,17 @@ static WebPEncodingError EncodeImageInternal(
-       histogram_image =
-           VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp);
-       tmp_histo = VP8LAllocateHistogram(cache_bits_tmp);
--      if (histogram_image == NULL || tmp_histo == NULL ||
--          !VP8LGetHistoImageSymbols(width, height, &refs_array[i_cache],
--                                    quality, low_effort, histogram_bits,
--                                    cache_bits_tmp, histogram_image, tmp_histo,
--                                    histogram_symbols)) {
-+      if (histogram_image == NULL || tmp_histo == NULL) {
-+        WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+        goto Error;
-+      }
-+
-+      i_percent_range = i_remaining_percent / 3;
-+      i_remaining_percent -= i_percent_range;
-+      if (!VP8LGetHistoImageSymbols(
-+              width, height, &refs_array[i_cache], quality, low_effort,
-+              histogram_bits, cache_bits_tmp, histogram_image, tmp_histo,
-+              histogram_symbols, pic, i_percent_range, percent)) {
-         goto Error;
-       }
-       // Create Huffman bit lengths and codes for each histogram image.
-@@ -970,6 +937,7 @@ static WebPEncodingError EncodeImageInternal(
-       // GetHuffBitLengthsAndCodes().
-       if (huffman_codes == NULL ||
-           !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
-+        WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-         goto Error;
-       }
-       // Free combined histograms.
-@@ -992,12 +960,14 @@ static WebPEncodingError EncodeImageInternal(
-       write_histogram_image = (histogram_image_size > 1);
-       VP8LPutBits(bw, write_histogram_image, 1);
-       if (write_histogram_image) {
--        uint32_t* const histogram_argb =
--            (uint32_t*)WebPSafeMalloc(histogram_image_xysize,
--                                      sizeof(*histogram_argb));
-+        uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc(
-+            histogram_image_xysize, sizeof(*histogram_argb));
-         int max_index = 0;
-         uint32_t i;
--        if (histogram_argb == NULL) goto Error;
-+        if (histogram_argb == NULL) {
-+          WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+          goto Error;
-+        }
-         for (i = 0; i < histogram_image_xysize; ++i) {
-           const int symbol_index = histogram_symbols[i] & 0xffff;
-           histogram_argb[i] = (symbol_index << 8);
-@@ -1008,12 +978,17 @@ static WebPEncodingError EncodeImageInternal(
-         histogram_image_size = max_index;
- 
-         VP8LPutBits(bw, histogram_bits - 2, 3);
--        err = EncodeImageNoHuffman(
--            bw, histogram_argb, &hash_chain_histogram, &refs_array[2],
--            VP8LSubSampleSize(width, histogram_bits),
--            VP8LSubSampleSize(height, histogram_bits), quality, low_effort);
-+        i_percent_range = i_remaining_percent / 2;
-+        i_remaining_percent -= i_percent_range;
-+        if (!EncodeImageNoHuffman(
-+                bw, histogram_argb, &hash_chain_histogram, &refs_array[2],
-+                VP8LSubSampleSize(width, histogram_bits),
-+                VP8LSubSampleSize(height, histogram_bits), quality, low_effort,
-+                pic, i_percent_range, percent)) {
-+          WebPSafeFree(histogram_argb);
-+          goto Error;
-+        }
-         WebPSafeFree(histogram_argb);
--        if (err != VP8_ENC_OK) goto Error;
-       }
- 
-       // Store Huffman codes.
-@@ -1028,7 +1003,10 @@ static WebPEncodingError EncodeImageInternal(
-           }
-         }
-         tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
--        if (tokens == NULL) goto Error;
-+        if (tokens == NULL) {
-+          WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+          goto Error;
-+        }
-         for (i = 0; i < 5 * histogram_image_size; ++i) {
-           HuffmanTreeCode* const codes = &huffman_codes[i];
-           StoreHuffmanCode(bw, huff_tree, tokens, codes);
-@@ -1037,9 +1015,10 @@ static WebPEncodingError EncodeImageInternal(
-       }
-       // Store actual literals.
-       hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
--      err = StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache],
--                                histogram_symbols, huffman_codes);
--      if (err != VP8_ENC_OK) goto Error;
-+      if (!StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache],
-+                               histogram_symbols, huffman_codes, pic)) {
-+        goto Error;
-+      }
-       // Keep track of the smallest image so far.
-       if (VP8LBitWriterNumBytes(bw) < bw_size_best) {
-         bw_size_best = VP8LBitWriterNumBytes(bw);
-@@ -1059,7 +1038,10 @@ static WebPEncodingError EncodeImageInternal(
-     }
-   }
-   VP8LBitWriterSwap(bw, &bw_best);
--  err = VP8_ENC_OK;
-+
-+  if (!WebPReportProgress(pic, percent_start + remaining_percent, percent)) {
-+    goto Error;
-+  }
- 
-  Error:
-   WebPSafeFree(tokens);
-@@ -1073,7 +1055,7 @@ static WebPEncodingError EncodeImageInternal(
-   }
-   WebPSafeFree(histogram_symbols);
-   VP8LBitWriterWipeOut(&bw_best);
--  return err;
-+  return (pic->error_code == VP8_ENC_OK);
- }
- 
- // -----------------------------------------------------------------------------
-@@ -1082,26 +1064,27 @@ static WebPEncodingError EncodeImageInternal(
- static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height,
-                                VP8LBitWriter* const bw) {
-   VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
--  VP8LPutBits(bw, SUBTRACT_GREEN, 2);
-+  VP8LPutBits(bw, SUBTRACT_GREEN_TRANSFORM, 2);
-   VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
- }
- 
--static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
--                                            int width, int height,
--                                            int quality, int low_effort,
--                                            int used_subtract_green,
--                                            VP8LBitWriter* const bw) {
-+static int ApplyPredictFilter(const VP8LEncoder* const enc, int width,
-+                              int height, int quality, int low_effort,
-+                              int used_subtract_green, VP8LBitWriter* const bw,
-+                              int percent_range, int* const percent) {
-   const int pred_bits = enc->transform_bits_;
-   const int transform_width = VP8LSubSampleSize(width, pred_bits);
-   const int transform_height = VP8LSubSampleSize(height, pred_bits);
-   // we disable near-lossless quantization if palette is used.
--  const int near_lossless_strength = enc->use_palette_ ? 100
--                                   : enc->config_->near_lossless;
-+  const int near_lossless_strength =
-+      enc->use_palette_ ? 100 : enc->config_->near_lossless;
- 
--  VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_,
--                    enc->argb_scratch_, enc->transform_data_,
--                    near_lossless_strength, enc->config_->exact,
--                    used_subtract_green);
-+  if (!VP8LResidualImage(
-+          width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_,
-+          enc->transform_data_, near_lossless_strength, enc->config_->exact,
-+          used_subtract_green, enc->pic_, percent_range / 2, percent)) {
-+    return 0;
-+  }
-   VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
-   VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
-   assert(pred_bits >= 2);
-@@ -1109,19 +1092,23 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
-   return EncodeImageNoHuffman(
-       bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
-       (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
--      quality, low_effort);
-+      quality, low_effort, enc->pic_, percent_range - percent_range / 2,
-+      percent);
- }
- 
--static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
--                                               int width, int height,
--                                               int quality, int low_effort,
--                                               VP8LBitWriter* const bw) {
-+static int ApplyCrossColorFilter(const VP8LEncoder* const enc, int width,
-+                                 int height, int quality, int low_effort,
-+                                 VP8LBitWriter* const bw, int percent_range,
-+                                 int* const percent) {
-   const int ccolor_transform_bits = enc->transform_bits_;
-   const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
-   const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
- 
--  VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
--                          enc->argb_, enc->transform_data_);
-+  if (!VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
-+                               enc->argb_, enc->transform_data_, enc->pic_,
-+                               percent_range / 2, percent)) {
-+    return 0;
-+  }
-   VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
-   VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
-   assert(ccolor_transform_bits >= 2);
-@@ -1129,23 +1116,21 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
-   return EncodeImageNoHuffman(
-       bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_,
-       (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height,
--      quality, low_effort);
-+      quality, low_effort, enc->pic_, percent_range - percent_range / 2,
-+      percent);
- }
- 
- // -----------------------------------------------------------------------------
- 
--static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
--                                         size_t riff_size, size_t vp8l_size) {
-+static int WriteRiffHeader(const WebPPicture* const pic, size_t riff_size,
-+                           size_t vp8l_size) {
-   uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
-     'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P',
-     'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE,
-   };
-   PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
-   PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size);
--  if (!pic->writer(riff, sizeof(riff), pic)) {
--    return VP8_ENC_ERROR_BAD_WRITE;
--  }
--  return VP8_ENC_OK;
-+  return pic->writer(riff, sizeof(riff), pic);
- }
- 
- static int WriteImageSize(const WebPPicture* const pic,
-@@ -1165,36 +1150,32 @@ static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
-   return !bw->error_;
- }
- 
--static WebPEncodingError WriteImage(const WebPPicture* const pic,
--                                    VP8LBitWriter* const bw,
--                                    size_t* const coded_size) {
--  WebPEncodingError err = VP8_ENC_OK;
-+static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw,
-+                      size_t* const coded_size) {
-   const uint8_t* const webpll_data = VP8LBitWriterFinish(bw);
-   const size_t webpll_size = VP8LBitWriterNumBytes(bw);
-   const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size;
-   const size_t pad = vp8l_size & 1;
-   const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad;
-+  *coded_size = 0;
- 
--  err = WriteRiffHeader(pic, riff_size, vp8l_size);
--  if (err != VP8_ENC_OK) goto Error;
-+  if (bw->error_) {
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+  }
- 
--  if (!pic->writer(webpll_data, webpll_size, pic)) {
--    err = VP8_ENC_ERROR_BAD_WRITE;
--    goto Error;
-+  if (!WriteRiffHeader(pic, riff_size, vp8l_size) ||
-+      !pic->writer(webpll_data, webpll_size, pic)) {
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
-   }
- 
-   if (pad) {
-     const uint8_t pad_byte[1] = { 0 };
-     if (!pic->writer(pad_byte, 1, pic)) {
--      err = VP8_ENC_ERROR_BAD_WRITE;
--      goto Error;
-+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
-     }
-   }
-   *coded_size = CHUNK_HEADER_SIZE + riff_size;
--  return VP8_ENC_OK;
--
-- Error:
--  return err;
-+  return 1;
- }
- 
- // -----------------------------------------------------------------------------
-@@ -1210,36 +1191,32 @@ static void ClearTransformBuffer(VP8LEncoder* const enc) {
- // Flags influencing the memory allocated:
- //  enc->transform_bits_
- //  enc->use_predict_, enc->use_cross_color_
--static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
--                                                 int width, int height) {
--  WebPEncodingError err = VP8_ENC_OK;
--  const uint64_t image_size = width * height;
-+static int AllocateTransformBuffer(VP8LEncoder* const enc, int width,
-+                                   int height) {
-+  const uint64_t image_size = (uint64_t)width * height;
-   // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra
-   // pixel in each, plus 2 regular scanlines of bytes.
-   // TODO(skal): Clean up by using arithmetic in bytes instead of words.
-   const uint64_t argb_scratch_size =
--      enc->use_predict_
--          ? (width + 1) * 2 +
--            (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t)
--          : 0;
-+      enc->use_predict_ ? (width + 1) * 2 + (width * 2 + sizeof(uint32_t) - 1) /
-+                                                sizeof(uint32_t)
-+                        : 0;
-   const uint64_t transform_data_size =
-       (enc->use_predict_ || enc->use_cross_color_)
--          ? VP8LSubSampleSize(width, enc->transform_bits_) *
-+          ? (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
-                 VP8LSubSampleSize(height, enc->transform_bits_)
-           : 0;
-   const uint64_t max_alignment_in_words =
-       (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t);
--  const uint64_t mem_size =
--      image_size + max_alignment_in_words +
--      argb_scratch_size + max_alignment_in_words +
--      transform_data_size;
-+  const uint64_t mem_size = image_size + max_alignment_in_words +
-+                            argb_scratch_size + max_alignment_in_words +
-+                            transform_data_size;
-   uint32_t* mem = enc->transform_mem_;
-   if (mem == NULL || mem_size > enc->transform_mem_size_) {
-     ClearTransformBuffer(enc);
-     mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem));
-     if (mem == NULL) {
--      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
--      goto Error;
-+      return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     }
-     enc->transform_mem_ = mem;
-     enc->transform_mem_size_ = (size_t)mem_size;
-@@ -1252,19 +1229,16 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
-   enc->transform_data_ = mem;
- 
-   enc->current_width_ = width;
-- Error:
--  return err;
-+  return 1;
- }
- 
--static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
--  WebPEncodingError err = VP8_ENC_OK;
-+static int MakeInputImageCopy(VP8LEncoder* const enc) {
-   const WebPPicture* const picture = enc->pic_;
-   const int width = picture->width;
-   const int height = picture->height;
- 
--  err = AllocateTransformBuffer(enc, width, height);
--  if (err != VP8_ENC_OK) return err;
--  if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK;
-+  if (!AllocateTransformBuffer(enc, width, height)) return 0;
-+  if (enc->argb_content_ == kEncoderARGB) return 1;
- 
-   {
-     uint32_t* dst = enc->argb_;
-@@ -1278,27 +1252,11 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
-   }
-   enc->argb_content_ = kEncoderARGB;
-   assert(enc->current_width_ == width);
--  return VP8_ENC_OK;
-+  return 1;
- }
- 
- // -----------------------------------------------------------------------------
- 
--static WEBP_INLINE int SearchColorNoIdx(const uint32_t sorted[], uint32_t color,
--                                        int hi) {
--  int low = 0;
--  if (sorted[low] == color) return low;  // loop invariant: sorted[low] != color
--  while (1) {
--    const int mid = (low + hi) >> 1;
--    if (sorted[mid] == color) {
--      return mid;
--    } else if (sorted[mid] < color) {
--      low = mid;
--    } else {
--      hi = mid;
--    }
--  }
--}
--
- #define APPLY_PALETTE_GREEDY_MAX 4
- 
- static WEBP_INLINE uint32_t SearchColorGreedy(const uint32_t palette[],
-@@ -1333,17 +1291,6 @@ static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) {
-          (32 - PALETTE_INV_SIZE_BITS);
- }
- 
--// Sort palette in increasing order and prepare an inverse mapping array.
--static void PrepareMapToPalette(const uint32_t palette[], int num_colors,
--                                uint32_t sorted[], uint32_t idx_map[]) {
--  int i;
--  memcpy(sorted, palette, num_colors * sizeof(*sorted));
--  qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort);
--  for (i = 0; i < num_colors; ++i) {
--    idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i;
--  }
--}
--
- // Use 1 pixel cache for ARGB pixels.
- #define APPLY_PALETTE_FOR(COLOR_INDEX) do {         \
-   uint32_t prev_pix = palette[0];                   \
-@@ -1367,16 +1314,18 @@ static void PrepareMapToPalette(const uint32_t palette[], int num_colors,
- // using 'row' as a temporary buffer of size 'width'.
- // We assume that all src[] values have a corresponding entry in the palette.
- // Note: src[] can be the same as dst[]
--static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride,
--                                      uint32_t* dst, uint32_t dst_stride,
--                                      const uint32_t* palette, int palette_size,
--                                      int width, int height, int xbits) {
-+static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst,
-+                        uint32_t dst_stride, const uint32_t* palette,
-+                        int palette_size, int width, int height, int xbits,
-+                        const WebPPicture* const pic) {
-   // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be
-   // made to work in-place.
-   uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
-   int x, y;
- 
--  if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
-+  if (tmp_row == NULL) {
-+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+  }
- 
-   if (palette_size < APPLY_PALETTE_GREEDY_MAX) {
-     APPLY_PALETTE_FOR(SearchColorGreedy(palette, palette_size, pix));
-@@ -1421,7 +1370,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride,
-     }
-   }
-   WebPSafeFree(tmp_row);
--  return VP8_ENC_OK;
-+  return 1;
- }
- #undef APPLY_PALETTE_FOR
- #undef PALETTE_INV_SIZE_BITS
-@@ -1429,9 +1378,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride,
- #undef APPLY_PALETTE_GREEDY_MAX
- 
- // Note: Expects "enc->palette_" to be set properly.
--static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc,
--                                             int in_place) {
--  WebPEncodingError err = VP8_ENC_OK;
-+static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) {
-   const WebPPicture* const pic = enc->pic_;
-   const int width = pic->width;
-   const int height = pic->height;
-@@ -1449,19 +1396,22 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc,
-     xbits = (palette_size <= 16) ? 1 : 0;
-   }
- 
--  err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
--  if (err != VP8_ENC_OK) return err;
--
--  err = ApplyPalette(src, src_stride,
-+  if (!AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height)) {
-+    return 0;
-+  }
-+  if (!ApplyPalette(src, src_stride,
-                      enc->argb_, enc->current_width_,
--                     palette, palette_size, width, height, xbits);
-+                     palette, palette_size, width, height, xbits, pic)) {
-+    return 0;
-+  }
-   enc->argb_content_ = kEncoderPalette;
--  return err;
-+  return 1;
- }
- 
- // Save palette_[] to bitstream.
- static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
--                                       VP8LEncoder* const enc) {
-+                                       VP8LEncoder* const enc,
-+                                       int percent_range, int* const percent) {
-   int i;
-   uint32_t tmp_palette[MAX_PALETTE_SIZE];
-   const int palette_size = enc->palette_size_;
-@@ -1476,7 +1426,7 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort,
-   tmp_palette[0] = palette[0];
-   return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_,
-                               &enc->refs_[0], palette_size, 1, /*quality=*/20,
--                              low_effort);
-+                              low_effort, enc->pic_, percent_range, percent);
- }
- 
- // -----------------------------------------------------------------------------
-@@ -1520,7 +1470,6 @@ typedef struct {
-   CrunchConfig crunch_configs_[CRUNCH_CONFIGS_MAX];
-   int num_crunch_configs_;
-   int red_and_blue_always_zero_;
--  WebPEncodingError err_;
-   WebPAuxStats* stats_;
- } StreamEncodeContext;
- 
-@@ -1537,7 +1486,6 @@ static int EncodeStreamHook(void* input, void* data2) {
- #if !defined(WEBP_DISABLE_STATS)
-   WebPAuxStats* const stats = params->stats_;
- #endif
--  WebPEncodingError err = VP8_ENC_OK;
-   const int quality = (int)config->quality;
-   const int low_effort = (config->method == 0);
- #if (WEBP_NEAR_LOSSLESS == 1)
-@@ -1545,6 +1493,7 @@ static int EncodeStreamHook(void* input, void* data2) {
- #endif
-   const int height = picture->height;
-   const size_t byte_position = VP8LBitWriterNumBytes(bw);
-+  int percent = 2;  // for WebPProgressHook
- #if (WEBP_NEAR_LOSSLESS == 1)
-   int use_near_lossless = 0;
- #endif
-@@ -1558,12 +1507,13 @@ static int EncodeStreamHook(void* input, void* data2) {
- 
-   if (!VP8LBitWriterInit(&bw_best, 0) ||
-       (num_crunch_configs > 1 && !VP8LBitWriterClone(bw, &bw_best))) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-   for (idx = 0; idx < num_crunch_configs; ++idx) {
-     const int entropy_idx = crunch_configs[idx].entropy_idx_;
-+    int remaining_percent = 97 / num_crunch_configs, percent_range;
-     enc->use_palette_ =
-         (entropy_idx == kPalette) || (entropy_idx == kPaletteAndSpatial);
-     enc->use_subtract_green_ =
-@@ -1571,7 +1521,8 @@ static int EncodeStreamHook(void* input, void* data2) {
-     enc->use_predict_ = (entropy_idx == kSpatial) ||
-                         (entropy_idx == kSpatialSubGreen) ||
-                         (entropy_idx == kPaletteAndSpatial);
--    if (low_effort) {
-+    // When using a palette, R/B==0, hence no need to test for cross-color.
-+    if (low_effort || enc->use_palette_) {
-       enc->use_cross_color_ = 0;
-     } else {
-       enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_;
-@@ -1586,11 +1537,10 @@ static int EncodeStreamHook(void* input, void* data2) {
-     use_near_lossless = (config->near_lossless < 100) && !enc->use_palette_ &&
-                         !enc->use_predict_;
-     if (use_near_lossless) {
--      err = AllocateTransformBuffer(enc, width, height);
--      if (err != VP8_ENC_OK) goto Error;
-+      if (!AllocateTransformBuffer(enc, width, height)) goto Error;
-       if ((enc->argb_content_ != kEncoderNearLossless) &&
-           !VP8ApplyNearLossless(picture, config->near_lossless, enc->argb_)) {
--        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+        WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-         goto Error;
-       }
-       enc->argb_content_ = kEncoderNearLossless;
-@@ -1603,10 +1553,18 @@ static int EncodeStreamHook(void* input, void* data2) {
- 
-     // Encode palette
-     if (enc->use_palette_) {
--      err = EncodePalette(bw, low_effort, enc);
--      if (err != VP8_ENC_OK) goto Error;
--      err = MapImageFromPalette(enc, use_delta_palette);
--      if (err != VP8_ENC_OK) goto Error;
-+      if (!PaletteSort(crunch_configs[idx].palette_sorting_type_, enc->pic_,
-+                       enc->palette_sorted_, enc->palette_size_,
-+                       enc->palette_)) {
-+        WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+        goto Error;
-+      }
-+      percent_range = remaining_percent / 4;
-+      if (!EncodePalette(bw, low_effort, enc, percent_range, &percent)) {
-+        goto Error;
-+      }
-+      remaining_percent -= percent_range;
-+      if (!MapImageFromPalette(enc, use_delta_palette)) goto Error;
-       // If using a color cache, do not have it bigger than the number of
-       // colors.
-       if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) {
-@@ -1617,8 +1575,7 @@ static int EncodeStreamHook(void* input, void* data2) {
-       // In case image is not packed.
-       if (enc->argb_content_ != kEncoderNearLossless &&
-           enc->argb_content_ != kEncoderPalette) {
--        err = MakeInputImageCopy(enc);
--        if (err != VP8_ENC_OK) goto Error;
-+        if (!MakeInputImageCopy(enc)) goto Error;
-       }
- 
-       // -----------------------------------------------------------------------
-@@ -1629,15 +1586,22 @@ static int EncodeStreamHook(void* input, void* data2) {
-       }
- 
-       if (enc->use_predict_) {
--        err = ApplyPredictFilter(enc, enc->current_width_, height, quality,
--                                 low_effort, enc->use_subtract_green_, bw);
--        if (err != VP8_ENC_OK) goto Error;
-+        percent_range = remaining_percent / 3;
-+        if (!ApplyPredictFilter(enc, enc->current_width_, height, quality,
-+                                low_effort, enc->use_subtract_green_, bw,
-+                                percent_range, &percent)) {
-+          goto Error;
-+        }
-+        remaining_percent -= percent_range;
-       }
- 
-       if (enc->use_cross_color_) {
--        err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
--                                    low_effort, bw);
--        if (err != VP8_ENC_OK) goto Error;
-+        percent_range = remaining_percent / 2;
-+        if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality,
-+                                   low_effort, bw, percent_range, &percent)) {
-+          goto Error;
-+        }
-+        remaining_percent -= percent_range;
-       }
-     }
- 
-@@ -1645,12 +1609,13 @@ static int EncodeStreamHook(void* input, void* data2) {
- 
-     // -------------------------------------------------------------------------
-     // Encode and write the transformed image.
--    err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
--                              enc->current_width_, height, quality, low_effort,
--                              use_cache, &crunch_configs[idx],
--                              &enc->cache_bits_, enc->histo_bits_,
--                              byte_position, &hdr_size, &data_size);
--    if (err != VP8_ENC_OK) goto Error;
-+    if (!EncodeImageInternal(
-+            bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_,
-+            height, quality, low_effort, use_cache, &crunch_configs[idx],
-+            &enc->cache_bits_, enc->histo_bits_, byte_position, &hdr_size,
-+            &data_size, picture, remaining_percent, &percent)) {
-+      goto Error;
-+    }
- 
-     // If we are better than what we already have.
-     if (VP8LBitWriterNumBytes(bw) < best_size) {
-@@ -1680,18 +1645,15 @@ static int EncodeStreamHook(void* input, void* data2) {
-   }
-   VP8LBitWriterSwap(&bw_best, bw);
- 
--Error:
-+ Error:
-   VP8LBitWriterWipeOut(&bw_best);
--  params->err_ = err;
-   // The hook should return false in case of error.
--  return (err == VP8_ENC_OK);
-+  return (params->picture_->error_code == VP8_ENC_OK);
- }
- 
--WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
--                                   const WebPPicture* const picture,
--                                   VP8LBitWriter* const bw_main,
--                                   int use_cache) {
--  WebPEncodingError err = VP8_ENC_OK;
-+int VP8LEncodeStream(const WebPConfig* const config,
-+                     const WebPPicture* const picture,
-+                     VP8LBitWriter* const bw_main, int use_cache) {
-   VP8LEncoder* const enc_main = VP8LEncoderNew(config, picture);
-   VP8LEncoder* enc_side = NULL;
-   CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX];
-@@ -1703,15 +1665,23 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-   // The main thread uses picture->stats, the side thread uses stats_side.
-   WebPAuxStats stats_side;
-   VP8LBitWriter bw_side;
-+  WebPPicture picture_side;
-   const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface();
-   int ok_main;
- 
-+  if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) {
-+    VP8LEncoderDelete(enc_main);
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-+  }
-+
-+  // Avoid "garbage value" error from Clang's static analysis tool.
-+  WebPPictureInit(&picture_side);
-+
-   // Analyze image (entropy, num_palettes etc)
--  if (enc_main == NULL ||
--      !EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main,
-+  if (!EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main,
-                       &red_and_blue_always_zero) ||
--      !EncoderInit(enc_main) || !VP8LBitWriterInit(&bw_side, 0)) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+      !EncoderInit(enc_main)) {
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-@@ -1740,25 +1710,32 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-       StreamEncodeContext* const param =
-           (idx == 0) ? &params_main : &params_side;
-       param->config_ = config;
--      param->picture_ = picture;
-       param->use_cache_ = use_cache;
-       param->red_and_blue_always_zero_ = red_and_blue_always_zero;
-       if (idx == 0) {
-+        param->picture_ = picture;
-         param->stats_ = picture->stats;
-         param->bw_ = bw_main;
-         param->enc_ = enc_main;
-       } else {
-+        // Create a side picture (error_code is not thread-safe).
-+        if (!WebPPictureView(picture, /*left=*/0, /*top=*/0, picture->width,
-+                             picture->height, &picture_side)) {
-+          assert(0);
-+        }
-+        picture_side.progress_hook = NULL;  // Progress hook is not thread-safe.
-+        param->picture_ = &picture_side;  // No need to free a view afterwards.
-         param->stats_ = (picture->stats == NULL) ? NULL : &stats_side;
-         // Create a side bit writer.
-         if (!VP8LBitWriterClone(bw_main, &bw_side)) {
--          err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+          WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-           goto Error;
-         }
-         param->bw_ = &bw_side;
-         // Create a side encoder.
--        enc_side = VP8LEncoderNew(config, picture);
-+        enc_side = VP8LEncoderNew(config, &picture_side);
-         if (enc_side == NULL || !EncoderInit(enc_side)) {
--          err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+          WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-           goto Error;
-         }
-         // Copy the values that were computed for the main encoder.
-@@ -1767,6 +1744,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-         enc_side->palette_size_ = enc_main->palette_size_;
-         memcpy(enc_side->palette_, enc_main->palette_,
-                sizeof(enc_main->palette_));
-+        memcpy(enc_side->palette_sorted_, enc_main->palette_sorted_,
-+               sizeof(enc_main->palette_sorted_));
-         param->enc_ = enc_side;
-       }
-       // Create the workers.
-@@ -1780,7 +1759,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-   // Start the second thread if needed.
-   if (num_crunch_configs_side != 0) {
-     if (!worker_interface->Reset(&worker_side)) {
--      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+      WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-       goto Error;
-     }
- #if !defined(WEBP_DISABLE_STATS)
-@@ -1790,8 +1769,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-       memcpy(&stats_side, picture->stats, sizeof(stats_side));
-     }
- #endif
--    // This line is only useful to remove a Clang static analyzer warning.
--    params_side.err_ = VP8_ENC_OK;
-     worker_interface->Launch(&worker_side);
-   }
-   // Execute the main thread.
-@@ -1803,7 +1780,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-     const int ok_side = worker_interface->Sync(&worker_side);
-     worker_interface->End(&worker_side);
-     if (!ok_main || !ok_side) {
--      err = ok_main ? params_side.err_ : params_main.err_;
-+      if (picture->error_code == VP8_ENC_OK) {
-+        assert(picture_side.error_code != VP8_ENC_OK);
-+        WebPEncodingSetError(picture, picture_side.error_code);
-+      }
-       goto Error;
-     }
-     if (VP8LBitWriterNumBytes(&bw_side) < VP8LBitWriterNumBytes(bw_main)) {
-@@ -1814,18 +1794,13 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
-       }
- #endif
-     }
--  } else {
--    if (!ok_main) {
--      err = params_main.err_;
--      goto Error;
--    }
-   }
- 
--Error:
-+ Error:
-   VP8LBitWriterWipeOut(&bw_side);
-   VP8LEncoderDelete(enc_main);
-   VP8LEncoderDelete(enc_side);
--  return err;
-+  return (picture->error_code == VP8_ENC_OK);
- }
- 
- #undef CRUNCH_CONFIGS_MAX
-@@ -1838,15 +1813,12 @@ int VP8LEncodeImage(const WebPConfig* const config,
-   size_t coded_size;
-   int percent = 0;
-   int initial_size;
--  WebPEncodingError err = VP8_ENC_OK;
-   VP8LBitWriter bw;
- 
-   if (picture == NULL) return 0;
- 
-   if (config == NULL || picture->argb == NULL) {
--    err = VP8_ENC_ERROR_NULL_PARAMETER;
--    WebPEncodingSetError(picture, err);
--    return 0;
-+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
-   }
- 
-   width = picture->width;
-@@ -1856,13 +1828,13 @@ int VP8LEncodeImage(const WebPConfig* const config,
-   initial_size = (config->image_hint == WEBP_HINT_GRAPH) ?
-       width * height : width * height * 2;
-   if (!VP8LBitWriterInit(&bw, initial_size)) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-   if (!WebPReportProgress(picture, 1, &percent)) {
-  UserAbort:
--    err = VP8_ENC_ERROR_USER_ABORT;
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_USER_ABORT);
-     goto Error;
-   }
-   // Reset stats (for pure lossless coding)
-@@ -1878,28 +1850,26 @@ int VP8LEncodeImage(const WebPConfig* const config,
- 
-   // Write image size.
-   if (!WriteImageSize(picture, &bw)) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
-   has_alpha = WebPPictureHasTransparency(picture);
-   // Write the non-trivial Alpha flag and lossless version.
-   if (!WriteRealAlphaAndVersion(&bw, has_alpha)) {
--    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-     goto Error;
-   }
- 
--  if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort;
-+  if (!WebPReportProgress(picture, 2, &percent)) goto UserAbort;
- 
-   // Encode main image stream.
--  err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/);
--  if (err != VP8_ENC_OK) goto Error;
-+  if (!VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/)) goto Error;
- 
--  if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort;
-+  if (!WebPReportProgress(picture, 99, &percent)) goto UserAbort;
- 
-   // Finish the RIFF chunk.
--  err = WriteImage(picture, &bw, &coded_size);
--  if (err != VP8_ENC_OK) goto Error;
-+  if (!WriteImage(picture, &bw, &coded_size)) goto Error;
- 
-   if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
- 
-@@ -1918,13 +1888,11 @@ int VP8LEncodeImage(const WebPConfig* const config,
-   }
- 
-  Error:
--  if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
--  VP8LBitWriterWipeOut(&bw);
--  if (err != VP8_ENC_OK) {
--    WebPEncodingSetError(picture, err);
--    return 0;
-+  if (bw.error_) {
-+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-   }
--  return 1;
-+  VP8LBitWriterWipeOut(&bw);
-+  return (picture->error_code == VP8_ENC_OK);
- }
- 
- //------------------------------------------------------------------------------
-diff --git a/3rdparty/libwebp/src/enc/vp8li_enc.h b/3rdparty/libwebp/src/enc/vp8li_enc.h
-index 94210ce9f3bd..3d35e1612dee 100644
---- a/3rdparty/libwebp/src/enc/vp8li_enc.h
-+++ b/3rdparty/libwebp/src/enc/vp8li_enc.h
-@@ -69,6 +69,8 @@ typedef struct {
-   int use_palette_;
-   int palette_size_;
-   uint32_t palette_[MAX_PALETTE_SIZE];
-+  // Sorted version of palette_ for cache purposes.
-+  uint32_t palette_sorted_[MAX_PALETTE_SIZE];
- 
-   // Some 'scratch' (potentially large) objects.
-   struct VP8LBackwardRefs refs_[4];  // Backward Refs array for temporaries.
-@@ -87,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config,
- 
- // Encodes the main image stream using the supplied bit writer.
- // If 'use_cache' is false, disables the use of color cache.
--WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
--                                   const WebPPicture* const picture,
--                                   VP8LBitWriter* const bw, int use_cache);
-+// Returns false in case of error (stored in picture->error_code).
-+int VP8LEncodeStream(const WebPConfig* const config,
-+                     const WebPPicture* const picture, VP8LBitWriter* const bw,
-+                     int use_cache);
- 
- #if (WEBP_NEAR_LOSSLESS == 1)
- // in near_lossless.c
-@@ -101,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality,
- //------------------------------------------------------------------------------
- // Image transforms in predictor.c.
- 
--void VP8LResidualImage(int width, int height, int bits, int low_effort,
--                       uint32_t* const argb, uint32_t* const argb_scratch,
--                       uint32_t* const image, int near_lossless, int exact,
--                       int used_subtract_green);
--
--void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
--                             uint32_t* const argb, uint32_t* image);
-+// pic and percent are for progress.
-+// Returns false in case of error (stored in pic->error_code).
-+int VP8LResidualImage(int width, int height, int bits, int low_effort,
-+                      uint32_t* const argb, uint32_t* const argb_scratch,
-+                      uint32_t* const image, int near_lossless, int exact,
-+                      int used_subtract_green, const WebPPicture* const pic,
-+                      int percent_range, int* const percent);
-+
-+int VP8LColorSpaceTransform(int width, int height, int bits, int quality,
-+                            uint32_t* const argb, uint32_t* image,
-+                            const WebPPicture* const pic, int percent_range,
-+                            int* const percent);
- 
- //------------------------------------------------------------------------------
- 
-diff --git a/3rdparty/libwebp/src/enc/webp_enc.c b/3rdparty/libwebp/src/enc/webp_enc.c
-index ce2db2e94bcf..583fe6a8bbd6 100644
---- a/3rdparty/libwebp/src/enc/webp_enc.c
-+++ b/3rdparty/libwebp/src/enc/webp_enc.c
-@@ -307,7 +307,10 @@ int WebPEncodingSetError(const WebPPicture* const pic,
-                          WebPEncodingError error) {
-   assert((int)error < VP8_ENC_ERROR_LAST);
-   assert((int)error >= VP8_ENC_OK);
--  ((WebPPicture*)pic)->error_code = error;
-+  // The oldest error reported takes precedence over the new one.
-+  if (pic->error_code == VP8_ENC_OK) {
-+    ((WebPPicture*)pic)->error_code = error;
-+  }
-   return 0;
- }
- 
-@@ -317,8 +320,7 @@ int WebPReportProgress(const WebPPicture* const pic,
-     *percent_store = percent;
-     if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
-       // user abort requested
--      WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
--      return 0;
-+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
-     }
-   }
-   return 1;  // ok
-@@ -329,16 +331,14 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
-   int ok = 0;
-   if (pic == NULL) return 0;
- 
--  WebPEncodingSetError(pic, VP8_ENC_OK);  // all ok so far
-+  pic->error_code = VP8_ENC_OK;  // all ok so far
-   if (config == NULL) {  // bad params
-     return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
-   }
-   if (!WebPValidateConfig(config)) {
-     return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-   }
--  if (pic->width <= 0 || pic->height <= 0) {
--    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
--  }
-+  if (!WebPValidatePicture(pic)) return 0;
-   if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) {
-     return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
-   }
-diff --git a/3rdparty/libwebp/src/mux/anim_encode.c b/3rdparty/libwebp/src/mux/anim_encode.c
-index 7be99068f687..d1c61a2f1ee5 100644
---- a/3rdparty/libwebp/src/mux/anim_encode.c
-+++ b/3rdparty/libwebp/src/mux/anim_encode.c
-@@ -248,9 +248,6 @@ WebPAnimEncoder* WebPAnimEncoderNewInternal(
- 
-   enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc));
-   if (enc == NULL) return NULL;
--  // sanity inits, so we can call WebPAnimEncoderDelete():
--  enc->encoded_frames_ = NULL;
--  enc->mux_ = NULL;
-   MarkNoError(enc);
- 
-   // Dimensions and options.
-@@ -421,7 +418,7 @@ static void MinimizeChangeRectangle(const WebPPicture* const src,
-   const int max_allowed_diff_lossy = QualityToMaxDiff(quality);
-   const int max_allowed_diff = is_lossless ? 0 : max_allowed_diff_lossy;
- 
--  // Sanity checks.
-+  // Assumption/correctness checks.
-   assert(src->width == dst->width && src->height == dst->height);
-   assert(rect->x_offset_ + rect->width_ <= dst->width);
-   assert(rect->y_offset_ + rect->height_ <= dst->height);
-@@ -596,16 +593,17 @@ int WebPAnimEncoderRefineRect(
-     int is_lossless, float quality, int* const x_offset, int* const y_offset,
-     int* const width, int* const height) {
-   FrameRectangle rect;
--  const int right = clip(*x_offset + *width, 0, curr_canvas->width);
--  const int left = clip(*x_offset, 0, curr_canvas->width - 1);
--  const int bottom = clip(*y_offset + *height, 0, curr_canvas->height);
--  const int top = clip(*y_offset, 0, curr_canvas->height - 1);
-+  int right, left, bottom, top;
-   if (prev_canvas == NULL || curr_canvas == NULL ||
-       prev_canvas->width != curr_canvas->width ||
-       prev_canvas->height != curr_canvas->height ||
-       !prev_canvas->use_argb || !curr_canvas->use_argb) {
-     return 0;
-   }
-+  right = clip(*x_offset + *width, 0, curr_canvas->width);
-+  left = clip(*x_offset, 0, curr_canvas->width - 1);
-+  bottom = clip(*y_offset + *height, 0, curr_canvas->height);
-+  top = clip(*y_offset, 0, curr_canvas->height - 1);
-   rect.x_offset_ = left;
-   rect.y_offset_ = top;
-   rect.width_ = clip(right - left, 0, curr_canvas->width - rect.x_offset_);
-@@ -949,7 +947,8 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
-   int new_duration;
- 
-   assert(enc->count_ >= 1);
--  assert(prev_enc_frame->sub_frame_.duration ==
-+  assert(!prev_enc_frame->is_key_frame_ ||
-+         prev_enc_frame->sub_frame_.duration ==
-          prev_enc_frame->key_frame_.duration);
-   assert(prev_enc_frame->sub_frame_.duration ==
-          (prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1)));
-@@ -966,7 +965,7 @@ static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
-       0x10, 0x88, 0x88, 0x08
-     };
-     const WebPData lossless_1x1 = {
--        lossless_1x1_bytes, sizeof(lossless_1x1_bytes)
-+      lossless_1x1_bytes, sizeof(lossless_1x1_bytes)
-     };
-     const uint8_t lossy_1x1_bytes[] = {
-       0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50,
-@@ -1358,6 +1357,12 @@ int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp,
-     if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) {
-       return 0;
-     }
-+    // IncreasePreviousDuration() may add a frame to avoid exceeding
-+    // MAX_DURATION which could cause CacheFrame() to over read encoded_frames_
-+    // before the next flush.
-+    if (enc->count_ == enc->size_ && !FlushFrames(enc)) {
-+      return 0;
-+    }
-   } else {
-     enc->first_timestamp_ = timestamp;
-   }
-diff --git a/3rdparty/libwebp/src/mux/muxedit.c b/3rdparty/libwebp/src/mux/muxedit.c
-index ccf14b2a0c51..aab479cc6c78 100644
---- a/3rdparty/libwebp/src/mux/muxedit.c
-+++ b/3rdparty/libwebp/src/mux/muxedit.c
-@@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) {
-     err = ChunkAssignData(&chunk, data, copy_data, tag);                       \
-     if (err == WEBP_MUX_OK) {                                                  \
-       err = ChunkSetHead(&chunk, (LIST));                                      \
-+      if (err != WEBP_MUX_OK) ChunkRelease(&chunk);                            \
-     }                                                                          \
-     return err;                                                                \
-   }
-@@ -235,7 +236,6 @@ WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream,
-   WebPMuxImage wpi;
-   WebPMuxError err;
- 
--  // Sanity checks.
-   if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL ||
-       bitstream->size > MAX_CHUNK_PAYLOAD) {
-     return WEBP_MUX_INVALID_ARGUMENT;
-@@ -267,7 +267,6 @@ WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* info,
-   WebPMuxImage wpi;
-   WebPMuxError err;
- 
--  // Sanity checks.
-   if (mux == NULL || info == NULL) return WEBP_MUX_INVALID_ARGUMENT;
- 
-   if (info->id != WEBP_CHUNK_ANMF) return WEBP_MUX_INVALID_ARGUMENT;
-@@ -556,7 +555,8 @@ static WebPMuxError MuxCleanup(WebPMux* const mux) {
-   if (num_frames == 1) {
-     WebPMuxImage* frame = NULL;
-     err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame);
--    assert(err == WEBP_MUX_OK);  // We know that one frame does exist.
-+    if (err != WEBP_MUX_OK) return err;
-+    // We know that one frame does exist.
-     assert(frame != NULL);
-     if (frame->header_ != NULL &&
-         ((mux->canvas_width_ == 0 && mux->canvas_height_ == 0) ||
-diff --git a/3rdparty/libwebp/src/mux/muxi.h b/3rdparty/libwebp/src/mux/muxi.h
-index 2289822e8f88..fc44d6f2feb8 100644
---- a/3rdparty/libwebp/src/mux/muxi.h
-+++ b/3rdparty/libwebp/src/mux/muxi.h
-@@ -28,8 +28,8 @@ extern "C" {
- // Defines and constants.
- 
- #define MUX_MAJ_VERSION 1
--#define MUX_MIN_VERSION 2
--#define MUX_REV_VERSION 0
-+#define MUX_MIN_VERSION 3
-+#define MUX_REV_VERSION 1
- 
- // Chunk object.
- typedef struct WebPChunk WebPChunk;
-diff --git a/3rdparty/libwebp/src/mux/muxinternal.c b/3rdparty/libwebp/src/mux/muxinternal.c
-index b9ee6717d3a4..75b6b416b993 100644
---- a/3rdparty/libwebp/src/mux/muxinternal.c
-+++ b/3rdparty/libwebp/src/mux/muxinternal.c
-@@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk,
- 
- WebPMuxError ChunkAppend(WebPChunk* const chunk,
-                          WebPChunk*** const chunk_list) {
-+  WebPMuxError err;
-   assert(chunk_list != NULL && *chunk_list != NULL);
- 
-   if (**chunk_list == NULL) {
--    ChunkSetHead(chunk, *chunk_list);
-+    err = ChunkSetHead(chunk, *chunk_list);
-   } else {
-     WebPChunk* last_chunk = **chunk_list;
-     while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_;
--    ChunkSetHead(chunk, &last_chunk->next_);
--    *chunk_list = &last_chunk->next_;
-+    err = ChunkSetHead(chunk, &last_chunk->next_);
-+    if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_;
-   }
--  return WEBP_MUX_OK;
-+  return err;
- }
- 
- //------------------------------------------------------------------------------
-diff --git a/3rdparty/libwebp/src/mux/muxread.c b/3rdparty/libwebp/src/mux/muxread.c
-index 0101fde15da0..9862ec68eea6 100644
---- a/3rdparty/libwebp/src/mux/muxread.c
-+++ b/3rdparty/libwebp/src/mux/muxread.c
-@@ -56,7 +56,7 @@ static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk,
-   uint32_t chunk_size;
-   WebPData chunk_data;
- 
--  // Sanity checks.
-+  // Correctness checks.
-   if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
-   chunk_size = GetLE32(data + TAG_SIZE);
-   if (chunk_size > MAX_CHUNK_PAYLOAD) return WEBP_MUX_BAD_DATA;
-@@ -116,9 +116,12 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
-     // Each of ANMF chunk contain a header at the beginning. So, its size should
-     // be at least 'hdr_size'.
-     if (size < hdr_size) goto Fail;
--    ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
-+    if (ChunkAssignData(&subchunk, &temp, copy_data,
-+                        chunk->tag_) != WEBP_MUX_OK) {
-+      goto Fail;
-+    }
-   }
--  ChunkSetHead(&subchunk, &wpi->header_);
-+  if (ChunkSetHead(&subchunk, &wpi->header_) != WEBP_MUX_OK) goto Fail;
-   wpi->is_partial_ = 1;  // Waiting for ALPH and/or VP8/VP8L chunks.
- 
-   // Rest of the chunks.
-@@ -186,7 +189,6 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
-   WebPChunk** chunk_list_ends[WEBP_CHUNK_NIL + 1] = { NULL };
-   ChunkInit(&chunk);
- 
--  // Sanity checks.
-   if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
-     return NULL;  // version mismatch
-   }
-@@ -481,7 +483,6 @@ WebPMuxError WebPMuxGetFrame(
-   WebPMuxError err;
-   WebPMuxImage* wpi;
- 
--  // Sanity checks.
-   if (mux == NULL || frame == NULL) {
-     return WEBP_MUX_INVALID_ARGUMENT;
-   }
-diff --git a/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h b/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h
-index 46b38807062c..24f3af7b5454 100644
---- a/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h
-+++ b/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h
-@@ -55,7 +55,7 @@ void VP8LoadFinalBytes(VP8BitReader* const br);
- 
- // makes sure br->value_ has at least BITS bits worth of data
- static WEBP_UBSAN_IGNORE_UNDEF WEBP_INLINE
--void VP8LoadNewBytes(VP8BitReader* const br) {
-+void VP8LoadNewBytes(VP8BitReader* WEBP_RESTRICT const br) {
-   assert(br != NULL && br->buf_ != NULL);
-   // Read 'BITS' bits at a time if possible.
-   if (br->buf_ < br->buf_max_) {
-@@ -104,7 +104,7 @@ void VP8LoadNewBytes(VP8BitReader* const br) {
- }
- 
- // Read a bit with proba 'prob'. Speed-critical function!
--static WEBP_INLINE int VP8GetBit(VP8BitReader* const br,
-+static WEBP_INLINE int VP8GetBit(VP8BitReader* WEBP_RESTRICT const br,
-                                  int prob, const char label[]) {
-   // Don't move this declaration! It makes a big speed difference to store
-   // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
-@@ -137,7 +137,8 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br,
- 
- // simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
- static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
--int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) {
-+int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v,
-+                 const char label[]) {
-   if (br->bits_ < 0) {
-     VP8LoadNewBytes(br);
-   }
-@@ -147,15 +148,15 @@ int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) {
-     const range_t value = (range_t)(br->value_ >> pos);
-     const int32_t mask = (int32_t)(split - value) >> 31;  // -1 or 0
-     br->bits_ -= 1;
--    br->range_ += mask;
-+    br->range_ += (range_t)mask;
-     br->range_ |= 1;
--    br->value_ -= (bit_t)((split + 1) & mask) << pos;
-+    br->value_ -= (bit_t)((split + 1) & (uint32_t)mask) << pos;
-     BT_TRACK(br);
-     return (v ^ mask) - mask;
-   }
- }
- 
--static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br,
-+static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* WEBP_RESTRICT const br,
-                                     int prob, const char label[]) {
-   // Don't move this declaration! It makes a big speed difference to store
-   // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
-diff --git a/3rdparty/libwebp/src/utils/bit_reader_utils.c b/3rdparty/libwebp/src/utils/bit_reader_utils.c
-index 857cd6098882..a26557aa49f9 100644
---- a/3rdparty/libwebp/src/utils/bit_reader_utils.c
-+++ b/3rdparty/libwebp/src/utils/bit_reader_utils.c
-@@ -15,6 +15,7 @@
- #include "src/webp/config.h"
- #endif
- 
-+#include "src/dsp/cpu.h"
- #include "src/utils/bit_reader_inl_utils.h"
- #include "src/utils/utils.h"
- 
-@@ -121,7 +122,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits,
- 
- #define VP8L_LOG8_WBITS 4  // Number of bytes needed to store VP8L_WBITS bits.
- 
--#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
-+#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \
-     defined(__i386__) || defined(_M_IX86) || \
-     defined(__x86_64__) || defined(_M_X64)
- #define VP8L_USE_FAST_LOAD
-diff --git a/3rdparty/libwebp/src/utils/bit_reader_utils.h b/3rdparty/libwebp/src/utils/bit_reader_utils.h
-index e64156e31817..25ff31e5d97a 100644
---- a/3rdparty/libwebp/src/utils/bit_reader_utils.h
-+++ b/3rdparty/libwebp/src/utils/bit_reader_utils.h
-@@ -19,6 +19,7 @@
- #ifdef _MSC_VER
- #include <stdlib.h>  // _byteswap_ulong
- #endif
-+#include "src/dsp/cpu.h"
- #include "src/webp/types.h"
- 
- // Warning! This macro triggers quite some MACRO wizardry around func signature!
-@@ -64,7 +65,7 @@ extern "C" {
- #define BITS 56
- #elif defined(__arm__) || defined(_M_ARM)      // ARM
- #define BITS 24
--#elif defined(__aarch64__)                     // ARM 64bit
-+#elif WEBP_AARCH64                             // ARM 64bit
- #define BITS 56
- #elif defined(__mips__)                        // MIPS
- #define BITS 24
-diff --git a/3rdparty/libwebp/src/utils/bit_writer_utils.c b/3rdparty/libwebp/src/utils/bit_writer_utils.c
-index bef0e31ca5ea..2f408508f114 100644
---- a/3rdparty/libwebp/src/utils/bit_writer_utils.c
-+++ b/3rdparty/libwebp/src/utils/bit_writer_utils.c
-@@ -278,7 +278,7 @@ void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) {
-   // If needed, make some room by flushing some bits out.
-   if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
-     const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
--    if (extra_size != (size_t)extra_size ||
-+    if (!CheckSizeOverflow(extra_size) ||
-         !VP8LBitWriterResize(bw, (size_t)extra_size)) {
-       bw->cur_ = bw->buf_;
-       bw->error_ = 1;
-@@ -314,7 +314,7 @@ void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
-     while (used >= VP8L_WRITER_BITS) {
-       if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
-         const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
--        if (extra_size != (size_t)extra_size ||
-+        if (!CheckSizeOverflow(extra_size) ||
-             !VP8LBitWriterResize(bw, (size_t)extra_size)) {
-           bw->cur_ = bw->buf_;
-           bw->error_ = 1;
-diff --git a/3rdparty/libwebp/src/utils/color_cache_utils.c b/3rdparty/libwebp/src/utils/color_cache_utils.c
-index b09f538e8be6..7b5222b6e554 100644
---- a/3rdparty/libwebp/src/utils/color_cache_utils.c
-+++ b/3rdparty/libwebp/src/utils/color_cache_utils.c
-@@ -20,22 +20,22 @@
- //------------------------------------------------------------------------------
- // VP8LColorCache.
- 
--int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
-+int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits) {
-   const int hash_size = 1 << hash_bits;
--  assert(cc != NULL);
-+  assert(color_cache != NULL);
-   assert(hash_bits > 0);
--  cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size,
--                                          sizeof(*cc->colors_));
--  if (cc->colors_ == NULL) return 0;
--  cc->hash_shift_ = 32 - hash_bits;
--  cc->hash_bits_ = hash_bits;
-+  color_cache->colors_ = (uint32_t*)WebPSafeCalloc(
-+      (uint64_t)hash_size, sizeof(*color_cache->colors_));
-+  if (color_cache->colors_ == NULL) return 0;
-+  color_cache->hash_shift_ = 32 - hash_bits;
-+  color_cache->hash_bits_ = hash_bits;
-   return 1;
- }
- 
--void VP8LColorCacheClear(VP8LColorCache* const cc) {
--  if (cc != NULL) {
--    WebPSafeFree(cc->colors_);
--    cc->colors_ = NULL;
-+void VP8LColorCacheClear(VP8LColorCache* const color_cache) {
-+  if (color_cache != NULL) {
-+    WebPSafeFree(color_cache->colors_);
-+    color_cache->colors_ = NULL;
-   }
- }
- 
-diff --git a/3rdparty/libwebp/src/utils/huffman_encode_utils.c b/3rdparty/libwebp/src/utils/huffman_encode_utils.c
-index 6f3b1bbe020f..585db9195184 100644
---- a/3rdparty/libwebp/src/utils/huffman_encode_utils.c
-+++ b/3rdparty/libwebp/src/utils/huffman_encode_utils.c
-@@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree,
- // especially when population counts are longer than 2**tree_limit, but
- // we are not planning to use this with extremely long blocks.
- //
--// See http://en.wikipedia.org/wiki/Huffman_coding
-+// See https://en.wikipedia.org/wiki/Huffman_coding
- static void GenerateOptimalTree(const uint32_t* const histogram,
-                                 int histogram_size,
-                                 HuffmanTree* tree, int tree_depth_limit,
-@@ -404,8 +404,7 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
- // Main entry point
- 
- void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
--                           uint8_t* const buf_rle,
--                           HuffmanTree* const huff_tree,
-+                           uint8_t* const buf_rle, HuffmanTree* const huff_tree,
-                            HuffmanTreeCode* const huff_code) {
-   const int num_symbols = huff_code->num_symbols;
-   memset(buf_rle, 0, num_symbols * sizeof(*buf_rle));
-diff --git a/3rdparty/libwebp/src/utils/huffman_encode_utils.h b/3rdparty/libwebp/src/utils/huffman_encode_utils.h
-index 3e6763ce49db..3f7f1d8074c2 100644
---- a/3rdparty/libwebp/src/utils/huffman_encode_utils.h
-+++ b/3rdparty/libwebp/src/utils/huffman_encode_utils.h
-@@ -51,7 +51,7 @@ int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
- // huffman code tree.
- void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
-                            uint8_t* const buf_rle, HuffmanTree* const huff_tree,
--                           HuffmanTreeCode* const tree);
-+                           HuffmanTreeCode* const huff_code);
- 
- #ifdef __cplusplus
- }
-diff --git a/3rdparty/libwebp/src/utils/huffman_utils.c b/3rdparty/libwebp/src/utils/huffman_utils.c
-index 0cba0fbb7d4f..cf73abd437d0 100644
---- a/3rdparty/libwebp/src/utils/huffman_utils.c
-+++ b/3rdparty/libwebp/src/utils/huffman_utils.c
-@@ -142,7 +142,7 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
- 
-   {
-     int step;              // step size to replicate values in current table
--    uint32_t low = -1;     // low bits for current root entry
-+    uint32_t low = 0xffffffffu;        // low bits for current root entry
-     uint32_t mask = total_size - 1;    // mask for low bits
-     uint32_t key = 0;      // reversed prefix code
-     int num_nodes = 1;     // number of Huffman tree nodes
-@@ -177,21 +177,24 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
-       if (num_open < 0) {
-         return 0;
-       }
--      if (root_table == NULL) continue;
-       for (; count[len] > 0; --count[len]) {
-         HuffmanCode code;
-         if ((key & mask) != low) {
--          table += table_size;
-+          if (root_table != NULL) table += table_size;
-           table_bits = NextTableBitSize(count, len, root_bits);
-           table_size = 1 << table_bits;
-           total_size += table_size;
-           low = key & mask;
--          root_table[low].bits = (uint8_t)(table_bits + root_bits);
--          root_table[low].value = (uint16_t)((table - root_table) - low);
-+          if (root_table != NULL) {
-+            root_table[low].bits = (uint8_t)(table_bits + root_bits);
-+            root_table[low].value = (uint16_t)((table - root_table) - low);
-+          }
-+        }
-+        if (root_table != NULL) {
-+          code.bits = (uint8_t)(len - root_bits);
-+          code.value = (uint16_t)sorted[symbol++];
-+          ReplicateValue(&table[key >> root_bits], step, table_size, code);
-         }
--        code.bits = (uint8_t)(len - root_bits);
--        code.value = (uint16_t)sorted[symbol++];
--        ReplicateValue(&table[key >> root_bits], step, table_size, code);
-         key = GetNextKey(key, len);
-       }
-     }
-@@ -211,25 +214,83 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
-   ((1 << MAX_CACHE_BITS) + NUM_LITERAL_CODES + NUM_LENGTH_CODES)
- // Cut-off value for switching between heap and stack allocation.
- #define SORTED_SIZE_CUTOFF 512
--int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
-+int VP8LBuildHuffmanTable(HuffmanTables* const root_table, int root_bits,
-                           const int code_lengths[], int code_lengths_size) {
--  int total_size;
-+  const int total_size =
-+      BuildHuffmanTable(NULL, root_bits, code_lengths, code_lengths_size, NULL);
-   assert(code_lengths_size <= MAX_CODE_LENGTHS_SIZE);
--  if (root_table == NULL) {
--    total_size = BuildHuffmanTable(NULL, root_bits,
--                                   code_lengths, code_lengths_size, NULL);
--  } else if (code_lengths_size <= SORTED_SIZE_CUTOFF) {
-+  if (total_size == 0 || root_table == NULL) return total_size;
-+
-+  if (root_table->curr_segment->curr_table + total_size >=
-+      root_table->curr_segment->start + root_table->curr_segment->size) {
-+    // If 'root_table' does not have enough memory, allocate a new segment.
-+    // The available part of root_table->curr_segment is left unused because we
-+    // need a contiguous buffer.
-+    const int segment_size = root_table->curr_segment->size;
-+    struct HuffmanTablesSegment* next =
-+        (HuffmanTablesSegment*)WebPSafeMalloc(1, sizeof(*next));
-+    if (next == NULL) return 0;
-+    // Fill the new segment.
-+    // We need at least 'total_size' but if that value is small, it is better to
-+    // allocate a big chunk to prevent more allocations later. 'segment_size' is
-+    // therefore chosen (any other arbitrary value could be chosen).
-+    next->size = total_size > segment_size ? total_size : segment_size;
-+    next->start =
-+        (HuffmanCode*)WebPSafeMalloc(next->size, sizeof(*next->start));
-+    if (next->start == NULL) {
-+      WebPSafeFree(next);
-+      return 0;
-+    }
-+    next->curr_table = next->start;
-+    next->next = NULL;
-+    // Point to the new segment.
-+    root_table->curr_segment->next = next;
-+    root_table->curr_segment = next;
-+  }
-+  if (code_lengths_size <= SORTED_SIZE_CUTOFF) {
-     // use local stack-allocated array.
-     uint16_t sorted[SORTED_SIZE_CUTOFF];
--    total_size = BuildHuffmanTable(root_table, root_bits,
--                                   code_lengths, code_lengths_size, sorted);
--  } else {   // rare case. Use heap allocation.
-+    BuildHuffmanTable(root_table->curr_segment->curr_table, root_bits,
-+                      code_lengths, code_lengths_size, sorted);
-+  } else {  // rare case. Use heap allocation.
-     uint16_t* const sorted =
-         (uint16_t*)WebPSafeMalloc(code_lengths_size, sizeof(*sorted));
-     if (sorted == NULL) return 0;
--    total_size = BuildHuffmanTable(root_table, root_bits,
--                                   code_lengths, code_lengths_size, sorted);
-+    BuildHuffmanTable(root_table->curr_segment->curr_table, root_bits,
-+                      code_lengths, code_lengths_size, sorted);
-     WebPSafeFree(sorted);
-   }
-   return total_size;
- }
-+
-+int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables) {
-+  // Have 'segment' point to the first segment for now, 'root'.
-+  HuffmanTablesSegment* const root = &huffman_tables->root;
-+  huffman_tables->curr_segment = root;
-+  // Allocate root.
-+  root->start = (HuffmanCode*)WebPSafeMalloc(size, sizeof(*root->start));
-+  if (root->start == NULL) return 0;
-+  root->curr_table = root->start;
-+  root->next = NULL;
-+  root->size = size;
-+  return 1;
-+}
-+
-+void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables) {
-+  HuffmanTablesSegment *current, *next;
-+  if (huffman_tables == NULL) return;
-+  // Free the root node.
-+  current = &huffman_tables->root;
-+  next = current->next;
-+  WebPSafeFree(current->start);
-+  current->start = NULL;
-+  current->next = NULL;
-+  current = next;
-+  // Free the following nodes.
-+  while (current != NULL) {
-+    next = current->next;
-+    WebPSafeFree(current->start);
-+    WebPSafeFree(current);
-+    current = next;
-+  }
-+}
-diff --git a/3rdparty/libwebp/src/utils/huffman_utils.h b/3rdparty/libwebp/src/utils/huffman_utils.h
-index 13b7ad1ac40c..98415c532895 100644
---- a/3rdparty/libwebp/src/utils/huffman_utils.h
-+++ b/3rdparty/libwebp/src/utils/huffman_utils.h
-@@ -43,6 +43,29 @@ typedef struct {
-                     // or non-literal symbol otherwise
- } HuffmanCode32;
- 
-+// Contiguous memory segment of HuffmanCodes.
-+typedef struct HuffmanTablesSegment {
-+  HuffmanCode* start;
-+  // Pointer to where we are writing into the segment. Starts at 'start' and
-+  // cannot go beyond 'start' + 'size'.
-+  HuffmanCode* curr_table;
-+  // Pointer to the next segment in the chain.
-+  struct HuffmanTablesSegment* next;
-+  int size;
-+} HuffmanTablesSegment;
-+
-+// Chained memory segments of HuffmanCodes.
-+typedef struct HuffmanTables {
-+  HuffmanTablesSegment root;
-+  // Currently processed segment. At first, this is 'root'.
-+  HuffmanTablesSegment* curr_segment;
-+} HuffmanTables;
-+
-+// Allocates a HuffmanTables with 'size' contiguous HuffmanCodes. Returns 0 on
-+// memory allocation error, 1 otherwise.
-+int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables);
-+void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables);
-+
- #define HUFFMAN_PACKED_BITS 6
- #define HUFFMAN_PACKED_TABLE_SIZE (1u << HUFFMAN_PACKED_BITS)
- 
-@@ -78,9 +101,7 @@ void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups);
- // the huffman table.
- // Returns built table size or 0 in case of error (invalid tree or
- // memory error).
--// If root_table is NULL, it returns 0 if a lookup cannot be built, something
--// > 0 otherwise (but not the table size).
--int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
-+int VP8LBuildHuffmanTable(HuffmanTables* const root_table, int root_bits,
-                           const int code_lengths[], int code_lengths_size);
- 
- #ifdef __cplusplus
-diff --git a/3rdparty/libwebp/src/utils/palette.c b/3rdparty/libwebp/src/utils/palette.c
-new file mode 100644
-index 000000000000..515da2101950
---- /dev/null
-+++ b/3rdparty/libwebp/src/utils/palette.c
-@@ -0,0 +1,402 @@
-+// Copyright 2023 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Utilities for palette analysis.
-+//
-+// Author: Vincent Rabaud (vrabaud@google.com)
-+
-+#include "src/utils/palette.h"
-+
-+#include <assert.h>
-+#include <stdlib.h>
-+
-+#include "src/dsp/lossless_common.h"
-+#include "src/utils/color_cache_utils.h"
-+#include "src/utils/utils.h"
-+#include "src/webp/encode.h"
-+#include "src/webp/format_constants.h"
-+
-+// -----------------------------------------------------------------------------
-+
-+// Palette reordering for smaller sum of deltas (and for smaller storage).
-+
-+static int PaletteCompareColorsForQsort(const void* p1, const void* p2) {
-+  const uint32_t a = WebPMemToUint32((uint8_t*)p1);
-+  const uint32_t b = WebPMemToUint32((uint8_t*)p2);
-+  assert(a != b);
-+  return (a < b) ? -1 : 1;
-+}
-+
-+static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) {
-+  return (v <= 128) ? v : (256 - v);
-+}
-+
-+// Computes a value that is related to the entropy created by the
-+// palette entry diff.
-+//
-+// Note that the last & 0xff is a no-operation in the next statement, but
-+// removed by most compilers and is here only for regularity of the code.
-+static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) {
-+  const uint32_t diff = VP8LSubPixels(col1, col2);
-+  const int kMoreWeightForRGBThanForAlpha = 9;
-+  uint32_t score;
-+  score = PaletteComponentDistance((diff >> 0) & 0xff);
-+  score += PaletteComponentDistance((diff >> 8) & 0xff);
-+  score += PaletteComponentDistance((diff >> 16) & 0xff);
-+  score *= kMoreWeightForRGBThanForAlpha;
-+  score += PaletteComponentDistance((diff >> 24) & 0xff);
-+  return score;
-+}
-+
-+static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) {
-+  const uint32_t tmp = *col1;
-+  *col1 = *col2;
-+  *col2 = tmp;
-+}
-+
-+int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, int num_colors) {
-+  int low = 0, hi = num_colors;
-+  if (sorted[low] == color) return low;  // loop invariant: sorted[low] != color
-+  while (1) {
-+    const int mid = (low + hi) >> 1;
-+    if (sorted[mid] == color) {
-+      return mid;
-+    } else if (sorted[mid] < color) {
-+      low = mid;
-+    } else {
-+      hi = mid;
-+    }
-+  }
-+  assert(0);
-+  return 0;
-+}
-+
-+void PrepareMapToPalette(const uint32_t palette[], uint32_t num_colors,
-+                         uint32_t sorted[], uint32_t idx_map[]) {
-+  uint32_t i;
-+  memcpy(sorted, palette, num_colors * sizeof(*sorted));
-+  qsort(sorted, num_colors, sizeof(*sorted), PaletteCompareColorsForQsort);
-+  for (i = 0; i < num_colors; ++i) {
-+    idx_map[SearchColorNoIdx(sorted, palette[i], num_colors)] = i;
-+  }
-+}
-+
-+//------------------------------------------------------------------------------
-+
-+#define COLOR_HASH_SIZE (MAX_PALETTE_SIZE * 4)
-+#define COLOR_HASH_RIGHT_SHIFT 22  // 32 - log2(COLOR_HASH_SIZE).
-+
-+int GetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
-+  int i;
-+  int x, y;
-+  int num_colors = 0;
-+  uint8_t in_use[COLOR_HASH_SIZE] = {0};
-+  uint32_t colors[COLOR_HASH_SIZE] = {0};
-+  const uint32_t* argb = pic->argb;
-+  const int width = pic->width;
-+  const int height = pic->height;
-+  uint32_t last_pix = ~argb[0];  // so we're sure that last_pix != argb[0]
-+  assert(pic != NULL);
-+  assert(pic->use_argb);
-+
-+  for (y = 0; y < height; ++y) {
-+    for (x = 0; x < width; ++x) {
-+      int key;
-+      if (argb[x] == last_pix) {
-+        continue;
-+      }
-+      last_pix = argb[x];
-+      key = VP8LHashPix(last_pix, COLOR_HASH_RIGHT_SHIFT);
-+      while (1) {
-+        if (!in_use[key]) {
-+          colors[key] = last_pix;
-+          in_use[key] = 1;
-+          ++num_colors;
-+          if (num_colors > MAX_PALETTE_SIZE) {
-+            return MAX_PALETTE_SIZE + 1;  // Exact count not needed.
-+          }
-+          break;
-+        } else if (colors[key] == last_pix) {
-+          break;  // The color is already there.
-+        } else {
-+          // Some other color sits here, so do linear conflict resolution.
-+          ++key;
-+          key &= (COLOR_HASH_SIZE - 1);  // Key mask.
-+        }
-+      }
-+    }
-+    argb += pic->argb_stride;
-+  }
-+
-+  if (palette != NULL) {  // Fill the colors into palette.
-+    num_colors = 0;
-+    for (i = 0; i < COLOR_HASH_SIZE; ++i) {
-+      if (in_use[i]) {
-+        palette[num_colors] = colors[i];
-+        ++num_colors;
-+      }
-+    }
-+    qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort);
-+  }
-+  return num_colors;
-+}
-+
-+#undef COLOR_HASH_SIZE
-+#undef COLOR_HASH_RIGHT_SHIFT
-+
-+// -----------------------------------------------------------------------------
-+
-+// The palette has been sorted by alpha. This function checks if the other
-+// components of the palette have a monotonic development with regards to
-+// position in the palette. If all have monotonic development, there is
-+// no benefit to re-organize them greedily. A monotonic development
-+// would be spotted in green-only situations (like lossy alpha) or gray-scale
-+// images.
-+static int PaletteHasNonMonotonousDeltas(const uint32_t* const palette,
-+                                         int num_colors) {
-+  uint32_t predict = 0x000000;
-+  int i;
-+  uint8_t sign_found = 0x00;
-+  for (i = 0; i < num_colors; ++i) {
-+    const uint32_t diff = VP8LSubPixels(palette[i], predict);
-+    const uint8_t rd = (diff >> 16) & 0xff;
-+    const uint8_t gd = (diff >> 8) & 0xff;
-+    const uint8_t bd = (diff >> 0) & 0xff;
-+    if (rd != 0x00) {
-+      sign_found |= (rd < 0x80) ? 1 : 2;
-+    }
-+    if (gd != 0x00) {
-+      sign_found |= (gd < 0x80) ? 8 : 16;
-+    }
-+    if (bd != 0x00) {
-+      sign_found |= (bd < 0x80) ? 64 : 128;
-+    }
-+    predict = palette[i];
-+  }
-+  return (sign_found & (sign_found << 1)) != 0;  // two consequent signs.
-+}
-+
-+static void PaletteSortMinimizeDeltas(const uint32_t* const palette_sorted,
-+                                      int num_colors, uint32_t* const palette) {
-+  uint32_t predict = 0x00000000;
-+  int i, k;
-+  memcpy(palette, palette_sorted, num_colors * sizeof(*palette));
-+  if (!PaletteHasNonMonotonousDeltas(palette_sorted, num_colors)) return;
-+  // Find greedily always the closest color of the predicted color to minimize
-+  // deltas in the palette. This reduces storage needs since the
-+  // palette is stored with delta encoding.
-+  for (i = 0; i < num_colors; ++i) {
-+    int best_ix = i;
-+    uint32_t best_score = ~0U;
-+    for (k = i; k < num_colors; ++k) {
-+      const uint32_t cur_score = PaletteColorDistance(palette[k], predict);
-+      if (best_score > cur_score) {
-+        best_score = cur_score;
-+        best_ix = k;
-+      }
-+    }
-+    SwapColor(&palette[best_ix], &palette[i]);
-+    predict = palette[i];
-+  }
-+}
-+
-+// -----------------------------------------------------------------------------
-+// Modified Zeng method from "A Survey on Palette Reordering
-+// Methods for Improving the Compression of Color-Indexed Images" by Armando J.
-+// Pinho and Antonio J. R. Neves.
-+
-+// Finds the biggest cooccurrence in the matrix.
-+static void CoOccurrenceFindMax(const uint32_t* const cooccurrence,
-+                                uint32_t num_colors, uint8_t* const c1,
-+                                uint8_t* const c2) {
-+  // Find the index that is most frequently located adjacent to other
-+  // (different) indexes.
-+  uint32_t best_sum = 0u;
-+  uint32_t i, j, best_cooccurrence;
-+  *c1 = 0u;
-+  for (i = 0; i < num_colors; ++i) {
-+    uint32_t sum = 0;
-+    for (j = 0; j < num_colors; ++j) sum += cooccurrence[i * num_colors + j];
-+    if (sum > best_sum) {
-+      best_sum = sum;
-+      *c1 = i;
-+    }
-+  }
-+  // Find the index that is most frequently found adjacent to *c1.
-+  *c2 = 0u;
-+  best_cooccurrence = 0u;
-+  for (i = 0; i < num_colors; ++i) {
-+    if (cooccurrence[*c1 * num_colors + i] > best_cooccurrence) {
-+      best_cooccurrence = cooccurrence[*c1 * num_colors + i];
-+      *c2 = i;
-+    }
-+  }
-+  assert(*c1 != *c2);
-+}
-+
-+// Builds the cooccurrence matrix
-+static int CoOccurrenceBuild(const WebPPicture* const pic,
-+                             const uint32_t* const palette, uint32_t num_colors,
-+                             uint32_t* cooccurrence) {
-+  uint32_t *lines, *line_top, *line_current, *line_tmp;
-+  int x, y;
-+  const uint32_t* src = pic->argb;
-+  uint32_t prev_pix = ~src[0];
-+  uint32_t prev_idx = 0u;
-+  uint32_t idx_map[MAX_PALETTE_SIZE] = {0};
-+  uint32_t palette_sorted[MAX_PALETTE_SIZE];
-+  lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines));
-+  if (lines == NULL) {
-+    return 0;
-+  }
-+  line_top = &lines[0];
-+  line_current = &lines[pic->width];
-+  PrepareMapToPalette(palette, num_colors, palette_sorted, idx_map);
-+  for (y = 0; y < pic->height; ++y) {
-+    for (x = 0; x < pic->width; ++x) {
-+      const uint32_t pix = src[x];
-+      if (pix != prev_pix) {
-+        prev_idx = idx_map[SearchColorNoIdx(palette_sorted, pix, num_colors)];
-+        prev_pix = pix;
-+      }
-+      line_current[x] = prev_idx;
-+      // 4-connectivity is what works best as mentioned in "On the relation
-+      // between Memon's and the modified Zeng's palette reordering methods".
-+      if (x > 0 && prev_idx != line_current[x - 1]) {
-+        const uint32_t left_idx = line_current[x - 1];
-+        ++cooccurrence[prev_idx * num_colors + left_idx];
-+        ++cooccurrence[left_idx * num_colors + prev_idx];
-+      }
-+      if (y > 0 && prev_idx != line_top[x]) {
-+        const uint32_t top_idx = line_top[x];
-+        ++cooccurrence[prev_idx * num_colors + top_idx];
-+        ++cooccurrence[top_idx * num_colors + prev_idx];
-+      }
-+    }
-+    line_tmp = line_top;
-+    line_top = line_current;
-+    line_current = line_tmp;
-+    src += pic->argb_stride;
-+  }
-+  WebPSafeFree(lines);
-+  return 1;
-+}
-+
-+struct Sum {
-+  uint8_t index;
-+  uint32_t sum;
-+};
-+
-+static int PaletteSortModifiedZeng(const WebPPicture* const pic,
-+                                   const uint32_t* const palette_in,
-+                                   uint32_t num_colors,
-+                                   uint32_t* const palette) {
-+  uint32_t i, j, ind;
-+  uint8_t remapping[MAX_PALETTE_SIZE];
-+  uint32_t* cooccurrence;
-+  struct Sum sums[MAX_PALETTE_SIZE];
-+  uint32_t first, last;
-+  uint32_t num_sums;
-+  // TODO(vrabaud) check whether one color images should use palette or not.
-+  if (num_colors <= 1) return 1;
-+  // Build the co-occurrence matrix.
-+  cooccurrence =
-+      (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence));
-+  if (cooccurrence == NULL) {
-+    return 0;
-+  }
-+  if (!CoOccurrenceBuild(pic, palette_in, num_colors, cooccurrence)) {
-+    WebPSafeFree(cooccurrence);
-+    return 0;
-+  }
-+
-+  // Initialize the mapping list with the two best indices.
-+  CoOccurrenceFindMax(cooccurrence, num_colors, &remapping[0], &remapping[1]);
-+
-+  // We need to append and prepend to the list of remapping. To this end, we
-+  // actually define the next start/end of the list as indices in a vector (with
-+  // a wrap around when the end is reached).
-+  first = 0;
-+  last = 1;
-+  num_sums = num_colors - 2;  // -2 because we know the first two values
-+  if (num_sums > 0) {
-+    // Initialize the sums with the first two remappings and find the best one
-+    struct Sum* best_sum = &sums[0];
-+    best_sum->index = 0u;
-+    best_sum->sum = 0u;
-+    for (i = 0, j = 0; i < num_colors; ++i) {
-+      if (i == remapping[0] || i == remapping[1]) continue;
-+      sums[j].index = i;
-+      sums[j].sum = cooccurrence[i * num_colors + remapping[0]] +
-+                    cooccurrence[i * num_colors + remapping[1]];
-+      if (sums[j].sum > best_sum->sum) best_sum = &sums[j];
-+      ++j;
-+    }
-+
-+    while (num_sums > 0) {
-+      const uint8_t best_index = best_sum->index;
-+      // Compute delta to know if we need to prepend or append the best index.
-+      int32_t delta = 0;
-+      const int32_t n = num_colors - num_sums;
-+      for (ind = first, j = 0; (ind + j) % num_colors != last + 1; ++j) {
-+        const uint16_t l_j = remapping[(ind + j) % num_colors];
-+        delta += (n - 1 - 2 * (int32_t)j) *
-+                 (int32_t)cooccurrence[best_index * num_colors + l_j];
-+      }
-+      if (delta > 0) {
-+        first = (first == 0) ? num_colors - 1 : first - 1;
-+        remapping[first] = best_index;
-+      } else {
-+        ++last;
-+        remapping[last] = best_index;
-+      }
-+      // Remove best_sum from sums.
-+      *best_sum = sums[num_sums - 1];
-+      --num_sums;
-+      // Update all the sums and find the best one.
-+      best_sum = &sums[0];
-+      for (i = 0; i < num_sums; ++i) {
-+        sums[i].sum += cooccurrence[best_index * num_colors + sums[i].index];
-+        if (sums[i].sum > best_sum->sum) best_sum = &sums[i];
-+      }
-+    }
-+  }
-+  assert((last + 1) % num_colors == first);
-+  WebPSafeFree(cooccurrence);
-+
-+  // Re-map the palette.
-+  for (i = 0; i < num_colors; ++i) {
-+    palette[i] = palette_in[remapping[(first + i) % num_colors]];
-+  }
-+  return 1;
-+}
-+
-+// -----------------------------------------------------------------------------
-+
-+int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic,
-+                const uint32_t* const palette_sorted, uint32_t num_colors,
-+                uint32_t* const palette) {
-+  switch (method) {
-+    case kSortedDefault:
-+      // Nothing to do, we have already sorted the palette.
-+      memcpy(palette, palette_sorted, num_colors * sizeof(*palette));
-+      return 1;
-+    case kMinimizeDelta:
-+      PaletteSortMinimizeDeltas(palette_sorted, num_colors, palette);
-+      return 1;
-+    case kModifiedZeng:
-+      return PaletteSortModifiedZeng(pic, palette_sorted, num_colors, palette);
-+    case kUnusedPalette:
-+    case kPaletteSortingNum:
-+      break;
-+  }
-+
-+  assert(0);
-+  return 0;
-+}
-diff --git a/3rdparty/libwebp/src/utils/palette.h b/3rdparty/libwebp/src/utils/palette.h
-new file mode 100644
-index 000000000000..34479e463fe3
---- /dev/null
-+++ b/3rdparty/libwebp/src/utils/palette.h
-@@ -0,0 +1,60 @@
-+// Copyright 2023 Google Inc. All Rights Reserved.
-+//
-+// Use of this source code is governed by a BSD-style license
-+// that can be found in the COPYING file in the root of the source
-+// tree. An additional intellectual property rights grant can be found
-+// in the file PATENTS. All contributing project authors may
-+// be found in the AUTHORS file in the root of the source tree.
-+// -----------------------------------------------------------------------------
-+//
-+// Utilities for palette analysis.
-+//
-+// Author: Vincent Rabaud (vrabaud@google.com)
-+
-+#ifndef WEBP_UTILS_PALETTE_H_
-+#define WEBP_UTILS_PALETTE_H_
-+
-+#include "src/webp/types.h"
-+
-+struct WebPPicture;
-+
-+// The different ways a palette can be sorted.
-+typedef enum PaletteSorting {
-+  kSortedDefault = 0,
-+  // Sorts by minimizing L1 deltas between consecutive colors, giving more
-+  // weight to RGB colors.
-+  kMinimizeDelta = 1,
-+  // Implements the modified Zeng method from "A Survey on Palette Reordering
-+  // Methods for Improving the Compression of Color-Indexed Images" by Armando
-+  // J. Pinho and Antonio J. R. Neves.
-+  kModifiedZeng = 2,
-+  kUnusedPalette = 3,
-+  kPaletteSortingNum = 4
-+} PaletteSorting;
-+
-+// Returns the index of 'color' in the sorted palette 'sorted' of size
-+// 'num_colors'.
-+int SearchColorNoIdx(const uint32_t sorted[], uint32_t color, int num_colors);
-+
-+// Sort palette in increasing order and prepare an inverse mapping array.
-+void PrepareMapToPalette(const uint32_t palette[], uint32_t num_colors,
-+                         uint32_t sorted[], uint32_t idx_map[]);
-+
-+// Returns count of unique colors in 'pic', assuming pic->use_argb is true.
-+// If the unique color count is more than MAX_PALETTE_SIZE, returns
-+// MAX_PALETTE_SIZE+1.
-+// If 'palette' is not NULL and the number of unique colors is less than or
-+// equal to MAX_PALETTE_SIZE, also outputs the actual unique colors into
-+// 'palette' in a sorted order. Note: 'palette' is assumed to be an array
-+// already allocated with at least MAX_PALETTE_SIZE elements.
-+int GetColorPalette(const struct WebPPicture* const pic,
-+                    uint32_t* const palette);
-+
-+// Sorts the palette according to the criterion defined by 'method'.
-+// 'palette_sorted' is the input palette sorted lexicographically, as done in
-+// PrepareMapToPalette. Returns 0 on memory allocation error.
-+int PaletteSort(PaletteSorting method, const struct WebPPicture* const pic,
-+                const uint32_t* const palette_sorted, uint32_t num_colors,
-+                uint32_t* const palette);
-+
-+#endif  // WEBP_UTILS_PALETTE_H_
-diff --git a/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c b/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c
-index f65b6cdbb696..97e78937043e 100644
---- a/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c
-+++ b/3rdparty/libwebp/src/utils/quant_levels_dec_utils.c
-@@ -30,7 +30,7 @@
- 
- #define DFIX 4           // extra precision for ordered dithering
- #define DSIZE 4          // dithering size (must be a power of two)
--// cf. http://en.wikipedia.org/wiki/Ordered_dithering
-+// cf. https://en.wikipedia.org/wiki/Ordered_dithering
- static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
-   {  0,  8,  2, 10 },     // coefficients are in DFIX fixed-point precision
-   { 12,  4, 14,  6 },
-diff --git a/3rdparty/libwebp/src/utils/rescaler_utils.c b/3rdparty/libwebp/src/utils/rescaler_utils.c
-index 4bcae24af54a..a0581a14b1a8 100644
---- a/3rdparty/libwebp/src/utils/rescaler_utils.c
-+++ b/3rdparty/libwebp/src/utils/rescaler_utils.c
-@@ -12,66 +12,74 @@
- // Author: Skal (pascal.massimino@gmail.com)
- 
- #include <assert.h>
-+#include <limits.h>
- #include <stdlib.h>
- #include <string.h>
- #include "src/dsp/dsp.h"
- #include "src/utils/rescaler_utils.h"
-+#include "src/utils/utils.h"
- 
- //------------------------------------------------------------------------------
- 
--void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
--                      uint8_t* const dst,
--                      int dst_width, int dst_height, int dst_stride,
--                      int num_channels, rescaler_t* const work) {
-+int WebPRescalerInit(WebPRescaler* const rescaler,
-+                     int src_width, int src_height,
-+                     uint8_t* const dst,
-+                     int dst_width, int dst_height, int dst_stride,
-+                     int num_channels, rescaler_t* const work) {
-   const int x_add = src_width, x_sub = dst_width;
-   const int y_add = src_height, y_sub = dst_height;
--  wrk->x_expand = (src_width < dst_width);
--  wrk->y_expand = (src_height < dst_height);
--  wrk->src_width = src_width;
--  wrk->src_height = src_height;
--  wrk->dst_width = dst_width;
--  wrk->dst_height = dst_height;
--  wrk->src_y = 0;
--  wrk->dst_y = 0;
--  wrk->dst = dst;
--  wrk->dst_stride = dst_stride;
--  wrk->num_channels = num_channels;
-+  const uint64_t total_size = 2ull * dst_width * num_channels * sizeof(*work);
-+  if (!CheckSizeOverflow(total_size)) return 0;
-+
-+  rescaler->x_expand = (src_width < dst_width);
-+  rescaler->y_expand = (src_height < dst_height);
-+  rescaler->src_width = src_width;
-+  rescaler->src_height = src_height;
-+  rescaler->dst_width = dst_width;
-+  rescaler->dst_height = dst_height;
-+  rescaler->src_y = 0;
-+  rescaler->dst_y = 0;
-+  rescaler->dst = dst;
-+  rescaler->dst_stride = dst_stride;
-+  rescaler->num_channels = num_channels;
- 
-   // for 'x_expand', we use bilinear interpolation
--  wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add;
--  wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
--  if (!wrk->x_expand) {  // fx_scale is not used otherwise
--    wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub);
-+  rescaler->x_add = rescaler->x_expand ? (x_sub - 1) : x_add;
-+  rescaler->x_sub = rescaler->x_expand ? (x_add - 1) : x_sub;
-+  if (!rescaler->x_expand) {  // fx_scale is not used otherwise
-+    rescaler->fx_scale = WEBP_RESCALER_FRAC(1, rescaler->x_sub);
-   }
-   // vertical scaling parameters
--  wrk->y_add = wrk->y_expand ? y_add - 1 : y_add;
--  wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub;
--  wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;
--  if (!wrk->y_expand) {
-+  rescaler->y_add = rescaler->y_expand ? y_add - 1 : y_add;
-+  rescaler->y_sub = rescaler->y_expand ? y_sub - 1 : y_sub;
-+  rescaler->y_accum = rescaler->y_expand ? rescaler->y_sub : rescaler->y_add;
-+  if (!rescaler->y_expand) {
-     // This is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast.
--    // Its value is <= WEBP_RESCALER_ONE, because dst_height <= wrk->y_add, and
--    // wrk->x_add >= 1;
--    const uint64_t ratio =
--        (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add);
-+    // Its value is <= WEBP_RESCALER_ONE, because dst_height <= rescaler->y_add
-+    // and rescaler->x_add >= 1;
-+    const uint64_t num = (uint64_t)dst_height * WEBP_RESCALER_ONE;
-+    const uint64_t den = (uint64_t)rescaler->x_add * rescaler->y_add;
-+    const uint64_t ratio = num / den;
-     if (ratio != (uint32_t)ratio) {
-       // When ratio == WEBP_RESCALER_ONE, we can't represent the ratio with the
-       // current fixed-point precision. This happens when src_height ==
--      // wrk->y_add (which == src_height), and wrk->x_add == 1.
-+      // rescaler->y_add (which == src_height), and rescaler->x_add == 1.
-       // => We special-case fxy_scale = 0, in WebPRescalerExportRow().
--      wrk->fxy_scale = 0;
-+      rescaler->fxy_scale = 0;
-     } else {
--      wrk->fxy_scale = (uint32_t)ratio;
-+      rescaler->fxy_scale = (uint32_t)ratio;
-     }
--    wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);
-+    rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->y_sub);
-   } else {
--    wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add);
--    // wrk->fxy_scale is unused here.
-+    rescaler->fy_scale = WEBP_RESCALER_FRAC(1, rescaler->x_add);
-+    // rescaler->fxy_scale is unused here.
-   }
--  wrk->irow = work;
--  wrk->frow = work + num_channels * dst_width;
--  memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
-+  rescaler->irow = work;
-+  rescaler->frow = work + num_channels * dst_width;
-+  memset(work, 0, (size_t)total_size);
- 
-   WebPRescalerDspInit();
-+  return 1;
- }
- 
- int WebPRescalerGetScaledDimensions(int src_width, int src_height,
-@@ -82,6 +90,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height,
-   {
-     int width = *scaled_width;
-     int height = *scaled_height;
-+    const int max_size = INT_MAX / 2;
- 
-     // if width is unspecified, scale original proportionally to height ratio.
-     if (width == 0 && src_height > 0) {
-@@ -94,7 +103,7 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height,
-           (int)(((uint64_t)src_height * width + src_width - 1) / src_width);
-     }
-     // Check if the overall dimensions still make sense.
--    if (width <= 0 || height <= 0) {
-+    if (width <= 0 || height <= 0 || width > max_size || height > max_size) {
-       return 0;
-     }
- 
-@@ -107,31 +116,34 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height,
- //------------------------------------------------------------------------------
- // all-in-one calls
- 
--int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
--  const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;
-+int WebPRescaleNeededLines(const WebPRescaler* const rescaler,
-+                           int max_num_lines) {
-+  const int num_lines =
-+      (rescaler->y_accum + rescaler->y_sub - 1) / rescaler->y_sub;
-   return (num_lines > max_num_lines) ? max_num_lines : num_lines;
- }
- 
--int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
-+int WebPRescalerImport(WebPRescaler* const rescaler, int num_lines,
-                        const uint8_t* src, int src_stride) {
-   int total_imported = 0;
--  while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {
--    if (wrk->y_expand) {
--      rescaler_t* const tmp = wrk->irow;
--      wrk->irow = wrk->frow;
--      wrk->frow = tmp;
-+  while (total_imported < num_lines &&
-+         !WebPRescalerHasPendingOutput(rescaler)) {
-+    if (rescaler->y_expand) {
-+      rescaler_t* const tmp = rescaler->irow;
-+      rescaler->irow = rescaler->frow;
-+      rescaler->frow = tmp;
-     }
--    WebPRescalerImportRow(wrk, src);
--    if (!wrk->y_expand) {     // Accumulate the contribution of the new row.
-+    WebPRescalerImportRow(rescaler, src);
-+    if (!rescaler->y_expand) {    // Accumulate the contribution of the new row.
-       int x;
--      for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {
--        wrk->irow[x] += wrk->frow[x];
-+      for (x = 0; x < rescaler->num_channels * rescaler->dst_width; ++x) {
-+        rescaler->irow[x] += rescaler->frow[x];
-       }
-     }
--    ++wrk->src_y;
-+    ++rescaler->src_y;
-     src += src_stride;
-     ++total_imported;
--    wrk->y_accum -= wrk->y_sub;
-+    rescaler->y_accum -= rescaler->y_sub;
-   }
-   return total_imported;
- }
-diff --git a/3rdparty/libwebp/src/utils/rescaler_utils.h b/3rdparty/libwebp/src/utils/rescaler_utils.h
-index ca41e42c4a53..ef201ef86c19 100644
---- a/3rdparty/libwebp/src/utils/rescaler_utils.h
-+++ b/3rdparty/libwebp/src/utils/rescaler_utils.h
-@@ -47,12 +47,13 @@ struct WebPRescaler {
- };
- 
- // Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
--void WebPRescalerInit(WebPRescaler* const rescaler,
--                      int src_width, int src_height,
--                      uint8_t* const dst,
--                      int dst_width, int dst_height, int dst_stride,
--                      int num_channels,
--                      rescaler_t* const work);
-+// Returns false in case of error.
-+int WebPRescalerInit(WebPRescaler* const rescaler,
-+                     int src_width, int src_height,
-+                     uint8_t* const dst,
-+                     int dst_width, int dst_height, int dst_stride,
-+                     int num_channels,
-+                     rescaler_t* const work);
- 
- // If either 'scaled_width' or 'scaled_height' (but not both) is 0 the value
- // will be calculated preserving the aspect ratio, otherwise the values are
-diff --git a/3rdparty/libwebp/src/utils/utils.c b/3rdparty/libwebp/src/utils/utils.c
-index 6080e19e2176..408ce88f67f6 100644
---- a/3rdparty/libwebp/src/utils/utils.c
-+++ b/3rdparty/libwebp/src/utils/utils.c
-@@ -11,19 +11,19 @@
- //
- // Author: Skal (pascal.massimino@gmail.com)
- 
-+#include "src/utils/utils.h"
-+
- #include <stdlib.h>
- #include <string.h>  // for memcpy()
--#include "src/webp/decode.h"
-+
-+#include "src/utils/palette.h"
- #include "src/webp/encode.h"
--#include "src/webp/format_constants.h"  // for MAX_PALETTE_SIZE
--#include "src/utils/color_cache_utils.h"
--#include "src/utils/utils.h"
- 
- // If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
- // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
- // and not multi-thread safe!).
- // An interesting alternative is valgrind's 'massif' tool:
--//    http://valgrind.org/docs/manual/ms-manual.html
-+//    https://valgrind.org/docs/manual/ms-manual.html
- // Here is an example command line:
- /*    valgrind --tool=massif --massif-out-file=massif.out \
-                --stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc
-@@ -101,6 +101,9 @@ static void Increment(int* const v) {
- #if defined(MALLOC_LIMIT)
-     {
-       const char* const malloc_limit_str = getenv("MALLOC_LIMIT");
-+#if MALLOC_LIMIT > 1
-+      mem_limit = (size_t)MALLOC_LIMIT;
-+#endif
-       if (malloc_limit_str != NULL) {
-         mem_limit = atoi(malloc_limit_str);
-       }
-@@ -169,16 +172,16 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
-   const uint64_t total_size = nmemb * size;
-   if (nmemb == 0) return 1;
-   if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
--  if (total_size != (size_t)total_size) return 0;
-+  if (!CheckSizeOverflow(total_size)) return 0;
- #if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT)
-   if (countdown_to_fail > 0 && --countdown_to_fail == 0) {
-     return 0;    // fake fail!
-   }
- #endif
--#if defined(MALLOC_LIMIT)
-+#if defined(PRINT_MEM_INFO) && defined(MALLOC_LIMIT)
-   if (mem_limit > 0) {
-     const uint64_t new_total_mem = (uint64_t)total_mem + total_size;
--    if (new_total_mem != (size_t)new_total_mem ||
-+    if (!CheckSizeOverflow(new_total_mem) ||
-         new_total_mem > mem_limit) {
-       return 0;   // fake fail!
-     }
-@@ -249,66 +252,10 @@ void WebPCopyPixels(const WebPPicture* const src, WebPPicture* const dst) {
- 
- //------------------------------------------------------------------------------
- 
--#define COLOR_HASH_SIZE         (MAX_PALETTE_SIZE * 4)
--#define COLOR_HASH_RIGHT_SHIFT  22  // 32 - log2(COLOR_HASH_SIZE).
--
- int WebPGetColorPalette(const WebPPicture* const pic, uint32_t* const palette) {
--  int i;
--  int x, y;
--  int num_colors = 0;
--  uint8_t in_use[COLOR_HASH_SIZE] = { 0 };
--  uint32_t colors[COLOR_HASH_SIZE];
--  const uint32_t* argb = pic->argb;
--  const int width = pic->width;
--  const int height = pic->height;
--  uint32_t last_pix = ~argb[0];   // so we're sure that last_pix != argb[0]
--  assert(pic != NULL);
--  assert(pic->use_argb);
--
--  for (y = 0; y < height; ++y) {
--    for (x = 0; x < width; ++x) {
--      int key;
--      if (argb[x] == last_pix) {
--        continue;
--      }
--      last_pix = argb[x];
--      key = VP8LHashPix(last_pix, COLOR_HASH_RIGHT_SHIFT);
--      while (1) {
--        if (!in_use[key]) {
--          colors[key] = last_pix;
--          in_use[key] = 1;
--          ++num_colors;
--          if (num_colors > MAX_PALETTE_SIZE) {
--            return MAX_PALETTE_SIZE + 1;  // Exact count not needed.
--          }
--          break;
--        } else if (colors[key] == last_pix) {
--          break;  // The color is already there.
--        } else {
--          // Some other color sits here, so do linear conflict resolution.
--          ++key;
--          key &= (COLOR_HASH_SIZE - 1);  // Key mask.
--        }
--      }
--    }
--    argb += pic->argb_stride;
--  }
--
--  if (palette != NULL) {  // Fill the colors into palette.
--    num_colors = 0;
--    for (i = 0; i < COLOR_HASH_SIZE; ++i) {
--      if (in_use[i]) {
--        palette[num_colors] = colors[i];
--        ++num_colors;
--      }
--    }
--  }
--  return num_colors;
-+  return GetColorPalette(pic, palette);
- }
- 
--#undef COLOR_HASH_SIZE
--#undef COLOR_HASH_RIGHT_SHIFT
--
- //------------------------------------------------------------------------------
- 
- #if defined(WEBP_NEED_LOG_TABLE_8BIT)
-diff --git a/3rdparty/libwebp/src/utils/utils.h b/3rdparty/libwebp/src/utils/utils.h
-index 2a3ec926784e..b2241fbf9bf7 100644
---- a/3rdparty/libwebp/src/utils/utils.h
-+++ b/3rdparty/libwebp/src/utils/utils.h
-@@ -20,9 +20,7 @@
- #endif
- 
- #include <assert.h>
--#include <limits.h>
- 
--#include "src/dsp/dsp.h"
- #include "src/webp/types.h"
- 
- #ifdef __cplusplus
-@@ -42,6 +40,10 @@ extern "C" {
- #endif
- #endif  // WEBP_MAX_ALLOCABLE_MEMORY
- 
-+static WEBP_INLINE int CheckSizeOverflow(uint64_t size) {
-+  return size == (size_t)size;
-+}
-+
- // size-checking safe malloc/calloc: verify that the requested size is not too
- // large, or return NULL. You don't need to call these for constructs like
- // malloc(sizeof(foo)), but only if there's picture-dependent size involved
-@@ -60,7 +62,8 @@ WEBP_EXTERN void WebPSafeFree(void* const ptr);
- // Alignment
- 
- #define WEBP_ALIGN_CST 31
--#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & ~WEBP_ALIGN_CST)
-+#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & \
-+                         ~(uintptr_t)WEBP_ALIGN_CST)
- 
- #include <string.h>
- // memcpy() is the safe way of moving potentially unaligned 32b memory.
-@@ -69,10 +72,19 @@ static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) {
-   memcpy(&A, ptr, sizeof(A));
-   return A;
- }
-+
-+static WEBP_INLINE int32_t WebPMemToInt32(const uint8_t* const ptr) {
-+  return (int32_t)WebPMemToUint32(ptr);
-+}
-+
- static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) {
-   memcpy(ptr, &val, sizeof(val));
- }
- 
-+static WEBP_INLINE void WebPInt32ToMem(uint8_t* const ptr, int val) {
-+  WebPUint32ToMem(ptr, (uint32_t)val);
-+}
-+
- //------------------------------------------------------------------------------
- // Reading/writing data.
- 
-@@ -107,24 +119,33 @@ static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
-   PutLE16(data + 2, (int)(val >> 16));
- }
- 
--// Returns (int)floor(log2(n)). n must be > 0.
- // use GNU builtins where available.
- #if defined(__GNUC__) && \
-     ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
-+// Returns (int)floor(log2(n)). n must be > 0.
- static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
-   return 31 ^ __builtin_clz(n);
- }
-+// counts the number of trailing zero
-+static WEBP_INLINE int BitsCtz(uint32_t n) { return __builtin_ctz(n); }
- #elif defined(_MSC_VER) && _MSC_VER > 1310 && \
-       (defined(_M_X64) || defined(_M_IX86))
- #include <intrin.h>
- #pragma intrinsic(_BitScanReverse)
-+#pragma intrinsic(_BitScanForward)
- 
- static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
--  unsigned long first_set_bit;
-+  unsigned long first_set_bit;  // NOLINT (runtime/int)
-   _BitScanReverse(&first_set_bit, n);
-   return first_set_bit;
- }
--#else   // default: use the C-version.
-+static WEBP_INLINE int BitsCtz(uint32_t n) {
-+  unsigned long first_set_bit;  // NOLINT (runtime/int)
-+  _BitScanForward(&first_set_bit, n);
-+  return first_set_bit;
-+}
-+#else   // default: use the (slow) C-version.
-+#define WEBP_HAVE_SLOW_CLZ_CTZ   // signal that the Clz/Ctz function are slow
- // Returns 31 ^ clz(n) = log2(n). This is the default C-implementation, either
- // based on table or not. Can be used as fallback if clz() is not available.
- #define WEBP_NEED_LOG_TABLE_8BIT
-@@ -139,6 +160,15 @@ static WEBP_INLINE int WebPLog2FloorC(uint32_t n) {
- }
- 
- static WEBP_INLINE int BitsLog2Floor(uint32_t n) { return WebPLog2FloorC(n); }
-+
-+static WEBP_INLINE int BitsCtz(uint32_t n) {
-+  int i;
-+  for (i = 0; i < 32; ++i, n >>= 1) {
-+    if (n & 1) return i;
-+  }
-+  return 32;
-+}
-+
- #endif
- 
- //------------------------------------------------------------------------------
-@@ -166,6 +196,7 @@ WEBP_EXTERN void WebPCopyPixels(const struct WebPPicture* const src,
- // MAX_PALETTE_SIZE, also outputs the actual unique colors into 'palette'.
- // Note: 'palette' is assumed to be an array already allocated with at least
- // MAX_PALETTE_SIZE elements.
-+// TODO(vrabaud) remove whenever we can break the ABI.
- WEBP_EXTERN int WebPGetColorPalette(const struct WebPPicture* const pic,
-                                     uint32_t* const palette);
- 
-diff --git a/3rdparty/libwebp/src/webp/decode.h b/3rdparty/libwebp/src/webp/decode.h
-index 44fcd64a84d4..9d968061d160 100644
---- a/3rdparty/libwebp/src/webp/decode.h
-+++ b/3rdparty/libwebp/src/webp/decode.h
-@@ -81,11 +81,12 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
- // returned is the Y samples buffer. Upon return, *u and *v will point to
- // the U and V chroma data. These U and V buffers need NOT be passed to
- // WebPFree(), unlike the returned Y luma one. The dimension of the U and V
--// planes are both (*width + 1) / 2 and (*height + 1)/ 2.
-+// planes are both (*width + 1) / 2 and (*height + 1) / 2.
- // Upon return, the Y buffer has a stride returned as '*stride', while U and V
- // have a common stride returned as '*uv_stride'.
--// Return NULL in case of error.
--// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
-+// 'width' and 'height' may be NULL, the other pointers must not be.
-+// Returns NULL in case of error.
-+// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr
- WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
-                                    int* width, int* height,
-                                    uint8_t** u, uint8_t** v,
-@@ -250,23 +251,24 @@ typedef enum VP8StatusCode {
- // WebPIDecoder object. This object can be left in a SUSPENDED state if the
- // picture is only partially decoded, pending additional input.
- // Code example:
--//
--//   WebPInitDecBuffer(&output_buffer);
--//   output_buffer.colorspace = mode;
--//   ...
--//   WebPIDecoder* idec = WebPINewDecoder(&output_buffer);
--//   while (additional_data_is_available) {
--//     // ... (get additional data in some new_data[] buffer)
--//     status = WebPIAppend(idec, new_data, new_data_size);
--//     if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) {
--//       break;    // an error occurred.
--//     }
--//
--//     // The above call decodes the current available buffer.
--//     // Part of the image can now be refreshed by calling
--//     // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
--//   }
--//   WebPIDelete(idec);
-+/*
-+     WebPInitDecBuffer(&output_buffer);
-+     output_buffer.colorspace = mode;
-+     ...
-+     WebPIDecoder* idec = WebPINewDecoder(&output_buffer);
-+     while (additional_data_is_available) {
-+       // ... (get additional data in some new_data[] buffer)
-+       status = WebPIAppend(idec, new_data, new_data_size);
-+       if (status != VP8_STATUS_OK && status != VP8_STATUS_SUSPENDED) {
-+         break;    // an error occurred.
-+       }
-+
-+       // The above call decodes the current available buffer.
-+       // Part of the image can now be refreshed by calling
-+       // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
-+     }
-+     WebPIDelete(idec);
-+*/
- 
- // Creates a new incremental decoder with the supplied buffer parameter.
- // This output_buffer can be passed NULL, in which case a default output buffer
-@@ -388,7 +390,7 @@ WEBP_EXTERN const WebPDecBuffer* WebPIDecodedArea(
-      CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
- 
-      // C) Adjust 'config', if needed
--     config.no_fancy_upsampling = 1;
-+     config.options.no_fancy_upsampling = 1;
-      config.output.colorspace = MODE_BGRA;
-      // etc.
- 
-diff --git a/3rdparty/libwebp/src/webp/encode.h b/3rdparty/libwebp/src/webp/encode.h
-index b4c599df8765..56b68e2f10e0 100644
---- a/3rdparty/libwebp/src/webp/encode.h
-+++ b/3rdparty/libwebp/src/webp/encode.h
-@@ -441,7 +441,7 @@ WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture,
- // the original dimension will be lost). Picture 'dst' need not be initialized
- // with WebPPictureInit() if it is different from 'src', since its content will
- // be overwritten.
--// Returns false in case of memory allocation error or invalid parameters.
-+// Returns false in case of invalid parameters.
- WEBP_EXTERN int WebPPictureView(const WebPPicture* src,
-                                 int left, int top, int width, int height,
-                                 WebPPicture* dst);
-@@ -455,7 +455,7 @@ WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture);
- // dimension will be calculated preserving the aspect ratio.
- // No gamma correction is applied.
- // Returns false in case of error (invalid parameter or insufficient memory).
--WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height);
-+WEBP_EXTERN int WebPPictureRescale(WebPPicture* picture, int width, int height);
- 
- // Colorspace conversion function to import RGB samples.
- // Previous buffer will be free'd, if any.
-@@ -526,7 +526,7 @@ WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture);
- // Remove the transparency information (if present) by blending the color with
- // the background color 'background_rgb' (specified as 24bit RGB triplet).
- // After this call, all alpha values are reset to 0xff.
--WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
-+WEBP_EXTERN void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb);
- 
- //------------------------------------------------------------------------------
- // Main call
-diff --git a/3rdparty/libwebp/src/webp/format_constants.h b/3rdparty/libwebp/src/webp/format_constants.h
-index eca6981a47d0..999035c5d265 100644
---- a/3rdparty/libwebp/src/webp/format_constants.h
-+++ b/3rdparty/libwebp/src/webp/format_constants.h
-@@ -55,7 +55,7 @@
- typedef enum {
-   PREDICTOR_TRANSFORM      = 0,
-   CROSS_COLOR_TRANSFORM    = 1,
--  SUBTRACT_GREEN           = 2,
-+  SUBTRACT_GREEN_TRANSFORM = 2,
-   COLOR_INDEXING_TRANSFORM = 3
- } VP8LImageTransformType;
- 
-diff --git a/3rdparty/libwebp/src/webp/types.h b/3rdparty/libwebp/src/webp/types.h
-index 47f7f2b00706..f255432e413c 100644
---- a/3rdparty/libwebp/src/webp/types.h
-+++ b/3rdparty/libwebp/src/webp/types.h
-@@ -42,7 +42,11 @@ typedef long long int int64_t;
- # if defined(__GNUC__) && __GNUC__ >= 4
- #  define WEBP_EXTERN extern __attribute__ ((visibility ("default")))
- # else
--#  define WEBP_EXTERN extern
-+#  if defined(_MSC_VER) && defined(WEBP_DLL)
-+#   define WEBP_EXTERN __declspec(dllexport)
-+#  else
-+#   define WEBP_EXTERN extern
-+#  endif
- # endif  /* __GNUC__ >= 4 */
- #endif  /* WEBP_EXTERN */
- 
-
-From 9b0cd53f2091071d780b2d3588a8b755bb1fdc68 Mon Sep 17 00:00:00 2001
-From: Vincent Rabaud <vrabaud@google.com>
-Date: Fri, 15 Sep 2023 09:37:50 +0200
-Subject: [PATCH 2/2] Add the sharpyuv folder.
-
----
- 3rdparty/libwebp/CMakeLists.txt | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt
-index 532c03026568..f3b6ebd0d620 100644
---- a/3rdparty/libwebp/CMakeLists.txt
-+++ b/3rdparty/libwebp/CMakeLists.txt
-@@ -9,8 +9,8 @@ if(ANDROID)
-   ocv_include_directories(${CPUFEATURES_INCLUDE_DIRS})
- endif()
- 
--file(GLOB lib_srcs src/dec/*.c src/demux/*.c src/dsp/*.c src/enc/*.c src/mux/*.c src/utils/*.c src/webp/*.c)
--file(GLOB lib_hdrs src/dec/*.h src/demux/*.h src/dsp/*.h src/enc/*.h src/mux/*.h src/utils/*.h src/webp/*.h)
-+file(GLOB lib_srcs sharpyuv/*.c src/dec/*.c src/demux/*.c src/dsp/*.c src/enc/*.c src/mux/*.c src/utils/*.c src/webp/*.c)
-+file(GLOB lib_hdrs sharpyuv/*.h src/dec/*.h src/demux/*.h src/dsp/*.h src/enc/*.h src/mux/*.h src/utils/*.h src/webp/*.h)
- 
- # FIXIT
- if(ANDROID AND ARMEABI_V7A AND NOT NEON)
diff --git a/third_party/libtiff b/third_party/libtiff
index 4f34a37..8b20804 160000
--- a/third_party/libtiff
+++ b/third_party/libtiff
@@ -1 +1 @@
-Subproject commit 4f34a374b2e2cba3c8055ab77458c5730c073aff
+Subproject commit 8b20804fc0ddeaa93667b799b5e1a2a7dc9e3fb2
diff --git a/third_party/opencv b/third_party/opencv
index f9a59f2..5199850 160000
--- a/third_party/opencv
+++ b/third_party/opencv
@@ -1 +1 @@
-Subproject commit f9a59f2592993d3dcc080e495f4f5e02dd8ec7ef
+Subproject commit 5199850039ad23f1f0e6cccea5061a9fea5efca6