diff --git a/.github/actions/print-cpu-info/action.yml b/.github/actions/print-cpu-info/action.yml index 15575ce9d..ff8e64f68 100644 --- a/.github/actions/print-cpu-info/action.yml +++ b/.github/actions/print-cpu-info/action.yml @@ -11,6 +11,7 @@ runs: run: cat /proc/cpuinfo shell: sh - - if: ${{ runner.os == 'Windows' }} + # TODO: wmic is no longer available by default on Windows, it seems. We should replace it. + - if: ${{ runner.os == 'Windows' && false }} run: wmic cpu get caption, deviceid, name, numberofcores, maxclockspeed, status shell: cmd diff --git a/.github/workflows/check-release-builds.yml b/.github/workflows/check-release-builds.yml index 875f0eb25..ded9aa932 100644 --- a/.github/workflows/check-release-builds.yml +++ b/.github/workflows/check-release-builds.yml @@ -4,7 +4,6 @@ on: push: tags: - blockio-* - - bloomfilter-blocked-* - lsm-tree-* concurrency: @@ -50,7 +49,7 @@ jobs: env: tag-name: ${{ github.ref_name }} - release-build-target: ${{ startsWith(github.ref_name, 'blockio') && './blockio/blockio.cabal' || startsWith(github.ref_name, 'bloomfilter-blocked') && './bloomfilter-blocked/bloomfilter-blocked.cabal' || startsWith(github.ref_name, 'lsm-tree') && './lsm-tree/lsm-tree.cabal' }} + release-build-target: ${{ startsWith(github.ref_name, 'blockio') && './blockio/blockio.cabal' || startsWith(github.ref_name, 'lsm-tree') && './lsm-tree/lsm-tree.cabal' }} steps: - name: 🗄️ Print release build target diff --git a/README.md b/README.md index 2e23638ff..96a5e63eb 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,14 @@ # lsm-tree [![Hackage: lsm-tree](https://img.shields.io/hackage/v/lsm-tree?label=Hackage:%20lsm-tree)](https://hackage.haskell.org/package/lsm-tree) -[![Hackage: bloomfilter-blocked](https://img.shields.io/hackage/v/bloomfilter-blocked?label=Hackage:%20bloomfilter-blocked)](https://hackage.haskell.org/package/bloomfilter-blocked) [![Hackage: blockio](https://img.shields.io/hackage/v/blockio?label=Hackage:%20blockio)](https://hackage.haskell.org/package/blockio) [![Build](https://img.shields.io/github/actions/workflow/status/IntersectMBO/lsm-tree/ci.yml?label=Build)](https://github.com/IntersectMBO/lsm-tree/actions/workflows/ci.yml) [![Haddocks](https://img.shields.io/badge/documentation-Haddocks-purple)](https://IntersectMBO.github.io/lsm-tree/) -The [`lsm-tree`](./lsm-tree/README.md) package and its daughter packages -[`blockio`](./blockio/README.md) and [`bloomfilter-blocked`](./bloomfilter-blocked/README.md) have -been developed by Well-Typed LLP on behalf of the Cardano Development Foundation -and Intersect. A [project report] and [integration notes] are available. +The [`lsm-tree`](./lsm-tree/README.md) package and its daughter package +[`blockio`](./blockio/README.md) have been developed by Well-Typed LLP on behalf +of the Cardano Development Foundation and Intersect. A [project report] and +[integration notes] are available. [project report]: https://github.com/IntersectMBO/lsm-tree/blob/main/doc/final-report/final-report.pdf [integration notes]: https://github.com/IntersectMBO/lsm-tree/blob/main/doc/final-report/integration-notes.pdf diff --git a/bloomfilter-blocked/CHANGELOG.md b/bloomfilter-blocked/CHANGELOG.md deleted file mode 100644 index cf2f22aa4..000000000 --- a/bloomfilter-blocked/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# Revision history for bloomfilter-blocked - -## 0.1.0.0 -- 2025-08-06 - -* First version. Released on an unsuspecting world. diff --git a/bloomfilter-blocked/LICENSE b/bloomfilter-blocked/LICENSE deleted file mode 100644 index 261eeb9e9..000000000 --- a/bloomfilter-blocked/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/bloomfilter-blocked/NOTICE b/bloomfilter-blocked/NOTICE deleted file mode 100644 index 47d51cf76..000000000 --- a/bloomfilter-blocked/NOTICE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (c) 2023-2025 Cardano Development Foundation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/bloomfilter-blocked/README.md b/bloomfilter-blocked/README.md deleted file mode 100644 index 32263ba2c..000000000 --- a/bloomfilter-blocked/README.md +++ /dev/null @@ -1,71 +0,0 @@ -# bloomfilter-blocked - -`bloomfilter-blocked` is a Haskell library providing multiple fast and efficient -implementations of [bloom filters](https://en.wikipedia.org/wiki/Bloom_filter). -It is a full rewrite of the -[`bloomfilter`](https://hackage.haskell.org/package/bloomfilter) package, -originally authored by Bryan O'Sullivan . - -A bloom filter is a space-efficient data structure representing a set that can -be probablistically queried for set membership. The set membership query returns -no false negatives, but it might return false positives. That is, if an element -was added to a bloom filter, then a subsequent query definitely returns `True`. -If an element was *not* added to a filter, then a subsequent query may still -return `True` if `False` would be the correct answer. The probabiliy of false -positives -- the false positive rate (FPR) -- is configurable. - -The library includes two implementations of bloom filters: classic, and blocked. - -* **Classic** bloom filters, found in the `Data.BloomFilter.Classic` module: a - default implementation that is faithful to the canonical description of a - bloom filter data structure. - -* **Blocked** floom filters, found in the `Data.BloomFilter.Blocked` module: an - implementation that optimises the memory layout of a classic bloom filter for - speed (cheaper CPU cache reads), at the cost of a slightly higher FPR for the - same amount of assigned memory. - -The FPR scales inversely with how much memory is assigned to the filter. It also -scales inversely with how many elements are added to the set. The user can -configure how much memory is asisgned to a filter, and the user also controls -how many elements are added to a set. Each implementation comes with helper -functions, like `sizeForFPR` and `sizeForBits`, that the user can leverage to -configure filters. - -Both immutable (`Bloom`) and mutable (`MBloom`) bloom filters, including -functions to convert between the two, are provided for each implementation. Note -however that a (mutable) bloom filter can not be resized once created, and that -elements can not be deleted once inserted. - -For more information about the library and examples of how to use it, see the -Haddock documentation of the different modules. - -# Usage notes - -User should take into account the following: - -* This package is not supported on 32bit systems. - -# Differences from the `bloomfilter` package - -The library is a full rewrite of the -[`bloomfilter`](https://hackage.haskell.org/package/bloomfilter) package, -originally authored by Bryan O'Sullivan . The main -differences are: - -* `bloomfilter-blocked` supports both classic and blocked bloom filters, whereas - `bloomfilter` only supports the former. -* `bloomfilter-blocked` supports bloom filters of arbitrary sizes, whereas - `bloomfilter` limits the sizes to powers of two. -* `bloomfilter-blocked` supports sizes up to `2^48` for classic bloom filters - and up to `2^41` for blocked bloom filters, instead of `2^32`. -* In `bloomfilter-blocked`, the `Bloom` and `MBloom` types are parameterised - over a `Hashable` type class, instead of having a `a -> [Hash]` typed field. - This separation in `bloomfilter-blocked` allows clean (de-)serialisation of - filters as the hashing scheme is static. -* `bloomfilter-blocked` uses [`XXH3`](https://xxhash.com/) for hashing instead - of [Jenkins' - `lookup3`](https://en.wikipedia.org/wiki/Jenkins_hash_function#lookup3), which - `bloomfilter` uses. -* The user can configure hash salts for improved security in - `bloomfilter-blocked`, whereas this is not supported in `bloomfilter`. diff --git a/bloomfilter-blocked/bench/bloomfilter-bench.hs b/bloomfilter-blocked/bench/bloomfilter-bench.hs deleted file mode 100644 index 3df2919ea..000000000 --- a/bloomfilter-blocked/bench/bloomfilter-bench.hs +++ /dev/null @@ -1,57 +0,0 @@ -module Main (main) where - -import Criterion.Main (bench, bgroup, defaultMain, env, whnf) -import qualified Data.BloomFilter.Blocked as B.Blocked -import qualified Data.BloomFilter.Classic as B.Classic -import Data.Word (Word64) -import System.Random (StdGen, newStdGen, uniform) - -main :: IO () -main = - defaultMain [ - bgroup "Data.BloomFilter.Classic" [ - env newStdGen $ \g0 -> - bench "construct m=1e6 fpr=1%" $ - whnf (constructBloom_classic 1_000_000 0.01) g0 - - , env newStdGen $ \g0 -> - bench "construct m=1e6 fpr=0.1%" $ - whnf (constructBloom_classic 1_000_000 0.001) g0 - - , env newStdGen $ \g0 -> - bench "construct m=1e7 fpr=0.1%" $ - whnf (constructBloom_classic 10_000_000 0.001) g0 - ] - , bgroup "Data.BloomFilter.Blocked" [ - env newStdGen $ \g0 -> - bench "construct m=1e6 fpr=1%" $ - whnf (constructBloom_blocked 1_000_000 0.01) g0 - - , env newStdGen $ \g0 -> - bench "construct m=1e6 fpr=0.1%" $ - whnf (constructBloom_blocked 1_000_000 0.001) g0 - - , env newStdGen $ \g0 -> - bench "construct m=1e7 fpr=0.1%" $ - whnf (constructBloom_blocked 10_000_000 0.001) g0 - ] - ] - -constructBloom_classic :: Int -> Double -> StdGen -> B.Classic.Bloom Word64 -constructBloom_classic n fpr g0 = - let (!salt, !g1) = uniform g0 in - B.Classic.unfold (B.Classic.sizeForFPR fpr n) salt (nextElement n) (g1, 0) - -constructBloom_blocked :: Int -> Double -> StdGen -> B.Blocked.Bloom Word64 -constructBloom_blocked n fpr g0 = - let (!salt, !g1) = uniform g0 in - B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) salt (nextElement n) (g1, 0) - -{-# INLINE nextElement #-} -nextElement :: Int -> (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) -nextElement !n (!g, !i) - | i >= n = Nothing - | otherwise = Just (x, (g', i+1)) - where - (!x, !g') = uniform g - diff --git a/bloomfilter-blocked/bloomfilter-blocked.cabal b/bloomfilter-blocked/bloomfilter-blocked.cabal deleted file mode 100644 index a54ddf10b..000000000 --- a/bloomfilter-blocked/bloomfilter-blocked.cabal +++ /dev/null @@ -1,199 +0,0 @@ -cabal-version: 3.4 -name: bloomfilter-blocked -version: 0.1.0.0 -synopsis: Classic and block-style bloom filters -description: - @bloomfilter-blocked@ is a Haskell library providing multiple fast and efficient - implementations of [bloom filters](https://en.wikipedia.org/wiki/Bloom_filter). - It is a full rewrite of the - [bloomfilter](https://hackage.haskell.org/package/bloomfilter) package, - originally authored by Bryan O'Sullivan . - - The library includes two implementations of bloom filters: classic, and blocked. - - * /Classic/ bloom filters, found in the "Data.BloomFilter.Classic" module: a - default implementation that is faithful to the canonical description of a - bloom filter data structure. - - * /Blocked/ floom filters, found in the "Data.BloomFilter.Blocked" module: an - implementation that optimises the memory layout of a classic bloom filter for - speed (cheaper CPU cache reads), at the cost of a slightly higher FPR for the - same amount of assigned memory. - -license: Apache-2.0 -license-files: - LICENSE - NOTICE - -author: - Duncan Coutts, Joris Dral, Matthias Heinzel, Wolfgang Jeltsch, Wen Kokke, and Alex Washburn - -maintainer: duncan@well-typed.com, joris@well-typed.com -copyright: (c) 2023-2025 Cardano Development Foundation -category: Data -build-type: Simple -tested-with: - GHC ==9.2 || ==9.4 || ==9.6 || ==9.8 || ==9.10 || ==9.12 - -extra-doc-files: - CHANGELOG.md - README.md - -extra-source-files: - xxhash/include/HsXXHash.h - xxhash/xxHash-0.8.2/xxhash.h - -license-files: xxhash/xxHash-0.8.2/LICENSE-xxHash - -source-repository head - type: git - location: https://github.com/IntersectMBO/lsm-tree - subdir: bloomfilter-blocked - -source-repository this - type: git - location: https://github.com/IntersectMBO/lsm-tree - subdir: bloomfilter-blocked - tag: bloomfilter-blocked-0.1.0.0 - -common warnings - ghc-options: - -Wall -Wcompat -Wincomplete-uni-patterns - -Wincomplete-record-updates -Wpartial-fields -Widentities - -Wredundant-constraints -Wmissing-export-lists - -Wno-unticked-promoted-constructors -Wunused-packages - - ghc-options: -Werror=missing-deriving-strategies - -common language - default-language: GHC2021 - default-extensions: - DeriveAnyClass - DerivingStrategies - DerivingVia - ExplicitNamespaces - GADTs - LambdaCase - RecordWildCards - RoleAnnotations - ViewPatterns - -library - import: language, warnings - hs-source-dirs: src - build-depends: - , base >=4.16 && <4.22 - , bloomfilter-blocked:xxhash - , bytestring ^>=0.11 || ^>=0.12 - , deepseq ^>=1.4 || ^>=1.5 - , primitive ^>=0.9 - - exposed-modules: - Data.BloomFilter - Data.BloomFilter.Blocked - Data.BloomFilter.Classic - Data.BloomFilter.Hash - - other-modules: - Data.BloomFilter.Blocked.BitArray - Data.BloomFilter.Blocked.Calc - Data.BloomFilter.Blocked.Internal - Data.BloomFilter.Classic.BitArray - Data.BloomFilter.Classic.Calc - Data.BloomFilter.Classic.Internal - - ghc-options: -O2 - -test-suite tests - import: language, warnings - type: exitcode-stdio-1.0 - hs-source-dirs: tests - main-is: bloomfilter-tests.hs - build-depends: - , base <5 - , bloomfilter-blocked - , bytestring - , quickcheck-instances - , tasty - , tasty-quickcheck - -benchmark bench - import: language, warnings - type: exitcode-stdio-1.0 - hs-source-dirs: bench - main-is: bloomfilter-bench.hs - build-depends: - , base - , bloomfilter-blocked - , criterion - , random - --- It's not really a test suite, but if we make it an executable then its --- dependencies will be included for dependency resolution when building the --- main library. As a test-suite, it's more accurately represented as an --- internal component. -test-suite fpr-calc - import: language, warnings - type: exitcode-stdio-1.0 - hs-source-dirs: tests - main-is: fpr-calc.hs - build-depends: - , base - , bloomfilter-blocked - , containers - , parallel - , random - , regression-simple - - ghc-options: -threaded - --- It's not really a test suite, but if we make it an executable then its --- dependencies will be included for dependency resolution when building the --- main library. As a test-suite, it's more accurately represented as an --- internal component. -test-suite spell - import: language, warnings - type: exitcode-stdio-1.0 - hs-source-dirs: examples - main-is: spell.hs - build-depends: - , base - , bloomfilter-blocked - , directory - --- this exists due to windows -library xxhash - import: language, warnings - visibility: private - include-dirs: xxhash/xxHash-0.8.2/ xxhash/include/ - includes: - HsXXHash.h - xxhash.h - - exposed-modules: XXH3 - - if (arch(x86_64) && !os(osx)) - -- Cabal doesn't pass cc-options to "ordinary" Haskell source compilation - -- https://github.com/haskell/cabal/issues/9801 - ghc-options: -optc=-mavx2 -optc=-O3 - - other-modules: FFI - hs-source-dirs: xxhash/src - build-depends: - , base <5 - , bytestring ^>=0.11 || ^>=0.12 - , primitive ^>=0.9 - -test-suite xxhash-tests - import: language, warnings - type: exitcode-stdio-1.0 - hs-source-dirs: xxhash/tests - main-is: xxhash-tests.hs - build-depends: - , base <5 - , bloomfilter-blocked:xxhash - , bytestring - , primitive - , tasty - , tasty-hunit - , tasty-quickcheck diff --git a/bloomfilter-blocked/examples/spell.hs b/bloomfilter-blocked/examples/spell.hs deleted file mode 100644 index 2275da09e..000000000 --- a/bloomfilter-blocked/examples/spell.hs +++ /dev/null @@ -1,29 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -module Main (main) where - -import Control.Monad (forM_, unless, when) -import System.Directory -import System.Environment (getArgs) -import System.Exit - -import qualified Data.BloomFilter as B - -main :: IO () -main = do - files <- getArgs - when (null files) $ do - putStrLn "No files to spell" - exitSuccess - putStrLn $ "Spelling files: " ++ show files - hasDictionary <- doesFileExist "/usr/share/dict/words" - unless hasDictionary $ do - putStrLn "No dictionary found" - exitSuccess - dictionary <- readFile "/usr/share/dict/words" - let !bloom = B.fromList (B.policyForFPR 0.01) bSalt (words dictionary) - forM_ files $ \file -> - putStrLn . unlines . filter (`B.notElem` bloom) . words - =<< readFile file - -bSalt :: B.Salt -bSalt = 4 diff --git a/bloomfilter-blocked/fpr.blocked.gnuplot.data b/bloomfilter-blocked/fpr.blocked.gnuplot.data deleted file mode 100644 index 07dae151f..000000000 --- a/bloomfilter-blocked/fpr.blocked.gnuplot.data +++ /dev/null @@ -1,999 +0,0 @@ -2.0 0.39201843320666596 0.3880831046648373 -2.0 0.39201843320666596 0.3825950607604861 -2.0 0.39201843320666596 0.3782830262642101 -2.0 0.39201843320666596 0.3986671893375147 -2.0 0.39201843320666596 0.39984319874559 -2.0 0.39201843320666596 0.40493923951391614 -2.0 0.39201843320666596 0.4057232457859663 -2.0 0.39201843320666596 0.39317914543316346 -2.0 0.39201843320666596 0.3884751078008624 -2.2 0.3545234090532114 0.31903580290677064 -2.2 0.3545234090532114 0.3310882665721375 -2.2 0.3545234090532114 0.33286068769939736 -2.2 0.3545234090532114 0.3317972350230415 -2.2 0.3545234090532114 0.33073378234668555 -2.2 0.3545234090532114 0.3555476781283233 -2.2 0.3545234090532114 0.3456221198156682 -2.2 0.3545234090532114 0.34207727756114853 -2.2 0.3545234090532114 0.3342786246012052 -2.4000000000000004 0.32074266536538576 0.31173829377806284 -2.4000000000000004 0.32074266536538576 0.31462475946119306 -2.4000000000000004 0.32074266536538576 0.32200128287363694 -2.4000000000000004 0.32074266536538576 0.3008338678640154 -2.4000000000000004 0.32074266536538576 0.3114175753688262 -2.4000000000000004 0.32074266536538576 0.3296985246953175 -2.4000000000000004 0.32074266536538576 0.3024374599101988 -2.4000000000000004 0.32074266536538576 0.3236048749198204 -2.4000000000000004 0.32074266536538576 0.3290570878768441 -2.6000000000000005 0.29029659365451377 0.2783744557329463 -2.6000000000000005 0.29029659365451377 0.2780841799709724 -2.6000000000000005 0.29029659365451377 0.2708272859216255 -2.6000000000000005 0.29029659365451377 0.2763425253991292 -2.6000000000000005 0.29029659365451377 0.27169811320754716 -2.6000000000000005 0.29029659365451377 0.2879535558780842 -2.6000000000000005 0.29029659365451377 0.2841799709724238 -2.6000000000000005 0.29029659365451377 0.28592162554426703 -2.6000000000000005 0.29029659365451377 0.2896952104499274 -2.8000000000000007 0.262845493076701 0.2578186596583443 -2.8000000000000007 0.262845493076701 0.24362680683311433 -2.8000000000000007 0.262845493076701 0.25098554533508544 -2.8000000000000007 0.262845493076701 0.26701708278580816 -2.8000000000000007 0.262845493076701 0.25624178712220763 -2.8000000000000007 0.262845493076701 0.2507227332457293 -2.8000000000000007 0.262845493076701 0.2614980289093298 -2.8000000000000007 0.262845493076701 0.2604467805519054 -2.8000000000000007 0.262845493076701 0.2680683311432326 -3.000000000000001 0.23808526530534094 0.22833333333333333 -3.000000000000001 0.23808526530534094 0.2311904761904762 -3.000000000000001 0.23808526530534094 0.235 -3.000000000000001 0.23808526530534094 0.23333333333333334 -3.000000000000001 0.23808526530534094 0.22333333333333333 -3.000000000000001 0.23808526530534094 0.23166666666666666 -3.000000000000001 0.23808526530534094 0.22976190476190475 -3.000000000000001 0.23808526530534094 0.2361904761904762 -3.000000000000001 0.23808526530534094 0.24928571428571428 -3.200000000000001 0.21574358502064847 0.21963322545846817 -3.200000000000001 0.21574358502064847 0.2127292340884574 -3.200000000000001 0.21574358502064847 0.2168284789644013 -3.200000000000001 0.21574358502064847 0.22071197411003235 -3.200000000000001 0.21574358502064847 0.2161812297734628 -3.200000000000001 0.21574358502064847 0.21877022653721684 -3.200000000000001 0.21574358502064847 0.21725997842502698 -3.200000000000001 0.21574358502064847 0.21121898597626754 -3.200000000000001 0.21574358502064847 0.23106796116504855 -3.4000000000000012 0.19557649229605542 0.20457656952865244 -3.4000000000000012 0.19557649229605542 0.1875611187169959 -3.4000000000000012 0.19557649229605542 0.21181302562096616 -3.4000000000000012 0.19557649229605542 0.19812243301388618 -3.4000000000000012 0.19557649229605542 0.19264619597105417 -3.4000000000000012 0.19557649229605542 0.19968707216898102 -3.4000000000000012 0.19557649229605542 0.19362409544298845 -3.4000000000000012 0.19557649229605542 0.1985135928026599 -3.4000000000000012 0.19557649229605542 0.19597105417563074 -3.6000000000000014 0.1773653593556774 0.18109258602341255 -3.6000000000000014 0.1773653593556774 0.18268889677190492 -3.6000000000000014 0.1773653593556774 0.18570415040794608 -3.6000000000000014 0.1773653593556774 0.1777225966654842 -3.6000000000000014 0.1773653593556774 0.17630365377793544 -3.6000000000000014 0.1773653593556774 0.1727562965590635 -3.6000000000000014 0.1773653593556774 0.1784320681092586 -3.6000000000000014 0.1773653593556774 0.17949627527492018 -3.6000000000000014 0.1773653593556774 0.17878680383114579 -3.8000000000000016 0.1609141896431677 0.15561635017701964 -3.8000000000000016 0.1609141896431677 0.15561635017701964 -3.8000000000000016 0.1609141896431677 0.16124879304795622 -3.8000000000000016 0.1609141896431677 0.15770840038622466 -3.8000000000000016 0.1609141896431677 0.16124879304795622 -3.8000000000000016 0.1609141896431677 0.16253620856131315 -3.8000000000000016 0.1609141896431677 0.16076601223044737 -3.8000000000000016 0.1609141896431677 0.153363373028645 -3.8000000000000016 0.1609141896431677 0.16929514000643708 -4.000000000000002 0.1460472119673121 0.14575726595589308 -4.000000000000002 0.1460472119673121 0.1421060318387615 -4.000000000000002 0.1460472119673121 0.1444428216737257 -4.000000000000002 0.1460472119673121 0.14838615452022783 -4.000000000000002 0.1460472119673121 0.14590331532057835 -4.000000000000002 0.1460472119673121 0.15130714181393312 -4.000000000000002 0.1460472119673121 0.14707171023806045 -4.000000000000002 0.1460472119673121 0.13801664962757412 -4.000000000000002 0.1460472119673121 0.15159924054330365 -4.200000000000002 0.13260673675027865 0.13168014852141627 -4.200000000000002 0.13260673675027865 0.13128232329929718 -4.200000000000002 0.13260673675027865 0.13406709985413076 -4.200000000000002 0.13260673675027865 0.13300623259514655 -4.200000000000002 0.13260673675027865 0.13592361755735313 -4.200000000000002 0.13260673675027865 0.13300623259514655 -4.200000000000002 0.13260673675027865 0.13446492507624982 -4.200000000000002 0.13260673675027865 0.1299562392255669 -4.200000000000002 0.13260673675027865 0.13353666622463864 -4.400000000000002 0.12045124516777768 0.11780293905083113 -4.400000000000002 0.12045124516777768 0.115152975186702 -4.400000000000002 0.12045124516777768 0.12238014936159962 -4.400000000000002 0.12045124516777768 0.12370513129366417 -4.400000000000002 0.12045124516777768 0.12430739580823898 -4.400000000000002 0.12045124516777768 0.12141652613827993 -4.400000000000002 0.12045124516777768 0.1187665622741508 -4.400000000000002 0.12045124516777768 0.11443025776921224 -4.400000000000002 0.12045124516777768 0.1257528306432185 -4.600000000000002 0.10945368529563608 0.10245183887915937 -4.600000000000002 0.10945368529563608 0.10956654991243432 -4.600000000000002 0.10945368529563608 0.11055166374781086 -4.600000000000002 0.10945368529563608 0.11241243432574431 -4.600000000000002 0.10945368529563608 0.11427320490367776 -4.600000000000002 0.10945368529563608 0.10825306479859895 -4.600000000000002 0.10945368529563608 0.1108800350262697 -4.600000000000002 0.10945368529563608 0.10573555166374782 -4.600000000000002 0.10945368529563608 0.12095008756567426 -4.8000000000000025 9.949995231639147e-2 0.10019900497512438 -4.8000000000000025 9.949995231639147e-2 9.412935323383084e-2 -4.8000000000000025 9.949995231639147e-2 0.10228855721393035 -4.8000000000000025 9.949995231639147e-2 0.10378109452736319 -4.8000000000000025 9.949995231639147e-2 0.1054726368159204 -4.8000000000000025 9.949995231639147e-2 0.10228855721393035 -4.8000000000000025 9.949995231639147e-2 0.10577114427860697 -4.8000000000000025 9.949995231639147e-2 9.681592039800994e-2 -4.8000000000000025 9.949995231639147e-2 0.103681592039801 -5.000000000000003 9.048753243817137e-2 8.994661116641028e-2 -5.000000000000003 9.048753243817137e-2 8.91322052302959e-2 -5.000000000000003 9.048753243817137e-2 8.958465297258167e-2 -5.000000000000003 9.048753243817137e-2 9.655234820378246e-2 -5.000000000000003 9.048753243817137e-2 9.419961994389647e-2 -5.000000000000003 9.048753243817137e-2 9.302325581395349e-2 -5.000000000000003 9.048753243817137e-2 9.727626459143969e-2 -5.000000000000003 9.048753243817137e-2 9.003710071486744e-2 -5.000000000000003 9.048753243817137e-2 9.727626459143969e-2 -5.200000000000003 8.232429247650848e-2 8.355972668148515e-2 -5.200000000000003 8.232429247650848e-2 8.10899810652836e-2 -5.200000000000003 8.232429247650848e-2 8.150160533465053e-2 -5.200000000000003 8.232429247650848e-2 8.792294393677451e-2 -5.200000000000003 8.232429247650848e-2 8.224252901951098e-2 -5.200000000000003 8.232429247650848e-2 8.150160533465053e-2 -5.200000000000003 8.232429247650848e-2 8.602947229768668e-2 -5.200000000000003 8.232429247650848e-2 8.767596937515436e-2 -5.200000000000003 8.232429247650848e-2 8.183090475014407e-2 -5.400000000000003 7.492739908322692e-2 7.934961786303012e-2 -5.400000000000003 7.492739908322692e-2 7.5827963434737e-2 -5.400000000000003 7.492739908322692e-2 7.62026075228533e-2 -5.400000000000003 7.492739908322692e-2 7.695189569908586e-2 -5.400000000000003 7.492739908322692e-2 7.747639742244868e-2 -5.400000000000003 7.492739908322692e-2 7.44043158998951e-2 -5.400000000000003 7.492739908322692e-2 7.717668215195564e-2 -5.400000000000003 7.492739908322692e-2 7.590289225236026e-2 -5.400000000000003 7.492739908322692e-2 7.425445826464859e-2 -5.600000000000003 6.82223534062247e-2 6.972301814708691e-2 -5.600000000000003 6.82223534062247e-2 6.719879929048983e-2 -5.600000000000003 6.82223534062247e-2 6.4538136171374e-2 -5.600000000000003 6.82223534062247e-2 6.719879929048983e-2 -5.600000000000003 6.82223534062247e-2 6.972301814708691e-2 -5.600000000000003 6.82223534062247e-2 6.815390912812117e-2 -5.600000000000003 6.82223534062247e-2 6.863146404693683e-2 -5.600000000000003 6.82223534062247e-2 7.0814572247237e-2 -5.600000000000003 6.82223534062247e-2 6.52203574839678e-2 -5.800000000000003 6.21421285572768e-2 6.232910763112105e-2 -5.800000000000003 6.21421285572768e-2 6.307481978622918e-2 -5.800000000000003 6.21421285572768e-2 6.152125279642058e-2 -5.800000000000003 6.21421285572768e-2 6.829480487198608e-2 -5.800000000000003 6.21421285572768e-2 6.232910763112105e-2 -5.800000000000003 6.21421285572768e-2 6.1086254039274174e-2 -5.800000000000003 6.21421285572768e-2 6.487695749440715e-2 -5.800000000000003 6.21421285572768e-2 6.717623663932389e-2 -5.800000000000003 6.21421285572768e-2 6.102411135968183e-2 -6.0000000000000036 5.66263986760163e-2 5.7701019252548134e-2 -6.0000000000000036 5.66263986760163e-2 5.492638731596829e-2 -6.0000000000000036 5.66263986760163e-2 5.65118912797282e-2 -6.0000000000000036 5.66263986760163e-2 5.9173272933182336e-2 -6.0000000000000036 5.66263986760163e-2 5.951302378255945e-2 -6.0000000000000036 5.66263986760163e-2 5.475651189127973e-2 -6.0000000000000036 5.66263986760163e-2 5.8776896942242356e-2 -6.0000000000000036 5.66263986760163e-2 6.098527746319366e-2 -6.0000000000000036 5.66263986760163e-2 5.311438278595697e-2 -6.200000000000004 5.162084962827559e-2 5.229196778856081e-2 -6.200000000000004 5.162084962827559e-2 5.14660334503407e-2 -6.200000000000004 5.162084962827559e-2 5.203386330786702e-2 -6.200000000000004 5.162084962827559e-2 5.1362791658063185e-2 -6.200000000000004 5.162084962827559e-2 5.280817674994838e-2 -6.200000000000004 5.162084962827559e-2 5.151765434647945e-2 -6.200000000000004 5.162084962827559e-2 5.662812306421639e-2 -6.200000000000004 5.162084962827559e-2 5.440842453024985e-2 -6.200000000000004 5.162084962827559e-2 5.244683047697708e-2 -6.400000000000004 4.7076562484715895e-2 4.891253177666886e-2 -6.400000000000004 4.7076562484715895e-2 4.67470106392995e-2 -6.400000000000004 4.7076562484715895e-2 4.792392430091329e-2 -6.400000000000004 4.7076562484715895e-2 4.7829771207984186e-2 -6.400000000000004 4.7076562484715895e-2 4.7971000847377834e-2 -6.400000000000004 4.7076562484715895e-2 4.599378589586668e-2 -6.400000000000004 4.7076562484715895e-2 4.9289144148385276e-2 -6.400000000000004 4.7076562484715895e-2 4.844176631202335e-2 -6.400000000000004 4.7076562484715895e-2 4.7547311929196874e-2 -6.600000000000004 4.294946190765297e-2 4.453893398617017e-2 -6.600000000000004 4.294946190765297e-2 4.488253231971825e-2 -6.600000000000004 4.294946190765297e-2 4.148949877593094e-2 -6.600000000000004 4.294946190765297e-2 4.372288794399347e-2 -6.600000000000004 4.294946190765297e-2 4.432418502770261e-2 -6.600000000000004 4.294946190765297e-2 4.25632435682687e-2 -6.600000000000004 4.294946190765297e-2 4.655757419576515e-2 -6.600000000000004 4.294946190765297e-2 4.518318086157282e-2 -6.600000000000004 4.294946190765297e-2 4.526908044495984e-2 -6.800000000000004 3.919982244476177e-2 3.9709917679341435e-2 -6.800000000000004 3.919982244476177e-2 3.947471579772638e-2 -6.800000000000004 3.919982244476177e-2 3.7671501372010974e-2 -6.800000000000004 3.919982244476177e-2 3.8729909839278716e-2 -6.800000000000004 3.919982244476177e-2 4.108192865542924e-2 -6.800000000000004 3.919982244476177e-2 4.049392395139161e-2 -6.800000000000004 3.919982244476177e-2 3.9670717365738926e-2 -6.800000000000004 3.919982244476177e-2 3.888671109368875e-2 -6.800000000000004 3.919982244476177e-2 3.90827126617013e-2 -7.000000000000004 3.5791826500754534e-2 3.7939797415798705e-2 -7.000000000000004 3.5791826500754534e-2 3.4575324814775044e-2 -7.000000000000004 3.5791826500754534e-2 3.4253194459357886e-2 -7.000000000000004 3.5791826500754534e-2 3.7975589677511724e-2 -7.000000000000004 3.5791826500754534e-2 3.5505923619313505e-2 -7.000000000000004 3.5791826500754534e-2 3.704499087297326e-2 -7.000000000000004 3.5791826500754534e-2 3.6830237302695155e-2 -7.000000000000004 3.5791826500754534e-2 3.6508106947278e-2 -7.000000000000004 3.5791826500754534e-2 3.543433909588747e-2 -7.200000000000005 3.269316844350807e-2 3.4361003040507404e-2 -7.200000000000005 3.269316844350807e-2 3.269362801190048e-2 -7.200000000000005 3.269316844350807e-2 3.0960865727269755e-2 -7.200000000000005 3.269316844350807e-2 3.226861084774577e-2 -7.200000000000005 3.269316844350807e-2 3.0993559355281655e-2 -7.200000000000005 3.269316844350807e-2 3.220322359172197e-2 -7.200000000000005 3.269316844350807e-2 3.4361003040507404e-2 -7.200000000000005 3.269316844350807e-2 3.494948834472161e-2 -7.200000000000005 3.269316844350807e-2 3.190898093961487e-2 -7.400000000000005 2.9874699909469064e-2 2.969557553849371e-2 -7.400000000000005 2.9874699909469064e-2 3.0741194395482927e-2 -7.400000000000005 2.9874699909469064e-2 2.984494966092074e-2 -7.400000000000005 2.9874699909469064e-2 3.0771069219968333e-2 -7.400000000000005 2.9874699909469064e-2 2.885908045290234e-2 -7.400000000000005 2.9874699909469064e-2 2.9635825889522897e-2 -7.400000000000005 2.9874699909469064e-2 3.008394825680399e-2 -7.400000000000005 2.9874699909469064e-2 3.139844053416186e-2 -7.400000000000005 2.9874699909469064e-2 3.065156992202671e-2 -7.600000000000005 2.7310111913322593e-2 2.733777583570024e-2 -7.600000000000005 2.7310111913322593e-2 2.782936421236618e-2 -7.600000000000005 2.7310111913322593e-2 2.701005025125628e-2 -7.600000000000005 2.7310111913322593e-2 2.7911295608477167e-2 -7.600000000000005 2.7310111913322593e-2 2.690080838977496e-2 -7.600000000000005 2.7310111913322593e-2 2.810246886606948e-2 -7.600000000000005 2.7310111913322593e-2 2.788398514310684e-2 -7.600000000000005 2.7310111913322593e-2 2.8621367708105746e-2 -7.600000000000005 2.7310111913322593e-2 2.7911295608477167e-2 -7.800000000000005 2.4975649846666052e-2 2.4576038362596468e-2 -7.800000000000005 2.4975649846666052e-2 2.5100526986188466e-2 -7.800000000000005 2.4975649846666052e-2 2.5874772097205224e-2 -7.800000000000005 2.4975649846666052e-2 2.5100526986188466e-2 -7.800000000000005 2.4975649846666052e-2 2.5175453932415895e-2 -7.800000000000005 2.4975649846666052e-2 2.547516171732561e-2 -7.800000000000005 2.4975649846666052e-2 2.6074577287145035e-2 -7.800000000000005 2.4975649846666052e-2 2.5375259122355703e-2 -7.800000000000005 2.4975649846666052e-2 2.5999650340917606e-2 -8.000000000000005 2.2849857876681937e-2 2.4197970935015082e-2 -8.000000000000005 2.2849857876681937e-2 2.415227127319258e-2 -8.000000000000005 2.2849857876681937e-2 2.3352527191298784e-2 -8.000000000000005 2.2849857876681937e-2 2.4106571611370076e-2 -8.000000000000005 2.2849857876681937e-2 2.4449319075038844e-2 -8.000000000000005 2.2849857876681937e-2 2.3489626176766292e-2 -8.000000000000005 2.2849857876681937e-2 2.472351704597386e-2 -8.000000000000005 2.2849857876681937e-2 2.3946622794991317e-2 -8.000000000000005 2.2849857876681937e-2 2.454071839868385e-2 -8.200000000000006 2.091334963463375e-2 1.9240421616195415e-2 -8.200000000000006 2.091334963463375e-2 2.082984774970721e-2 -8.200000000000006 2.091334963463375e-2 2.1143550276058222e-2 -8.200000000000006 2.091334963463375e-2 2.1310858290112096e-2 -8.200000000000006 2.091334963463375e-2 2.147816630416597e-2 -8.200000000000006 2.091334963463375e-2 2.1603647314706376e-2 -8.200000000000006 2.091334963463375e-2 2.254475489375941e-2 -8.200000000000006 2.091334963463375e-2 2.1101723272544753e-2 -8.200000000000006 2.091334963463375e-2 2.1708214823490043e-2 -8.400000000000006 1.9148602420807403e-2 1.8957164467763247e-2 -8.400000000000006 1.9148602420807403e-2 2.031671868716849e-2 -8.400000000000006 1.9148602420807403e-2 1.8995461769718322e-2 -8.400000000000006 1.9148602420807403e-2 1.9646515902954638e-2 -8.400000000000006 1.9148602420807403e-2 1.7903988663998623e-2 -8.400000000000006 1.9148602420807403e-2 1.9167799628516172e-2 -8.400000000000006 1.9148602420807403e-2 2.085288091453957e-2 -8.400000000000006 1.9148602420807403e-2 1.9799705110774946e-2 -8.400000000000006 1.9148602420807403e-2 1.98762997146851e-2 -8.600000000000005 1.7539772452040647e-2 1.7399540455685546e-2 -8.600000000000005 1.7539772452040647e-2 1.745216003367653e-2 -8.600000000000005 1.7539772452040647e-2 1.766263834564047e-2 -8.600000000000005 1.7539772452040647e-2 1.68733446757757e-2 -8.600000000000005 1.7539772452040647e-2 1.7101362847069968e-2 -8.600000000000005 1.7539772452040647e-2 1.7417080315015872e-2 -8.600000000000005 1.7539772452040647e-2 1.839931243751425e-2 -8.600000000000005 1.7539772452040647e-2 1.7610018767649484e-2 -8.600000000000005 1.7539772452040647e-2 1.8416852296844578e-2 -8.800000000000006 1.6072528944441514e-2 1.5815358899353885e-2 -8.800000000000006 1.6072528944441514e-2 1.604037416824713e-2 -8.800000000000006 1.6072528944441514e-2 1.6956507763026776e-2 -8.800000000000006 1.6072528944441514e-2 1.6265389437140378e-2 -8.800000000000006 1.6072528944441514e-2 1.6538622263653605e-2 -8.800000000000006 1.6072528944441514e-2 1.5574271111253977e-2 -8.800000000000006 1.6072528944441514e-2 1.6586839821273586e-2 -8.800000000000006 1.6072528944441514e-2 1.592786653380051e-2 -8.800000000000006 1.6072528944441514e-2 1.5783213860940563e-2 -9.000000000000007 1.4733905061014388e-2 1.4336019802271957e-2 -9.000000000000007 1.4733905061014388e-2 1.4984308467533998e-2 -9.000000000000007 1.4733905061014388e-2 1.4954840800931178e-2 -9.000000000000007 1.4733905061014388e-2 1.4365487468874777e-2 -9.000000000000007 1.4733905061014388e-2 1.4115012302750807e-2 -9.000000000000007 1.4733905061014388e-2 1.4615962634998748e-2 -9.000000000000007 1.4733905061014388e-2 1.4498091968587467e-2 -9.000000000000007 1.4733905061014388e-2 1.4365487468874777e-2 -9.000000000000007 1.4733905061014388e-2 1.5131646800548099e-2 -9.200000000000006 1.351216396499029e-2 1.3147404975205048e-2 -9.200000000000006 1.351216396499029e-2 1.3390625211128677e-2 -9.200000000000006 1.351216396499029e-2 1.3633845447052305e-2 -9.200000000000006 1.351216396499029e-2 1.295823368059778e-2 -9.200000000000006 1.351216396499029e-2 1.3282527328495953e-2 -9.200000000000006 1.351216396499029e-2 1.3823016741659573e-2 -9.200000000000006 1.351216396499029e-2 1.4268920507519559e-2 -9.200000000000006 1.351216396499029e-2 1.3823016741659573e-2 -9.200000000000006 1.351216396499029e-2 1.4066236977583201e-2 -9.400000000000006 1.23966784076142e-2 1.263217920587105e-2 -9.400000000000006 1.23966784076142e-2 1.3351184499237606e-2 -9.400000000000006 1.23966784076142e-2 1.2458626204023951e-2 -9.400000000000006 1.23966784076142e-2 1.2954491923587092e-2 -9.400000000000006 1.23966784076142e-2 1.211152020032975e-2 -9.400000000000006 1.23966784076142e-2 1.2880112065652621e-2 -9.400000000000006 1.23966784076142e-2 1.2768542278750914e-2 -9.400000000000006 1.23966784076142e-2 1.2260279916198694e-2 -9.400000000000006 1.23966784076142e-2 1.3239614712335899e-2 -9.600000000000007 1.1377822446563804e-2 1.1878484469222893e-2 -9.600000000000007 1.1377822446563804e-2 1.2344976675389691e-2 -9.600000000000007 1.1377822446563804e-2 1.1503015132552055e-2 -9.600000000000007 1.1377822446563804e-2 1.1218568665377175e-2 -9.600000000000007 1.1377822446563804e-2 1.1173057230629196e-2 -9.600000000000007 1.1377822446563804e-2 1.1173057230629196e-2 -9.600000000000007 1.1377822446563804e-2 1.1798839458413927e-2 -9.600000000000007 1.1377822446563804e-2 1.109341221982023e-2 -9.600000000000007 1.1377822446563804e-2 1.2015018773466833e-2 -9.800000000000008 1.0446874040350686e-2 1.1157309709366708e-2 -9.800000000000008 1.0446874040350686e-2 1.0864795971667954e-2 -9.800000000000008 1.0446874040350686e-2 1.0237980819456342e-2 -9.800000000000008 1.0446874040350686e-2 1.0540941476358623e-2 -9.800000000000008 1.0446874040350686e-2 1.0509600718748041e-2 -9.800000000000008 1.0446874040350686e-2 1.0405131526712772e-2 -9.800000000000008 1.0446874040350686e-2 1.1021499759720858e-2 -9.800000000000008 1.0446874040350686e-2 1.0540941476358623e-2 -9.800000000000008 1.0446874040350686e-2 1.064541066839389e-2 -10.000000000000007 9.595927397018434e-3 9.76864246576657e-3 -10.000000000000007 9.595927397018434e-3 9.624703726094174e-3 -10.000000000000007 9.595927397018434e-3 9.567128230225217e-3 -10.000000000000007 9.595927397018434e-3 9.845409793591848e-3 -10.000000000000007 9.595927397018434e-3 9.356018078705702e-3 -10.000000000000007 9.595927397018434e-3 9.077736515339071e-3 -10.000000000000007 9.595927397018434e-3 1.0162075020871117e-2 -10.000000000000007 9.595927397018434e-3 9.240867086967787e-3 -10.000000000000007 9.595927397018434e-3 9.979752617286083e-3 -10.200000000000006 8.817814074001557e-3 8.817797843166648e-3 -10.200000000000006 8.817814074001557e-3 8.914793619441481e-3 -10.200000000000006 8.817814074001557e-3 9.055878384932147e-3 -10.200000000000006 8.817814074001557e-3 9.04706058708898e-3 -10.200000000000006 8.817814074001557e-3 8.429814738067315e-3 -10.200000000000006 8.817814074001557e-3 8.650259684146481e-3 -10.200000000000006 8.817814074001557e-3 8.747255460421315e-3 -10.200000000000006 8.817814074001557e-3 8.78252665179398e-3 -10.200000000000006 8.817814074001557e-3 8.75607325826448e-3 -10.400000000000007 8.10603193174018e-3 7.976330401653629e-3 -10.400000000000007 8.10603193174018e-3 8.543752279820046e-3 -10.400000000000007 8.10603193174018e-3 8.024966562639322e-3 -10.400000000000007 8.10603193174018e-3 8.25193531390589e-3 -10.400000000000007 8.10603193174018e-3 7.943906294329834e-3 -10.400000000000007 8.10603193174018e-3 8.10602683094881e-3 -10.400000000000007 8.10603193174018e-3 8.527540226158148e-3 -10.400000000000007 8.10603193174018e-3 7.830421918696552e-3 -10.400000000000007 8.10603193174018e-3 8.203299152920196e-3 -10.600000000000009 7.45468113797498e-3 7.40249284351145e-3 -10.600000000000009 7.45468113797498e-3 7.290672709923664e-3 -10.600000000000009 7.45468113797498e-3 7.454675572519084e-3 -10.600000000000009 7.45468113797498e-3 7.477039599236642e-3 -10.600000000000009 7.45468113797498e-3 7.231035305343511e-3 -10.600000000000009 7.45468113797498e-3 7.186307251908397e-3 -10.600000000000009 7.45468113797498e-3 7.074487118320611e-3 -10.600000000000009 7.45468113797498e-3 7.573950381679389e-3 -10.600000000000009 7.45468113797498e-3 7.797590648854962e-3 -10.800000000000008 6.858406503829848e-3 6.721259756114289e-3 -10.800000000000008 6.858406503829848e-3 6.872145179210732e-3 -10.800000000000008 6.858406503829848e-3 6.673250757856329e-3 -10.800000000000008 6.858406503829848e-3 6.920154177468691e-3 -10.800000000000008 6.858406503829848e-3 6.542940619727583e-3 -10.800000000000008 6.858406503829848e-3 6.652675472888633e-3 -10.800000000000008 6.858406503829848e-3 6.789844039339945e-3 -10.800000000000008 6.858406503829848e-3 6.933871034113822e-3 -10.800000000000008 6.858406503829848e-3 7.0916148855328315e-3 -11.000000000000007 6.312345507942478e-3 6.381769978538064e-3 -11.000000000000007 6.312345507942478e-3 6.508016664562555e-3 -11.000000000000007 6.312345507942478e-3 6.451205655851534e-3 -11.000000000000007 6.312345507942478e-3 6.274460295417245e-3 -11.000000000000007 6.312345507942478e-3 6.268147961116021e-3 -11.000000000000007 6.312345507942478e-3 6.1040272692841815e-3 -11.000000000000007 6.312345507942478e-3 6.590077010478475e-3 -11.000000000000007 6.312345507942478e-3 6.19871228380255e-3 -11.000000000000007 6.312345507942478e-3 6.306021966923368e-3 -11.200000000000008 5.812081432014394e-3 5.939961058963703e-3 -11.200000000000008 5.812081432014394e-3 6.125948097991921e-3 -11.200000000000008 5.812081432014394e-3 5.602859550725059e-3 -11.200000000000008 5.812081432014394e-3 5.951585248902967e-3 -11.200000000000008 5.812081432014394e-3 5.823719159571067e-3 -11.200000000000008 5.812081432014394e-3 6.067827148295603e-3 -11.200000000000008 5.812081432014394e-3 5.841155444479963e-3 -11.200000000000008 5.812081432014394e-3 5.858591729388858e-3 -11.200000000000008 5.812081432014394e-3 6.364243991746825e-3 -11.40000000000001 5.353601091101685e-3 5.364312864714385e-3 -11.40000000000001 5.353601091101685e-3 5.6105787247711335e-3 -11.40000000000001 5.353601091101685e-3 5.401788104288238e-3 -11.40000000000001 5.353601091101685e-3 5.557042668237058e-3 -11.40000000000001 5.353601091101685e-3 5.535628245623428e-3 -11.40000000000001 5.353601091101685e-3 5.701590020879062e-3 -11.40000000000001 5.353601091101685e-3 5.385727287328015e-3 -11.40000000000001 5.353601091101685e-3 5.342898442100755e-3 -11.40000000000001 5.353601091101685e-3 5.487445794742759e-3 -11.600000000000009 4.933256695536919e-3 4.992452122778802e-3 -11.600000000000009 4.933256695536919e-3 4.913520073406806e-3 -11.600000000000009 4.933256695536919e-3 5.06645091906505e-3 -11.600000000000009 4.933256695536919e-3 5.239114777066293e-3 -11.600000000000009 4.933256695536919e-3 5.06645091906505e-3 -11.600000000000009 4.933256695536919e-3 4.9036535672353065e-3 -11.600000000000009 4.933256695536919e-3 4.790188746263061e-3 -11.600000000000009 4.933256695536919e-3 4.721123203062563e-3 -11.600000000000009 4.933256695536919e-3 4.972719110435803e-3 -11.800000000000008 4.547731429255463e-3 4.561371594888354e-3 -11.800000000000008 4.547731429255463e-3 4.4658692982855065e-3 -11.800000000000008 4.547731429255463e-3 4.693255718768475e-3 -11.800000000000008 4.547731429255463e-3 4.402201100550275e-3 -11.800000000000008 4.547731429255463e-3 4.429487471008232e-3 -11.800000000000008 4.547731429255463e-3 4.5431806812497155e-3 -11.800000000000008 4.547731429255463e-3 4.72963754604575e-3 -11.800000000000008 4.547731429255463e-3 4.379462458501978e-3 -11.800000000000008 4.547731429255463e-3 4.402201100550275e-3 -12.000000000000009 4.1940083721126e-3 4.399521882273995e-3 -12.000000000000009 4.1940083721126e-3 4.202403170675446e-3 -12.000000000000009 4.1940083721126e-3 3.988508398515319e-3 -12.000000000000009 4.1940083721126e-3 3.946568247111372e-3 -12.000000000000009 4.1940083721126e-3 3.9214041562690045e-3 -12.000000000000009 4.1940083721126e-3 4.038836580200054e-3 -12.000000000000009 4.1940083721126e-3 4.181433094973473e-3 -12.000000000000009 4.1940083721126e-3 4.1059408224463695e-3 -12.000000000000009 4.1940083721126e-3 4.1898211252542625e-3 -12.20000000000001 3.8693424320706163e-3 3.675873116598695e-3 -12.20000000000001 3.8693424320706163e-3 3.8074306807717013e-3 -12.20000000000001 3.8693424320706163e-3 3.8538627622445266e-3 -12.20000000000001 3.8693424320706163e-3 3.5791396135303084e-3 -12.20000000000001 3.8693424320706163e-3 3.830646721508114e-3 -12.20000000000001 3.8693424320706163e-3 3.8577321023672622e-3 -12.20000000000001 3.8693424320706163e-3 3.7996920005262304e-3 -12.20000000000001 3.8693424320706163e-3 3.5868782937757797e-3 -12.20000000000001 3.8693424320706163e-3 3.8848174832264108e-3 -12.40000000000001 3.5712349873948854e-3 3.7533703551595452e-3 -12.40000000000001 3.5712349873948854e-3 3.510526221809546e-3 -12.40000000000001 3.5712349873948854e-3 3.692659321822045e-3 -12.40000000000001 3.5712349873948854e-3 3.692659321822045e-3 -12.40000000000001 3.5712349873948854e-3 3.599807153188222e-3 -12.40000000000001 3.5712349873948854e-3 3.4712426120029286e-3 -12.40000000000001 3.5712349873948854e-3 3.6390907629948394e-3 -12.40000000000001 3.5712349873948854e-3 3.731942931628663e-3 -12.40000000000001 3.5712349873948854e-3 3.7212292198632218e-3 -12.600000000000009 3.2974109696596596e-3 3.3633617790205365e-3 -12.600000000000009 3.2974109696596596e-3 3.4457971167416277e-3 -12.600000000000009 3.2974109696596596e-3 3.162219554981073e-3 -12.600000000000009 3.2974109696596596e-3 3.317197989896725e-3 -12.600000000000009 3.2974109696596596e-3 3.3633617790205365e-3 -12.600000000000009 3.2974109696596596e-3 3.4326074627062534e-3 -12.600000000000009 3.2974109696596596e-3 3.3007109223525065e-3 -12.600000000000009 3.2974109696596596e-3 3.1655169684899165e-3 -12.600000000000009 3.2974109696596596e-3 3.2050859305960406e-3 -12.80000000000001 3.0457981458141417e-3 3.2102728731942215e-3 -12.80000000000001 3.0457981458141417e-3 3.0183874927281534e-3 -12.80000000000001 3.0457981458141417e-3 3.0183874927281534e-3 -12.80000000000001 3.0457981458141417e-3 3.204181273814346e-3 -12.80000000000001 3.0457981458141417e-3 3.1371736806357193e-3 -12.80000000000001 3.0457981458141417e-3 3.1219446821860314e-3 -12.80000000000001 3.0457981458141417e-3 3.088440885596718e-3 -12.80000000000001 3.0457981458141417e-3 3.1067156837363435e-3 -12.80000000000001 3.0457981458141417e-3 3.115853082806156e-3 -13.00000000000001 2.8145083821412512e-3 2.780733010227919e-3 -13.00000000000001 2.8145083821412512e-3 2.8651682230890906e-3 -13.00000000000001 2.8145083821412512e-3 2.7863620244186634e-3 -13.00000000000001 2.8145083821412512e-3 2.8820552656613247e-3 -13.00000000000001 2.8145083821412512e-3 2.7441444179880778e-3 -13.00000000000001 2.8145083821412512e-3 2.9580469572363793e-3 -13.00000000000001 2.8145083821412512e-3 2.8229506166585045e-3 -13.00000000000001 2.8145083821412512e-3 3.0115225920484545e-3 -13.00000000000001 2.8145083821412512e-3 2.81732160246776e-3 -13.20000000000001 2.6018206949624846e-3 2.4951476013800066e-3 -13.20000000000001 2.6018206949624846e-3 2.5627949816051164e-3 -13.20000000000001 2.6018206949624846e-3 2.5029530683290577e-3 -13.20000000000001 2.6018206949624846e-3 2.7189043205861387e-3 -13.20000000000001 2.6018206949624846e-3 2.6408496510956273e-3 -13.20000000000001 2.6018206949624846e-3 2.6304423618302257e-3 -13.20000000000001 2.6018206949624846e-3 2.476934845165554e-3 -13.20000000000001 2.6018206949624846e-3 2.5549895146560653e-3 -13.20000000000001 2.6018206949624846e-3 2.5549895146560653e-3 -13.40000000000001 2.4061659126745766e-3 2.281044949578801e-3 -13.40000000000001 2.4061659126745766e-3 2.30029427404782e-3 -13.40000000000001 2.4061659126745766e-3 2.305106605165075e-3 -13.40000000000001 2.4061659126745766e-3 2.4085717241860544e-3 -13.40000000000001 2.4061659126745766e-3 2.350823750778996e-3 -13.40000000000001 2.4061659126745766e-3 2.4085717241860544e-3 -13.40000000000001 2.4061659126745766e-3 2.276232618461546e-3 -13.40000000000001 2.4061659126745766e-3 2.2858572806960555e-3 -13.40000000000001 2.4061659126745766e-3 2.46872586315174e-3 -13.60000000000001 2.2261127913912448e-3 2.045795545107677e-3 -13.60000000000001 2.2261127913912448e-3 2.1214833019451754e-3 -13.60000000000001 2.2261127913912448e-3 2.1125788599642933e-3 -13.60000000000001 2.2261127913912448e-3 2.070282760555103e-3 -13.60000000000001 2.2261127913912448e-3 2.152648848878263e-3 -13.60000000000001 2.2261127913912448e-3 2.1170310809547344e-3 -13.60000000000001 2.2261127913912448e-3 2.1259355229356165e-3 -13.60000000000001 2.2261127913912448e-3 2.1593271803639247e-3 -13.60000000000001 2.2261127913912448e-3 2.1081266389738522e-3 -13.800000000000011 2.0603554423226206e-3 2.0006057446847964e-3 -13.800000000000011 2.0603554423226206e-3 2.0624164268068807e-3 -13.800000000000011 2.0603554423226206e-3 2.0995028360801314e-3 -13.800000000000011 2.0603554423226206e-3 2.136589245353382e-3 -13.800000000000011 2.0603554423226206e-3 2.095382123938659e-3 -13.800000000000011 2.0603554423226206e-3 2.1118649725045484e-3 -13.800000000000011 2.0603554423226206e-3 2.155132449990007e-3 -13.800000000000011 2.0603554423226206e-3 2.0438732221702556e-3 -13.800000000000011 2.0603554423226206e-3 2.136589245353382e-3 -14.00000000000001 1.9077019432494815e-3 1.9172400899672066e-3 -14.00000000000001 1.9077019432494815e-3 1.955394121608345e-3 -14.00000000000001 1.9077019432494815e-3 1.9992712579956543e-3 -14.00000000000001 1.9077019432494815e-3 1.9363171057877758e-3 -14.00000000000001 1.9077019432494815e-3 1.9210554931313204e-3 -14.00000000000001 1.9077019432494815e-3 1.9210554931313204e-3 -14.00000000000001 1.9077019432494815e-3 2.006902064323882e-3 -14.00000000000001 1.9077019432494815e-3 1.812316502954076e-3 -14.00000000000001 1.9077019432494815e-3 1.98973275008537e-3 -14.20000000000001 1.7670640192116816e-3 1.7794348924740683e-3 -14.20000000000001 1.7670640192116816e-3 1.6893145553179834e-3 -14.20000000000001 1.7670640192116816e-3 1.717587602268912e-3 -14.20000000000001 1.7670640192116816e-3 1.761764238129738e-3 -14.20000000000001 1.7670640192116816e-3 1.7458606492198406e-3 -14.20000000000001 1.7670640192116816e-3 1.763531303564171e-3 -14.20000000000001 1.7670640192116816e-3 1.851884575285823e-3 -14.20000000000001 1.7670640192116816e-3 1.6557403120637556e-3 -14.20000000000001 1.7670640192116816e-3 1.7723666307363363e-3 -14.400000000000011 1.6374476889839651e-3 1.5965102741576566e-3 -14.400000000000011 1.6374476889839651e-3 1.5261000774512163e-3 -14.400000000000011 1.6374476889839651e-3 1.6587332386889294e-3 -14.400000000000011 1.6374476889839651e-3 1.640721327903561e-3 -14.400000000000011 1.6374476889839651e-3 1.5866855955474558e-3 -14.400000000000011 1.6374476889839651e-3 1.6898447209545658e-3 -14.400000000000011 1.6374476889839651e-3 1.7144064174800682e-3 -14.400000000000011 1.6374476889839651e-3 1.5211877381461159e-3 -14.400000000000011 1.6374476889839651e-3 1.637446435033494e-3 -14.600000000000012 1.5179447841949282e-3 1.4891049431908741e-3 -14.600000000000012 1.5179447841949282e-3 1.4541921871323725e-3 -14.600000000000012 1.5179447841949282e-3 1.5088382400935054e-3 -14.600000000000012 1.5179447841949282e-3 1.479997267697352e-3 -14.600000000000012 1.5179447841949282e-3 1.537679212489659e-3 -14.600000000000012 1.5179447841949282e-3 1.476961375866178e-3 -14.600000000000012 1.5179447841949282e-3 1.5179459155870276e-3 -14.600000000000012 1.5179447841949282e-3 1.4526742412167854e-3 -14.600000000000012 1.5179447841949282e-3 1.434458890229741e-3 -14.800000000000011 1.4077252571790926e-3 1.3598623808008829e-3 -14.800000000000011 1.4077252571790926e-3 1.3260769800356436e-3 -14.800000000000011 1.4077252571790926e-3 1.3852014313748124e-3 -14.800000000000011 1.4077252571790926e-3 1.3739396311197327e-3 -14.800000000000011 1.4077252571790926e-3 1.4471413327777512e-3 -14.800000000000011 1.4077252571790926e-3 1.3725319060878477e-3 -14.800000000000011 1.4077252571790926e-3 1.3866091564066973e-3 -14.800000000000011 1.4077252571790926e-3 1.3866091564066973e-3 -14.800000000000011 1.4077252571790926e-3 1.3739396311197327e-3 -15.00000000000001 1.30603020194702e-3 1.2041599678194125e-3 -15.00000000000001 1.30603020194702e-3 1.2694614845124392e-3 -15.00000000000001 1.30603020194702e-3 1.2916640001880684e-3 -15.00000000000001 1.30603020194702e-3 1.2263624834950417e-3 -15.00000000000001 1.30603020194702e-3 1.2995001821912316e-3 -15.00000000000001 1.30603020194702e-3 1.22244439249346e-3 -15.00000000000001 1.30603020194702e-3 1.2407288171675075e-3 -15.00000000000001 1.30603020194702e-3 1.180651421809923e-3 -15.00000000000001 1.30603020194702e-3 1.2746856058478814e-3 -15.200000000000012 1.2121655201122257e-3 1.24004509254882e-3 -15.200000000000012 1.2121655201122257e-3 1.2097409602773435e-3 -15.200000000000012 1.2121655201122257e-3 1.2073166296956252e-3 -15.200000000000012 1.2121655201122257e-3 1.2570154066208468e-3 -15.200000000000012 1.2121655201122257e-3 1.2764100512745917e-3 -15.200000000000012 1.2121655201122257e-3 1.260651902493424e-3 -15.200000000000012 1.2121655201122257e-3 1.2521667454574105e-3 -15.200000000000012 1.2121655201122257e-3 1.2545910760391288e-3 -15.200000000000012 1.2121655201122257e-3 1.1770124974241488e-3 -15.400000000000013 1.125496170313487e-3 1.1457551347950528e-3 -15.400000000000013 1.125496170313487e-3 1.1828965094986253e-3 -15.400000000000013 1.125496170313487e-3 1.0849783398255706e-3 -15.400000000000013 1.125496170313487e-3 1.1873984943111795e-3 -15.400000000000013 1.125496170313487e-3 1.1772690284829325e-3 -15.400000000000013 1.125496170313487e-3 1.1896494867174566e-3 -15.400000000000013 1.125496170313487e-3 1.1558846006232998e-3 -15.400000000000013 1.125496170313487e-3 1.1209942183260046e-3 -15.400000000000013 1.125496170313487e-3 1.055715438543968e-3 -15.600000000000012 1.0454409456952054e-3 1.0423048213654715e-3 -15.600000000000012 1.0454409456952054e-3 1.05694099739267e-3 -15.600000000000012 1.0454409456952054e-3 1.0611227619718693e-3 -15.600000000000012 1.0454409456952054e-3 1.0109415870214755e-3 -15.600000000000012 1.0454409456952054e-3 1.0809861437230668e-3 -15.600000000000012 1.0454409456952054e-3 1.038123056786272e-3 -15.600000000000012 1.0454409456952054e-3 1.1039858489086639e-3 -15.600000000000012 1.0454409456952054e-3 1.0621682031166692e-3 -15.600000000000012 1.0454409456952054e-3 1.0893496728814657e-3 -15.800000000000011 9.714677294265883e-4 9.695250493020003e-4 -15.800000000000011 9.714677294265883e-4 9.520386255670944e-4 -15.800000000000011 9.714677294265883e-4 9.85068537066361e-4 -15.800000000000011 9.714677294265883e-4 9.180372460825554e-4 -15.800000000000011 9.714677294265883e-4 9.510671575818219e-4 -15.800000000000011 9.714677294265883e-4 9.83125601095816e-4 -15.800000000000011 9.714677294265883e-4 1.0132411086392647e-3 -15.800000000000011 9.714677294265883e-4 9.510671575818219e-4 -15.800000000000011 9.714677294265883e-4 9.74382389228363e-4 -16.000000000000014 9.03089183114959e-4 8.994771111974063e-4 -16.000000000000014 9.03089183114959e-4 8.976709322592589e-4 -16.000000000000014 9.03089183114959e-4 9.401161373057228e-4 -16.000000000000014 9.03089183114959e-4 8.118774326972573e-4 -16.000000000000014 9.03089183114959e-4 8.678689797798268e-4 -16.000000000000014 9.03089183114959e-4 9.184419900479541e-4 -16.000000000000014 9.03089183114959e-4 8.859307691613008e-4 -16.000000000000014 9.03089183114959e-4 8.516133693365002e-4 -16.000000000000014 9.03089183114959e-4 8.669658903107531e-4 -16.200000000000014 8.398588273554193e-4 8.348198838306979e-4 -16.200000000000014 8.398588273554193e-4 7.8862763673745e-4 -16.200000000000014 8.398588273554193e-4 8.197024211456349e-4 -16.200000000000014 8.398588273554193e-4 8.121436898031034e-4 -16.200000000000014 8.398588273554193e-4 8.222219982598121e-4 -16.200000000000014 8.398588273554193e-4 8.516170645918789e-4 -16.200000000000014 8.398588273554193e-4 8.003856632702767e-4 -16.200000000000014 8.398588273554193e-4 7.987059451941586e-4 -16.200000000000014 8.398588273554193e-4 7.8862763673745e-4 -16.400000000000013 7.813674776082505e-4 7.548007201080162e-4 -16.400000000000013 7.813674776082505e-4 7.376106415962394e-4 -16.400000000000013 7.813674776082505e-4 7.938690803620544e-4 -16.400000000000013 7.813674776082505e-4 7.524566184927739e-4 -16.400000000000013 7.813674776082505e-4 7.172950942641396e-4 -16.400000000000013 7.813674776082505e-4 7.376106415962394e-4 -16.400000000000013 7.813674776082505e-4 7.508938840826124e-4 -16.400000000000013 7.813674776082505e-4 7.837113066960044e-4 -16.400000000000013 7.813674776082505e-4 8.009013852077812e-4 -16.600000000000012 7.272400021518117e-4 7.694198516139636e-4 -16.600000000000012 7.272400021518117e-4 7.679653717432378e-4 -16.600000000000012 7.272400021518117e-4 7.286944152336403e-4 -16.600000000000012 7.272400021518117e-4 7.243309756214629e-4 -16.600000000000012 7.272400021518117e-4 7.672381318078749e-4 -16.600000000000012 7.272400021518117e-4 7.628746921956973e-4 -16.600000000000012 7.272400021518117e-4 7.74510531161504e-4 -16.600000000000012 7.272400021518117e-4 7.272399353629146e-4 -16.600000000000012 7.272400021518117e-4 7.614202123249716e-4 -16.80000000000001 6.771323720620374e-4 7.272402249163065e-4 -16.80000000000001 6.771323720620374e-4 6.98800663048071e-4 -16.80000000000001 6.771323720620374e-4 6.378587447589951e-4 -16.80000000000001 6.771323720620374e-4 6.669754390526647e-4 -16.80000000000001 6.771323720620374e-4 7.062491197278469e-4 -16.80000000000001 6.771323720620374e-4 6.947378684954659e-4 -16.80000000000001 6.771323720620374e-4 7.048948548769786e-4 -16.80000000000001 6.771323720620374e-4 7.143747088330571e-4 -16.80000000000001 6.771323720620374e-4 6.818723524122165e-4 -17.000000000000014 6.307289760561569e-4 6.326211772304312e-4 -17.000000000000014 6.307289760561569e-4 6.452357570356242e-4 -17.000000000000014 6.307289760561569e-4 6.294675322791329e-4 -17.000000000000014 6.307289760561569e-4 6.136993075226416e-4 -17.000000000000014 6.307289760561569e-4 7.013706371687332e-4 -17.000000000000014 6.307289760561569e-4 6.162222234836802e-4 -17.000000000000014 6.307289760561569e-4 6.439742990551049e-4 -17.000000000000014 6.307289760561569e-4 6.263138873278347e-4 -17.000000000000014 6.307289760561569e-4 6.521737759284804e-4 -17.200000000000014 5.877401756398498e-4 5.589409391618354e-4 -17.200000000000014 5.877401756398498e-4 6.088988569628407e-4 -17.200000000000014 5.877401756398498e-4 5.859769887953207e-4 -17.200000000000014 5.877401756398498e-4 5.983195331932161e-4 -17.200000000000014 5.877401756398498e-4 5.871524692141678e-4 -17.200000000000014 5.877401756398498e-4 5.62467380418377e-4 -17.200000000000014 5.877401756398498e-4 5.818628073293555e-4 -17.200000000000014 5.877401756398498e-4 5.542390174864467e-4 -17.200000000000014 5.877401756398498e-4 5.759854052351195e-4 -17.400000000000013 5.479000783427062e-4 5.292715667205435e-4 -17.400000000000013 5.479000783427062e-4 5.54474974659617e-4 -17.400000000000013 5.479000783427062e-4 5.363942689641947e-4 -17.400000000000013 5.479000783427062e-4 5.473522724159658e-4 -17.400000000000013 5.479000783427062e-4 5.637892775936224e-4 -17.400000000000013 5.479000783427062e-4 5.298194668931321e-4 -17.400000000000013 5.479000783427062e-4 5.522833739692628e-4 -17.400000000000013 5.479000783427062e-4 5.4954387310632e-4 -17.400000000000013 5.479000783427062e-4 5.28723666547955e-4 -17.600000000000016 5.109645089423057e-4 4.915478801869926e-4 -17.600000000000016 5.109645089423057e-4 5.068768161595599e-4 -17.600000000000016 5.109645089423057e-4 5.038110289650465e-4 -17.600000000000016 5.109645089423057e-4 5.06365851627141e-4 -17.600000000000016 5.109645089423057e-4 5.135193550810058e-4 -17.600000000000016 5.109645089423057e-4 5.027890999002087e-4 -17.600000000000016 5.109645089423057e-4 5.216947875997084e-4 -17.600000000000016 5.109645089423057e-4 5.053439225623031e-4 -17.600000000000016 5.109645089423057e-4 4.900149865897359e-4 -17.800000000000015 4.767091604856612e-4 4.37619028323676e-4 -17.800000000000015 4.767091604856612e-4 4.7527905363693355e-4 -17.800000000000015 4.767091604856612e-4 4.547805588461731e-4 -17.800000000000015 4.767091604856612e-4 4.357121915989541e-4 -17.800000000000015 4.767091604856612e-4 4.733722169122116e-4 -17.800000000000015 4.767091604856612e-4 4.485833394908269e-4 -17.800000000000015 4.767091604856612e-4 4.605010690203388e-4 -17.800000000000015 4.767091604856612e-4 4.4095599259193934e-4 -17.800000000000015 4.767091604856612e-4 4.6717499755686545e-4 -18.000000000000014 4.449279086389735e-4 4.1244819370382484e-4 -18.000000000000014 4.449279086389735e-4 4.168974730318057e-4 -18.000000000000014 4.449279086389735e-4 4.2668588755336354e-4 -18.000000000000014 4.449279086389735e-4 4.1200326577102675e-4 -18.000000000000014 4.449279086389735e-4 4.1422790543501714e-4 -18.000000000000014 4.449279086389735e-4 4.213467523597865e-4 -18.000000000000014 4.449279086389735e-4 4.373641579405176e-4 -18.000000000000014 4.449279086389735e-4 4.373641579405176e-4 -18.000000000000014 4.449279086389735e-4 4.106684819726325e-4 -18.200000000000014 4.154312744512661e-4 4.2914051007483166e-4 -18.200000000000014 4.154312744512661e-4 4.1750843429351963e-4 -18.200000000000014 4.154312744512661e-4 4.303868039085436e-4 -18.200000000000014 4.154312744512661e-4 4.1833929684932766e-4 -18.200000000000014 4.154312744512661e-4 4.0421463340059165e-4 -18.200000000000014 4.154312744512661e-4 4.1958559068303963e-4 -18.200000000000014 4.154312744512661e-4 3.988140267878397e-4 -18.200000000000014 4.154312744512661e-4 4.2997137263063964e-4 -18.200000000000014 4.154312744512661e-4 4.2124731579465564e-4 -18.400000000000013 3.8804502202118517e-4 3.6941879790657506e-4 -18.400000000000013 3.8804502202118517e-4 3.705829327739277e-4 -18.400000000000013 3.8804502202118517e-4 3.806721016243174e-4 -18.400000000000013 3.8804502202118517e-4 3.861047310052964e-4 -18.400000000000013 3.8804502202118517e-4 3.7834383188961205e-4 -18.400000000000013 3.8804502202118517e-4 3.826123264032385e-4 -18.400000000000013 3.8804502202118517e-4 3.9192540534205967e-4 -18.400000000000013 3.8804502202118517e-4 4.1598419260068117e-4 -18.400000000000013 3.8804502202118517e-4 3.880449557842175e-4 -18.600000000000016 3.626088788241667e-4 3.61521100938722e-4 -18.600000000000016 3.626088788241667e-4 3.5716979380605936e-4 -18.600000000000016 3.626088788241667e-4 3.535437045288405e-4 -18.600000000000016 3.626088788241667e-4 3.54994140239728e-4 -18.600000000000016 3.626088788241667e-4 3.821898098188696e-4 -18.600000000000016 3.626088788241667e-4 3.782011116139288e-4 -18.600000000000016 3.626088788241667e-4 3.658724080713846e-4 -18.600000000000016 3.626088788241667e-4 3.651471902159409e-4 -18.600000000000016 3.626088788241667e-4 3.68048061637716e-4 -18.800000000000015 3.389753676027032e-4 3.345686725081159e-4 -18.800000000000015 3.389753676027032e-4 3.115183485663207e-4 -18.800000000000015 3.389753676027032e-4 3.352466232122864e-4 -18.800000000000015 3.389753676027032e-4 3.227045351851331e-4 -18.800000000000015 3.389753676027032e-4 3.2914506687475235e-4 -18.800000000000015 3.389753676027032e-4 3.1355220067883204e-4 -18.800000000000015 3.389753676027032e-4 3.1558605279134337e-4 -18.800000000000015 3.389753676027032e-4 3.443989577185874e-4 -18.800000000000015 3.389753676027032e-4 3.3490764786020115e-4 -19.000000000000014 3.170087397577456e-4 2.9545216068413026e-4 -19.000000000000014 3.170087397577456e-4 2.9545216068413026e-4 -19.000000000000014 3.170087397577456e-4 3.062304583914912e-4 -19.000000000000014 3.170087397577456e-4 3.068644759036889e-4 -19.000000000000014 3.170087397577456e-4 2.9703720446462453e-4 -19.000000000000014 3.170087397577456e-4 2.9925626575731646e-4 -19.000000000000014 3.170087397577456e-4 3.10985589732974e-4 -19.000000000000014 3.170087397577456e-4 3.052794321231947e-4 -19.000000000000014 3.170087397577456e-4 3.10985589732974e-4 -19.200000000000017 2.9658400111523623e-4 2.7256070036533217e-4 -19.200000000000017 2.9658400111523623e-4 2.781957964555839e-4 -19.200000000000017 2.9658400111523623e-4 2.8116163650308476e-4 -19.200000000000017 2.9658400111523623e-4 2.856103965743361e-4 -19.200000000000017 2.9658400111523623e-4 2.88576236621837e-4 -19.200000000000017 2.9658400111523623e-4 2.948045007215889e-4 -19.200000000000017 2.9658400111523623e-4 2.9213524467883807e-4 -19.200000000000017 2.9658400111523623e-4 2.734504523795824e-4 -19.200000000000017 2.9658400111523623e-4 2.998464288023404e-4 -19.400000000000016 2.7758602178815306e-4 2.556567171512347e-4 -19.400000000000016 2.7758602178815306e-4 2.670377436476523e-4 -19.400000000000016 2.7758602178815306e-4 2.6676015763554454e-4 -19.400000000000016 2.7758602178815306e-4 2.5704464721177343e-4 -19.400000000000016 2.7758602178815306e-4 2.687032597202988e-4 -19.400000000000016 2.7758602178815306e-4 2.5843257727231215e-4 -19.400000000000016 2.7758602178815306e-4 2.6731532965976006e-4 -19.400000000000016 2.7758602178815306e-4 2.6453946953868256e-4 -19.400000000000016 2.7758602178815306e-4 2.692584317445143e-4 -19.600000000000016 2.5990872262005797e-4 2.5471052019425573e-4 -19.600000000000016 2.5990872262005797e-4 2.6042851146392275e-4 -19.600000000000016 2.5990872262005797e-4 2.645870505691351e-4 -19.600000000000016 2.5990872262005797e-4 2.596487853816954e-4 -19.600000000000016 2.5990872262005797e-4 2.661465027335897e-4 -19.600000000000016 2.5990872262005797e-4 2.7550321572031744e-4 -19.600000000000016 2.5990872262005797e-4 2.762829418025448e-4 -19.600000000000016 2.5990872262005797e-4 2.614681462402258e-4 -19.600000000000016 2.5990872262005797e-4 2.5834924191131656e-4 -19.800000000000015 2.4345433138892443e-4 2.337161327673183e-4 -19.800000000000015 2.4345433138892443e-4 2.4028939900139914e-4 -19.800000000000015 2.4345433138892443e-4 2.475930281503778e-4 -19.800000000000015 2.4345433138892443e-4 2.4345430496595656e-4 -19.800000000000015 2.4345433138892443e-4 2.5148829702983315e-4 -19.800000000000015 2.4345433138892443e-4 2.475930281503778e-4 -19.800000000000015 2.4345433138892443e-4 2.495406625901055e-4 -19.800000000000015 2.4345433138892443e-4 2.449150307957523e-4 -19.800000000000015 2.4345433138892443e-4 2.5343593146956076e-4 -20.000000000000014 2.2813270257705028e-4 2.2699202037498626e-4 -20.000000000000014 2.2813270257705028e-4 2.3064214331568956e-4 -20.000000000000014 2.2813270257705028e-4 2.2630762232360438e-4 -20.000000000000014 2.2813270257705028e-4 2.23113764750489e-4 -20.000000000000014 2.2813270257705028e-4 2.33607868205011e-4 -20.000000000000014 2.2813270257705028e-4 2.2653575500739836e-4 -20.000000000000014 2.2813270257705028e-4 2.2357003011807693e-4 -20.000000000000014 2.2813270257705028e-4 2.2357003011807693e-4 -20.000000000000014 2.2813270257705028e-4 2.345203989401868e-4 -20.200000000000017 2.1386069508055506e-4 2.1621316436627408e-4 -20.200000000000017 2.1386069508055506e-4 2.1086664694871043e-4 -20.200000000000017 2.1386069508055506e-4 2.1835177133329954e-4 -20.200000000000017 2.1386069508055506e-4 2.0872803998168497e-4 -20.200000000000017 2.1386069508055506e-4 1.9846272653996277e-4 -20.200000000000017 2.1386069508055506e-4 2.1043892555530535e-4 -20.200000000000017 2.1386069508055506e-4 2.1321911461243844e-4 -20.200000000000017 2.1386069508055506e-4 2.1450227879265372e-4 -20.200000000000017 2.1386069508055506e-4 2.2305670666075556e-4 -20.400000000000016 2.0056160274611903e-4 1.9835543488877554e-4 -20.400000000000016 2.0056160274611903e-4 1.9935824295191394e-4 -20.400000000000016 2.0056160274611903e-4 1.8712398458162546e-4 -20.400000000000016 2.0056160274611903e-4 1.9695150360038178e-4 -20.400000000000016 2.0056160274611903e-4 1.8732454619425315e-4 -20.400000000000016 2.0056160274611903e-4 1.9715206521300947e-4 -20.400000000000016 2.0056160274611903e-4 1.997593661771693e-4 -20.400000000000016 2.0056160274611903e-4 1.9915768133928628e-4 -20.400000000000016 2.0056160274611903e-4 1.9053353199629603e-4 -20.600000000000016 1.8816463308837033e-4 1.6859551095635615e-4 -20.600000000000016 1.8816463308837033e-4 1.9249241931735753e-4 -20.600000000000016 1.8816463308837033e-4 1.7649842553243536e-4 -20.600000000000016 1.8816463308837033e-4 1.7969722428941978e-4 -20.600000000000016 1.8816463308837033e-4 1.751812731030888e-4 -20.600000000000016 1.8816463308837033e-4 1.7066532191675784e-4 -20.600000000000016 1.8816463308837033e-4 1.7706291943072672e-4 -20.600000000000016 1.8816463308837033e-4 1.8571849253786108e-4 -20.600000000000016 1.8816463308837033e-4 1.7988538892218356e-4 -20.80000000000002 1.766044299634105e-4 1.6318249666394226e-4 -20.80000000000002 1.766044299634105e-4 1.7625122475174715e-4 -20.80000000000002 1.766044299634105e-4 1.7024667400870164e-4 -20.80000000000002 1.766044299634105e-4 1.649485410001321e-4 -20.80000000000002 1.766044299634105e-4 1.6530174986737007e-4 -20.80000000000002 1.766044299634105e-4 1.7413197154831934e-4 -20.80000000000002 1.766044299634105e-4 1.746617848491763e-4 -20.80000000000002 1.766044299634105e-4 1.6759760750441688e-4 -20.80000000000002 1.766044299634105e-4 1.6689118976994093e-4 -21.000000000000018 1.6582063635655071e-4 1.7477496479627606e-4 -21.000000000000018 1.6582063635655071e-4 1.7842301908993648e-4 -21.000000000000018 1.6582063635655071e-4 1.7195601375117483e-4 -21.000000000000018 1.6582063635655071e-4 1.6333333996615933e-4 -21.000000000000018 1.6582063635655071e-4 1.8107614948532588e-4 -21.000000000000018 1.6582063635655071e-4 1.6217259541817647e-4 -21.000000000000018 1.6582063635655071e-4 1.8008122558705485e-4 -21.000000000000018 1.6582063635655071e-4 1.6830795945751444e-4 -21.000000000000018 1.6582063635655071e-4 1.754382473951234e-4 -21.200000000000017 1.5575749378922264e-4 1.5653627739910535e-4 -21.200000000000017 1.5575749378922264e-4 1.6494718185637073e-4 -21.200000000000017 1.5575749378922264e-4 1.556017324594092e-4 -21.200000000000017 1.5575749378922264e-4 1.5871688225839637e-4 -21.200000000000017 1.5575749378922264e-4 1.5933991221819382e-4 -21.200000000000017 1.5575749378922264e-4 1.6074172962773805e-4 -21.200000000000017 1.5575749378922264e-4 1.556017324594092e-4 -21.200000000000017 1.5575749378922264e-4 1.5233082517047267e-4 -21.200000000000017 1.5575749378922264e-4 1.6245506201718098e-4 -21.400000000000016 1.463634751646593e-4 1.5587709332690896e-4 -21.400000000000016 1.463634751646593e-4 1.5807254534559782e-4 -21.400000000000016 1.463634751646593e-4 1.4855891993127942e-4 -21.400000000000016 1.463634751646593e-4 1.4182620040730025e-4 -21.400000000000016 1.463634751646593e-4 1.535352778403075e-4 -21.400000000000016 1.463634751646593e-4 1.5294982396865715e-4 -21.400000000000016 1.463634751646593e-4 1.4987619114249274e-4 -21.400000000000016 1.463634751646593e-4 1.4211892734312544e-4 -21.400000000000016 1.463634751646593e-4 1.4387528895807653e-4 -21.600000000000016 1.375909481573892e-4 1.3057380928793933e-4 -21.600000000000016 1.375909481573892e-4 1.4281940362579666e-4 -21.600000000000016 1.375909481573892e-4 1.45158449735275e-4 -21.600000000000016 1.375909481573892e-4 1.3029862739270657e-4 -21.600000000000016 1.375909481573892e-4 1.3855408424968905e-4 -21.600000000000016 1.375909481573892e-4 1.3951722088300367e-4 -21.600000000000016 1.375909481573892e-4 1.3924203898777092e-4 -21.600000000000016 1.375909481573892e-4 1.3951722088300367e-4 -21.600000000000016 1.375909481573892e-4 1.4103072130678378e-4 -21.80000000000002 1.2939586651062514e-4 1.2616097208387647e-4 -21.80000000000002 1.2939586651062514e-4 1.3392472421211503e-4 -21.80000000000002 1.2939586651062514e-4 1.2538459687105261e-4 -21.80000000000002 1.2939586651062514e-4 1.2007936625008961e-4 -21.80000000000002 1.2939586651062514e-4 1.2900768119756395e-4 -21.80000000000002 1.2939586651062514e-4 1.2409063818301285e-4 -21.80000000000002 1.2939586651062514e-4 1.2887828532875996e-4 -21.80000000000002 1.2939586651062514e-4 1.3353653660570309e-4 -21.80000000000002 1.2939586651062514e-4 1.3340714073689912e-4 -22.000000000000018 1.2173748684079103e-4 1.2076358130730716e-4 -22.000000000000018 1.2173748684079103e-4 1.2502439314778673e-4 -22.000000000000018 1.2173748684079103e-4 1.2465918070431707e-4 -22.000000000000018 1.2173748684079103e-4 1.1516365717410541e-4 -22.000000000000018 1.2173748684079103e-4 1.1528539465526198e-4 -22.000000000000018 1.2173748684079103e-4 1.1808535672186284e-4 -22.000000000000018 1.2173748684079103e-4 1.1930273153342844e-4 -22.000000000000018 1.2173748684079103e-4 1.2636350544050891e-4 -22.000000000000018 1.2173748684079103e-4 1.2039836886383749e-4 -22.200000000000017 1.1457810876195154e-4 1.1297401105059987e-4 -22.200000000000017 1.1457810876195154e-4 1.0460980942109298e-4 -22.200000000000017 1.1457810876195154e-4 1.1366147967768262e-4 -22.200000000000017 1.1457810876195154e-4 1.0735968392942401e-4 -22.200000000000017 1.1457810876195154e-4 1.0483896563012057e-4 -22.200000000000017 1.1457810876195154e-4 1.1033871464678263e-4 -22.200000000000017 1.1457810876195154e-4 1.1274485484157229e-4 -22.200000000000017 1.1457810876195154e-4 1.1847376006726193e-4 -22.200000000000017 1.1457810876195154e-4 1.1182823000546194e-4 -22.40000000000002 1.0788283633687184e-4 1.210445432051883e-4 -22.40000000000002 1.0788283633687184e-4 1.1877900362648157e-4 -22.40000000000002 1.0788283633687184e-4 1.1726864390734375e-4 -22.40000000000002 1.0788283633687184e-4 1.1209026772744264e-4 -22.40000000000002 1.0788283633687184e-4 1.1543463567696211e-4 -22.40000000000002 1.0788283633687184e-4 1.0971684531165464e-4 -22.40000000000002 1.0788283633687184e-4 1.1770017525566884e-4 -22.40000000000002 1.0788283633687184e-4 1.1834747227815648e-4 -22.40000000000002 1.0788283633687184e-4 1.170528782331812e-4 -22.60000000000002 1.0161935903764002e-4 1.0395660040576609e-4 -22.60000000000002 1.0161935903764002e-4 1.0497279395811961e-4 -22.60000000000002 1.0161935903764002e-4 1.0365174234006003e-4 -22.60000000000002 1.0161935903764002e-4 1.0355012298482467e-4 -22.60000000000002 1.0161935903764002e-4 1.095456649437105e-4 -22.60000000000002 1.0161935903764002e-4 1.0466793589241355e-4 -22.60000000000002 1.0161935903764002e-4 1.0619222622094385e-4 -22.60000000000002 1.0161935903764002e-4 1.0578574880000244e-4 -22.60000000000002 1.0161935903764002e-4 1.1035861978559332e-4 -22.80000000000002 9.57577505589414e-5 1.0150321473129562e-4 -22.80000000000002 9.57577505589414e-5 9.882199773839347e-5 -22.80000000000002 9.57577505589414e-5 1.0246079222876067e-4 -22.80000000000002 9.57577505589414e-5 9.671532724397035e-5 -22.80000000000002 9.57577505589414e-5 9.642805399473084e-5 -22.80000000000002 9.57577505589414e-5 9.738563149219589e-5 -22.80000000000002 9.57577505589414e-5 9.75771469916889e-5 -22.80000000000002 9.57577505589414e-5 1.0083291048307008e-4 -22.80000000000002 9.57577505589414e-5 9.853472448915395e-5 -23.000000000000018 9.027028397266895e-5 9.41519030374415e-5 -23.000000000000018 9.027028397266895e-5 9.315892994692198e-5 -23.000000000000018 9.027028397266895e-5 9.55962275327426e-5 -23.000000000000018 9.027028397266895e-5 9.505460584700469e-5 -23.000000000000018 9.027028397266895e-5 9.37005516326599e-5 -23.000000000000018 9.027028397266895e-5 9.027028095631975e-5 -23.000000000000018 9.027028397266895e-5 8.990919983249446e-5 -23.000000000000018 9.027028397266895e-5 9.686001146613108e-5 -23.000000000000018 9.027028397266895e-5 9.523514640891733e-5 -23.200000000000017 8.513126184494805e-5 8.989861377359136e-5 -23.200000000000017 8.513126184494805e-5 8.972835124750502e-5 -23.200000000000017 8.513126184494805e-5 8.010851852362639e-5 -23.200000000000017 8.513126184494805e-5 8.30029814670943e-5 -23.200000000000017 8.513126184494805e-5 8.828111977577107e-5 -23.200000000000017 8.513126184494805e-5 8.206653757361939e-5 -23.200000000000017 8.513126184494805e-5 8.274758767796478e-5 -23.200000000000017 8.513126184494805e-5 8.989861377359136e-5 -23.200000000000017 8.513126184494805e-5 8.71744133562098e-5 -23.40000000000002 8.031686005707997e-5 8.047749336863849e-5 -23.40000000000002 8.031686005707997e-5 8.601935668444293e-5 -23.40000000000002 8.031686005707997e-5 7.838925501775565e-5 -23.40000000000002 8.031686005707997e-5 7.951369105284641e-5 -23.40000000000002 8.031686005707997e-5 8.336890031601471e-5 -23.40000000000002 8.031686005707997e-5 7.525689749143139e-5 -23.40000000000002 8.031686005707997e-5 7.774672014056092e-5 -23.40000000000002 8.031686005707997e-5 7.903178989495036e-5 -23.40000000000002 8.031686005707997e-5 7.99152753510931e-5 -23.60000000000002 7.580498418149386e-5 7.239375836934385e-5 -23.60000000000002 7.580498418149386e-5 7.178731850865826e-5 -23.60000000000002 7.580498418149386e-5 7.603239753345748e-5 -23.60000000000002 7.580498418149386e-5 7.785171711551428e-5 -23.60000000000002 7.580498418149386e-5 7.262117331710095e-5 -23.60000000000002 7.580498418149386e-5 7.239375836934385e-5 -23.60000000000002 7.580498418149386e-5 7.262117331710095e-5 -23.60000000000002 7.580498418149386e-5 7.663883739414308e-5 -23.60000000000002 7.580498418149386e-5 7.610820251604317e-5 -23.80000000000002 7.157513736351763e-5 6.921315734247816e-5 -23.80000000000002 7.157513736351763e-5 7.193301254311328e-5 -23.80000000000002 7.157513736351763e-5 6.620700159440775e-5 -23.80000000000002 7.157513736351763e-5 6.856898111074878e-5 -23.80000000000002 7.157513736351763e-5 7.221931309054856e-5 -23.80000000000002 7.157513736351763e-5 7.064466007965453e-5 -23.80000000000002 7.157513736351763e-5 6.949945788991344e-5 -23.80000000000002 7.157513736351763e-5 7.1646711995678e-5 -23.80000000000002 7.157513736351763e-5 7.028678439536044e-5 -24.00000000000002 6.760829875058038e-5 7.024502058131777e-5 -24.00000000000002 6.760829875058038e-5 7.470716818321092e-5 -24.00000000000002 6.760829875058038e-5 7.308456905524977e-5 -24.00000000000002 6.760829875058038e-5 7.240848608526597e-5 -24.00000000000002 6.760829875058038e-5 7.308456905524977e-5 -24.00000000000002 6.760829875058038e-5 7.430151840122062e-5 -24.00000000000002 6.760829875058038e-5 7.031262887831615e-5 -24.00000000000002 6.760829875058038e-5 7.024502058131777e-5 -24.00000000000002 6.760829875058038e-5 7.078588695730482e-5 diff --git a/bloomfilter-blocked/fpr.classic.gnuplot.data b/bloomfilter-blocked/fpr.classic.gnuplot.data deleted file mode 100644 index 583ffd95e..000000000 --- a/bloomfilter-blocked/fpr.classic.gnuplot.data +++ /dev/null @@ -1,183 +0,0 @@ -2.0 0.3934693402873666 0.3900039354584809 -2.0 0.3934693402873666 0.37504919323101144 -2.0 0.3934693402873666 0.3892168437622983 -2.3 0.3374056100322293 0.3434547908232119 -2.3 0.3374056100322293 0.32388663967611336 -2.3 0.3374056100322293 0.340080971659919 -2.5999999999999996 0.28797243387934673 0.28217679239850274 -2.5999999999999996 0.28797243387934673 0.2824647279009502 -2.5999999999999996 0.28797243387934673 0.2830405989058451 -2.8999999999999995 0.2482540030425363 0.2477656405163853 -2.8999999999999995 0.2482540030425363 0.23857994041708044 -2.8999999999999995 0.2482540030425363 0.24056603773584906 -3.1999999999999993 0.21598193982220967 0.21706263498920086 -3.1999999999999993 0.21598193982220967 0.21036717062634988 -3.1999999999999993 0.21598193982220967 0.21036717062634988 -3.499999999999999 0.18947031330845207 0.18075028419856007 -3.499999999999999 0.18947031330845207 0.1911708980674498 -3.499999999999999 0.18947031330845207 0.1964759378552482 -3.799999999999999 0.16269646923733003 0.16254474454930035 -3.799999999999999 0.16269646923733003 0.15929059550927432 -3.799999999999999 0.16269646923733003 0.15554832411324437 -4.099999999999999 0.1397281316954663 0.14559172837781192 -4.099999999999999 0.1397281316954663 0.13846583764146989 -4.099999999999999 0.1397281316954663 0.13706860416375577 -4.399999999999999 0.12077596326307868 0.12065217391304348 -4.399999999999999 0.12077596326307868 0.1213768115942029 -4.399999999999999 0.12077596326307868 0.1178743961352657 -4.699999999999998 0.10502663985837676 0.10272030248923432 -4.699999999999998 0.10502663985837676 0.10629135595000525 -4.699999999999998 0.10502663985837676 9.904421804432308e-2 -4.999999999999998 9.184883923294052e-2 9.396527969137504e-2 -4.999999999999998 9.184883923294052e-2 9.06585836318545e-2 -4.999999999999998 9.184883923294052e-2 9.176081565169468e-2 -5.299999999999998 7.881929681834592e-2 8.291952392212501e-2 -5.299999999999998 7.881929681834592e-2 7.283045637266493e-2 -5.299999999999998 7.881929681834592e-2 7.448569401749823e-2 -5.599999999999998 6.789553531345656e-2 6.91832439405255e-2 -5.599999999999998 6.789553531345656e-2 6.544911399280331e-2 -5.599999999999998 6.789553531345656e-2 6.477018127503564e-2 -5.899999999999998 5.876263252151765e-2 5.858502761781643e-2 -5.899999999999998 5.876263252151765e-2 5.688094958279469e-2 -5.899999999999998 5.876263252151765e-2 5.699847220589964e-2 -6.1999999999999975 5.108781980733247e-2 5.057729641360989e-2 -6.1999999999999975 5.108781980733247e-2 4.955553284969858e-2 -6.1999999999999975 5.108781980733247e-2 4.930009195872075e-2 -6.499999999999997 4.450177077923493e-2 4.570335098571492e-2 -6.499999999999997 4.450177077923493e-2 4.28552356370433e-2 -6.499999999999997 4.450177077923493e-2 4.20987049975524e-2 -6.799999999999997 3.825331720048197e-2 3.756407313901002e-2 -6.799999999999997 3.825331720048197e-2 3.80613572029684e-2 -6.799999999999997 3.825331720048197e-2 3.699028383444266e-2 -7.099999999999997 3.300976153640641e-2 3.231663035584604e-2 -7.099999999999997 3.300976153640641e-2 3.26467287251601e-2 -7.099999999999997 3.300976153640641e-2 3.32409057899254e-2 -7.399999999999997 2.859146946961458e-2 2.8162973552537527e-2 -7.399999999999997 2.859146946961458e-2 2.7962830593280915e-2 -7.399999999999997 2.859146946961458e-2 2.9049320943531094e-2 -7.699999999999997 2.485365957813612e-2 2.5424992543990457e-2 -7.699999999999997 2.485365957813612e-2 2.3884083904960732e-2 -7.699999999999997 2.485365957813612e-2 2.4381151207873545e-2 -7.9999999999999964 2.157714146321929e-2 2.2073578595317726e-2 -7.9999999999999964 2.157714146321929e-2 2.0606322149099146e-2 -7.9999999999999964 2.157714146321929e-2 2.1814650987161507e-2 -8.299999999999997 1.858180941904642e-2 1.789430652594024e-2 -8.299999999999997 1.858180941904642e-2 1.735543332837818e-2 -8.299999999999997 1.858180941904642e-2 1.9325107774639513e-2 -8.599999999999996 1.6053280201294692e-2 1.595684908416676e-2 -8.599999999999996 1.6053280201294692e-2 1.4929446326232483e-2 -8.599999999999996 1.6053280201294692e-2 1.651870996741207e-2 -8.899999999999995 1.3911659303848264e-2 1.3299574302328816e-2 -8.899999999999995 1.3911659303848264e-2 1.35638963857433e-2 -8.899999999999995 1.3911659303848264e-2 1.402298210956846e-2 -9.199999999999996 1.2091803064275024e-2 1.2152210976892662e-2 -9.199999999999996 1.2091803064275024e-2 1.2007109950302899e-2 -9.199999999999996 1.2091803064275024e-2 1.2599605808877765e-2 -9.499999999999996 1.0472810419700971e-2 1.0923181651568309e-2 -9.499999999999996 1.0472810419700971e-2 1.0776561763627795e-2 -9.499999999999996 1.0472810419700971e-2 1.0713724668796146e-2 -9.799999999999995 9.030714850304539e-3 8.922362800610477e-3 -9.799999999999995 9.030714850304539e-3 8.804963290076129e-3 -9.799999999999995 9.030714850304539e-3 8.714655974280476e-3 -10.099999999999994 7.808242114948139e-3 8.222066057624737e-3 -10.099999999999994 7.808242114948139e-3 7.652065276801749e-3 -10.099999999999994 7.808242114948139e-3 7.816038104161786e-3 -10.399999999999995 6.769015491520877e-3 6.653940920044405e-3 -10.399999999999995 6.769015491520877e-3 6.80962824574229e-3 -10.399999999999995 6.769015491520877e-3 6.477946551864186e-3 -10.699999999999996 5.883120099197549e-3 5.81251691395357e-3 -10.699999999999996 5.883120099197549e-3 5.759568885385168e-3 -10.699999999999996 5.883120099197549e-3 5.724270199672899e-3 -10.999999999999995 5.086410643627119e-3 4.877875097913552e-3 -10.999999999999995 5.086410643627119e-3 5.132196010213528e-3 -10.999999999999995 5.086410643627119e-3 5.132196010213528e-3 -11.299999999999994 4.390250871934632e-3 4.706357533903774e-3 -11.299999999999994 4.390250871934632e-3 4.346356304631284e-3 -11.299999999999994 4.390250871934632e-3 4.188306984462873e-3 -11.599999999999994 3.7982630662866026e-3 3.6463358123352504e-3 -11.599999999999994 3.7982630662866026e-3 3.5589756834980514e-3 -11.599999999999994 3.7982630662866026e-3 3.718502875287719e-3 -11.899999999999995 3.293628806737463e-3 3.2738722596964585e-3 -11.899999999999995 3.293628806737463e-3 3.2837531618887016e-3 -11.899999999999995 3.293628806737463e-3 3.204705944350759e-3 -12.199999999999994 2.862426553117111e-3 2.9397115819483964e-3 -12.199999999999994 2.862426553117111e-3 2.765103591199757e-3 -12.199999999999994 2.862426553117111e-3 2.885325486469312e-3 -12.499999999999993 2.4714022749062954e-3 2.429385931309916e-3 -12.499999999999993 2.4714022749062954e-3 2.5405989190097596e-3 -12.499999999999993 2.4714022749062954e-3 2.3601867389633465e-3 -12.799999999999994 2.1347378701230876e-3 2.047211821314058e-3 -12.799999999999994 2.1347378701230876e-3 2.132601261202027e-3 -12.799999999999994 2.1347378701230876e-3 2.224394909081594e-3 -13.099999999999994 1.8477570412903156e-3 1.8606902846837658e-3 -13.099999999999994 1.8477570412903156e-3 1.9438393043568238e-3 -13.099999999999994 1.8477570412903156e-3 1.8311261888000118e-3 -13.399999999999993 1.602601363396337e-3 1.5240750979590855e-3 -13.399999999999993 1.602601363396337e-3 1.5481141373590711e-3 -13.399999999999993 1.602601363396337e-3 1.5577297531190653e-3 -13.699999999999992 1.392732355820269e-3 1.3955179084501257e-3 -13.699999999999992 1.392732355820269e-3 1.3551286675867985e-3 -13.699999999999992 1.392732355820269e-3 1.2980266374007156e-3 -13.999999999999993 1.2011660314775851e-3 1.2323969038730415e-3 -13.999999999999993 1.2011660314775851e-3 1.1783444080891363e-3 -13.999999999999993 1.2011660314775851e-3 1.2239887378622117e-3 -14.299999999999994 1.0381521121625098e-3 1.0859070853880093e-3 -14.299999999999994 1.0381521121625098e-3 1.0454191539060473e-3 -14.299999999999994 1.0381521121625098e-3 9.841681806384635e-4 -14.599999999999993 8.989266559699968e-4 8.665651479003774e-4 -14.599999999999993 8.989266559699968e-4 9.088146934930307e-4 -14.599999999999993 8.989266559699968e-4 8.189220432958961e-4 -14.899999999999991 7.79789817603333e-4 7.84468460235013e-4 -14.899999999999991 7.79789817603333e-4 8.179994182768675e-4 -14.899999999999991 7.79789817603333e-4 7.875876191226274e-4 -15.199999999999992 6.764405168941774e-4 6.135313770228103e-4 -15.199999999999992 6.764405168941774e-4 6.771167678057697e-4 -15.199999999999992 6.764405168941774e-4 6.9673353730264e-4 -15.499999999999993 5.839219194030841e-4 5.79250454583144e-4 -15.499999999999993 5.839219194030841e-4 5.827539855584453e-4 -15.499999999999993 5.839219194030841e-4 6.020234059226023e-4 -15.799999999999992 5.049190197991896e-4 5.089582714807656e-4 -15.799999999999992 5.049190197991896e-4 4.806828119540564e-4 -15.799999999999992 5.049190197991896e-4 4.8118773087417623e-4 -16.09999999999999 4.3733863083801914e-4 4.369012288339067e-4 -16.09999999999999 4.3733863083801914e-4 4.3602655169910405e-4 -16.09999999999999 4.3733863083801914e-4 4.264051032162753e-4 -16.39999999999999 3.7942896537169595e-4 3.9574447579792977e-4 -16.39999999999999 3.7942896537169595e-4 3.691844438651828e-4 -16.39999999999999 3.7942896537169595e-4 3.8663817913527364e-4 -16.699999999999992 3.287090342802393e-4 3.379129078786301e-4 -16.699999999999992 3.287090342802393e-4 3.3988516220476996e-4 -16.699999999999992 3.287090342802393e-4 3.306813086827839e-4 -16.999999999999993 2.839064245426713e-4 2.770926957400121e-4 -16.999999999999993 2.839064245426713e-4 3.0150865048759513e-4 -16.999999999999993 2.839064245426713e-4 2.9072020536657006e-4 -17.29999999999999 2.455927593376433e-4 2.384705857215798e-4 -17.29999999999999 2.455927593376433e-4 2.5639885838653895e-4 -17.29999999999999 2.455927593376433e-4 2.522237811905896e-4 -17.59999999999999 2.1277570578396985e-4 1.9745584106507e-4 -17.59999999999999 2.1277570578396985e-4 2.140523449476944e-4 -17.59999999999999 2.1277570578396985e-4 2.11286260967257e-4 -17.89999999999999 1.8462225664709928e-4 1.8536074727705247e-4 -17.89999999999999 1.8462225664709928e-4 1.8222216888690316e-4 -17.89999999999999 1.8462225664709928e-4 1.868377253430051e-4 -18.19999999999999 1.597658501117391e-4 1.6455882258961267e-4 -18.19999999999999 1.597658501117391e-4 1.6871273461614657e-4 -18.19999999999999 1.597658501117391e-4 1.5673029607806799e-4 -18.49999999999999 1.3805376639064915e-4 1.3266966289342732e-4 -18.49999999999999 1.3805376639064915e-4 1.421953722999273e-4 -18.49999999999999 1.3805376639064915e-4 1.38467920793036e-4 -18.79999999999999 1.1946336764483079e-4 1.1492375788264087e-4 -18.79999999999999 1.1946336764483079e-4 1.1277341729855819e-4 -18.79999999999999 1.1946336764483079e-4 1.250781439741424e-4 -19.09999999999999 1.0352241360319877e-4 9.907094846405867e-5 -19.09999999999999 1.0352241360319877e-4 1.0538581560753577e-4 -19.09999999999999 1.0352241360319877e-4 1.018660535931387e-4 -19.39999999999999 8.983346052166448e-5 8.749778785110284e-5 -19.39999999999999 8.983346052166448e-5 8.43536168297593e-5 -19.39999999999999 8.983346052166448e-5 9.180979382323111e-5 -19.69999999999999 7.766526701752384e-5 7.525764092226258e-5 -19.69999999999999 7.766526701752384e-5 7.618962409157853e-5 -19.69999999999999 7.766526701752384e-5 7.945156518418433e-5 -19.99999999999999 6.713708129260107e-5 7.04267998344131e-5 -19.99999999999999 6.713708129260107e-5 6.599575237104678e-5 -19.99999999999999 6.713708129260107e-5 6.693567152994266e-5 diff --git a/bloomfilter-blocked/fpr.gnuplot b/bloomfilter-blocked/fpr.gnuplot deleted file mode 100644 index 8449c8530..000000000 --- a/bloomfilter-blocked/fpr.gnuplot +++ /dev/null @@ -1,20 +0,0 @@ -set term png size 1800, 1200 -set output "fpr.png" -set title "Bloom filter false positive rates (FPR) vs bits per entry\nclassic and block-structured implementations" -# set subtitle "blah" - -set xlabel "Bits per entry" -set xrange [1:25] -set grid xtics -set xtics 0,2,24 - -set ylabel "False Positive Rate (FPR), log scale" -set yrange [1e-5:1] -set logscale y -set format y "10^{%L}" -set grid ytics - -plot "fpr.classic.gnuplot.data" using 1 : 3 title "Classic, actual FPR" with points pointtype 1 pointsize 2, \ - "fpr.classic.gnuplot.data" using 1 : 2 title "Classic, calculated FPR" with lines linewidth 2, \ - "fpr.blocked.gnuplot.data" using 1 : 3 title "Blocked, actual FPR" with points pointtype 1 pointsize 2, \ - "fpr.blocked.gnuplot.data" using 1 : 2 title "Blocked, calculated FPR" with lines linewidth 3 diff --git a/bloomfilter-blocked/fpr.png b/bloomfilter-blocked/fpr.png deleted file mode 100644 index 1edc1e9e2..000000000 Binary files a/bloomfilter-blocked/fpr.png and /dev/null differ diff --git a/bloomfilter-blocked/src/Data/BloomFilter.hs b/bloomfilter-blocked/src/Data/BloomFilter.hs deleted file mode 100644 index 4afe1198b..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter.hs +++ /dev/null @@ -1,63 +0,0 @@ --- | By default, this module re-exports the classic bloom filter implementation --- from "Data.BloomFilter.Classic". If you want to use the blocked bloom filter --- implementation, import "Data.BloomFilter.Blocked". -module Data.BloomFilter ( - module Data.BloomFilter.Classic - -- * Example: a spelling checker - -- $example - - -- * Differences with the @bloomfilter@ package - -- $differences - ) where - -import Data.BloomFilter.Classic - --- $example --- --- This example reads a dictionary file containing one word per line, --- constructs a Bloom filter with a 1% false positive rate, and --- spellchecks its standard input. Like the Unix @spell@ command, it --- prints each word that it does not recognize. --- --- >>> import Control.Monad (forM_) --- >>> import System.Environment (getArgs) --- >>> import qualified Data.BloomFilter as B --- --- >>> :{ --- main :: IO () --- main = do --- files <- getArgs --- dictionary <- readFile "/usr/share/dict/words" --- let !bloom = B.fromList (B.policyForFPR 0.01) 4 (words dictionary) --- forM_ files $ \file -> --- putStrLn . unlines . filter (`B.notElem` bloom) . words --- =<< readFile file --- :} - --- $differences --- --- This package is an entirely rewritten fork of the --- [bloomfilter](https://hackage.haskell.org/package/bloomfilter) package. --- --- The main differences are --- --- * Support for both classic and \"blocked\" Bloom filters. Blocked-structured --- Bloom filters arrange all the bits for each insert or lookup into a single --- cache line, which greatly reduces the number of slow uncached memory reads. --- The trade-off for this performance optimisation is a slightly worse --- trade-off between bits per element and the FPR. In practice for typical --- FPRs of @1-e3@ up to @1e-4@, this requires a couple extra bits per element. --- --- * This package support Bloom filters of arbitrary sizes (not limited to powers --- of two). --- --- * Sizes over @2^32@ are supported up to @2^48@ for classic Bloom filters and --- @2^41@ for blocked Bloom filters. --- --- * The 'Bloom' and 'MBloom' types are parametrised over a 'Hashable' type --- class, instead of having a @a -> ['Hash']@ typed field. --- This separation allows clean (de-)serialisation of Bloom filters in this --- package, as the hashing scheme is static. --- --- * [@XXH3@ hash](https://xxhash.com/) is used instead of [Jenkins' --- @lookup3@](https://en.wikipedia.org/wiki/Jenkins_hash_function#lookup3). diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Blocked.hs b/bloomfilter-blocked/src/Data/BloomFilter/Blocked.hs deleted file mode 100644 index 59adac9a7..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Blocked.hs +++ /dev/null @@ -1,329 +0,0 @@ --- | A fast, space efficient Bloom filter implementation. A Bloom filter is a --- set-like data structure that provides a probabilistic membership test. --- --- * Queries do not give false negatives. When an element is added to a filter, --- a subsequent membership test will definitely return 'True'. --- --- * False positives /are/ possible. If an element has not been added to a --- filter, a membership test /may/ nevertheless indicate that the element is --- present. --- -module Data.BloomFilter.Blocked ( - -- * Overview - -- $overview - - -- * Types - Hash, - Salt, - Hashable, - - -- * Immutable Bloom filters - Bloom, - - -- ** Creation - create, - unfold, - fromList, - - -- ** (De)Serialisation - formatVersion, - serialise, - deserialise, - - -- ** Sizes - NumEntries, - BloomSize (..), - FPR, - sizeForFPR, - BitsPerEntry, - sizeForBits, - sizeForPolicy, - BloomPolicy (..), - policyFPR, - policyForFPR, - policyForBits, - - -- ** Accessors - size, - elem, - notElem, - (?), - - -- * Mutable Bloom filters - MBloom, - new, - maxSizeBits, - insert, - insertMany, - read, - - -- ** Conversion - freeze, - thaw, - unsafeFreeze, - - -- * Low level variants - Hashes, - hashesWithSalt, - insertHashes, - elemHashes, - readHashes, - -- ** Prefetching - prefetchInsert, - prefetchElem, -) where - -import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, - stToPrim) -import Control.Monad.ST (ST, runST) -import Data.Bits ((.&.)) -import Data.Primitive.ByteArray (MutableByteArray) -import qualified Data.Primitive.PrimArray as P - -import Data.BloomFilter.Blocked.Calc (BitsPerEntry, BloomPolicy (..), - BloomSize (..), FPR, NumEntries, policyFPR, policyForBits, - policyForFPR, sizeForBits, sizeForFPR, sizeForPolicy) -import Data.BloomFilter.Blocked.Internal hiding (deserialise) -import qualified Data.BloomFilter.Blocked.Internal as Internal -import Data.BloomFilter.Hash - -import Prelude hiding (elem, notElem, read) - --- $setup --- --- >>> import Text.Printf - --- $overview --- --- Each of the functions for creating Bloom filters accepts a 'BloomSize'. The --- size determines the number of bits that should be used for the filter. Note --- that a filter is fixed in size; it cannot be resized after creation. --- --- The size can be specified by asking for a target false positive rate (FPR) --- or a number of bits per element, and the number of elements in the filter. --- For example: --- --- * @'sizeForFPR' 1e-3 10_000@ for a Bloom filter sized for 10,000 elements --- with a false positive rate of 1 in 1000 --- --- * @'sizeForBits' 10 10_000@ for a Bloom filter sized for 10,000 elements --- with 10 bits per element --- --- Depending on the application it may be more important to target a fixed --- amount of memory to use, or target a specific FPR. --- --- As a very rough guide for filter sizes, here are a range of FPRs and bits --- per element: --- --- * FPR of 1e-1 requires approximately 4.8 bits per element --- * FPR of 1e-2 requires approximately 9.8 bits per element --- * FPR of 1e-3 requires approximately 15.8 bits per element --- * FPR of 1e-4 requires approximately 22.6 bits per element --- * FPR of 1e-5 requires approximately 30.2 bits per element --- --- >>> fmap (printf "%0.1f" . policyBits . policyForFPR) [1e-1, 1e-2, 1e-3, 1e-4, 1e-5] :: [String] --- ["4.8","9.8","15.8","22.6","30.2"] - --- | Create an immutable Bloom filter, using the given setup function --- which executes in the 'ST' monad. --- --- Example: --- --- >>> :{ --- filter = create (sizeForBits 16 2) 4 $ \mf -> do --- insert mf "foo" --- insert mf "bar" --- :} --- --- Note that the result of the setup function is not used. -create :: BloomSize - -> Salt - -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function - -> Bloom a -{-# INLINE create #-} -create bloomsize bloomsalt body = - runST $ do - mb <- new bloomsize bloomsalt - body mb - unsafeFreeze mb - -{-# INLINEABLE insert #-} --- | Insert a value into a mutable Bloom filter. Afterwards, a --- membership query for the same value is guaranteed to return @True@. -insert :: Hashable a => MBloom s a -> a -> ST s () -insert = \ !mb !x -> insertHashes mb (hashesWithSalt (mbHashSalt mb) x) - -{-# INLINE elem #-} --- | Query an immutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -elem :: Hashable a => a -> Bloom a -> Bool -elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x) - --- | Same as 'elem' but with the opposite argument order: --- --- > x `elem` bfilter --- --- versus --- --- > bfilter ? x --- -(?) :: Hashable a => Bloom a -> a -> Bool -(?) = flip elem - -{-# INLINE notElem #-} --- | Query an immutable Bloom filter for non-membership. If the value --- /is/ present, return @False@. If the value is not present, there --- is /still/ some possibility that @False@ will be returned. -notElem :: Hashable a => a -> Bloom a -> Bool -notElem = \x b -> not (x `elem` b) - --- | Query a mutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -read :: Hashable a => MBloom s a -> a -> ST s Bool -read !mb !x = readHashes mb (hashesWithSalt (mbHashSalt mb) x) - --- | Build an immutable Bloom filter from a seed value. The seeding --- function populates the filter as follows. --- --- * If it returns 'Nothing', it is finished producing values to --- insert into the filter. --- --- * If it returns @'Just' (a,b)@, @a@ is added to the filter and --- @b@ is used as a new seed. -unfold :: forall a b. - Hashable a - => BloomSize - -> Salt - -> (b -> Maybe (a, b)) -- ^ seeding function - -> b -- ^ initial seed - -> Bloom a -{-# INLINE unfold #-} -unfold bloomsize bloomsalt f k = - create bloomsize bloomsalt body - where - body :: forall s. MBloom s a -> ST s () - body mb = loop k - where - loop :: b -> ST s () - loop !j = case f j of - Nothing -> pure () - Just (a, j') -> insert mb a >> loop j' - -{-# INLINEABLE fromList #-} --- | Create a Bloom filter, populating it from a sequence of values. --- --- For example --- --- @ --- filter = fromList (policyForBits 10) 4 [\"foo\", \"bar\", \"quux\"] --- @ -fromList :: (Foldable t, Hashable a) - => BloomPolicy - -> Salt - -> t a -- ^ values to populate with - -> Bloom a -fromList policy bloomsalt xs = - create bsize bloomsalt (\b -> mapM_ (insert b) xs) - where - bsize = sizeForPolicy policy (length xs) - -{-# SPECIALISE deserialise :: - BloomSize - -> Salt - -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) - -> IO (Bloom a) #-} -deserialise :: PrimMonad m - => BloomSize - -> Salt - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m (Bloom a) -deserialise bloomsize bloomsalt fill = do - mbloom <- stToPrim $ new bloomsize bloomsalt - Internal.deserialise mbloom fill - stToPrim $ unsafeFreeze mbloom - - ------------------------------------------------------------ --- Bulk insert --- - -{-# INLINABLE insertMany #-} --- | A bulk insert of many elements. --- --- This is somewhat faster than repeated insertion using 'insert'. It uses --- memory prefetching to improve the utilisation of memory bandwidth. This has --- greatest benefit for large filters (that do not fit in L3 cache) and for --- inserting many elements, e.g. > 10. --- --- To get best performance, you probably want to specialise this function to --- the 'Hashable' instance and to the lookup action. It is marked @INLINABLE@ --- to help with this. --- -insertMany :: - forall a s. - Hashable a - => MBloom s a - -> (Int -> ST s a) -- ^ Action to lookup elements, indexed @0..n-1@ - -> Int -- ^ @n@, number of elements to insert - -> ST s () -insertMany bloom key n = - P.newPrimArray 0x10 >>= body - where - -- The general strategy is to use a rolling buffer @buf@ (of size 16). At - -- the write end of the buffer, we prepare the probe locations and prefetch - -- the corresponding cache line. At the read end, we do the hash insert. - -- By having a prefetch distance of 15 between the write and read ends, we - -- can have up to 15 memory reads in flight at once, thus improving - -- utilisation of the memory bandwidth. - body :: P.MutablePrimArray s (Hashes a) -> ST s () - body !buf = prepareProbes 0 0 - where - -- Start by filling the buffer as far as we can, either to the end of - -- the buffer or until we run out of elements. - prepareProbes :: Int -> Int -> ST s () - prepareProbes !i !i_w - | i_w < 0x0f && i < n = do - k <- key i - let !kh = hashesWithSalt (mbHashSalt bloom) k - prefetchInsert bloom kh - P.writePrimArray buf i_w kh - prepareProbes (i+1) (i_w+1) - - | n > 0 = insertProbe 0 0 i_w - | otherwise = pure () - - -- Read from the read end of the buffer and do the inserts. - insertProbe :: Int -> Int -> Int -> ST s () - insertProbe !i !i_r !i_w = do - kh <- P.readPrimArray buf i_r - insertHashes bloom kh - nextProbe i i_r i_w - - -- Move on to the next entry. - nextProbe :: Int -> Int -> Int -> ST s () - nextProbe !i !i_r !i_w - -- If there are elements left, we prepare them and add them at the - -- write end of the buffer, before inserting the next element - -- (from the read end of the buffer). - | i < n = do - k <- key i - let !kh = hashesWithSalt (mbHashSalt bloom) k - prefetchInsert bloom kh - P.writePrimArray buf i_w kh - insertProbe - (i+1) - ((i_r + 1) .&. 0x0f) - ((i_w + 1) .&. 0x0f) - - -- Or if there's no more elements to add to the buffer, but the - -- buffer is still non-empty, we just loop draining the buffer. - | ((i_r + 1) .&. 0x0f) /= i_w = - insertProbe - i - ((i_r + 1) .&. 0x0f) - i_w - - -- When the buffer is empty, we're done. - | otherwise = pure () diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Blocked/BitArray.hs b/bloomfilter-blocked/src/Data/BloomFilter/Blocked/BitArray.hs deleted file mode 100644 index 4743d84b1..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Blocked/BitArray.hs +++ /dev/null @@ -1,211 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} --- | Blocked bit array implementation. This uses blocks of 64 bytes, aligned --- to 64byte boundaries to match typical cache line sizes. This means that --- multiple accesses to the same block only require a single cache line load --- or store. -module Data.BloomFilter.Blocked.BitArray ( - NumBlocks (..), - bitsToBlocks, - blocksToBits, - BlockIx (..), - BitIx (..), - BitArray (..), - unsafeIndex, - prefetchIndex, - MBitArray (..), - new, - unsafeSet, - prefetchSet, - unsafeRead, - freeze, - unsafeFreeze, - thaw, - serialise, - deserialise, -) where - -import Control.Exception (assert) -import Control.Monad.Primitive (PrimMonad, PrimState) -import Control.Monad.ST (ST) -import Data.Bits -import Data.Primitive.ByteArray -import Data.Primitive.PrimArray -import Data.Word (Word64, Word8) - -import GHC.Exts (Int (I#), prefetchByteArray0#, - prefetchMutableByteArray0#) -import GHC.ST (ST (ST)) - --- | An array of blocks of bits. --- --- Each block is 512 bits (64 bytes large), corresponding to a cache line on --- most current architectures. --- --- It is represented by an array of 'Word64'. This array is aligned to 64 bytes --- so that multiple accesses within a single block will use only one cache line. --- -newtype BitArray = BitArray (PrimArray Word64) - deriving stock (Eq, Show) - --- | Blocks are 512 bits, 64 bytes. -newtype NumBlocks = NumBlocks Int - deriving stock Eq - --- | The number of 512-bit blocks for the given number of bits. This rounds --- up to the nearest multiple of 512. -bitsToBlocks :: Int -> NumBlocks -bitsToBlocks n = NumBlocks ((n+511) `div` 512) -- rounded up - -blocksToBits :: NumBlocks -> Int -blocksToBits (NumBlocks n) = n * 512 - -newtype BlockIx = BlockIx Word -newtype BitIx = BitIx Int - -{-# INLINE unsafeIndex #-} -unsafeIndex :: BitArray -> BlockIx -> BitIx -> Bool -unsafeIndex (BitArray arr) blockIx blockBitIx = - assert (wordIx >= 0 && wordIx < sizeofPrimArray arr) $ - indexPrimArray arr wordIx `unsafeTestBit` wordBitIx - where - (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx - -{-# INLINE prefetchIndex #-} -prefetchIndex :: BitArray -> BlockIx -> ST s () -prefetchIndex (BitArray (PrimArray ba#)) (BlockIx blockIx) = - -- For reading, we want to prefetch such that we do least disturbance of - -- the caches. We will typically not keep this cache line longer than one - -- use of elemHashes which does several memory reads of the same cache line. - let !i@(I# i#) = fromIntegral blockIx `shiftL` 6 in - -- blockIx * 64 to go from block index to the byte offset of the beginning - -- of the block. This offset is in bytes, not words. - - assert (i >= 0 && i < sizeofByteArray (ByteArray ba#) - 63) $ - - -- In prefetchByteArray0, the 0 refers to a "non temporal" load, which is - -- a hint that the value will be used soon, and then not used again (soon). - -- So the caches can evict the value as soon as they like. - ST (\s -> case prefetchByteArray0# ba# i# s of - s' -> (# s', () #)) - -newtype MBitArray s = MBitArray (MutablePrimArray s Word64) - --- | We create an explicitly pinned byte array, aligned to 64 bytes. --- -new :: NumBlocks -> ST s (MBitArray s) -new (NumBlocks numBlocks) = do - mba@(MutableByteArray mba#) <- newAlignedPinnedByteArray numBytes 64 - setByteArray mba 0 numBytes (0 :: Word8) - pure (MBitArray (MutablePrimArray mba#)) - where - !numBytes = numBlocks * 64 - -serialise :: BitArray -> (ByteArray, Int, Int) -serialise bitArray = - let ba = asByteArray bitArray - in (ba, 0, sizeofByteArray ba) - where - asByteArray (BitArray (PrimArray ba#)) = ByteArray ba# - -{-# INLINE deserialise #-} --- | Do an inplace overwrite of the byte array representing the bit block. -deserialise :: PrimMonad m - => MBitArray (PrimState m) - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m () -deserialise bitArray fill = do - let mba = asMutableByteArray bitArray - len <- getSizeofMutableByteArray mba - fill mba 0 len - where - asMutableByteArray (MBitArray (MutablePrimArray mba#)) = - MutableByteArray mba# - -unsafeSet :: MBitArray s -> BlockIx -> BitIx -> ST s () -unsafeSet (MBitArray arr) blockIx blockBitIx = do -#ifdef NO_IGNORE_ASSERTS - sz <- getSizeofMutablePrimArray arr - assert (wordIx >= 0 && wordIx < sz) $ pure () -#endif - w <- readPrimArray arr wordIx - writePrimArray arr wordIx (unsafeSetBit w wordBitIx) - where - (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx - -{-# INLINE prefetchSet #-} -prefetchSet :: MBitArray s -> BlockIx -> ST s () -prefetchSet (MBitArray (MutablePrimArray mba#)) (BlockIx blockIx) = do - -- For setting, we will do several writes to the same cache line, but all - -- immediately after each other, after which we will not need the value in - -- the cache again (for a long time). So as with prefetchIndex we want to - -- disturbe the caches the least, and so we use prefetchMutableByteArray0. - let !(I# i#) = fromIntegral blockIx `shiftL` 6 - -- blockIx * 64 to go from block index to the byte offset of the beginning - -- of the block. This offset is in bytes, not words. - -#ifdef NO_IGNORE_ASSERTS - sz <- getSizeofMutableByteArray (MutableByteArray mba#) - assert (let i = I# i# in i >= 0 && i < sz-63) $ pure () -#endif - - -- In prefetchMutableByteArray0, the 0 refers to a "non temporal" load, - -- which is a hint that the value will be used soon, and then not used - -- again (soon). So the caches can evict the value as soon as they like. - ST (\s -> case prefetchMutableByteArray0# mba# i# s of - s' -> (# s', () #)) - -unsafeRead :: MBitArray s -> BlockIx -> BitIx -> ST s Bool -unsafeRead (MBitArray arr) blockIx blockBitIx = do -#ifdef NO_IGNORE_ASSERTS - sz <- getSizeofMutablePrimArray arr - assert (wordIx >= 0 && wordIx < sz) $ pure () -#endif - w <- readPrimArray arr wordIx - pure $ unsafeTestBit w wordBitIx - where - (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx - -freeze :: MBitArray s -> ST s BitArray -freeze (MBitArray arr) = do - len <- getSizeofMutablePrimArray arr - BitArray <$> freezePrimArray arr 0 len - -unsafeFreeze :: MBitArray s -> ST s BitArray -unsafeFreeze (MBitArray arr) = - BitArray <$> unsafeFreezePrimArray arr - -thaw :: BitArray -> ST s (MBitArray s) -thaw (BitArray arr) = - MBitArray <$> thawPrimArray arr 0 (sizeofPrimArray arr) - -{-# INLINE wordAndBitIndex #-} --- | Given the index of the 512 bit block, and the index of the bit within the --- block, compute the index of the word in the array, and index of the bit --- within the word. --- -wordAndBitIndex :: BlockIx -> BitIx -> (Int, Int) -wordAndBitIndex (BlockIx blockIx) (BitIx blockBitIx) = - assert (blockBitIx < 512) $ - (wordIx, wordBitIx) - where - -- Select the Word64 in the underlying array based on the block index - -- and the bit index. - -- * There are 8 Word64s in each 64byte block. - -- * Use 3 bits (bits 6..8) to select the Word64 within the block - wordIx = fromIntegral blockIx `shiftL` 3 -- * 8 - + (blockBitIx `shiftR` 6) .&. 7 -- `div` 64, `mod` 8 - - -- Bits 0..5 of blockBitIx select the bit within Word64 - wordBitIx = blockBitIx .&. 63 -- `mod` 64 - -{-# INLINE unsafeTestBit #-} --- like testBit but using unsafeShiftL instead of shiftL -unsafeTestBit :: Word64 -> Int -> Bool -unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 - -{-# INLINE unsafeSetBit #-} --- like setBit but using unsafeShiftL instead of shiftL -unsafeSetBit :: Word64 -> Int -> Word64 -unsafeSetBit w k = w .|. (1 `unsafeShiftL` k) diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Blocked/Calc.hs b/bloomfilter-blocked/src/Data/BloomFilter/Blocked/Calc.hs deleted file mode 100644 index 2cf53701a..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Blocked/Calc.hs +++ /dev/null @@ -1,162 +0,0 @@ --- | Various formulas for working with bloomfilters. -module Data.BloomFilter.Blocked.Calc ( - NumEntries, - BloomSize (..), - FPR, - sizeForFPR, - BitsPerEntry, - sizeForBits, - sizeForPolicy, - BloomPolicy (..), - policyFPR, - policyForFPR, - policyForBits, -) where - -import Data.BloomFilter.Classic.Calc (BitsPerEntry, FPR, NumEntries) - -{- -Calculating the relationship between bits and FPR for the blocked -implementation: - -While in principle there's a principled approach to this, it's complex to -calculate numerically. So instead we compute a regression from samples of bits -& FPR. The fpr-calc.hs program in this package does this for a range of bits, -and outputs out both graph data (to feed into gnuplot) and it also a regression -fit. The exact fit one gets depends on the PRNG seed used. - -We calculate the regression two ways, one for FPR -> bits, and bits -> FPR. -We use a quadratic regression, with the FPR in log space. - -The following is the sample of the regression fit output that we end up using -in the functions 'policyForFPR' and 'policyForBits'. - -Blocked bloom filter quadratic regressions: -bits independent, FPR dependent: -Fit { - fitParams = V3 (-5.03623760876204e-3) 0.5251544487138062 (-0.10110451821280719), - fitErrors = V3 3.344945010267228e-5 8.905631581753235e-4 5.102181306816477e-3, - fitNDF = 996, fitWSSR = 1.5016403117905384 -} - -FPR independent, bits dependent: -Fit { - fitParams = V3 8.079418894776325e-2 1.6462569292513933 0.5550062950289885, - fitErrors = V3 7.713375250014809e-4 8.542261871094414e-3 2.0678969159415226e-2, - fitNDF = 996, fitWSSR = 19.00125036371992 -} - --} - --- | A policy on intended bloom filter size -- independent of the number of --- elements. --- --- We can decide a policy based on: --- --- 1. a target false positive rate (FPR) using 'policyForFPR' --- 2. a number of bits per entry using 'policyForBits' --- --- A policy can be turned into a 'BloomSize' given a target 'NumEntries' using --- 'sizeForPolicy'. --- --- Either way we define the policy, we can inspect the result to see: --- --- 1. The bits per entry 'policyBits'. This will determine the --- size of the bloom filter in bits. In general the bits per entry can be --- fractional. The final bloom filter size in will be rounded to a whole --- number of bits. --- 2. The number of hashes 'policyHashes'. --- 3. The expected FPR for the policy using 'policyFPR'. --- -data BloomPolicy = BloomPolicy { - policyBits :: !Double, - policyHashes :: !Int - } - deriving stock Show - -policyForFPR :: FPR -> BloomPolicy -policyForFPR fpr | fpr <= 0 || fpr >= 1 = - error "bloomPolicyForFPR: fpr out of range (0,1)" - -policyForFPR fpr = - BloomPolicy { - policyBits = c, - policyHashes = k - } - where - k :: Int - k = max 1 (round (recip_log2 * log_fpr)) - c = log_fpr * log_fpr * f2 - + log_fpr * f1 - + f0 - log_fpr = negate (log fpr) - - -- These parameters are from a (quadratic) linear regression in log space - -- of samples of the actual FPR between 1 and 20 bits. This is with log FPR - -- as the independent variable and bits as the dependent variable. - f2,f1,f0 :: Double - f2 = 8.079418894776325e-2 - f1 = 1.6462569292513933 - f0 = 0.5550062950289885 - -policyForBits :: BitsPerEntry -> BloomPolicy -policyForBits c | c < 0 = - error "policyForBits: bits per entry must be > 0" - -policyForBits c = - BloomPolicy { - policyBits = c, - policyHashes = k - } - where - k = max 1 (round (c * log2)) - -policyFPR :: BloomPolicy -> FPR -policyFPR BloomPolicy { - policyBits = c - } = - exp (0 `min` negate (c*c*f2 + c*f1 + f0)) - where - -- These parameters are from a (quadratic) linear regression in log space - -- of samples of the actual FPR between 2 and 24 bits. This is with bits as - -- the independent variable and log FPR as the dependent variable. We have to - -- clamp the result to keep the FPR within sanity bounds, otherwise extreme - -- bits per element (<0.1 or >104) give FPRs > 1. This is because it's - -- just a regression, not a principled approach. - f2,f1,f0 :: Double - f2 = -5.03623760876204e-3 - f1 = 0.5251544487138062 - f0 = -0.10110451821280719 - --- | Parameters for constructing a Bloom filter. --- -data BloomSize = BloomSize { - -- | The requested number of bits in the filter. - -- - -- The actual size will be rounded up to the nearest 512. - sizeBits :: !Int, - - -- | The number of hash functions to use. - sizeHashes :: !Int - } - deriving stock Show - -sizeForFPR :: FPR -> NumEntries -> BloomSize -sizeForFPR = sizeForPolicy . policyForFPR - -sizeForBits :: BitsPerEntry -> NumEntries -> BloomSize -sizeForBits = sizeForPolicy . policyForBits - -sizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize -sizeForPolicy BloomPolicy { - policyBits = c, - policyHashes = k - } n = - BloomSize { - sizeBits = max 1 (ceiling (fromIntegral n * c)), - sizeHashes = max 1 k - } - -log2, recip_log2 :: Double -log2 = log 2 -recip_log2 = recip log2 diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Blocked/Internal.hs b/bloomfilter-blocked/src/Data/BloomFilter/Blocked/Internal.hs deleted file mode 100644 index 24e2420da..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Blocked/Internal.hs +++ /dev/null @@ -1,387 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} -{-# OPTIONS_HADDOCK not-home #-} - --- | This module defines the 'Bloom' and 'MBloom' types and all the functions --- that need direct knowledge of and access to the representation. This forms --- the trusted base. -module Data.BloomFilter.Blocked.Internal ( - -- * Mutable Bloom filters - MBloom (mbHashSalt), - new, - maxSizeBits, - - -- * Immutable Bloom filters - Bloom (hashSalt), - bloomInvariant, - size, - - -- * Hash-based operations - Hashes, - Salt, - hashesWithSalt, - insertHashes, - prefetchInsert, - elemHashes, - prefetchElem, - readHashes, - - -- * Conversion - freeze, - unsafeFreeze, - thaw, - - -- * (De)Serialisation - formatVersion, - serialise, - deserialise, - ) where - -import Control.DeepSeq (NFData (..)) -import Control.Exception (assert) -import Control.Monad.Primitive (PrimMonad, PrimState) -import Control.Monad.ST (ST) -import Data.Bits -import Data.Kind (Type) -import Data.Primitive.ByteArray -import Data.Primitive.PrimArray -import Data.Primitive.Types (Prim (..)) - -import Data.BloomFilter.Blocked.BitArray (BitArray, BitIx (..), - BlockIx (..), MBitArray, NumBlocks (..), bitsToBlocks, - blocksToBits) -import qualified Data.BloomFilter.Blocked.BitArray as BitArray -import Data.BloomFilter.Blocked.Calc -import Data.BloomFilter.Hash - --- | The version of the format used by 'serialise' and 'deserialise'. The --- format number will change when there is an incompatible change in the --- library, such that deserialising and using the filter will not work. --- This can include more than just changes to the serialised format, for --- example changes to hash functions or how the hash is mapped to bits. --- --- Note that the format produced does not include this version. Version --- checking is the responsibility of the user of the library. --- --- The library guarantes that the format version value for the classic --- ("Data.BloomFilter.Classic") and blocked ("Data.BloomFilter.Blocked") --- implementation will not overlap with each other or any previous value used --- by either implementation. So switching between the two implementations will --- always be detectable and unambigious. --- --- History: --- --- * Version 1000: original blocked implementation --- -formatVersion :: Int -formatVersion = 1000 - -------------------------------------------------------------------------------- --- Mutable Bloom filters --- - -type MBloom :: Type -> Type -> Type --- | A mutable Bloom filter, for use within the 'ST' monad. -data MBloom s a = MBloom { - mbNumBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero - , mbNumHashes :: {-# UNPACK #-} !Int - , mbHashSalt :: {-# UNPACK #-} !Salt - , mbBitArray :: {-# UNPACK #-} !(MBitArray s) - } -type role MBloom nominal nominal - -instance Show (MBloom s a) where - show mb = "MBloom { " ++ show numBits ++ " bits } " - where - numBits = blocksToBits (mbNumBlocks mb) - -instance NFData (MBloom s a) where - rnf !_ = () - --- | Create a new mutable Bloom filter. --- --- The filter size is capped at 'maxSizeBits'. --- -new :: BloomSize -> Salt -> ST s (MBloom s a) -new BloomSize { sizeBits, sizeHashes } mbHashSalt = do - let numBlocks = bitsToBlocks (max 1 (min maxSizeBits sizeBits)) - mbBitArray <- BitArray.new numBlocks - pure MBloom { - mbNumBlocks = numBlocks, - mbNumHashes = max 1 sizeHashes, - mbHashSalt, - mbBitArray - } - --- | The maximum size is @2^41@ bits (256 gigabytes). Tell us if you need bigger --- bloom filters. --- --- The reason for the current limit of @2^41@ bits is that this corresponds to --- @2^32@ blocks, each of size 64 bytes (512 bits). The reason for the current --- limit of @2^32@ blocks is that for efficiency we use a single 64bit hash per --- element, and split that into a pair of 32bit hashes which are used for --- probing the filter. To go bigger would need a pair of hashes. --- -maxSizeBits :: Int -maxSizeBits = 0x200_0000_0000 - -{-# NOINLINE insertHashes #-} -insertHashes :: forall s a. MBloom s a -> Hashes a -> ST s () -insertHashes MBloom { mbNumBlocks, mbNumHashes, mbBitArray } !h = - go g0 mbNumHashes - where - blockIx :: BlockIx - (!blockIx, !g0) = blockIxAndBitGen h mbNumBlocks - - go :: BitIxGen -> Int -> ST s () - go !_ 0 = pure () - go !g !i = do - let blockBitIx :: BitIx - (!blockBitIx, !g') = genBitIndex g - assert (let BlockIx b = blockIx - NumBlocks nb = mbNumBlocks - in b >= 0 && b < fromIntegral nb) $ - BitArray.unsafeSet mbBitArray blockIx blockBitIx - go g' (i-1) - -prefetchInsert :: MBloom s a -> Hashes a -> ST s () -prefetchInsert MBloom { mbNumBlocks, mbBitArray } !h = - BitArray.prefetchSet mbBitArray blockIx - where - blockIx :: BlockIx - (!blockIx, _) = blockIxAndBitGen h mbNumBlocks - -readHashes :: forall s a. MBloom s a -> Hashes a -> ST s Bool -readHashes MBloom { mbNumBlocks, mbNumHashes, mbBitArray } !h = - go g0 mbNumHashes - where - blockIx :: BlockIx - (!blockIx, !g0) = blockIxAndBitGen h mbNumBlocks - - go :: BitIxGen -> Int -> ST s Bool - go !_ 0 = pure True - go !g !i - | let blockBitIx :: BitIx - (!blockBitIx, !g') = genBitIndex g - = do - assert (let BlockIx b = blockIx - NumBlocks nb = mbNumBlocks - in b >= 0 && b < fromIntegral nb) $ pure () - b <- BitArray.unsafeRead mbBitArray blockIx blockBitIx - if b then go g' (i + 1) - else pure False - -{-# INLINE deserialise #-} --- | Overwrite the filter's bit array. Use 'new' to create a filter of the --- expected size and then use this function to fill in the bit data. --- --- The callback is expected to write (exactly) the given number of bytes into --- the given byte array buffer. --- --- See also 'formatVersion' for compatibility advice. --- -deserialise :: PrimMonad m - => MBloom (PrimState m) a - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m () -deserialise MBloom {mbBitArray} fill = - BitArray.deserialise mbBitArray fill - - -------------------------------------------------------------------------------- --- Immutable Bloom filters --- - -type Bloom :: Type -> Type --- | An immutable Bloom filter. -data Bloom a = Bloom { - numBlocks :: {-# UNPACK #-} !NumBlocks -- ^ non-zero - , numHashes :: {-# UNPACK #-} !Int - , hashSalt :: {-# UNPACK #-} !Salt - , bitArray :: {-# UNPACK #-} !BitArray - } - deriving stock Eq -type role Bloom nominal - -bloomInvariant :: Bloom a -> Bool -bloomInvariant Bloom { - numBlocks = NumBlocks nb, - numHashes, - bitArray = BitArray.BitArray pa - } = - nb * 8 == sizeofPrimArray pa - && numHashes > 0 - -instance Show (Bloom a) where - show mb = "Bloom { " ++ show numBits ++ " bits } " - where - numBits = blocksToBits (numBlocks mb) - -instance NFData (Bloom a) where - rnf !_ = () - --- | Return the size of the Bloom filter. -size :: Bloom a -> BloomSize -size Bloom { numBlocks, numHashes } = - BloomSize { - sizeBits = blocksToBits numBlocks, - sizeHashes = numHashes - } - --- | Query an immutable Bloom filter for membership using already constructed --- 'Hash' value. -elemHashes :: Bloom a -> Hashes a -> Bool -elemHashes Bloom { numBlocks, numHashes, bitArray } !h = - go g0 numHashes - where - blockIx :: BlockIx - (!blockIx, !g0) = blockIxAndBitGen h numBlocks - - go :: BitIxGen -> Int -> Bool - go !_ 0 = True - go !g !i - | let blockBitIx :: BitIx - (!blockBitIx, !g') = genBitIndex g - , assert (let BlockIx b = blockIx - NumBlocks nb = numBlocks - in b >= 0 && b < fromIntegral nb) $ - BitArray.unsafeIndex bitArray blockIx blockBitIx - = go g' (i-1) - - | otherwise = False - -prefetchElem :: Bloom a -> Hashes a -> ST s () -prefetchElem Bloom { numBlocks, bitArray } !h = - BitArray.prefetchIndex bitArray blockIx - where - blockIx :: BlockIx - (!blockIx, _) = blockIxAndBitGen h numBlocks - --- | Serialise the bloom filter to a 'BloomSize' (which is needed to --- deserialise) and a 'ByteArray' along with the offset and length containing --- the filter's bit data. --- --- See also 'formatVersion' for compatibility advice. --- -serialise :: Bloom a -> (BloomSize, Salt, ByteArray, Int, Int) -serialise b@Bloom{bitArray} = - (size b, hashSalt b, ba, off, len) - where - (ba, off, len) = BitArray.serialise bitArray - - -------------------------------------------------------------------------------- --- Conversions between mutable and immutable Bloom filters --- - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter may be modified afterwards. -freeze :: MBloom s a -> ST s (Bloom a) -freeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do - bitArray <- BitArray.freeze mbBitArray - let !bf = Bloom { - numBlocks = mbNumBlocks, - numHashes = mbNumHashes, - hashSalt = mbHashSalt, - bitArray - } - assert (bloomInvariant bf) $ pure bf - --- | Create an immutable Bloom filter from a mutable one without copying. The --- mutable filter /must not/ be modified afterwards. For a safer creation --- interface, use 'freeze' or 'create'. -unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbHashSalt, mbBitArray } = do - bitArray <- BitArray.unsafeFreeze mbBitArray - let !bf = Bloom { - numBlocks = mbNumBlocks, - numHashes = mbNumHashes, - hashSalt = mbHashSalt, - bitArray - } - assert (bloomInvariant bf) $ pure bf - --- | Copy an immutable Bloom filter to create a mutable one. There is --- no non-copying equivalent. -thaw :: Bloom a -> ST s (MBloom s a) -thaw Bloom { numBlocks, numHashes, hashSalt, bitArray } = do - mbBitArray <- BitArray.thaw bitArray - pure MBloom { - mbNumBlocks = numBlocks, - mbNumHashes = numHashes, - mbHashSalt = hashSalt, - mbBitArray - } - - -------------------------------------------------------------------------------- --- Low level utils --- - -{-# INLINE reduceRange32 #-} --- | Given a word sampled uniformly from the full 'Word32' range, such as a --- hash, reduce it fairly to a value in the range @[0,n)@. --- --- See --- -reduceRange32 :: Word -- ^ Sample from 0..2^32-1 - -> Word -- ^ upper bound of range [0,n) - -> Word -- ^ result within range -reduceRange32 x n = - assert (n > 0) $ - let w :: Word - w = x * n - in w `shiftR` 32 - -------------------------------------------------------------------------------- --- Hashes --- - --- | A small family of hashes, for probing bits in a blocked bloom filter. --- -newtype Hashes a = Hashes Hash - deriving newtype Prim -type role Hashes nominal - -{-# INLINE hashesWithSalt #-} --- | Create a 'Hashes' structure. -hashesWithSalt :: Hashable a => Salt -> a -> Hashes a -hashesWithSalt = \ !salt !x -> Hashes (hashSalt64 salt x) - -{-# INLINE blockIxAndBitGen #-} --- | The scheme for turning 'Hashes' into block and bit indexes is as follows: --- the high 32bits of the 64bit hash select the block of bits, while the low --- 32bits are used with a simpler PRNG to produce a sequence of probe points --- within the selected 512bit block. --- -blockIxAndBitGen :: Hashes a -> NumBlocks -> (BlockIx, BitIxGen) -blockIxAndBitGen (Hashes w64) (NumBlocks numBlocks) = - assert (numBlocks > 0) $ - (blockIx, bitGen) - where - blockIx = BlockIx (high32 `reduceRange32` fromIntegral numBlocks) - bitGen = BitIxGen low32 - - high32, low32 :: Word - high32 = fromIntegral (w64 `shiftR` 32) - low32 = fromIntegral w64 .&. 0xffff_ffff - -newtype BitIxGen = BitIxGen Word - -{-# INLINE genBitIndex #-} --- | Generate the next in a short sequence of pseudo-random 9-bit values. This --- is used for selecting the probe bit within the 512 bit block. --- --- This simple generator works by multiplying a 32bit value by the golden ratio --- (as a fraction of a 32bit value). This is only suitable for short sequences --- using the top few bits each time. -genBitIndex :: BitIxGen -> (BitIx, BitIxGen) -genBitIndex (BitIxGen h) = - (BitIx i, BitIxGen h') - where - i :: Int - i = fromIntegral (h `shiftR` (32-9)) -- top 9 bits - - h' :: Word - h' = (h * 0x9e37_79b9) .&. 0xffff_ffff -- keep least significant 32 bits diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Classic.hs b/bloomfilter-blocked/src/Data/BloomFilter/Classic.hs deleted file mode 100644 index 3f4c57321..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Classic.hs +++ /dev/null @@ -1,234 +0,0 @@ --- | A fast, space efficient Bloom filter implementation. A Bloom filter is a --- set-like data structure that provides a probabilistic membership test. --- --- * Queries do not give false negatives. When an element is added to a filter, --- a subsequent membership test will definitely return 'True'. --- --- * False positives /are/ possible. If an element has not been added to a --- filter, a membership test /may/ nevertheless indicate that the element is --- present. --- -module Data.BloomFilter.Classic ( - -- * Overview - -- $overview - - -- * Types - Hash, - Salt, - Hashable, - - -- * Immutable Bloom filters - Bloom, - - -- ** Creation - create, - unfold, - fromList, - - -- ** (De)Serialisation - formatVersion, - serialise, - deserialise, - - -- ** Sizes - NumEntries, - BloomSize (..), - FPR, - sizeForFPR, - BitsPerEntry, - sizeForBits, - sizeForPolicy, - BloomPolicy (..), - policyFPR, - policyForFPR, - policyForBits, - - -- ** Accessors - size, - elem, - notElem, - (?), - - -- * Mutable Bloom filters - MBloom, - new, - maxSizeBits, - insert, - read, - - -- ** Conversion - freeze, - thaw, - unsafeFreeze, - - -- * Low level variants - Hashes, - hashesWithSalt, - insertHashes, - elemHashes, - readHashes, -) where - -import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, - stToPrim) -import Control.Monad.ST (ST, runST) -import Data.Primitive.ByteArray (MutableByteArray) - -import Data.BloomFilter.Classic.Calc -import Data.BloomFilter.Classic.Internal hiding (deserialise) -import qualified Data.BloomFilter.Classic.Internal as Internal -import Data.BloomFilter.Hash - -import Prelude hiding (elem, notElem, read) - --- $setup --- --- >>> import Text.Printf - --- $overview --- --- Each of the functions for creating Bloom filters accepts a 'BloomSize'. The --- size determines the number of bits that should be used for the filter. Note --- that a filter is fixed in size; it cannot be resized after creation. --- --- The size can be specified by asking for a target false positive rate (FPR) --- or a number of bits per element, and the number of elements in the filter. --- For example: --- --- * @'sizeForFPR' 1e-3 10_000@ for a Bloom filter sized for 10,000 elements --- with a false positive rate of 1 in 1000 --- --- * @'sizeForBits' 10 10_000@ for a Bloom filter sized for 10,000 elements --- with 10 bits per element --- --- Depending on the application it may be more important to target a fixed --- amount of memory to use, or target a specific FPR. --- --- As a very rough guide for filter sizes, here are a range of FPRs and bits --- per element: --- --- * FPR of 1e-1 requires approximately 4.8 bits per element --- * FPR of 1e-2 requires approximately 9.6 bits per element --- * FPR of 1e-3 requires approximately 14.4 bits per element --- * FPR of 1e-4 requires approximately 19.2 bits per element --- * FPR of 1e-5 requires approximately 24.0 bits per element --- --- >>> fmap (printf "%0.1f" . policyBits . policyForFPR) [1e-1, 1e-2, 1e-3, 1e-4, 1e-5] :: [String] --- ["4.8","9.6","14.4","19.2","24.0"] - --- | Create an immutable Bloom filter, using the given setup function --- which executes in the 'ST' monad. --- --- Example: --- --- >>> :{ --- filter = create (sizeForBits 16 2) 4 $ \mf -> do --- insert mf "foo" --- insert mf "bar" --- :} --- --- Note that the result of the setup function is not used. -create :: BloomSize - -> Salt - -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function - -> Bloom a -{-# INLINE create #-} -create bloomsize bloomsalt body = - runST $ do - mb <- new bloomsize bloomsalt - body mb - unsafeFreeze mb - --- | Insert a value into a mutable Bloom filter. Afterwards, a --- membership query for the same value is guaranteed to return @True@. -insert :: Hashable a => MBloom s a -> a -> ST s () -insert !mb !x = insertHashes mb (hashesWithSalt (mbHashSalt mb) x) - --- | Query an immutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -elem :: Hashable a => a -> Bloom a -> Bool -elem = \ !x !b -> elemHashes b (hashesWithSalt (hashSalt b) x) - --- | Same as 'elem' but with the opposite argument order: --- --- > x `elem` bfilter --- --- versus --- --- > bfilter ? x --- -(?) :: Hashable a => Bloom a -> a -> Bool -(?) = flip elem - --- | Query an immutable Bloom filter for non-membership. If the value --- /is/ present, return @False@. If the value is not present, there --- is /still/ some possibility that @False@ will be returned. -notElem :: Hashable a => a -> Bloom a -> Bool -notElem = \ x b -> not (x `elem` b) - --- | Query a mutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -read :: Hashable a => MBloom s a -> a -> ST s Bool -read !mb !x = readHashes mb (hashesWithSalt (mbHashSalt mb) x) - --- | Build an immutable Bloom filter from a seed value. The seeding --- function populates the filter as follows. --- --- * If it returns 'Nothing', it is finished producing values to --- insert into the filter. --- --- * If it returns @'Just' (a,b)@, @a@ is added to the filter and --- @b@ is used as a new seed. -unfold :: forall a b. - Hashable a - => BloomSize - -> Salt - -> (b -> Maybe (a, b)) -- ^ seeding function - -> b -- ^ initial seed - -> Bloom a -{-# INLINE unfold #-} -unfold bloomsize bloomsalt f k = - create bloomsize bloomsalt body - where - body :: forall s. MBloom s a -> ST s () - body mb = loop k - where - loop :: b -> ST s () - loop !j = case f j of - Nothing -> pure () - Just (a, j') -> insert mb a >> loop j' - -{-# INLINEABLE fromList #-} --- | Create a Bloom filter, populating it from a sequence of values. --- --- For example --- --- @ --- filt = fromList (policyForBits 10) 4 [\"foo\", \"bar\", \"quux\"] --- @ -fromList :: (Foldable t, Hashable a) - => BloomPolicy - -> Salt - -> t a -- ^ values to populate with - -> Bloom a -fromList policy bsalt xs = - create bsize bsalt (\b -> mapM_ (insert b) xs) - where - bsize = sizeForPolicy policy (length xs) - -{-# SPECIALISE deserialise :: - BloomSize - -> Salt - -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) - -> IO (Bloom a) #-} -deserialise :: PrimMonad m - => BloomSize - -> Salt - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m (Bloom a) -deserialise bloomsalt bloomsize fill = do - mbloom <- stToPrim $ new bloomsalt bloomsize - Internal.deserialise mbloom fill - stToPrim $ unsafeFreeze mbloom diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Classic/BitArray.hs b/bloomfilter-blocked/src/Data/BloomFilter/Classic/BitArray.hs deleted file mode 100644 index 8d2425757..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Classic/BitArray.hs +++ /dev/null @@ -1,144 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} --- | Minimal bit array implementation. -module Data.BloomFilter.Classic.BitArray ( - BitArray (..), - unsafeIndex, - prefetchIndex, - MBitArray (..), - new, - unsafeSet, - unsafeRead, - freeze, - unsafeFreeze, - thaw, - serialise, - deserialise, -) where - -import Control.Exception (assert) -import Control.Monad.Primitive (PrimMonad, PrimState) -import Control.Monad.ST (ST) -import Data.Bits -import Data.Primitive.ByteArray -import Data.Primitive.PrimArray -import Data.Word (Word64, Word8) - -import GHC.Exts (Int (I#), prefetchByteArray0#) -import GHC.ST (ST (ST)) - --- | Bit vector backed up by an array of Word64 --- --- This vector's offset and length are multiples of 64 -newtype BitArray = BitArray (PrimArray Word64) - deriving stock (Eq, Show) - -{-# INLINE unsafeIndex #-} -unsafeIndex :: BitArray -> Int -> Bool -unsafeIndex (BitArray arr) !i = - assert (j >= 0 && j < sizeofPrimArray arr) $ - unsafeTestBit (indexPrimArray arr j) k - where - !j = unsafeShiftR i 6 -- `div` 64, bit index to Word64 index. - !k = i .&. 63 -- `mod` 64, bit within Word64 - -{-# INLINE prefetchIndex #-} -prefetchIndex :: BitArray -> Int -> ST s () -prefetchIndex (BitArray (PrimArray ba#)) !i = - let !(I# bi#) = i `unsafeShiftR` 3 in - ST (\s -> case prefetchByteArray0# ba# bi# s of - s' -> (# s', () #)) - -- We only need to shiftR 3 here, not 6, because we're going from a bit - -- offset to a byte offset for prefetch. Whereas in unsafeIndex, we go from - -- a bit offset to a Word64 offset, so an extra shiftR 3, for 6 total. - -newtype MBitArray s = MBitArray (MutablePrimArray s Word64) - --- | Will create an explicitly pinned byte array. --- This is done because pinned byte arrays allow for more efficient --- serialisation, but the definition of 'isByteArrayPinned' changed in GHC 9.6, --- see . --- --- TODO: remove this workaround once a solution exists, e.g. a new primop that --- allows checking for implicit pinning. -new :: Int -> ST s (MBitArray s) -new s = do - mba@(MutableByteArray mba#) <- newPinnedByteArray numBytes - setByteArray mba 0 numBytes (0 :: Word8) - pure (MBitArray (MutablePrimArray mba#)) - where - !numWords = roundUpTo64 s - !numBytes = unsafeShiftL numWords 3 -- * 8 - - -- this may overflow, but so be it (2^64 bits is a lot) - roundUpTo64 :: Int -> Int - roundUpTo64 i = unsafeShiftR (i + 63) 6 -- `div` 64, rounded up - -serialise :: BitArray -> (ByteArray, Int, Int) -serialise bitArray = - let ba = asByteArray bitArray - in (ba, 0, sizeofByteArray ba) - where - asByteArray (BitArray (PrimArray ba#)) = ByteArray ba# - -{-# INLINE deserialise #-} --- | Do an inplace overwrite of the byte array representing the bit block. -deserialise :: PrimMonad m - => MBitArray (PrimState m) - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m () -deserialise bitArray fill = do - let mba = asMutableByteArray bitArray - len <- getSizeofMutableByteArray mba - fill mba 0 len - where - asMutableByteArray (MBitArray (MutablePrimArray mba#)) = - MutableByteArray mba# - -unsafeSet :: MBitArray s -> Int -> ST s () -unsafeSet (MBitArray arr) i = do -#ifdef NO_IGNORE_ASSERTS - sz <- getSizeofMutablePrimArray arr - assert (j >= 0 && j < sz) $ pure () -#endif - w <- readPrimArray arr j - writePrimArray arr j (unsafeSetBit w k) - where - !j = unsafeShiftR i 6 -- `div` 64 - !k = i .&. 63 -- `mod` 64 - -unsafeRead :: MBitArray s -> Int -> ST s Bool -unsafeRead (MBitArray arr) i = do -#ifdef NO_IGNORE_ASSERTS - sz <- getSizeofMutablePrimArray arr - assert (j >= 0 && j < sz) $ pure () -#endif - w <- readPrimArray arr j - pure $! unsafeTestBit w k - where - !j = unsafeShiftR i 6 -- `div` 64 - !k = i .&. 63 -- `mod` 64 - -freeze :: MBitArray s -> ST s BitArray -freeze (MBitArray arr) = do - len <- getSizeofMutablePrimArray arr - BitArray <$> freezePrimArray arr 0 len - -unsafeFreeze :: MBitArray s -> ST s BitArray -unsafeFreeze (MBitArray arr) = - BitArray <$> unsafeFreezePrimArray arr - -thaw :: BitArray -> ST s (MBitArray s) -thaw (BitArray arr) = - MBitArray <$> thawPrimArray arr 0 (sizeofPrimArray arr) - -{-# INLINE unsafeTestBit #-} --- like testBit but using unsafeShiftL instead of shiftL -unsafeTestBit :: Word64 -> Int -> Bool -unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 - -{-# INLINE unsafeSetBit #-} --- like setBit but using unsafeShiftL instead of shiftL -unsafeSetBit :: Word64 -> Int -> Word64 -unsafeSetBit w k = w .|. (1 `unsafeShiftL` k) diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Classic/Calc.hs b/bloomfilter-blocked/src/Data/BloomFilter/Classic/Calc.hs deleted file mode 100644 index 2f5fc0f4f..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Classic/Calc.hs +++ /dev/null @@ -1,156 +0,0 @@ --- | Various formulas for working with bloomfilters. -module Data.BloomFilter.Classic.Calc ( - NumEntries, - BloomSize (..), - FPR, - sizeForFPR, - BitsPerEntry, - sizeForBits, - sizeForPolicy, - BloomPolicy (..), - policyFPR, - policyForFPR, - policyForBits, -) where - -import Numeric - -type FPR = Double -type BitsPerEntry = Double -type NumEntries = Int - --- | A policy on intended bloom filter size -- independent of the number of --- elements. --- --- We can decide a policy based on: --- --- 1. a target false positive rate (FPR) using 'policyForFPR' --- 2. a number of bits per entry using 'policyForBits' --- --- A policy can be turned into a 'BloomSize' given a target 'NumEntries' using --- 'sizeForPolicy'. --- --- Either way we define the policy, we can inspect the result to see: --- --- 1. The bits per entry 'policyBits'. This will determine the --- size of the bloom filter in bits. In general the bits per entry can be --- fractional. The final bloom filter size in will be rounded to a whole --- number of bits. --- 2. The number of hashes 'policyHashes'. --- 3. The expected FPR for the policy using 'policyFPR'. --- -data BloomPolicy = BloomPolicy { - policyBits :: !Double, - policyHashes :: !Int - } - deriving stock Show - -policyForFPR :: FPR -> BloomPolicy -policyForFPR fpr | fpr <= 0 || fpr >= 1 = - error "bloomPolicyForFPR: fpr out of range (0,1)" - -policyForFPR fpr = - BloomPolicy { - policyBits = c, - policyHashes = k - } - where - -- There's a simper fomula to compute the number of bits, but it assumes - -- that k is a real. We must however round k to the nearest natural, and - -- so we have to use a more precise approximation, using the actual value - -- of k. - k :: Int; k' :: Double - k = max 1 (round ((-recip_log2) * log_fpr)) - k' = fromIntegral k - c = negate k' / log1mexp (log_fpr / k') - log_fpr = log fpr - -- For the source of this formula, see - -- https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives - -- - -- We start with the FPR ε approximation that assumes independence for the - -- probabilities of each bit being set. - -- - -- ε = (1 - e^(-kn/m))^k - -- - -- And noting that bits per entry @c = m/n@, hence @-kn/m = -k/c@, hence - -- - -- ε = (1-e^(-k/c))^k - -- - -- And then we rearrange to get c, the number of bits per entry: - -- - -- ε = (1-e^(-k/c))^k - -- ε = (1-exp (-k/c))^k - -- ε = exp (log (1 - exp (-k/c)) * k) - -- log ε = log (1 - exp (-k/c)) * k - -- log ε / k = log (1 - exp (-k/c)) - -- exp (log ε / k) = 1 - exp (-k/c) - -- 1 - exp (log ε / k) = exp (-k/c) - -- log (1 - exp (log ε / k)) = -k/c - -- -k / log (1 - exp (log ε / k)) = c - -- -k / log1mexp (log ε / k) = c - -policyForBits :: BitsPerEntry -> BloomPolicy -policyForBits c | c <= 0 = - error "policyForBits: bits per entry must be > 0" - -policyForBits c = - BloomPolicy { - policyBits = c, - policyHashes = k - } - where - k = max 1 (round (c * log2)) - -- For the source of this formula, see - -- https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions - -policyFPR :: BloomPolicy -> FPR -policyFPR BloomPolicy { - policyBits = c, - policyHashes = k - } = - negate (expm1 (negate (k' / c))) ** k' - where - k' = fromIntegral k - -- For the source of this formula, see - -- https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives - -- - -- We use the FPR ε approximation that assumes independence for the - -- probabilities of each bit being set. - -- - -- ε = (1 - e^(-kn/m))^k - -- - -- And noting that bits per entry @c = m/n@, hence @-kn/m = -k/c@, hence - -- - -- ε = (1-e^(-k/c))^k - -- - --- | Parameters for constructing a Bloom filter. --- -data BloomSize = BloomSize { - -- | The requested number of bits in the filter. - sizeBits :: !Int, - - -- | The number of hash functions to use. - sizeHashes :: !Int - } - deriving stock Show - -sizeForFPR :: FPR -> NumEntries -> BloomSize -sizeForFPR = sizeForPolicy . policyForFPR - -sizeForBits :: BitsPerEntry -> NumEntries -> BloomSize -sizeForBits = sizeForPolicy . policyForBits - -sizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize -sizeForPolicy BloomPolicy { - policyBits = c, - policyHashes = k - } n = - BloomSize { - sizeBits = max 1 (ceiling (fromIntegral n * c)), - sizeHashes = max 1 k - } - -log2, recip_log2 :: Double -log2 = log 2 -recip_log2 = recip log2 diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter-blocked/src/Data/BloomFilter/Classic/Internal.hs deleted file mode 100644 index b4a83b905..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Classic/Internal.hs +++ /dev/null @@ -1,439 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} -{-# OPTIONS_HADDOCK not-home #-} --- | This module defines the 'Bloom' and 'MBloom' types and all the functions --- that need direct knowledge of and access to the representation. This forms --- the trusted base. -module Data.BloomFilter.Classic.Internal ( - -- * Mutable Bloom filters - MBloom (mbHashSalt), - new, - maxSizeBits, - - -- * Immutable Bloom filters - Bloom (hashSalt), - bloomInvariant, - size, - - -- * Hash-based operations - Hashes, - Salt, - hashesWithSalt, - insertHashes, - elemHashes, - readHashes, - - -- * Conversion - freeze, - unsafeFreeze, - thaw, - - -- * (De)Serialisation - formatVersion, - serialise, - deserialise, - ) where - -import Control.DeepSeq (NFData (..)) -import Control.Exception (assert) -import Control.Monad.Primitive (PrimMonad, PrimState) -import Control.Monad.ST (ST) -import Data.Bits -import Data.Kind (Type) -import Data.Primitive.ByteArray -import Data.Primitive.PrimArray -import Data.Primitive.Types (Prim (..)) -import Data.Word (Word64) - -import GHC.Exts (Int (I#), Int#, int2Word#, timesWord2#, - uncheckedIShiftL#, word2Int#, (+#)) -import qualified GHC.Exts as Exts -import GHC.Word (Word64 (W64#)) - -import Data.BloomFilter.Classic.BitArray (BitArray, MBitArray) -import qualified Data.BloomFilter.Classic.BitArray as BitArray -import Data.BloomFilter.Classic.Calc -import Data.BloomFilter.Hash - --- | The version of the format used by 'serialise' and 'deserialise'. The --- format number will change when there is an incompatible change in the --- library, such that deserialising and using the filter will not work. --- This can include more than just changes to the serialised format, for --- example changes to hash functions or how the hash is mapped to bits. --- --- Note that the format produced does not include this version. Version --- checking is the responsibility of the user of the library. --- --- The library guarantes that the format version value for the classic --- ("Data.BloomFilter.Classic") and blocked ("Data.BloomFilter.Blocked") --- implementation will not overlap with each other or any previous value used --- by either implementation. So switching between the two implementations will --- always be detectable and unambigious. --- --- History: --- --- * Version 0: original --- --- * Version 1: changed range reduction (of hash to bit index) from remainder --- to method based on multiplication. --- -formatVersion :: Int -formatVersion = 1 - -------------------------------------------------------------------------------- --- Mutable Bloom filters --- - -type MBloom :: Type -> Type -> Type --- | A mutable Bloom filter, for use within the 'ST' monad. -data MBloom s a = MBloom { - mbNumBits :: {-# UNPACK #-} !Int -- ^ non-zero - , mbNumHashes :: {-# UNPACK #-} !Int - , mbHashSalt :: {-# UNPACK #-} !Salt - , mbBitArray :: {-# UNPACK #-} !(MBitArray s) - } -type role MBloom nominal nominal - -instance Show (MBloom s a) where - show mb = "MBloom { " ++ show (mbNumBits mb) ++ " bits } " - -instance NFData (MBloom s a) where - rnf !_ = () - --- | Create a new mutable Bloom filter. --- --- The filter size is capped at 'maxSizeBits'. --- -new :: BloomSize -> Salt -> ST s (MBloom s a) -new BloomSize { sizeBits, sizeHashes } mbHashSalt = do - let !mbNumBits = max 1 (min maxSizeBits sizeBits) - mbBitArray <- BitArray.new mbNumBits - pure MBloom { - mbNumBits, - mbNumHashes = max 1 sizeHashes, - mbHashSalt, - mbBitArray - } - --- | The maximum filter size is @2^48@ bits (256 terabytes). Tell us if you need --- bigger bloom filters. --- -maxSizeBits :: Int -maxSizeBits = 0x1_0000_0000_0000 - -insertHashes :: MBloom s a -> Hashes a -> ST s () -insertHashes MBloom { mbNumBits, mbNumHashes, mbBitArray } !h = - go 0 - where - go !i | i >= mbNumHashes = pure () - go !i = do - let probe :: Word64 - probe = evalHashes h i - index :: Int - index = reduceRange64 probe mbNumBits - BitArray.unsafeSet mbBitArray index - go (i + 1) - -readHashes :: forall s a. MBloom s a -> Hashes a -> ST s Bool -readHashes MBloom { mbNumBits, mbNumHashes, mbBitArray } !h = - go 0 - where - go :: Int -> ST s Bool - go !i | i >= mbNumHashes = pure True - go !i = do - let probe :: Word64 - probe = evalHashes h i - index :: Int - index = reduceRange64 probe mbNumBits - b <- BitArray.unsafeRead mbBitArray index - if b then go (i + 1) - else pure False - -{-# INLINE deserialise #-} --- | Overwrite the filter's bit array. Use 'new' to create a filter of the --- expected size and then use this function to fill in the bit data. --- --- The callback is expected to write (exactly) the given number of bytes into --- the given byte array buffer. --- --- See also 'formatVersion' for compatibility advice. --- -deserialise :: PrimMonad m - => MBloom (PrimState m) a - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m () -deserialise MBloom {mbBitArray} fill = - BitArray.deserialise mbBitArray fill - - -------------------------------------------------------------------------------- --- Immutable Bloom filters --- - -type Bloom :: Type -> Type --- | An immutable Bloom filter. -data Bloom a = Bloom { - numBits :: {-# UNPACK #-} !Int -- ^ non-zero - , numHashes :: {-# UNPACK #-} !Int - , hashSalt :: {-# UNPACK #-} !Salt - , bitArray :: {-# UNPACK #-} !BitArray - } - deriving stock Eq -type role Bloom nominal - -bloomInvariant :: Bloom a -> Bool -bloomInvariant Bloom { numBits, numHashes, bitArray = BitArray.BitArray pa } = - numBits > 0 - && numBits <= 2^(48 :: Int) - && ceilDiv64 numBits == sizeofPrimArray pa - && numHashes > 0 - where - ceilDiv64 x = unsafeShiftR (x + 63) 6 - -instance Show (Bloom a) where - show mb = "Bloom { " ++ show (numBits mb) ++ " bits } " - -instance NFData (Bloom a) where - rnf !_ = () - --- | Return the size of the Bloom filter. -size :: Bloom a -> BloomSize -size Bloom { numBits, numHashes } = - BloomSize { - sizeBits = numBits, - sizeHashes = numHashes - } - --- | Query an immutable Bloom filter for membership using already constructed --- 'Hashes' value. -elemHashes :: Bloom a -> Hashes a -> Bool -elemHashes Bloom { numBits, numHashes, bitArray } !h = - go 0 - where - go :: Int -> Bool - go !i | i >= numHashes = True - go !i = - let probe :: Word64 - probe = evalHashes h i - index :: Int - index = reduceRange64 probe numBits - in if BitArray.unsafeIndex bitArray index - then go (i + 1) - else False - --- | Serialise the bloom filter to a 'BloomSize' (which is needed to --- deserialise) and a 'ByteArray' along with the offset and length containing --- the filter's bit data. --- --- See also 'formatVersion' for compatibility advice. --- -serialise :: Bloom a -> (BloomSize, Salt, ByteArray, Int, Int) -serialise b@Bloom{bitArray} = - (size b, hashSalt b, ba, off, len) - where - (ba, off, len) = BitArray.serialise bitArray - - -------------------------------------------------------------------------------- --- Conversions between mutable and immutable Bloom filters --- - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter may be modified afterwards. -freeze :: MBloom s a -> ST s (Bloom a) -freeze MBloom { mbNumBits, mbNumHashes, mbHashSalt, mbBitArray } = do - bitArray <- BitArray.freeze mbBitArray - let !bf = Bloom { - numBits = mbNumBits, - numHashes = mbNumHashes, - hashSalt = mbHashSalt, - bitArray - } - assert (bloomInvariant bf) $ pure bf - --- | Create an immutable Bloom filter from a mutable one without copying. The --- mutable filter /must not/ be modified afterwards. For a safer creation --- interface, use 'freeze' or 'create'. -unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MBloom { mbNumBits, mbNumHashes, mbHashSalt, mbBitArray } = do - bitArray <- BitArray.unsafeFreeze mbBitArray - let !bf = Bloom { - numBits = mbNumBits, - numHashes = mbNumHashes, - hashSalt = mbHashSalt, - bitArray - } - assert (bloomInvariant bf) $ pure bf - --- | Copy an immutable Bloom filter to create a mutable one. There is --- no non-copying equivalent. -thaw :: Bloom a -> ST s (MBloom s a) -thaw Bloom { numBits, numHashes, hashSalt, bitArray } = do - mbBitArray <- BitArray.thaw bitArray - pure MBloom { - mbNumBits = numBits, - mbNumHashes = numHashes, - mbHashSalt = hashSalt, - mbBitArray - } - - -------------------------------------------------------------------------------- --- Low level utils --- - --- | Given a word sampled uniformly from the full 'Word64' range, such as a --- hash, reduce it fairly to a value in the range @[0,n)@. --- --- See --- -{-# INLINE reduceRange64 #-} -reduceRange64 :: Word64 -- ^ Sample from 0..2^64-1 - -> Int -- ^ upper bound of range [0,n) - -> Int -- ^ result within range -reduceRange64 (W64# x) (I# n) = - -- Note that we use widening multiplication of two 64bit numbers, with a - -- 128bit result. GHC provides a primop which returns the 128bit result as - -- a pair of 64bit words. There are (as of 2025) no high level wrappers in - -- the base or primitive packages, so we use the primops directly. - case timesWord2# (word64ToWordShim# x) (int2Word# n) of - (# high, _low #) -> I# (word2Int# high) - -- Note that while x can cover the full Word64 range, since the result is - -- less than n, and since n was an Int then the result fits an Int too. - -{-# INLINE word64ToWordShim# #-} - -#if MIN_VERSION_base(4,17,0) -word64ToWordShim# :: Exts.Word64# -> Exts.Word# -word64ToWordShim# = Exts.word64ToWord# -#else -word64ToWordShim# :: Exts.Word# -> Exts.Word# -word64ToWordShim# x# = x# -#endif - -------------------------------------------------------------------------------- --- Hashes --- - --- | A small family of hashes, for probing bits in a classic bloom filter. -data Hashes a = Hashes !Hash !Hash --- pair of hashes used for a double hashing scheme. -type role Hashes nominal - -instance Prim (Hashes a) where - sizeOfType# _ = 16# - alignmentOfType# _ = 8# - - indexByteArray# ba i = Hashes - (indexByteArray# ba (indexLo i)) - (indexByteArray# ba (indexHi i)) - readByteArray# ba i s1 = - case readByteArray# ba (indexLo i) s1 of { (# s2, lo #) -> - case readByteArray# ba (indexHi i) s2 of { (# s3, hi #) -> - (# s3, Hashes lo hi #) - }} - writeByteArray# ba i (Hashes lo hi) s = - writeByteArray# ba (indexHi i) hi (writeByteArray# ba (indexLo i) lo s) - - indexOffAddr# ba i = Hashes - (indexOffAddr# ba (indexLo i)) - (indexOffAddr# ba (indexHi i)) - readOffAddr# ba i s1 = - case readOffAddr# ba (indexLo i) s1 of { (# s2, lo #) -> - case readOffAddr# ba (indexHi i) s2 of { (# s3, hi #) -> - (# s3, Hashes lo hi #) - }} - writeOffAddr# ba i (Hashes lo hi) s = - writeOffAddr# ba (indexHi i) hi (writeOffAddr# ba (indexLo i) lo s) - -indexLo :: Int# -> Int# -indexLo i = uncheckedIShiftL# i 1# - -indexHi :: Int# -> Int# -indexHi i = uncheckedIShiftL# i 1# +# 1# - -{- Note [Original Hashes] - -Compute a list of 32-bit hashes relatively cheaply. The value to -hash is inspected at most twice, regardless of the number of hashes -requested. - -We use a variant of Kirsch and Mitzenmacher's technique from \"Less -Hashing, Same Performance: Building a Better Bloom Filter\", -. - -Where Kirsch and Mitzenmacher multiply the second hash by a -coefficient, we shift right by the coefficient. This offers better -performance (as a shift is much cheaper than a multiply), and the -low order bits of the final hash stay well mixed. - --} - -{- Note: [Hashes] - -On the first glance the 'evalHashes' scheme seems dubious. - -Firstly, it's original performance motivation is dubious. - -> multiply the second hash by a coefficient - -While the scheme double hashing scheme is presented in -theoretical analysis as - - g(i) = a + i * b - -In practice it's implemented in a loop which looks like - - g[0] = a - for (i = 1; i < k; i++) { - a += b; - g[i] = a; - } - -I.e. with just an addition. - -Secondly there is no analysis anywhere about the -'evalHashes' scheme. - -Peter Dillinger's thesis (Adaptive Approximate State Storage) -discusses various fast hashing schemes (section 6.5), -mentioning why ordinary "double hashing" is weak scheme. - -Issue 1: when second hash value is bad, e.g. not coprime with bloom filters size in bits, -we can get repetitions (worst case 0, or m/2). - -Issue 2: in bloom filter scenario, whether we do a + i * b or h0 - i * b' (with b' = -b) -as we probe all indices (as set) doesn't matter, not sequentially (like in hash table). -So we lose one bit entropy. - -Issue 3: the scheme is prone to partial overlap. -Two values with the same second hash value could overlap on many indices. - -Then Dillinger discusses various schemes which solve this issue. - -The Hashes scheme seems to avoid these cuprits. -This is probably because it uses most of the bits of the second hash, even in m = 2^n scenarios. -(normal double hashing and enhances double hashing don't use the high bits or original hash then). -TL;DR Hashes seems to work well in practice. - -For the record: RocksDB uses an own scheme as well, -where first hash is used to pick a cache line, and second one to generate probes inside it. -https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f6/util/bloom_impl.h - --} - --- | Evalute 'Hashes' family. --- --- \[ --- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor --- \] --- -evalHashes :: Hashes a -> Int -> Hash -evalHashes (Hashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) - --- | Create a 'Hashes' structure. -hashesWithSalt :: Hashable a => Salt -> a -> Hashes a --- It simply hashes the value twice using seed 0 and 1. -hashesWithSalt salt v = Hashes (hashSalt64 salt v) (hashSalt64 (salt + 1) v) -{-# INLINE hashesWithSalt #-} diff --git a/bloomfilter-blocked/src/Data/BloomFilter/Hash.hs b/bloomfilter-blocked/src/Data/BloomFilter/Hash.hs deleted file mode 100644 index 5cb15ba85..000000000 --- a/bloomfilter-blocked/src/Data/BloomFilter/Hash.hs +++ /dev/null @@ -1,144 +0,0 @@ -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} --- | --- --- Fast hashing of Haskell values. --- The hash used is XXH3 64bit. --- -module Data.BloomFilter.Hash ( - -- * Basic hash functionality - Hash, - Salt, - Hashable(..), - hash64, - hashByteArray, - -- * Incremental hashing - Incremental (..), - HashState, - incrementalHash, -) where - -import Control.Monad (forM_) -import Control.Monad.ST (ST, runST) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as LBS -import Data.Char (ord) -import qualified Data.Primitive.ByteArray as P -import Data.Word (Word32, Word64) -import qualified XXH3 - --- | A hash value is 64 bits wide. -type Hash = Word64 - --- | The salt value to be used for hashes. -type Salt = Word64 - -------------------------------------------------------------------------------- --- One shot hashing -------------------------------------------------------------------------------- - --- | The class of types that can be converted to a hash value. --- --- The instances are meant to be stable, the hash values can be persisted. --- -class Hashable a where - -- | Compute a 64-bit hash of a value. - hashSalt64 :: - Salt -- ^ seed - -> a -- ^ value to hash - -> Hash - --- | Compute a 64-bit hash. -hash64 :: Hashable a => a -> Hash -hash64 = hashSalt64 0 - -instance Hashable () where - hashSalt64 salt _ = salt - -instance Hashable Char where - -- Char's ordinal value should fit into Word32 - hashSalt64 salt c = hashSalt64 salt (fromIntegral (ord c) :: Word32) - -instance Hashable BS.ByteString where - hashSalt64 salt bs = XXH3.xxh3_64bit_withSeed_bs bs salt - -instance Hashable LBS.ByteString where - hashSalt64 salt lbs = - incrementalHash salt $ \s -> - forM_ (LBS.toChunks lbs) $ \bs -> - update s bs - -instance Hashable P.ByteArray where - hashSalt64 salt ba = XXH3.xxh3_64bit_withSeed_ba ba 0 (P.sizeofByteArray ba) salt - -instance Hashable Word64 where - hashSalt64 salt w = XXH3.xxh3_64bit_withSeed_w64 w salt - -instance Hashable Word32 where - hashSalt64 salt w = XXH3.xxh3_64bit_withSeed_w32 w salt - -instance Hashable Word where - hashSalt64 salt n = hashSalt64 salt (fromIntegral n :: Word64) - --32bit support would need some CPP here to select based on word size - -instance Hashable Int where - hashSalt64 salt n = hashSalt64 salt (fromIntegral n :: Word) - -{- Note [Tree hashing] - -We recursively hash inductive types (instead e.g. just serially hashing -their fields). Why? - -So ("", "x") and ("x", "") or [[],[],[""]], [[],[""],[]] and [[""],[],[]] -have different hash values! - -Another approach would be to have injective serialisation, -but then 'Incremental BS.ByteString' instance (e.g.) would need to serialise -the length, so we'd need third class for "pieces", keeping 'Incremental' -just adding bytes to the state (without any extras). - --} - -instance Hashable a => Hashable [a] where - hashSalt64 salt xs = incrementalHash salt $ \s -> forM_ xs $ \x -> - update s (hash64 x) - -instance (Hashable a, Hashable b) => Hashable (a, b) where - hashSalt64 salt (x, y) = incrementalHash salt $ \s -> do - update s (hash64 x) - update s (hash64 y) - --- | Hash a (part of) 'P.ByteArray'. -hashByteArray :: P.ByteArray -> Int -> Int -> Salt -> Hash -hashByteArray = XXH3.xxh3_64bit_withSeed_ba - -------------------------------------------------------------------------------- --- Incremental hashing -------------------------------------------------------------------------------- - --- | Hash state for incremental hashing -newtype HashState s = HashState (XXH3.XXH3_State s) - --- | The class of types that can be incrementally hashed. -class Incremental a where - update :: HashState s -> a -> ST s () - -instance Incremental BS.ByteString where - update (HashState s) = XXH3.xxh3_64bit_update_bs s - -instance Incremental Word32 where - update (HashState s) = XXH3.xxh3_64bit_update_w32 s - -instance Incremental Word64 where - update (HashState s) = XXH3.xxh3_64bit_update_w64 s - -instance Incremental Char where - update s c = update s (fromIntegral (ord c) :: Word32) - --- | Calculate incrementally constructed hash. -incrementalHash :: Salt -> (forall s. HashState s -> ST s ()) -> Hash -incrementalHash seed f = runST $ do - s <- XXH3.xxh3_64bit_createState - XXH3.xxh3_64bit_reset_withSeed s seed - f (HashState s) - XXH3.xxh3_64bit_digest s diff --git a/bloomfilter-blocked/tests/bloomfilter-tests.hs b/bloomfilter-blocked/tests/bloomfilter-tests.hs deleted file mode 100644 index e169c9fa8..000000000 --- a/bloomfilter-blocked/tests/bloomfilter-tests.hs +++ /dev/null @@ -1,371 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} -module Main (main) where - -import qualified Data.BloomFilter.Blocked as Bloom.Blocked -import qualified Data.BloomFilter.Classic as B -import qualified Data.BloomFilter.Classic as Bloom.Classic -import Data.BloomFilter.Hash (Hashable (..), hash64) - -import Data.ByteString (ByteString) -import qualified Data.ByteString as BS -import qualified Data.ByteString.Lazy as LBS -import Data.Int (Int64) -import Data.Proxy (Proxy (..)) -import Data.Word (Word32, Word64) - -import Test.QuickCheck.Instances () -import Test.Tasty -import Test.Tasty.QuickCheck - -import Data.Kind (Type) -import Prelude hiding (elem, notElem) - -main :: IO () -main = defaultMain tests - ---TODO: add a golden test for the BloomFilter format vs the 'formatVersion' --- to ensure we don't change the format without conciously bumping the version. -tests :: TestTree -tests = - testGroup "Data.BloomFilter" $ - [ testGroup "Classic" - [ testGroup "calculations" $ - test_calculations proxyClassic - (FPR 1e-15, FPR 1) (BitsPerEntry 1, BitsPerEntry 75) 1e-6 - ++ test_calculations_classic - , test_fromList proxyClassic - ] - , testGroup "Blocked" - [ testGroup "calculations" $ - -- For the Blocked impl, the calculations are approximations - -- based on regressions. Since they are approximations then we have - -- to use much looser tolerances. Also, the regression only covered - -- the range of 2 bits to 24 bits, so we only cover that range here. - -- And the precision at around 2 bits is poor, so we only look at 3 - -- bits and above. - test_calculations proxyBlocked - (FPR 1e-4, FPR 1e-1) (BitsPerEntry 3, BitsPerEntry 24) 1e-2 - , test_fromList proxyBlocked - , testProperty "prop_insertMany" prop_insertMany - ] - , tests_hashes - ] - where - test_calculations proxy fprRrange bitsRange tolerance = - [ testProperty "prop_calc_policy_fpr" $ - prop_calc_policy_fpr proxy fprRrange tolerance - - , testProperty "prop_calc_policy_bits" $ - prop_calc_policy_bits proxy bitsRange tolerance - - , testProperty "prop_calc_size_hashes_bits" $ - prop_calc_size_hashes_bits proxy - ] - - -- These tests are only for the classic implementation because they use a - -- test oracle ('falsePositiveRate') that is only appropriate for the - -- classic implementation. - test_calculations_classic = - [ testProperty "prop_calc_size_fpr_fpr" $ - prop_calc_size_fpr_fpr proxyClassic - - , testProperty "prop_calc_size_fpr_bits" $ - prop_calc_size_fpr_bits proxyClassic - ] - - test_fromList proxy = - testGroup "fromList" - [ testProperty "()" $ prop_elem proxy (Proxy :: Proxy ()) - , testProperty "Char" $ prop_elem proxy (Proxy :: Proxy Char) - , testProperty "Word32" $ prop_elem proxy (Proxy :: Proxy Word32) - , testProperty "Word64" $ prop_elem proxy (Proxy :: Proxy Word64) - , testProperty "ByteString" $ prop_elem proxy (Proxy :: Proxy ByteString) - , testProperty "LBS.ByteString" $ prop_elem proxy (Proxy :: Proxy LBS.ByteString) - , testProperty "String" $ prop_elem proxy (Proxy :: Proxy String) - ] - - tests_hashes = - testGroup "hashes" - [ testProperty "prop_rechunked_eq" prop_rechunked_eq - , testProperty "prop_tuple_ex" $ - hash64 (BS.empty, BS.pack [120]) =/= hash64 (BS.pack [120], BS.empty) - , testProperty "prop_list_ex" $ - hash64 [[],[],[BS.empty]] =/= hash64 [[],[BS.empty],[]] - ] - -proxyClassic :: Proxy Bloom.Classic.Bloom -proxyClassic = Proxy - -proxyBlocked :: Proxy Bloom.Blocked.Bloom -proxyBlocked = Proxy - -------------------------------------------------------------------------------- --- Element is in a Bloom filter -------------------------------------------------------------------------------- - -prop_elem :: forall bloom a. (BloomFilter bloom, Hashable a) - => Proxy bloom -> Proxy a - -> B.Salt -> a -> [a] -> FPR -> Property -prop_elem proxy _ salt x xs (FPR q) = - let bf :: bloom a - bf = fromList (policyForFPR proxy q) salt (x:xs) - in elem x bf .&&. not (notElem x bf) - -------------------------------------------------------------------------------- --- Bloom filter size calculations -------------------------------------------------------------------------------- - -prop_calc_policy_fpr :: BloomFilter bloom => Proxy bloom - -> (FPR, FPR) -> Double - -> FPR -> Property -prop_calc_policy_fpr proxy (FPR lb, FPR ub) t (FPR fpr) = - fpr > lb && fpr < ub ==> - let policy = policyForFPR proxy fpr - in policyFPR proxy policy ~~~ fpr - where - (~~~) = withinTolerance t - -prop_calc_policy_bits :: forall bloom. BloomFilter bloom => Proxy bloom - -> (BitsPerEntry, BitsPerEntry) -> Double - -> BitsPerEntry -> Property -prop_calc_policy_bits proxy (BitsPerEntry lb, BitsPerEntry ub) t - (BitsPerEntry c) = - c >= lb && c <= ub ==> - let policy = policyForBits proxy c - c' = policyBits (Proxy @bloom) policy - fpr = policyFPR proxy policy - policy' = policyForFPR proxy fpr - fpr' = policyFPR proxy policy' - in c === c' .&&. fpr ~~~ fpr' - where - (~~~) = withinTolerance t - --- | Compare @sizeHashes . sizeForBits@ against @numHashFunctions@ -prop_calc_size_hashes_bits :: forall bloom. BloomFilter bloom => Proxy bloom - -> BitsPerEntry -> NumEntries -> Property -prop_calc_size_hashes_bits proxy (BitsPerEntry c) (NumEntries numEntries) = - let bsize = sizeForBits proxy c numEntries - in numHashFunctions (fromIntegral (sizeBits (Proxy @bloom) bsize)) - (fromIntegral numEntries) - === fromIntegral (sizeHashes (Proxy @bloom) bsize) - --- | Compare @sizeForFPR@ against @falsePositiveRate@ with some tolerance for deviations -prop_calc_size_fpr_fpr :: forall bloom. BloomFilter bloom => Proxy bloom - -> FPR -> NumEntries -> Property -prop_calc_size_fpr_fpr proxy (FPR fpr) (NumEntries numEntries) = - let bsize = sizeForFPR proxy fpr numEntries - in falsePositiveRate (fromIntegral (sizeBits (Proxy @bloom) bsize)) - (fromIntegral numEntries) - (fromIntegral (sizeHashes (Proxy @bloom) bsize)) - ~~~ fpr - where - (~~~) = withinTolerance tolerance - -- At small filter sizes (corresponding to high FPRs), we get significant - -- reductions in accuracy due to rounding the number of bits to an integer. - -- So we use greater tolerances for bigger FPRs. - -- Contrast with prop_calc_policy_fpr which does not do rounding to an - -- integer number of bits (it uses Double for bits per key), and thus can - -- use a very small tolerance. - tolerance | fpr <= 1e-4 = 1e-7 - | fpr <= 1e-3 = 1e-6 - | fpr <= 1e-2 = 1e-5 - | fpr <= 1e-1 = 1e-4 - | otherwise = 1e-3 - --- | Compare @sizeForBits@ against @falsePositiveRate@ with some tolerance for deviations -prop_calc_size_fpr_bits :: forall bloom. BloomFilter bloom => Proxy bloom - -> BitsPerEntry -> NumEntries -> Property -prop_calc_size_fpr_bits proxy (BitsPerEntry bpe) (NumEntries numEntries) = - let policy = policyForBits proxy bpe - bsize = sizeForPolicy proxy policy numEntries - in falsePositiveRate (fromIntegral (sizeBits (Proxy @bloom) bsize)) - (fromIntegral numEntries) - (fromIntegral (sizeHashes (Proxy @bloom) bsize)) - ~~~ policyFPR proxy policy - where - (~~~) = withinTolerance tolerance - tolerance | bpe >= 18 = 1e-7 - | bpe >= 13 = 1e-6 - | bpe >= 8 = 1e-5 - | bpe >= 4 = 1e-4 - | otherwise = 1e-3 - --- reference implementations used for sanity checks - --- | Computes the optimal number of hash functions that minimises the false --- positive rate for a bloom filter. --- --- See --- -numHashFunctions :: - Double -- ^ Number of bits assigned to the bloom filter. - -> Double -- ^ Number of entries inserted into the bloom filter. - -> Integer -numHashFunctions bits nentries = - round $ - max 1 ((bits / nentries) * log 2) - --- | False positive rate --- --- See --- -falsePositiveRate :: - Double -- ^ Number of bits assigned to the bloom filter. - -> Double -- ^ Number of entries inserted into the bloom filter. - -> Double -- ^ Number of hash functions - -> Double -falsePositiveRate m n k = - (1 - exp (-(k * n / m))) ** k - -withinTolerance :: Double -> Double -> Double -> Property -withinTolerance t a b = - counterexample (show a ++ " /= " ++ show b ++ - " and not within (abs) tolerance of " ++ show t) $ - abs (a - b) < t - -------------------------------------------------------------------------------- --- Chunking -------------------------------------------------------------------------------- - --- Ensure that a property over a lazy ByteString holds if we change --- the chunk boundaries. - -rechunk :: Int64 -> LBS.ByteString -> LBS.ByteString -rechunk k xs | k <= 0 = xs - | otherwise = LBS.fromChunks (go xs) - where go s | LBS.null s = [] - | otherwise = let (pre,suf) = LBS.splitAt k s - in repack pre : go suf - repack = BS.concat . LBS.toChunks - - -prop_rechunked :: (Eq a, Show a) => (LBS.ByteString -> a) -> LBS.ByteString -> Property -prop_rechunked f s = - let l = LBS.length s - in l > 0 ==> forAll (choose (1,l-1)) $ \k -> - let n = k `mod` l - in n > 0 ==> f s === f (rechunk n s) - -prop_rechunked_eq :: LBS.ByteString -> Property -prop_rechunked_eq = prop_rechunked hash64 - -------------------------------------------------------------------------------- --- Bulk operations -------------------------------------------------------------------------------- - --- Currently only for Bloom.Blocked. -prop_insertMany :: FPR -> [Word64] -> Property -prop_insertMany (FPR fpr) keys = - bloom_insert === bloom_insertMany - where - bloom_insert = - Bloom.Blocked.create (Bloom.Blocked.sizeForFPR fpr n) salt $ \mb -> - mapM_ (Bloom.Blocked.insert mb) keys - - bloom_insertMany = - Bloom.Blocked.create (Bloom.Blocked.sizeForFPR fpr n) salt $ \mb -> - Bloom.Blocked.insertMany mb (\k -> pure $ keys !! k) n - - !n = length keys - - !salt = 4 -- https://xkcd.com/221/ - -------------------------------------------------------------------------------- --- Class to allow testing two filter implementations -------------------------------------------------------------------------------- - -class BloomFilter bloom where - type BloomPolicy bloom :: Type - - policyBits :: Proxy bloom -> BloomPolicy bloom -> Double - - type BloomSize bloom :: Type - - sizeBits :: Proxy bloom -> BloomSize bloom -> Int - sizeHashes :: Proxy bloom -> BloomSize bloom -> Int - - fromList :: Hashable a => BloomPolicy bloom -> B.Salt -> [a] -> bloom a - elem :: Hashable a => a -> bloom a -> Bool - notElem :: Hashable a => a -> bloom a -> Bool - - sizeForFPR :: Proxy bloom -> B.FPR -> B.NumEntries -> BloomSize bloom - sizeForBits :: Proxy bloom -> B.BitsPerEntry -> B.NumEntries -> BloomSize bloom - sizeForPolicy :: Proxy bloom -> BloomPolicy bloom -> B.NumEntries -> BloomSize bloom - policyForFPR :: Proxy bloom -> B.FPR -> BloomPolicy bloom - policyForBits :: Proxy bloom -> B.BitsPerEntry -> BloomPolicy bloom - policyFPR :: Proxy bloom -> BloomPolicy bloom -> B.FPR - -instance BloomFilter Bloom.Classic.Bloom where - type instance BloomPolicy Bloom.Classic.Bloom = Bloom.Classic.BloomPolicy - - policyBits _ = Bloom.Classic.policyBits - - type instance BloomSize Bloom.Classic.Bloom = Bloom.Classic.BloomSize - - sizeBits _ = Bloom.Classic.sizeBits - sizeHashes _ = Bloom.Classic.sizeHashes - - fromList = Bloom.Classic.fromList - elem = Bloom.Classic.elem - notElem = Bloom.Classic.notElem - - sizeForFPR _ = Bloom.Classic.sizeForFPR - sizeForBits _ = Bloom.Classic.sizeForBits - sizeForPolicy _ = Bloom.Classic.sizeForPolicy - policyForFPR _ = Bloom.Classic.policyForFPR - policyForBits _ = Bloom.Classic.policyForBits - policyFPR _ = Bloom.Classic.policyFPR - -instance BloomFilter Bloom.Blocked.Bloom where - type instance BloomPolicy Bloom.Blocked.Bloom = Bloom.Blocked.BloomPolicy - - policyBits _ = Bloom.Blocked.policyBits - - type instance BloomSize Bloom.Blocked.Bloom = Bloom.Blocked.BloomSize - - sizeBits _ = Bloom.Blocked.sizeBits - sizeHashes _ = Bloom.Blocked.sizeHashes - - fromList = Bloom.Blocked.fromList - elem = Bloom.Blocked.elem - notElem = Bloom.Blocked.notElem - - sizeForFPR _ = Bloom.Blocked.sizeForFPR - sizeForBits _ = Bloom.Blocked.sizeForBits - sizeForPolicy _ = Bloom.Blocked.sizeForPolicy - policyForFPR _ = Bloom.Blocked.policyForFPR - policyForBits _ = Bloom.Blocked.policyForBits - policyFPR _ = Bloom.Blocked.policyFPR - -------------------------------------------------------------------------------- --- QC generators -------------------------------------------------------------------------------- - -newtype FPR = FPR Double - deriving stock Show - -instance Arbitrary FPR where - -- The most significant effect of the FPR is from its (negative) exponent, - -- which influences both filter bits and number of hashes. So we generate - -- values with an exponent from 10^0 to 10^-15 - arbitrary = do - m <- choose (1, 9.99) -- not less than 1 or it's a different exponent - e <- choose (1, 15) - pure (FPR (m * 10 ** (-e))) - -newtype BitsPerEntry = BitsPerEntry Double - deriving stock Show - -instance Arbitrary BitsPerEntry where - arbitrary = BitsPerEntry <$> choose (1, 75) - -newtype NumEntries = NumEntries Int - deriving stock Show - --- | The FPR calculations are approximations and are not expected to be --- accurate for low numbers of entries or bits. --- -instance Arbitrary NumEntries where - arbitrary = NumEntries <$> choose (1_000, 100_000_000) - shrink (NumEntries n) = - [ NumEntries n' | n' <- shrink n, n' >= 1000 ] diff --git a/bloomfilter-blocked/tests/fpr-calc.hs b/bloomfilter-blocked/tests/fpr-calc.hs deleted file mode 100644 index f13668c98..000000000 --- a/bloomfilter-blocked/tests/fpr-calc.hs +++ /dev/null @@ -1,208 +0,0 @@ -{-# LANGUAGE ParallelListComp #-} -module Main (main) where - -import qualified Data.BloomFilter as B (BitsPerEntry, FPR, Hashable, Salt) -import qualified Data.BloomFilter.Blocked as B.Blocked -import qualified Data.BloomFilter.Classic as B.Classic - -import Control.Parallel.Strategies -import Data.IntSet (IntSet) -import qualified Data.IntSet as IntSet -import Data.List (unfoldr) -import Math.Regression.Simple -import System.Environment (getArgs) -import System.Exit (exitSuccess) -import System.IO -import System.Random - -import Prelude hiding (elem) - --- | Write out data files used by gnuplot fpr.plot -main :: IO () -main = do - hSetBuffering stdout NoBuffering --for progress reporting - - args <- getArgs - case args of - ["Generate"] -> main_generateData - ["Regression"] -> main_regression - _ -> do - putStrLn "Usage: bloomfilter-fpr-calc [Generate|Regression]" - exitSuccess - -main_regression :: IO () -main_regression = do - s <- readFile "bloomfilter/fpr.blocked.gnuplot.data" - let parseLine l = case words l of - [w_xs_blocked, _, w_ys_blocked_actual] -> - ( read w_xs_blocked, read w_ys_blocked_actual ) - _ -> error "failed parse" - xs_blocked, ys_blocked_actual :: [Double] - (xs_blocked, ys_blocked_actual) = unzip $ fmap parseLine $ lines s - - let regressionData :: [(Double, Double)] - regressionData = zip xs_blocked - (map (negate . log) ys_blocked_actual) - regressionBitsToFPR = quadraticFit (\(x,y)->(x,y)) regressionData - regressionFPRToBits = quadraticFit (\(x,y)->(y,x)) regressionData - putStrLn "" - putStrLn "Blocked bloom filter quadratic regressions:" - putStrLn "bits independent, FPR dependent:" - print regressionBitsToFPR - putStrLn "" - putStrLn "FPR independent, bits dependent:" - print regressionFPRToBits - -main_generateData :: IO () -main_generateData = do - withFile "bloomfilter/fpr.classic.gnuplot.data" WriteMode $ \h -> do - hSetBuffering h LineBuffering --for incremental output - mapM_ (\l -> hPutStrLn h l >> putChar '.') $ - [ unwords [show bitsperkey, show y1, show y2] - | (bitsperkey, _) <- xs_classic - | y1 <- ys_classic_calc - | y2 <- ys_classic_actual - ] - putStrLn "Wrote bloomfilter/fpr.classic.gnuplot.data" - - withFile "bloomfilter/fpr.blocked.gnuplot.data" WriteMode $ \h -> do - hSetBuffering h LineBuffering --for incremental output - mapM_ (\l -> hPutStrLn h l >> putChar '.') $ - [ unwords [show bitsperkey, show y1, show y2] - | (bitsperkey, _) <- xs_blocked - | y1 <- ys_blocked_calc - | y2 <- ys_blocked_actual - ] - putStrLn "Wrote bloomfilter/fpr.blocked.gnuplot.data" - where - -- x axis values - xs_classic = - [ (bitsperkey, g) - | bitsperkey <- [2,2.3..20] - , g <- mkStdGen <$> [1..3] - ] - -- We use fewer points for classic, as it's slower and there's less need. - - xs_blocked = - [ (bitsperkey, g) - | bitsperkey <- [2,2.2..24] - , g <- mkStdGen <$> [1..9] - ] - - ys_classic_calc, ys_classic_actual, - ys_blocked_calc, ys_blocked_actual :: [Double] - - ys_classic_calc = ys_calc classicBloomImpl xs_classic - ys_blocked_calc = ys_calc blockedBloomImpl xs_blocked - - ys_classic_actual = ys_actual classicBloomImpl xs_classic - ys_blocked_actual = ys_actual blockedBloomImpl xs_blocked - - ys_calc :: BloomImpl b p s -> [(Double, StdGen)] -> [Double] - ys_calc BloomImpl{..} xs = - [ fpr - | (bitsperkey, _) <- xs - , let policy = policyForBits bitsperkey - fpr = policyFPR policy - ] - - ys_actual :: BloomImpl b p s -> [(Double, StdGen)] -> [Double] - ys_actual impl@BloomImpl{..} xs = - withStrategy (parList rseq) -- eval in parallel - [ fpr - | (bitsperkey, g) <- xs - , let policy = policyForBits bitsperkey - fpr_est = policyFPR policy - nentries = round (1000 * recip fpr_est) - fpr = actualFalsePositiveRate impl policy nentries g - ] -{- - -- fpr values in the range 1e-1 .. 1e-6 - ys = [ exp (-log_fpr) - | log_fpr <- [2.3,2.4 .. 13.8] ] - - xs_classic_calc = xs_calc classicBloomImpl - xs_blocked_calc = xs_calc blockedBloomImpl - - xs_calc BloomImpl{..} = - [ bits - | fpr <- ys - , let policy = policyForFPR fpr - bits = policyBits policy - ] --} - -actualFalsePositiveRate :: BloomImpl bloom policy size - -> policy -> Int -> StdGen -> Double -actualFalsePositiveRate bloomimpl policy n g0 = - fromIntegral (countFalsePositives bloomimpl policy n g0) - / fromIntegral n - -countFalsePositives :: forall bloom policy size. BloomImpl bloom policy size - -> policy -> Int -> StdGen -> Int -countFalsePositives BloomImpl{..} policy n g0 = - let (!g01, !g02) = splitGen g0 - - -- create a random salt - (!salt, !g03) = uniform g02 - - -- create a bloom filter from n elements from g0 - size = sizeForPolicy policy n - - xs_b :: bloom Int - !xs_b = unfold size salt nextElement (g01, 0) - - -- and a set, so we can make sure we don't count true positives - xs_s :: IntSet - !xs_s = IntSet.fromList (unfoldr nextElement (g01, 0)) - - -- now for a different random sequence (that will mostly not overlap) - -- count the number of false positives - in length - [ () - | y <- unfoldr nextElement (g03, 0) - , y `elem` xs_b -- Bloom filter reports positive - , not (y `IntSet.member` xs_s) -- but it is not a true positive - ] - where - nextElement :: (StdGen, Int) -> Maybe (Int, (StdGen, Int)) - nextElement (!g, !i) - | i >= n = Nothing - | otherwise = Just (x, (g', i+1)) - where - (!x, !g') = uniform g - -data BloomImpl bloom policy size = BloomImpl { - policyForBits :: B.BitsPerEntry -> policy, - policyForFPR :: B.FPR -> policy, - policyBits :: policy -> B.BitsPerEntry, - policyFPR :: policy -> B.FPR, - sizeForPolicy :: policy -> Int -> size, - unfold :: forall a b. B.Hashable a - => size -> B.Salt -> (b -> Maybe (a, b)) -> b -> bloom a, - elem :: forall a. B.Hashable a => a -> bloom a -> Bool - } - -classicBloomImpl :: BloomImpl B.Classic.Bloom B.Classic.BloomPolicy B.Classic.BloomSize -classicBloomImpl = - BloomImpl { - policyForBits = B.Classic.policyForBits, - policyForFPR = B.Classic.policyForFPR, - policyBits = B.Classic.policyBits, - policyFPR = B.Classic.policyFPR, - sizeForPolicy = B.Classic.sizeForPolicy, - unfold = B.Classic.unfold, - elem = B.Classic.elem - } - -blockedBloomImpl :: BloomImpl B.Blocked.Bloom B.Blocked.BloomPolicy B.Blocked.BloomSize -blockedBloomImpl = - BloomImpl { - policyForBits = B.Blocked.policyForBits, - policyForFPR = B.Blocked.policyForFPR, - policyBits = B.Blocked.policyBits, - policyFPR = B.Blocked.policyFPR, - sizeForPolicy = B.Blocked.sizeForPolicy, - unfold = B.Blocked.unfold, - elem = B.Blocked.elem - } diff --git a/bloomfilter-blocked/xxhash/include/HsXXHash.h b/bloomfilter-blocked/xxhash/include/HsXXHash.h deleted file mode 100644 index 6b3bfae34..000000000 --- a/bloomfilter-blocked/xxhash/include/HsXXHash.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef HS_XXHASH -#define HS_XXHASH - -#include - -#define XXH_INLINE_ALL -#include "xxhash.h" - -#define hs_XXH3_sizeof_state_s sizeof(struct XXH3_state_s) - -static inline uint64_t hs_XXH3_64bits_withSeed_offset(const uint8_t *ptr, size_t off, size_t len, uint64_t seed) { - return XXH3_64bits_withSeed(ptr + off, len, seed); -} - -static inline uint64_t hs_XXH3_64bits_withSeed_u64(uint64_t val, uint64_t seed) { - return XXH3_64bits_withSeed(&val, sizeof(val), seed); -} - -static inline uint64_t hs_XXH3_64bits_withSeed_u32(uint32_t val, uint64_t seed) { - return XXH3_64bits_withSeed(&val, sizeof(val), seed); -} - -static inline void hs_XXH3_64bits_update_offset(XXH3_state_t *statePtr, const uint8_t *ptr, size_t off, size_t len) { - XXH3_64bits_update(statePtr, ptr + off, len); -} - -static inline void hs_XXH3_64bits_update_u64(XXH3_state_t *statePtr, uint64_t val) { - XXH3_64bits_update(statePtr, &val, sizeof(val)); -} - -static inline void hs_XXH3_64bits_update_u32(XXH3_state_t *statePtr, uint32_t val) { - XXH3_64bits_update(statePtr, &val, sizeof(val)); -} - -#endif /* HS_XXHASH */ diff --git a/bloomfilter-blocked/xxhash/src/FFI.hs b/bloomfilter-blocked/xxhash/src/FFI.hs deleted file mode 100644 index 3ddcec135..000000000 --- a/bloomfilter-blocked/xxhash/src/FFI.hs +++ /dev/null @@ -1,75 +0,0 @@ -{-# LANGUAGE CApiFFI #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnliftedFFITypes #-} -module FFI ( - -- * One shot - unsafe_xxh3_64bit_withSeed_ptr, - unsafe_xxh3_64bit_withSeed_ba, - unsafe_xxh3_64bit_withSeed_u64, - unsafe_xxh3_64bit_withSeed_u32, - -- * Incremental - unsafe_xxh3_sizeof_state, - unsafe_xxh3_initState, - unsafe_xxh3_64bit_reset_withSeed, - unsafe_xxh3_64bit_digest, - unsafe_xxh3_64bit_update_ptr, - unsafe_xxh3_64bit_update_ba, - unsafe_xxh3_64bit_update_u64, - unsafe_xxh3_64bit_update_u32, -) where - -import Data.Word (Word32, Word64, Word8) -import Foreign.C.Types (CSize (..)) -import Foreign.Ptr (Ptr) -import GHC.Exts (ByteArray#, MutableByteArray#) - --- Note: we use unsafe FFI calls, as we expect our use case to be hashing only small data (<1kb, at most 4k). - -------------------------------------------------------------------------------- --- OneShot -------------------------------------------------------------------------------- - -foreign import capi unsafe "HsXXHash.h XXH3_64bits_withSeed" - unsafe_xxh3_64bit_withSeed_ptr :: Ptr Word8 -> CSize -> Word64 -> IO Word64 - -foreign import capi unsafe "HsXXHash.h hs_XXH3_64bits_withSeed_offset" - unsafe_xxh3_64bit_withSeed_ba :: ByteArray# -> CSize -> CSize -> Word64 -> Word64 - -foreign import capi unsafe "HsXXHash.h hs_XXH3_64bits_withSeed_u64" - unsafe_xxh3_64bit_withSeed_u64 :: Word64 -> Word64 -> Word64 - -foreign import capi unsafe "HsXXHash.h hs_XXH3_64bits_withSeed_u32" - unsafe_xxh3_64bit_withSeed_u32 :: Word32 -> Word64 -> Word64 - -------------------------------------------------------------------------------- --- Incremental -------------------------------------------------------------------------------- - --- reset and update functions return OK/Error --- we ignore that: --- * reset errors only on NULL state --- * update cannot even error - -foreign import capi unsafe "HsXXHash.h value hs_XXH3_sizeof_state_s" - unsafe_xxh3_sizeof_state :: Int - -foreign import capi unsafe "HsXXHash.h XXH3_INITSTATE" - unsafe_xxh3_initState :: MutableByteArray# s -> IO () - -foreign import capi unsafe "HsXXHash.h XXH3_64bits_reset_withSeed" - unsafe_xxh3_64bit_reset_withSeed :: MutableByteArray# s -> Word64 -> IO () - -foreign import capi unsafe "HsXXHash.h XXH3_64bits_digest" - unsafe_xxh3_64bit_digest :: MutableByteArray# s -> IO Word64 - -foreign import capi unsafe "HsXXHash.h XXH3_64bits_update" - unsafe_xxh3_64bit_update_ptr :: MutableByteArray# s -> Ptr Word8 -> CSize -> IO () - -foreign import capi unsafe "HsXXHash.h hs_XXH3_64bits_update_offset" - unsafe_xxh3_64bit_update_ba :: MutableByteArray# s -> ByteArray# -> CSize -> CSize -> IO () - -foreign import capi unsafe "HsXXHash.h hs_XXH3_64bits_update_u64" - unsafe_xxh3_64bit_update_u64 :: MutableByteArray# s -> Word64 -> IO () - -foreign import capi unsafe "HsXXHash.h hs_XXH3_64bits_update_u32" - unsafe_xxh3_64bit_update_u32 :: MutableByteArray# s -> Word32 -> IO () diff --git a/bloomfilter-blocked/xxhash/src/XXH3.hs b/bloomfilter-blocked/xxhash/src/XXH3.hs deleted file mode 100644 index 8a110d1fc..000000000 --- a/bloomfilter-blocked/xxhash/src/XXH3.hs +++ /dev/null @@ -1,110 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} - -module XXH3 ( - -- * One shot - xxh3_64bit_withSeed_bs, - xxh3_64bit_withSeed_ba, - xxh3_64bit_withSeed_w64, - xxh3_64bit_withSeed_w32, - -- * Incremental - XXH3_State, - xxh3_64bit_createState, - xxh3_64bit_reset_withSeed, - xxh3_64bit_digest, - xxh3_64bit_update_bs, - xxh3_64bit_update_ba, - xxh3_64bit_update_w64, - xxh3_64bit_update_w32, -) where - -import Control.Monad.ST (ST) -import Control.Monad.ST.Unsafe (unsafeIOToST) -import Data.ByteString.Internal (ByteString (..), - accursedUnutterablePerformIO) -import qualified Data.Primitive as P -import Data.Primitive.ByteArray (ByteArray (..)) -import Data.Word (Word32, Word64) -import Foreign.ForeignPtr -import GHC.Exts (MutableByteArray#) -import GHC.ForeignPtr - -import FFI - -{-# INLINE withFP #-} -withFP :: ForeignPtr a -> (P.Ptr a -> IO b) -> IO b -withFP = unsafeWithForeignPtr - -------------------------------------------------------------------------------- --- OneShot -------------------------------------------------------------------------------- - --- | Hash 'ByteString'. -xxh3_64bit_withSeed_bs :: ByteString -> Word64 -> Word64 -xxh3_64bit_withSeed_bs (BS fptr len) !salt = accursedUnutterablePerformIO $ - withFP fptr $ \ptr -> - unsafe_xxh3_64bit_withSeed_ptr ptr (fromIntegral len) salt - --- | Hash (part of) 'ByteArray'. -xxh3_64bit_withSeed_ba :: ByteArray -> Int -> Int -> Word64 -> Word64 -xxh3_64bit_withSeed_ba (ByteArray ba) !off !len !salt = - unsafe_xxh3_64bit_withSeed_ba ba (fromIntegral off) (fromIntegral len) salt - --- | Hash 'Word64'. -xxh3_64bit_withSeed_w64 :: Word64 -> Word64 -> Word64 -xxh3_64bit_withSeed_w64 !x !salt = - unsafe_xxh3_64bit_withSeed_u64 x salt - --- | Hash 'Word32'. -xxh3_64bit_withSeed_w32 :: Word32 -> Word64 -> Word64 -xxh3_64bit_withSeed_w32 !x !salt = - unsafe_xxh3_64bit_withSeed_u32 x salt - -------------------------------------------------------------------------------- --- Incremental -------------------------------------------------------------------------------- - --- | Mutable XXH3 state. -data XXH3_State s = XXH3 (MutableByteArray# s) - --- | Create 'XXH3_State'. -xxh3_64bit_createState :: forall s. ST s (XXH3_State s) -xxh3_64bit_createState = do - -- aligned alloc, otherwise we get segfaults. - -- see XXH3_createState implementation - P.MutableByteArray ba <- P.newAlignedPinnedByteArray unsafe_xxh3_sizeof_state 64 - unsafeIOToST (unsafe_xxh3_initState ba) - pure (XXH3 ba) - --- | Reset 'XXH3_State' with a seed. -xxh3_64bit_reset_withSeed :: XXH3_State s -> Word64 -> ST s () -xxh3_64bit_reset_withSeed (XXH3 s) seed = do - unsafeIOToST (unsafe_xxh3_64bit_reset_withSeed s seed) - --- | Return a hash value from a 'XXH3_State'. --- --- Doesn't mutate given state, so you can update, digest and update again. -xxh3_64bit_digest :: XXH3_State s -> ST s Word64 -xxh3_64bit_digest (XXH3 s) = - unsafeIOToST (unsafe_xxh3_64bit_digest s) - --- | Update 'XXH3_State' with 'ByteString'. -xxh3_64bit_update_bs :: XXH3_State s -> ByteString -> ST s () -xxh3_64bit_update_bs (XXH3 s) (BS fptr len) = unsafeIOToST $ - withFP fptr $ \ptr -> - unsafe_xxh3_64bit_update_ptr s ptr (fromIntegral len) - --- | Update 'XXH3_State' with (part of) 'ByteArray' -xxh3_64bit_update_ba :: XXH3_State s -> ByteArray -> Int -> Int -> ST s () -xxh3_64bit_update_ba (XXH3 s) (ByteArray ba) !off !len = unsafeIOToST $ - unsafe_xxh3_64bit_update_ba s ba (fromIntegral off) (fromIntegral len) - --- | Update 'XXH3_State' with 'Word64'. -xxh3_64bit_update_w64 :: XXH3_State s -> Word64 -> ST s () -xxh3_64bit_update_w64 (XXH3 s) w64 = unsafeIOToST $ - unsafe_xxh3_64bit_update_u64 s w64 - --- | Update 'XXH3_State' with 'Word32'. -xxh3_64bit_update_w32 :: XXH3_State s -> Word32 -> ST s () -xxh3_64bit_update_w32 (XXH3 s) w32 = unsafeIOToST $ - unsafe_xxh3_64bit_update_u32 s w32 diff --git a/bloomfilter-blocked/xxhash/tests/xxhash-tests.hs b/bloomfilter-blocked/xxhash/tests/xxhash-tests.hs deleted file mode 100644 index 28c2c1cdd..000000000 --- a/bloomfilter-blocked/xxhash/tests/xxhash-tests.hs +++ /dev/null @@ -1,63 +0,0 @@ -{-# LANGUAGE NumericUnderscores #-} -module Main (main) where - -import Control.Monad.ST (runST) -import qualified Data.ByteString as BS -import qualified Data.Primitive as P -import Data.Word (Word32, Word64) -import Test.Tasty (defaultMain, testGroup) -import Test.Tasty.HUnit (testCase, (@=?)) -import Test.Tasty.QuickCheck (testProperty, (===)) - -import XXH3 - -main :: IO () -main = defaultMain $ testGroup "xxhash" - [ testGroup "oneshot" - [ testProperty "w64-ref" $ \w salt -> - xxh3_64bit_withSeed_w64 w salt === xxh3_64bit_withSeed_w64_ref w salt - , testCase "w64-examples" $ do - xxh3_64bit_withSeed_w64 0 0 @=? 0xc77b_3abb_6f87_acd9 - xxh3_64bit_withSeed_w64 0x12 1 @=? 0xbba4_8522_c425_46b2 - xxh3_64bit_withSeed_w64 0x2100_0000_0000_0000 0 @=? 0xb7cb_e42a_e127_8055 - xxh3_64bit_withSeed_w64 0x1eb6e9 0 @=? 0x8e_adc3_1b56 - - , testProperty "w32-ref" $ \w salt -> - xxh3_64bit_withSeed_w32 w salt === xxh3_64bit_withSeed_w32_ref w salt - - , testCase "w32-examples" $ do - xxh3_64bit_withSeed_w32 0 0 @=? 0x48b2_c926_16fc_193d - xxh3_64bit_withSeed_w32 0x12 1 @=? 0x2870_1df3_2a21_6ad3 - - ] - - , testGroup "incremental" - [ testProperty "empty" $ \seed -> do - let expected = xxh3_64bit_withSeed_bs BS.empty seed - let actual = runST $ do - s <- xxh3_64bit_createState - xxh3_64bit_reset_withSeed s seed - xxh3_64bit_digest s - - actual === expected - - , testProperty "bs" $ \w8s seed -> do - let bs = BS.pack w8s - let expected = xxh3_64bit_withSeed_bs bs seed - let actual = runST $ do - s <- xxh3_64bit_createState - xxh3_64bit_reset_withSeed s seed - xxh3_64bit_update_bs s bs - xxh3_64bit_digest s - - actual === expected - ] - ] - -xxh3_64bit_withSeed_w64_ref :: Word64 -> Word64 -> Word64 -xxh3_64bit_withSeed_w64_ref w salt = case P.primArrayFromList [w] of - P.PrimArray ba -> xxh3_64bit_withSeed_ba (P.ByteArray ba) 0 8 salt - -xxh3_64bit_withSeed_w32_ref :: Word32 -> Word64 -> Word64 -xxh3_64bit_withSeed_w32_ref w salt = case P.primArrayFromList [w] of - P.PrimArray ba -> xxh3_64bit_withSeed_ba (P.ByteArray ba) 0 4 salt diff --git a/bloomfilter-blocked/xxhash/xxHash-0.8.2/LICENSE-xxHash b/bloomfilter-blocked/xxhash/xxHash-0.8.2/LICENSE-xxHash deleted file mode 100644 index e4c5da723..000000000 --- a/bloomfilter-blocked/xxhash/xxHash-0.8.2/LICENSE-xxHash +++ /dev/null @@ -1,26 +0,0 @@ -xxHash Library -Copyright (c) 2012-2021 Yann Collet -All rights reserved. - -BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bloomfilter-blocked/xxhash/xxHash-0.8.2/xxhash.h b/bloomfilter-blocked/xxhash/xxHash-0.8.2/xxhash.h deleted file mode 100644 index a18e8c762..000000000 --- a/bloomfilter-blocked/xxhash/xxHash-0.8.2/xxhash.h +++ /dev/null @@ -1,6773 +0,0 @@ -/* - * xxHash - Extremely Fast Hash algorithm - * Header File - * Copyright (C) 2012-2021 Yann Collet - * - * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You can contact the author at: - * - xxHash homepage: https://www.xxhash.com - * - xxHash source repository: https://github.com/Cyan4973/xxHash - */ - -/*! - * @mainpage xxHash - * - * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed - * limits. - * - * It is proposed in four flavors, in three families: - * 1. @ref XXH32_family - * - Classic 32-bit hash function. Simple, compact, and runs on almost all - * 32-bit and 64-bit systems. - * 2. @ref XXH64_family - * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most - * 64-bit systems (but _not_ 32-bit systems). - * 3. @ref XXH3_family - * - Modern 64-bit and 128-bit hash function family which features improved - * strength and performance across the board, especially on smaller data. - * It benefits greatly from SIMD and 64-bit without requiring it. - * - * Benchmarks - * --- - * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04. - * The open source benchmark program is compiled with clang v10.0 using -O3 flag. - * - * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity | - * | -------------------- | ------- | ----: | ---------------: | ------------------: | - * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 | - * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 | - * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 | - * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 | - * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 | - * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 | - * | RAM sequential read | | N/A | 28.0 GB/s | N/A | - * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 | - * | City64 | | 64 | 22.0 GB/s | 76.6 | - * | T1ha2 | | 64 | 22.0 GB/s | 99.0 | - * | City128 | | 128 | 21.7 GB/s | 57.7 | - * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 | - * | XXH64() | | 64 | 19.4 GB/s | 71.0 | - * | SpookyHash | | 64 | 19.3 GB/s | 53.2 | - * | Mum | | 64 | 18.0 GB/s | 67.0 | - * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 | - * | XXH32() | | 32 | 9.7 GB/s | 71.9 | - * | City32 | | 32 | 9.1 GB/s | 66.0 | - * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 | - * | Murmur3 | | 32 | 3.9 GB/s | 56.1 | - * | SipHash* | | 64 | 3.0 GB/s | 43.2 | - * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 | - * | HighwayHash | | 64 | 1.4 GB/s | 6.0 | - * | FNV64 | | 64 | 1.2 GB/s | 62.7 | - * | Blake2* | | 256 | 1.1 GB/s | 5.1 | - * | SHA1* | | 160 | 0.8 GB/s | 5.6 | - * | MD5* | | 128 | 0.6 GB/s | 7.8 | - * @note - * - Hashes which require a specific ISA extension are noted. SSE2 is also noted, - * even though it is mandatory on x64. - * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic - * by modern standards. - * - Small data velocity is a rough average of algorithm's efficiency for small - * data. For more accurate information, see the wiki. - * - More benchmarks and strength tests are found on the wiki: - * https://github.com/Cyan4973/xxHash/wiki - * - * Usage - * ------ - * All xxHash variants use a similar API. Changing the algorithm is a trivial - * substitution. - * - * @pre - * For functions which take an input and length parameter, the following - * requirements are assumed: - * - The range from [`input`, `input + length`) is valid, readable memory. - * - The only exception is if the `length` is `0`, `input` may be `NULL`. - * - For C++, the objects must have the *TriviallyCopyable* property, as the - * functions access bytes directly as if it was an array of `unsigned char`. - * - * @anchor single_shot_example - * **Single Shot** - * - * These functions are stateless functions which hash a contiguous block of memory, - * immediately returning the result. They are the easiest and usually the fastest - * option. - * - * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits() - * - * @code{.c} - * #include - * #include "xxhash.h" - * - * // Example for a function which hashes a null terminated string with XXH32(). - * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed) - * { - * // NULL pointers are only valid if the length is zero - * size_t length = (string == NULL) ? 0 : strlen(string); - * return XXH32(string, length, seed); - * } - * @endcode - * - * @anchor streaming_example - * **Streaming** - * - * These groups of functions allow incremental hashing of unknown size, even - * more than what would fit in a size_t. - * - * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset() - * - * @code{.c} - * #include - * #include - * #include "xxhash.h" - * // Example for a function which hashes a FILE incrementally with XXH3_64bits(). - * XXH64_hash_t hashFile(FILE* f) - * { - * // Allocate a state struct. Do not just use malloc() or new. - * XXH3_state_t* state = XXH3_createState(); - * assert(state != NULL && "Out of memory!"); - * // Reset the state to start a new hashing session. - * XXH3_64bits_reset(state); - * char buffer[4096]; - * size_t count; - * // Read the file in chunks - * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) { - * // Run update() as many times as necessary to process the data - * XXH3_64bits_update(state, buffer, count); - * } - * // Retrieve the finalized hash. This will not change the state. - * XXH64_hash_t result = XXH3_64bits_digest(state); - * // Free the state. Do not use free(). - * XXH3_freeState(state); - * return result; - * } - * @endcode - * - * @file xxhash.h - * xxHash prototypes and implementation - */ - -#if defined (__cplusplus) -extern "C" { -#endif - -/* **************************** - * INLINE mode - ******************************/ -/*! - * @defgroup public Public API - * Contains details on the public xxHash functions. - * @{ - */ -#ifdef XXH_DOXYGEN -/*! - * @brief Gives access to internal state declaration, required for static allocation. - * - * Incompatible with dynamic linking, due to risks of ABI changes. - * - * Usage: - * @code{.c} - * #define XXH_STATIC_LINKING_ONLY - * #include "xxhash.h" - * @endcode - */ -# define XXH_STATIC_LINKING_ONLY -/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */ - -/*! - * @brief Gives access to internal definitions. - * - * Usage: - * @code{.c} - * #define XXH_STATIC_LINKING_ONLY - * #define XXH_IMPLEMENTATION - * #include "xxhash.h" - * @endcode - */ -# define XXH_IMPLEMENTATION -/* Do not undef XXH_IMPLEMENTATION for Doxygen */ - -/*! - * @brief Exposes the implementation and marks all functions as `inline`. - * - * Use these build macros to inline xxhash into the target unit. - * Inlining improves performance on small inputs, especially when the length is - * expressed as a compile-time constant: - * - * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html - * - * It also keeps xxHash symbols private to the unit, so they are not exported. - * - * Usage: - * @code{.c} - * #define XXH_INLINE_ALL - * #include "xxhash.h" - * @endcode - * Do not compile and link xxhash.o as a separate object, as it is not useful. - */ -# define XXH_INLINE_ALL -# undef XXH_INLINE_ALL -/*! - * @brief Exposes the implementation without marking functions as inline. - */ -# define XXH_PRIVATE_API -# undef XXH_PRIVATE_API -/*! - * @brief Emulate a namespace by transparently prefixing all symbols. - * - * If you want to include _and expose_ xxHash functions from within your own - * library, but also want to avoid symbol collisions with other libraries which - * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix - * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE - * (therefore, avoid empty or numeric values). - * - * Note that no change is required within the calling program as long as it - * includes `xxhash.h`: Regular symbol names will be automatically translated - * by this header. - */ -# define XXH_NAMESPACE /* YOUR NAME HERE */ -# undef XXH_NAMESPACE -#endif - -#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ - && !defined(XXH_INLINE_ALL_31684351384) - /* this section should be traversed only once */ -# define XXH_INLINE_ALL_31684351384 - /* give access to the advanced API, required to compile implementations */ -# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ -# define XXH_STATIC_LINKING_ONLY - /* make all functions private */ -# undef XXH_PUBLIC_API -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((unused)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else - /* note: this version may generate warnings for unused static functions */ -# define XXH_PUBLIC_API static -# endif - - /* - * This part deals with the special case where a unit wants to inline xxHash, - * but "xxhash.h" has previously been included without XXH_INLINE_ALL, - * such as part of some previously included *.h header file. - * Without further action, the new include would just be ignored, - * and functions would effectively _not_ be inlined (silent failure). - * The following macros solve this situation by prefixing all inlined names, - * avoiding naming collision with previous inclusions. - */ - /* Before that, we unconditionally #undef all symbols, - * in case they were already defined with XXH_NAMESPACE. - * They will then be redefined for XXH_INLINE_ALL - */ -# undef XXH_versionNumber - /* XXH32 */ -# undef XXH32 -# undef XXH32_createState -# undef XXH32_freeState -# undef XXH32_reset -# undef XXH32_update -# undef XXH32_digest -# undef XXH32_copyState -# undef XXH32_canonicalFromHash -# undef XXH32_hashFromCanonical - /* XXH64 */ -# undef XXH64 -# undef XXH64_createState -# undef XXH64_freeState -# undef XXH64_reset -# undef XXH64_update -# undef XXH64_digest -# undef XXH64_copyState -# undef XXH64_canonicalFromHash -# undef XXH64_hashFromCanonical - /* XXH3_64bits */ -# undef XXH3_64bits -# undef XXH3_64bits_withSecret -# undef XXH3_64bits_withSeed -# undef XXH3_64bits_withSecretandSeed -# undef XXH3_createState -# undef XXH3_freeState -# undef XXH3_copyState -# undef XXH3_64bits_reset -# undef XXH3_64bits_reset_withSeed -# undef XXH3_64bits_reset_withSecret -# undef XXH3_64bits_update -# undef XXH3_64bits_digest -# undef XXH3_generateSecret - /* XXH3_128bits */ -# undef XXH128 -# undef XXH3_128bits -# undef XXH3_128bits_withSeed -# undef XXH3_128bits_withSecret -# undef XXH3_128bits_reset -# undef XXH3_128bits_reset_withSeed -# undef XXH3_128bits_reset_withSecret -# undef XXH3_128bits_reset_withSecretandSeed -# undef XXH3_128bits_update -# undef XXH3_128bits_digest -# undef XXH128_isEqual -# undef XXH128_cmp -# undef XXH128_canonicalFromHash -# undef XXH128_hashFromCanonical - /* Finally, free the namespace itself */ -# undef XXH_NAMESPACE - - /* employ the namespace for XXH_INLINE_ALL */ -# define XXH_NAMESPACE XXH_INLINE_ - /* - * Some identifiers (enums, type names) are not symbols, - * but they must nonetheless be renamed to avoid redeclaration. - * Alternative solution: do not redeclare them. - * However, this requires some #ifdefs, and has a more dispersed impact. - * Meanwhile, renaming can be achieved in a single place. - */ -# define XXH_IPREF(Id) XXH_NAMESPACE ## Id -# define XXH_OK XXH_IPREF(XXH_OK) -# define XXH_ERROR XXH_IPREF(XXH_ERROR) -# define XXH_errorcode XXH_IPREF(XXH_errorcode) -# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) -# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) -# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) -# define XXH32_state_s XXH_IPREF(XXH32_state_s) -# define XXH32_state_t XXH_IPREF(XXH32_state_t) -# define XXH64_state_s XXH_IPREF(XXH64_state_s) -# define XXH64_state_t XXH_IPREF(XXH64_state_t) -# define XXH3_state_s XXH_IPREF(XXH3_state_s) -# define XXH3_state_t XXH_IPREF(XXH3_state_t) -# define XXH128_hash_t XXH_IPREF(XXH128_hash_t) - /* Ensure the header is parsed again, even if it was previously included */ -# undef XXHASH_H_5627135585666179 -# undef XXHASH_H_STATIC_13879238742 -#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ - -/* **************************************************************** - * Stable API - *****************************************************************/ -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - -/*! @brief Marks a global symbol. */ -#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) -# ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) -# elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) -# endif -# else -# define XXH_PUBLIC_API /* do nothing */ -# endif -#endif - -#ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) -/* XXH32 */ -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) -/* XXH64 */ -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) -/* XXH3_64bits */ -# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) -# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) -# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) -# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed) -# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) -# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) -# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) -# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) -# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) -# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) -# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed) -# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) -# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) -# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) -# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed) -/* XXH3_128bits */ -# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) -# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) -# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) -# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) -# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed) -# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) -# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) -# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) -# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed) -# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) -# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) -# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) -# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) -# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) -# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) -#endif - - -/* ************************************* -* Compiler specifics -***************************************/ - -/* specific declaration modes for Windows */ -#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) -# ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) -# elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) -# endif -# else -# define XXH_PUBLIC_API /* do nothing */ -# endif -#endif - -#if defined (__GNUC__) -# define XXH_CONSTF __attribute__((const)) -# define XXH_PUREF __attribute__((pure)) -# define XXH_MALLOCF __attribute__((malloc)) -#else -# define XXH_CONSTF /* disable */ -# define XXH_PUREF -# define XXH_MALLOCF -#endif - -/* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 8 -#define XXH_VERSION_RELEASE 2 -/*! @brief Version number, encoded as two digits each */ -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) - -/*! - * @brief Obtains the xxHash version. - * - * This is mostly useful when xxHash is compiled as a shared library, - * since the returned value comes from the library, as opposed to header file. - * - * @return @ref XXH_VERSION_NUMBER of the invoked library. - */ -XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void); - - -/* **************************** -* Common basic types -******************************/ -#include /* size_t */ -/*! - * @brief Exit code for the streaming API. - */ -typedef enum { - XXH_OK = 0, /*!< OK */ - XXH_ERROR /*!< Error */ -} XXH_errorcode; - - -/*-********************************************************************** -* 32-bit hash -************************************************************************/ -#if defined(XXH_DOXYGEN) /* Don't show include */ -/*! - * @brief An unsigned 32-bit integer. - * - * Not necessarily defined to `uint32_t` but functionally equivalent. - */ -typedef uint32_t XXH32_hash_t; - -#elif !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint32_t XXH32_hash_t; - -#else -# include -# if UINT_MAX == 0xFFFFFFFFUL - typedef unsigned int XXH32_hash_t; -# elif ULONG_MAX == 0xFFFFFFFFUL - typedef unsigned long XXH32_hash_t; -# else -# error "unsupported platform: need a 32-bit type" -# endif -#endif - -/*! - * @} - * - * @defgroup XXH32_family XXH32 family - * @ingroup public - * Contains functions used in the classic 32-bit xxHash algorithm. - * - * @note - * XXH32 is useful for older platforms, with no or poor 64-bit performance. - * Note that the @ref XXH3_family provides competitive speed for both 32-bit - * and 64-bit systems, and offers true 64/128 bit hash results. - * - * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families - * @see @ref XXH32_impl for implementation details - * @{ - */ - -/*! - * @brief Calculates the 32-bit hash of @p input using xxHash32. - * - * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s - * - * See @ref single_shot_example "Single Shot Example" for an example. - * - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * @param seed The 32-bit seed to alter the hash's output predictably. - * - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return The calculated 32-bit hash value. - * - * @see - * XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128(): - * Direct equivalents for the other variants of xxHash. - * @see - * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); - -#ifndef XXH_NO_STREAM -/*! - * Streaming functions generate the xxHash value from an incremental input. - * This method is slower than single-call functions, due to state management. - * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. - * - * An XXH state must first be allocated using `XXH*_createState()`. - * - * Start a new hash by initializing the state with a seed using `XXH*_reset()`. - * - * Then, feed the hash state by calling `XXH*_update()` as many times as necessary. - * - * The function returns an error code, with 0 meaning OK, and any other value - * meaning there is an error. - * - * Finally, a hash value can be produced anytime, by using `XXH*_digest()`. - * This function returns the nn-bits hash as an int or long long. - * - * It's still possible to continue inserting input into the hash state after a - * digest, and generate new hash values later on by invoking `XXH*_digest()`. - * - * When done, release the state using `XXH*_freeState()`. - * - * @see streaming_example at the top of @ref xxhash.h for an example. - */ - -/*! - * @typedef struct XXH32_state_s XXH32_state_t - * @brief The opaque state struct for the XXH32 streaming API. - * - * @see XXH32_state_s for details. - */ -typedef struct XXH32_state_s XXH32_state_t; - -/*! - * @brief Allocates an @ref XXH32_state_t. - * - * Must be freed with XXH32_freeState(). - * @return An allocated XXH32_state_t on success, `NULL` on failure. - */ -XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void); -/*! - * @brief Frees an @ref XXH32_state_t. - * - * Must be allocated with XXH32_createState(). - * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState(). - * @return XXH_OK. - */ -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); -/*! - * @brief Copies one @ref XXH32_state_t to another. - * - * @param dst_state The state to copy to. - * @param src_state The state to copy from. - * @pre - * @p dst_state and @p src_state must not be `NULL` and must not overlap. - */ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); - -/*! - * @brief Resets an @ref XXH32_state_t to begin a new hash. - * - * This function resets and seeds a state. Call it before @ref XXH32_update(). - * - * @param statePtr The state struct to reset. - * @param seed The 32-bit seed to alter the hash result predictably. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); - -/*! - * @brief Consumes a block of @p input to an @ref XXH32_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); - -/*! - * @brief Returns the calculated hash value from an @ref XXH32_state_t. - * - * @note - * Calling XXH32_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated xxHash32 value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ - -/******* Canonical representation *******/ - -/* - * The default return values from XXH functions are unsigned 32 and 64 bit - * integers. - * This the simplest and fastest format for further post-processing. - * - * However, this leaves open the question of what is the order on the byte level, - * since little and big endian conventions will store the same number differently. - * - * The canonical representation settles this issue by mandating big-endian - * convention, the same convention as human-readable numbers (large digits first). - * - * When writing hash values to storage, sending them over a network, or printing - * them, it's highly recommended to use the canonical representation to ensure - * portability across a wider range of systems, present and future. - * - * The following functions allow transformation of hash values to and from - * canonical format. - */ - -/*! - * @brief Canonical (big endian) representation of @ref XXH32_hash_t. - */ -typedef struct { - unsigned char digest[4]; /*!< Hash bytes, big endian */ -} XXH32_canonical_t; - -/*! - * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t. - * - * @param dst The @ref XXH32_canonical_t pointer to be stored to. - * @param hash The @ref XXH32_hash_t to be converted. - * - * @pre - * @p dst must not be `NULL`. - */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); - -/*! - * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t. - * - * @param src The @ref XXH32_canonical_t to convert. - * - * @pre - * @p src must not be `NULL`. - * - * @return The converted hash. - */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); - - -/*! @cond Doxygen ignores this part */ -#ifdef __has_attribute -# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) -#else -# define XXH_HAS_ATTRIBUTE(x) 0 -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* - * C23 __STDC_VERSION__ number hasn't been specified yet. For now - * leave as `201711L` (C17 + 1). - * TODO: Update to correct value when its been specified. - */ -#define XXH_C23_VN 201711L -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* C-language Attributes are added in C23. */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute) -# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) -#else -# define XXH_HAS_C_ATTRIBUTE(x) 0 -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -#if defined(__cplusplus) && defined(__has_cpp_attribute) -# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) -#else -# define XXH_HAS_CPP_ATTRIBUTE(x) 0 -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* - * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute - * introduced in CPP17 and C23. - * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough - * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough - */ -#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough) -# define XXH_FALLTHROUGH [[fallthrough]] -#elif XXH_HAS_ATTRIBUTE(__fallthrough__) -# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__)) -#else -# define XXH_FALLTHROUGH /* fallthrough */ -#endif -/*! @endcond */ - -/*! @cond Doxygen ignores this part */ -/* - * Define XXH_NOESCAPE for annotated pointers in public API. - * https://clang.llvm.org/docs/AttributeReference.html#noescape - * As of writing this, only supported by clang. - */ -#if XXH_HAS_ATTRIBUTE(noescape) -# define XXH_NOESCAPE __attribute__((noescape)) -#else -# define XXH_NOESCAPE -#endif -/*! @endcond */ - - -/*! - * @} - * @ingroup public - * @{ - */ - -#ifndef XXH_NO_LONG_LONG -/*-********************************************************************** -* 64-bit hash -************************************************************************/ -#if defined(XXH_DOXYGEN) /* don't include */ -/*! - * @brief An unsigned 64-bit integer. - * - * Not necessarily defined to `uint64_t` but functionally equivalent. - */ -typedef uint64_t XXH64_hash_t; -#elif !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint64_t XXH64_hash_t; -#else -# include -# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL - /* LP64 ABI says uint64_t is unsigned long */ - typedef unsigned long XXH64_hash_t; -# else - /* the following type must have a width of 64-bit */ - typedef unsigned long long XXH64_hash_t; -# endif -#endif - -/*! - * @} - * - * @defgroup XXH64_family XXH64 family - * @ingroup public - * @{ - * Contains functions used in the classic 64-bit xxHash algorithm. - * - * @note - * XXH3 provides competitive speed for both 32-bit and 64-bit systems, - * and offers true 64/128 bit hash results. - * It provides better speed for systems with vector processing capabilities. - */ - -/*! - * @brief Calculates the 64-bit hash of @p input using xxHash64. - * - * This function usually runs faster on 64-bit systems, but slower on 32-bit - * systems (see benchmark). - * - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * @param seed The 64-bit seed to alter the hash's output predictably. - * - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return The calculated 64-bit hash. - * - * @see - * XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128(): - * Direct equivalents for the other variants of xxHash. - * @see - * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); - -/******* Streaming *******/ -#ifndef XXH_NO_STREAM -/*! - * @brief The opaque state struct for the XXH64 streaming API. - * - * @see XXH64_state_s for details. - */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ - -/*! - * @brief Allocates an @ref XXH64_state_t. - * - * Must be freed with XXH64_freeState(). - * @return An allocated XXH64_state_t on success, `NULL` on failure. - */ -XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void); - -/*! - * @brief Frees an @ref XXH64_state_t. - * - * Must be allocated with XXH64_createState(). - * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState(). - * @return XXH_OK. - */ -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - -/*! - * @brief Copies one @ref XXH64_state_t to another. - * - * @param dst_state The state to copy to. - * @param src_state The state to copy from. - * @pre - * @p dst_state and @p src_state must not be `NULL` and must not overlap. - */ -XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state); - -/*! - * @brief Resets an @ref XXH64_state_t to begin a new hash. - * - * This function resets and seeds a state. Call it before @ref XXH64_update(). - * - * @param statePtr The state struct to reset. - * @param seed The 64-bit seed to alter the hash result predictably. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed); - -/*! - * @brief Consumes a block of @p input to an @ref XXH64_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief Returns the calculated hash value from an @ref XXH64_state_t. - * - * @note - * Calling XXH64_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated xxHash64 value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ -/******* Canonical representation *******/ - -/*! - * @brief Canonical (big endian) representation of @ref XXH64_hash_t. - */ -typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; - -/*! - * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t. - * - * @param dst The @ref XXH64_canonical_t pointer to be stored to. - * @param hash The @ref XXH64_hash_t to be converted. - * - * @pre - * @p dst must not be `NULL`. - */ -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash); - -/*! - * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t. - * - * @param src The @ref XXH64_canonical_t to convert. - * - * @pre - * @p src must not be `NULL`. - * - * @return The converted hash. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src); - -#ifndef XXH_NO_XXH3 - -/*! - * @} - * ************************************************************************ - * @defgroup XXH3_family XXH3 family - * @ingroup public - * @{ - * - * XXH3 is a more recent hash algorithm featuring: - * - Improved speed for both small and large inputs - * - True 64-bit and 128-bit outputs - * - SIMD acceleration - * - Improved 32-bit viability - * - * Speed analysis methodology is explained here: - * - * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html - * - * Compared to XXH64, expect XXH3 to run approximately - * ~2x faster on large inputs and >3x faster on small ones, - * exact differences vary depending on platform. - * - * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic, - * but does not require it. - * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3 - * at competitive speeds, even without vector support. Further details are - * explained in the implementation. - * - * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD - * implementations for many common platforms: - * - AVX512 - * - AVX2 - * - SSE2 - * - ARM NEON - * - WebAssembly SIMD128 - * - POWER8 VSX - * - s390x ZVector - * This can be controlled via the @ref XXH_VECTOR macro, but it automatically - * selects the best version according to predefined macros. For the x86 family, an - * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c. - * - * XXH3 implementation is portable: - * it has a generic C90 formulation that can be compiled on any platform, - * all implementations generate exactly the same hash value on all platforms. - * Starting from v0.8.0, it's also labelled "stable", meaning that - * any future version will also generate the same hash value. - * - * XXH3 offers 2 variants, _64bits and _128bits. - * - * When only 64 bits are needed, prefer invoking the _64bits variant, as it - * reduces the amount of mixing, resulting in faster speed on small inputs. - * It's also generally simpler to manipulate a scalar return type than a struct. - * - * The API supports one-shot hashing, streaming mode, and custom secrets. - */ -/*-********************************************************************** -* XXH3 64-bit variant -************************************************************************/ - -/*! - * @brief 64-bit unseeded variant of XXH3. - * - * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however - * it may have slightly better performance due to constant propagation of the - * defaults. - * - * @see - * XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms - * @see - * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants - * @see - * XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief 64-bit seeded variant of XXH3 - * - * This variant generates a custom secret on the fly based on default secret - * altered using the `seed` value. - * - * While this operation is decently fast, note that it's not completely free. - * - * @note - * seed == 0 produces the same results as @ref XXH3_64bits(). - * - * @param input The data to hash - * @param length The length - * @param seed The 64-bit seed to alter the state. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); - -/*! - * The bare minimum size for a custom secret. - * - * @see - * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(), - * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret(). - */ -#define XXH3_SECRET_SIZE_MIN 136 - -/*! - * @brief 64-bit variant of XXH3 with a custom "secret". - * - * It's possible to provide any blob of bytes as a "secret" to generate the hash. - * This makes it more difficult for an external actor to prepare an intentional collision. - * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN). - * However, the quality of the secret impacts the dispersion of the hash algorithm. - * Therefore, the secret _must_ look like a bunch of random bytes. - * Avoid "trivial" or structured data such as repeated sequences or a text document. - * Whenever in doubt about the "randomness" of the blob of bytes, - * consider employing "XXH3_generateSecret()" instead (see below). - * It will generate a proper high entropy secret derived from the blob of bytes. - * Another advantage of using XXH3_generateSecret() is that - * it guarantees that all bits within the initial blob of bytes - * will impact every bit of the output. - * This is not necessarily the case when using the blob of bytes directly - * because, when hashing _small_ inputs, only a portion of the secret is employed. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); - - -/******* Streaming *******/ -#ifndef XXH_NO_STREAM -/* - * Streaming requires state maintenance. - * This operation costs memory and CPU. - * As a consequence, streaming is slower than one-shot hashing. - * For better performance, prefer one-shot functions whenever applicable. - */ - -/*! - * @brief The state struct for the XXH3 streaming API. - * - * @see XXH3_state_s for details. - */ -typedef struct XXH3_state_s XXH3_state_t; -XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); - -/*! - * @brief Copies one @ref XXH3_state_t to another. - * - * @param dst_state The state to copy to. - * @param src_state The state to copy from. - * @pre - * @p dst_state and @p src_state must not be `NULL` and must not overlap. - */ -XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state); - -/*! - * @brief Resets an @ref XXH3_state_t to begin a new hash. - * - * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_64bits_update(). - * Digest will be equivalent to `XXH3_64bits()`. - * - * @param statePtr The state struct to reset. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); - -/*! - * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. - * - * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_64bits_update(). - * Digest will be equivalent to `XXH3_64bits_withSeed()`. - * - * @param statePtr The state struct to reset. - * @param seed The 64-bit seed to alter the state. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); - -/*! - * XXH3_64bits_reset_withSecret(): - * `secret` is referenced, it _must outlive_ the hash streaming session. - * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`, - * and the quality of produced hash values depends on secret's entropy - * (secret's content should look like a bunch of random bytes). - * When in doubt about the randomness of a candidate `secret`, - * consider employing `XXH3_generateSecret()` instead (see below). - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); - -/*! - * @brief Consumes a block of @p input to an @ref XXH3_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t. - * - * @note - * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated XXH3 64-bit hash value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ - -/* note : canonical representation of XXH3 is the same as XXH64 - * since they both produce XXH64_hash_t values */ - - -/*-********************************************************************** -* XXH3 128-bit variant -************************************************************************/ - -/*! - * @brief The return value from 128-bit hashes. - * - * Stored in little endian order, although the fields themselves are in native - * endianness. - */ -typedef struct { - XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ - XXH64_hash_t high64; /*!< `value >> 64` */ -} XXH128_hash_t; - -/*! - * @brief Unseeded 128-bit variant of XXH3 - * - * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead - * for shorter inputs. - * - * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however - * it may have slightly better performance due to constant propagation of the - * defaults. - * - * @see - * XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms - * @see - * XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants - * @see - * XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version. - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len); -/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); -/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); - -/******* Streaming *******/ -#ifndef XXH_NO_STREAM -/* - * Streaming requires state maintenance. - * This operation costs memory and CPU. - * As a consequence, streaming is slower than one-shot hashing. - * For better performance, prefer one-shot functions whenever applicable. - * - * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits(). - * Use already declared XXH3_createState() and XXH3_freeState(). - * - * All reset and streaming functions have same meaning as their 64-bit counterpart. - */ - -/*! - * @brief Resets an @ref XXH3_state_t to begin a new hash. - * - * This function resets `statePtr` and generate a secret with default parameters. Call it before @ref XXH3_128bits_update(). - * Digest will be equivalent to `XXH3_128bits()`. - * - * @param statePtr The state struct to reset. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); - -/*! - * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. - * - * This function resets `statePtr` and generate a secret from `seed`. Call it before @ref XXH3_128bits_update(). - * Digest will be equivalent to `XXH3_128bits_withSeed()`. - * - * @param statePtr The state struct to reset. - * @param seed The 64-bit seed to alter the state. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - * - */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); -/*! @brief Custom secret 128-bit variant of XXH3. @see XXH_64bits_reset_withSecret(). */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); - -/*! - * @brief Consumes a block of @p input to an @ref XXH3_state_t. - * - * Call this to incrementally consume blocks of data. - * - * @param statePtr The state struct to update. - * @param input The block of data to be hashed, at least @p length bytes in size. - * @param length The length of @p input, in bytes. - * - * @pre - * @p statePtr must not be `NULL`. - * @pre - * The memory between @p input and @p input + @p length must be valid, - * readable, contiguous memory. However, if @p length is `0`, @p input may be - * `NULL`. In C++, this also must be *TriviallyCopyable*. - * - * @return @ref XXH_OK on success, @ref XXH_ERROR on failure. - */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); - -/*! - * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t. - * - * @note - * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update, - * digest, and update again. - * - * @param statePtr The state struct to calculate the hash from. - * - * @pre - * @p statePtr must not be `NULL`. - * - * @return The calculated XXH3 128-bit hash value from that state. - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); -#endif /* !XXH_NO_STREAM */ - -/* Following helper functions make it possible to compare XXH128_hast_t values. - * Since XXH128_hash_t is a structure, this capability is not offered by the language. - * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */ - -/*! - * XXH128_isEqual(): - * Return: 1 if `h1` and `h2` are equal, 0 if they are not. - */ -XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); - -/*! - * @brief Compares two @ref XXH128_hash_t - * This comparator is compatible with stdlib's `qsort()`/`bsearch()`. - * - * @return: >0 if *h128_1 > *h128_2 - * =0 if *h128_1 == *h128_2 - * <0 if *h128_1 < *h128_2 - */ -XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2); - - -/******* Canonical representation *******/ -typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; - - -/*! - * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t. - * - * @param dst The @ref XXH128_canonical_t pointer to be stored to. - * @param hash The @ref XXH128_hash_t to be converted. - * - * @pre - * @p dst must not be `NULL`. - */ -XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash); - -/*! - * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t. - * - * @param src The @ref XXH128_canonical_t to convert. - * - * @pre - * @p src must not be `NULL`. - * - * @return The converted hash. - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src); - - -#endif /* !XXH_NO_XXH3 */ -#endif /* XXH_NO_LONG_LONG */ - -/*! - * @} - */ -#endif /* XXHASH_H_5627135585666179 */ - - - -#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) -#define XXHASH_H_STATIC_13879238742 -/* **************************************************************************** - * This section contains declarations which are not guaranteed to remain stable. - * They may change in future versions, becoming incompatible with a different - * version of the library. - * These declarations should only be used with static linking. - * Never use them in association with dynamic linking! - ***************************************************************************** */ - -/* - * These definitions are only present to allow static allocation - * of XXH states, on stack or in a struct, for example. - * Never **ever** access their members directly. - */ - -/*! - * @internal - * @brief Structure for XXH32 streaming API. - * - * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, - * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is - * an opaque type. This allows fields to safely be changed. - * - * Typedef'd to @ref XXH32_state_t. - * Do not access the members of this struct directly. - * @see XXH64_state_s, XXH3_state_s - */ -struct XXH32_state_s { - XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ - XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ - XXH32_hash_t v[4]; /*!< Accumulator lanes */ - XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */ - XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */ - XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ -}; /* typedef'd to XXH32_state_t */ - - -#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ - -/*! - * @internal - * @brief Structure for XXH64 streaming API. - * - * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, - * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is - * an opaque type. This allows fields to safely be changed. - * - * Typedef'd to @ref XXH64_state_t. - * Do not access the members of this struct directly. - * @see XXH32_state_s, XXH3_state_s - */ -struct XXH64_state_s { - XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ - XXH64_hash_t v[4]; /*!< Accumulator lanes */ - XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */ - XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */ - XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ - XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */ -}; /* typedef'd to XXH64_state_t */ - -#ifndef XXH_NO_XXH3 - -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */ -# include -# define XXH_ALIGN(n) alignas(n) -#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ -/* In C++ alignas() is a keyword */ -# define XXH_ALIGN(n) alignas(n) -#elif defined(__GNUC__) -# define XXH_ALIGN(n) __attribute__ ((aligned(n))) -#elif defined(_MSC_VER) -# define XXH_ALIGN(n) __declspec(align(n)) -#else -# define XXH_ALIGN(n) /* disabled */ -#endif - -/* Old GCC versions only accept the attribute after the type in structures. */ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ - && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && defined(__GNUC__) -# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) -#else -# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type -#endif - -/*! - * @brief The size of the internal XXH3 buffer. - * - * This is the optimal update size for incremental hashing. - * - * @see XXH3_64b_update(), XXH3_128b_update(). - */ -#define XXH3_INTERNALBUFFER_SIZE 256 - -/*! - * @internal - * @brief Default size of the secret buffer (and @ref XXH3_kSecret). - * - * This is the size used in @ref XXH3_kSecret and the seeded functions. - * - * Not to be confused with @ref XXH3_SECRET_SIZE_MIN. - */ -#define XXH3_SECRET_DEFAULT_SIZE 192 - -/*! - * @internal - * @brief Structure for XXH3 streaming API. - * - * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, - * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. - * Otherwise it is an opaque type. - * Never use this definition in combination with dynamic library. - * This allows fields to safely be changed in the future. - * - * @note ** This structure has a strict alignment requirement of 64 bytes!! ** - * Do not allocate this with `malloc()` or `new`, - * it will not be sufficiently aligned. - * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation. - * - * Typedef'd to @ref XXH3_state_t. - * Do never access the members of this struct directly. - * - * @see XXH3_INITSTATE() for stack initialization. - * @see XXH3_createState(), XXH3_freeState(). - * @see XXH32_state_s, XXH64_state_s - */ -struct XXH3_state_s { - XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); - /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */ - XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); - /*!< Used to store a custom secret generated from a seed. */ - XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); - /*!< The internal buffer. @see XXH32_state_s::mem32 */ - XXH32_hash_t bufferedSize; - /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ - XXH32_hash_t useSeed; - /*!< Reserved field. Needed for padding on 64-bit. */ - size_t nbStripesSoFar; - /*!< Number or stripes processed. */ - XXH64_hash_t totalLen; - /*!< Total length hashed. 64-bit even on 32-bit targets. */ - size_t nbStripesPerBlock; - /*!< Number of stripes per block. */ - size_t secretLimit; - /*!< Size of @ref customSecret or @ref extSecret */ - XXH64_hash_t seed; - /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */ - XXH64_hash_t reserved64; - /*!< Reserved field. */ - const unsigned char* extSecret; - /*!< Reference to an external secret for the _withSecret variants, NULL - * for other variants. */ - /* note: there may be some padding at the end due to alignment on 64 bytes */ -}; /* typedef'd to XXH3_state_t */ - -#undef XXH_ALIGN_MEMBER - -/*! - * @brief Initializes a stack-allocated `XXH3_state_s`. - * - * When the @ref XXH3_state_t structure is merely emplaced on stack, - * it should be initialized with XXH3_INITSTATE() or a memset() - * in case its first reset uses XXH3_NNbits_reset_withSeed(). - * This init can be omitted if the first reset uses default or _withSecret mode. - * This operation isn't necessary when the state is created with XXH3_createState(). - * Note that this doesn't prepare the state for a streaming operation, - * it's still necessary to use XXH3_NNbits_reset*() afterwards. - */ -#define XXH3_INITSTATE(XXH3_state_ptr) \ - do { \ - XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \ - tmp_xxh3_state_ptr->seed = 0; \ - tmp_xxh3_state_ptr->extSecret = NULL; \ - } while(0) - - -/*! - * simple alias to pre-selected XXH3_128bits variant - */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); - - -/* === Experimental API === */ -/* Symbols defined below must be considered tied to a specific library version. */ - -/*! - * XXH3_generateSecret(): - * - * Derive a high-entropy secret from any user-defined content, named customSeed. - * The generated secret can be used in combination with `*_withSecret()` functions. - * The `_withSecret()` variants are useful to provide a higher level of protection - * than 64-bit seed, as it becomes much more difficult for an external actor to - * guess how to impact the calculation logic. - * - * The function accepts as input a custom seed of any length and any content, - * and derives from it a high-entropy secret of length @p secretSize into an - * already allocated buffer @p secretBuffer. - * - * The generated secret can then be used with any `*_withSecret()` variant. - * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(), - * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret() - * are part of this list. They all accept a `secret` parameter - * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN) - * _and_ feature very high entropy (consist of random-looking bytes). - * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can - * be employed to ensure proper quality. - * - * @p customSeed can be anything. It can have any size, even small ones, - * and its content can be anything, even "poor entropy" sources such as a bunch - * of zeroes. The resulting `secret` will nonetheless provide all required qualities. - * - * @pre - * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN - * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior. - * - * Example code: - * @code{.c} - * #include - * #include - * #include - * #define XXH_STATIC_LINKING_ONLY // expose unstable API - * #include "xxhash.h" - * // Hashes argv[2] using the entropy from argv[1]. - * int main(int argc, char* argv[]) - * { - * char secret[XXH3_SECRET_SIZE_MIN]; - * if (argv != 3) { return 1; } - * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1])); - * XXH64_hash_t h = XXH3_64bits_withSecret( - * argv[2], strlen(argv[2]), - * secret, sizeof(secret) - * ); - * printf("%016llx\n", (unsigned long long) h); - * } - * @endcode - */ -XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize); - -/*! - * @brief Generate the same secret as the _withSeed() variants. - * - * The generated secret can be used in combination with - *`*_withSecret()` and `_withSecretandSeed()` variants. - * - * Example C++ `std::string` hash class: - * @code{.cpp} - * #include - * #define XXH_STATIC_LINKING_ONLY // expose unstable API - * #include "xxhash.h" - * // Slow, seeds each time - * class HashSlow { - * XXH64_hash_t seed; - * public: - * HashSlow(XXH64_hash_t s) : seed{s} {} - * size_t operator()(const std::string& x) const { - * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)}; - * } - * }; - * // Fast, caches the seeded secret for future uses. - * class HashFast { - * unsigned char secret[XXH3_SECRET_SIZE_MIN]; - * public: - * HashFast(XXH64_hash_t s) { - * XXH3_generateSecret_fromSeed(secret, seed); - * } - * size_t operator()(const std::string& x) const { - * return size_t{ - * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret)) - * }; - * } - * }; - * @endcode - * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes - * @param seed The seed to seed the state. - */ -XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed); - -/*! - * These variants generate hash values using either - * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes) - * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX). - * - * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`. - * `_withSeed()` has to generate the secret on the fly for "large" keys. - * It's fast, but can be perceptible for "not so large" keys (< 1 KB). - * `_withSecret()` has to generate the masks on the fly for "small" keys, - * which requires more instructions than _withSeed() variants. - * Therefore, _withSecretandSeed variant combines the best of both worlds. - * - * When @p secret has been generated by XXH3_generateSecret_fromSeed(), - * this variant produces *exactly* the same results as `_withSeed()` variant, - * hence offering only a pure speed benefit on "large" input, - * by skipping the need to regenerate the secret for every large input. - * - * Another usage scenario is to hash the secret to a 64-bit hash value, - * for example with XXH3_64bits(), which then becomes the seed, - * and then employ both the seed and the secret in _withSecretandSeed(). - * On top of speed, an added benefit is that each bit in the secret - * has a 50% chance to swap each bit in the output, via its impact to the seed. - * - * This is not guaranteed when using the secret directly in "small data" scenarios, - * because only portions of the secret are employed for small data. - */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t -XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed); -/*! @copydoc XXH3_64bits_withSecretandSeed() */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t -XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); -#ifndef XXH_NO_STREAM -/*! @copydoc XXH3_64bits_withSecretandSeed() */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); -/*! @copydoc XXH3_64bits_withSecretandSeed() */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); -#endif /* !XXH_NO_STREAM */ - -#endif /* !XXH_NO_XXH3 */ -#endif /* XXH_NO_LONG_LONG */ -#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# define XXH_IMPLEMENTATION -#endif - -#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ - - -/* ======================================================================== */ -/* ======================================================================== */ -/* ======================================================================== */ - - -/*-********************************************************************** - * xxHash implementation - *-********************************************************************** - * xxHash's implementation used to be hosted inside xxhash.c. - * - * However, inlining requires implementation to be visible to the compiler, - * hence be included alongside the header. - * Previously, implementation was hosted inside xxhash.c, - * which was then #included when inlining was activated. - * This construction created issues with a few build and install systems, - * as it required xxhash.c to be stored in /include directory. - * - * xxHash implementation is now directly integrated within xxhash.h. - * As a consequence, xxhash.c is no longer needed in /include. - * - * xxhash.c is still available and is still useful. - * In a "normal" setup, when xxhash is not inlined, - * xxhash.h only exposes the prototypes and public symbols, - * while xxhash.c can be built into an object file xxhash.o - * which can then be linked into the final binary. - ************************************************************************/ - -#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ - || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) -# define XXH_IMPLEM_13a8737387 - -/* ************************************* -* Tuning parameters -***************************************/ - -/*! - * @defgroup tuning Tuning parameters - * @{ - * - * Various macros to control xxHash's behavior. - */ -#ifdef XXH_DOXYGEN -/*! - * @brief Define this to disable 64-bit code. - * - * Useful if only using the @ref XXH32_family and you have a strict C90 compiler. - */ -# define XXH_NO_LONG_LONG -# undef XXH_NO_LONG_LONG /* don't actually */ -/*! - * @brief Controls how unaligned memory is accessed. - * - * By default, access to unaligned memory is controlled by `memcpy()`, which is - * safe and portable. - * - * Unfortunately, on some target/compiler combinations, the generated assembly - * is sub-optimal. - * - * The below switch allow selection of a different access method - * in the search for improved performance. - * - * @par Possible options: - * - * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy` - * @par - * Use `memcpy()`. Safe and portable. Note that most modern compilers will - * eliminate the function call and treat it as an unaligned access. - * - * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))` - * @par - * Depends on compiler extensions and is therefore not portable. - * This method is safe _if_ your compiler supports it, - * and *generally* as fast or faster than `memcpy`. - * - * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast - * @par - * Casts directly and dereferences. This method doesn't depend on the - * compiler, but it violates the C standard as it directly dereferences an - * unaligned pointer. It can generate buggy code on targets which do not - * support unaligned memory accesses, but in some circumstances, it's the - * only known way to get the most performance. - * - * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift - * @par - * Also portable. This can generate the best code on old compilers which don't - * inline small `memcpy()` calls, and it might also be faster on big-endian - * systems which lack a native byteswap instruction. However, some compilers - * will emit literal byteshifts even if the target supports unaligned access. - * - * - * @warning - * Methods 1 and 2 rely on implementation-defined behavior. Use these with - * care, as what works on one compiler/platform/optimization level may cause - * another to read garbage data or even crash. - * - * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details. - * - * Prefer these methods in priority order (0 > 3 > 1 > 2) - */ -# define XXH_FORCE_MEMORY_ACCESS 0 - -/*! - * @def XXH_SIZE_OPT - * @brief Controls how much xxHash optimizes for size. - * - * xxHash, when compiled, tends to result in a rather large binary size. This - * is mostly due to heavy usage to forced inlining and constant folding of the - * @ref XXH3_family to increase performance. - * - * However, some developers prefer size over speed. This option can - * significantly reduce the size of the generated code. When using the `-Os` - * or `-Oz` options on GCC or Clang, this is defined to 1 by default, - * otherwise it is defined to 0. - * - * Most of these size optimizations can be controlled manually. - * - * This is a number from 0-2. - * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed - * comes first. - * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more - * conservative and disables hacks that increase code size. It implies the - * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0, - * and @ref XXH3_NEON_LANES == 8 if they are not already defined. - * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible. - * Performance may cry. For example, the single shot functions just use the - * streaming API. - */ -# define XXH_SIZE_OPT 0 - -/*! - * @def XXH_FORCE_ALIGN_CHECK - * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32() - * and XXH64() only). - * - * This is an important performance trick for architectures without decent - * unaligned memory access performance. - * - * It checks for input alignment, and when conditions are met, uses a "fast - * path" employing direct 32-bit/64-bit reads, resulting in _dramatically - * faster_ read speed. - * - * The check costs one initial branch per hash, which is generally negligible, - * but not zero. - * - * Moreover, it's not useful to generate an additional code path if memory - * access uses the same instruction for both aligned and unaligned - * addresses (e.g. x86 and aarch64). - * - * In these cases, the alignment check can be removed by setting this macro to 0. - * Then the code will always use unaligned memory access. - * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips - * which are platforms known to offer good unaligned memory accesses performance. - * - * It is also disabled by default when @ref XXH_SIZE_OPT >= 1. - * - * This option does not affect XXH3 (only XXH32 and XXH64). - */ -# define XXH_FORCE_ALIGN_CHECK 0 - -/*! - * @def XXH_NO_INLINE_HINTS - * @brief When non-zero, sets all functions to `static`. - * - * By default, xxHash tries to force the compiler to inline almost all internal - * functions. - * - * This can usually improve performance due to reduced jumping and improved - * constant folding, but significantly increases the size of the binary which - * might not be favorable. - * - * Additionally, sometimes the forced inlining can be detrimental to performance, - * depending on the architecture. - * - * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the - * compiler full control on whether to inline or not. - * - * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if - * @ref XXH_SIZE_OPT >= 1, this will automatically be defined. - */ -# define XXH_NO_INLINE_HINTS 0 - -/*! - * @def XXH3_INLINE_SECRET - * @brief Determines whether to inline the XXH3 withSecret code. - * - * When the secret size is known, the compiler can improve the performance - * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret(). - * - * However, if the secret size is not known, it doesn't have any benefit. This - * happens when xxHash is compiled into a global symbol. Therefore, if - * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0. - * - * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers - * that are *sometimes* force inline on -Og, and it is impossible to automatically - * detect this optimization level. - */ -# define XXH3_INLINE_SECRET 0 - -/*! - * @def XXH32_ENDJMP - * @brief Whether to use a jump for `XXH32_finalize`. - * - * For performance, `XXH32_finalize` uses multiple branches in the finalizer. - * This is generally preferable for performance, - * but depending on exact architecture, a jmp may be preferable. - * - * This setting is only possibly making a difference for very small inputs. - */ -# define XXH32_ENDJMP 0 - -/*! - * @internal - * @brief Redefines old internal names. - * - * For compatibility with code that uses xxHash's internals before the names - * were changed to improve namespacing. There is no other reason to use this. - */ -# define XXH_OLD_NAMES -# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ - -/*! - * @def XXH_NO_STREAM - * @brief Disables the streaming API. - * - * When xxHash is not inlined and the streaming functions are not used, disabling - * the streaming functions can improve code size significantly, especially with - * the @ref XXH3_family which tends to make constant folded copies of itself. - */ -# define XXH_NO_STREAM -# undef XXH_NO_STREAM /* don't actually */ -#endif /* XXH_DOXYGEN */ -/*! - * @} - */ - -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ - /* prefer __packed__ structures (method 1) for GCC - * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy - * which for some reason does unaligned loads. */ -# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -#ifndef XXH_SIZE_OPT - /* default to 1 for -Os or -Oz */ -# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__) -# define XXH_SIZE_OPT 1 -# else -# define XXH_SIZE_OPT 0 -# endif -#endif - -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ - /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */ -# if XXH_SIZE_OPT >= 1 || \ - defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \ - || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */ -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif - -#ifndef XXH_NO_INLINE_HINTS -# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */ -# define XXH_NO_INLINE_HINTS 1 -# else -# define XXH_NO_INLINE_HINTS 0 -# endif -#endif - -#ifndef XXH3_INLINE_SECRET -# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \ - || !defined(XXH_INLINE_ALL) -# define XXH3_INLINE_SECRET 0 -# else -# define XXH3_INLINE_SECRET 1 -# endif -#endif - -#ifndef XXH32_ENDJMP -/* generally preferable for performance */ -# define XXH32_ENDJMP 0 -#endif - -/*! - * @defgroup impl Implementation - * @{ - */ - - -/* ************************************* -* Includes & Memory related functions -***************************************/ -#if defined(XXH_NO_STREAM) -/* nothing */ -#elif defined(XXH_NO_STDLIB) - -/* When requesting to disable any mention of stdlib, - * the library loses the ability to invoked malloc / free. - * In practice, it means that functions like `XXH*_createState()` - * will always fail, and return NULL. - * This flag is useful in situations where - * xxhash.h is integrated into some kernel, embedded or limited environment - * without access to dynamic allocation. - */ - -static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; } -static void XXH_free(void* p) { (void)p; } - -#else - -/* - * Modify the local functions below should you wish to use - * different memory routines for malloc() and free() - */ -#include - -/*! - * @internal - * @brief Modify this function to use a different routine than malloc(). - */ -static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); } - -/*! - * @internal - * @brief Modify this function to use a different routine than free(). - */ -static void XXH_free(void* p) { free(p); } - -#endif /* XXH_NO_STDLIB */ - -#include - -/*! - * @internal - * @brief Modify this function to use a different routine than memcpy(). - */ -static void* XXH_memcpy(void* dest, const void* src, size_t size) -{ - return memcpy(dest,src,size); -} - -#include /* ULLONG_MAX */ - - -/* ************************************* -* Compiler Specific Options -***************************************/ -#ifdef _MSC_VER /* Visual Studio warning fix */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - -#if XXH_NO_INLINE_HINTS /* disable inlining hints */ -# if defined(__GNUC__) || defined(__clang__) -# define XXH_FORCE_INLINE static __attribute__((unused)) -# else -# define XXH_FORCE_INLINE static -# endif -# define XXH_NO_INLINE static -/* enable inlining hints */ -#elif defined(__GNUC__) || defined(__clang__) -# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) -# define XXH_NO_INLINE static __attribute__((noinline)) -#elif defined(_MSC_VER) /* Visual Studio */ -# define XXH_FORCE_INLINE static __forceinline -# define XXH_NO_INLINE static __declspec(noinline) -#elif defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ -# define XXH_FORCE_INLINE static inline -# define XXH_NO_INLINE static -#else -# define XXH_FORCE_INLINE static -# define XXH_NO_INLINE static -#endif - -#if XXH3_INLINE_SECRET -# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE -#else -# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE -#endif - - -/* ************************************* -* Debug -***************************************/ -/*! - * @ingroup tuning - * @def XXH_DEBUGLEVEL - * @brief Sets the debugging level. - * - * XXH_DEBUGLEVEL is expected to be defined externally, typically via the - * compiler's command line options. The value must be a number. - */ -#ifndef XXH_DEBUGLEVEL -# ifdef DEBUGLEVEL /* backwards compat */ -# define XXH_DEBUGLEVEL DEBUGLEVEL -# else -# define XXH_DEBUGLEVEL 0 -# endif -#endif - -#if (XXH_DEBUGLEVEL>=1) -# include /* note: can still be disabled with NDEBUG */ -# define XXH_ASSERT(c) assert(c) -#else -# if defined(__INTEL_COMPILER) -# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c)) -# else -# define XXH_ASSERT(c) XXH_ASSUME(c) -# endif -#endif - -/* note: use after variable declarations */ -#ifndef XXH_STATIC_ASSERT -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0) -# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) -# else -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) -# endif -# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c) -#endif - -/*! - * @internal - * @def XXH_COMPILER_GUARD(var) - * @brief Used to prevent unwanted optimizations for @p var. - * - * It uses an empty GCC inline assembly statement with a register constraint - * which forces @p var into a general purpose register (eg eax, ebx, ecx - * on x86) and marks it as modified. - * - * This is used in a few places to avoid unwanted autovectorization (e.g. - * XXH32_round()). All vectorization we want is explicit via intrinsics, - * and _usually_ isn't wanted elsewhere. - * - * We also use it to prevent unwanted constant folding for AArch64 in - * XXH3_initCustomSecret_scalar(). - */ -#if defined(__GNUC__) || defined(__clang__) -# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var)) -#else -# define XXH_COMPILER_GUARD(var) ((void)0) -#endif - -/* Specifically for NEON vectors which use the "w" constraint, on - * Clang. */ -#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__) -# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var)) -#else -# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0) -#endif - -/* ************************************* -* Basic Types -***************************************/ -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t xxh_u8; -#else - typedef unsigned char xxh_u8; -#endif -typedef XXH32_hash_t xxh_u32; - -#ifdef XXH_OLD_NAMES -# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly" -# define BYTE xxh_u8 -# define U8 xxh_u8 -# define U32 xxh_u32 -#endif - -/* *** Memory access *** */ - -/*! - * @internal - * @fn xxh_u32 XXH_read32(const void* ptr) - * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * - * @param ptr The pointer to read from. - * @return The 32-bit native endian integer from the bytes at @p ptr. - */ - -/*! - * @internal - * @fn xxh_u32 XXH_readLE32(const void* ptr) - * @brief Reads an unaligned 32-bit little endian integer from @p ptr. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * - * @param ptr The pointer to read from. - * @return The 32-bit little endian integer from the bytes at @p ptr. - */ - -/*! - * @internal - * @fn xxh_u32 XXH_readBE32(const void* ptr) - * @brief Reads an unaligned 32-bit big endian integer from @p ptr. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * - * @param ptr The pointer to read from. - * @return The 32-bit big endian integer from the bytes at @p ptr. - */ - -/*! - * @internal - * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) - * @brief Like @ref XXH_readLE32(), but has an option for aligned reads. - * - * Affected by @ref XXH_FORCE_MEMORY_ACCESS. - * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is - * always @ref XXH_alignment::XXH_unaligned. - * - * @param ptr The pointer to read from. - * @param align Whether @p ptr is aligned. - * @pre - * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte - * aligned. - * @return The 32-bit little endian integer from the bytes at @p ptr. - */ - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) -/* - * Manual byteshift. Best for old compilers which don't inline memcpy. - * We actually directly use XXH_readLE32 and XXH_readBE32. - */ -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* - * Force direct memory access. Only works on CPU which support unaligned memory - * access in hardware. - */ -static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* - * __attribute__((aligned(1))) is supported by gcc and clang. Originally the - * documentation claimed that it only increased the alignment, but actually it - * can decrease it on gcc, clang, and icc: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, - * https://gcc.godbolt.org/z/xYez1j67Y. - */ -#ifdef XXH_OLD_NAMES -typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; -#endif -static xxh_u32 XXH_read32(const void* ptr) -{ - typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32; - return *((const xxh_unalign32*)ptr); -} - -#else - -/* - * Portable and safe solution. Generally efficient. - * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html - */ -static xxh_u32 XXH_read32(const void* memPtr) -{ - xxh_u32 val; - XXH_memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - - -/* *** Endianness *** */ - -/*! - * @ingroup tuning - * @def XXH_CPU_LITTLE_ENDIAN - * @brief Whether the target is little endian. - * - * Defined to 1 if the target is little endian, or 0 if it is big endian. - * It can be defined externally, for example on the compiler command line. - * - * If it is not defined, - * a runtime check (which is usually constant folded) is used instead. - * - * @note - * This is not necessarily defined to an integer constant. - * - * @see XXH_isLittleEndian() for the runtime check. - */ -#ifndef XXH_CPU_LITTLE_ENDIAN -/* - * Try to detect endianness automatically, to avoid the nonstandard behavior - * in `XXH_isLittleEndian()` - */ -# if defined(_WIN32) /* Windows is always little endian */ \ - || defined(__LITTLE_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define XXH_CPU_LITTLE_ENDIAN 1 -# elif defined(__BIG_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define XXH_CPU_LITTLE_ENDIAN 0 -# else -/*! - * @internal - * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN. - * - * Most compilers will constant fold this. - */ -static int XXH_isLittleEndian(void) -{ - /* - * Portable and well-defined behavior. - * Don't use static: it is detrimental to performance. - */ - const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; - return one.c[0]; -} -# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() -# endif -#endif - - - - -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -#ifdef __has_builtin -# define XXH_HAS_BUILTIN(x) __has_builtin(x) -#else -# define XXH_HAS_BUILTIN(x) 0 -#endif - - - -/* - * C23 and future versions have standard "unreachable()". - * Once it has been implemented reliably we can add it as an - * additional case: - * - * ``` - * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) - * # include - * # ifdef unreachable - * # define XXH_UNREACHABLE() unreachable() - * # endif - * #endif - * ``` - * - * Note C++23 also has std::unreachable() which can be detected - * as follows: - * ``` - * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L) - * # include - * # define XXH_UNREACHABLE() std::unreachable() - * #endif - * ``` - * NB: `__cpp_lib_unreachable` is defined in the `` header. - * We don't use that as including `` in `extern "C"` blocks - * doesn't work on GCC12 - */ - -#if XXH_HAS_BUILTIN(__builtin_unreachable) -# define XXH_UNREACHABLE() __builtin_unreachable() - -#elif defined(_MSC_VER) -# define XXH_UNREACHABLE() __assume(0) - -#else -# define XXH_UNREACHABLE() -#endif - -#if XXH_HAS_BUILTIN(__builtin_assume) -# define XXH_ASSUME(c) __builtin_assume(c) -#else -# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); } -#endif - -/*! - * @internal - * @def XXH_rotl32(x,r) - * @brief 32-bit rotate left. - * - * @param x The 32-bit integer to be rotated. - * @param r The number of bits to rotate. - * @pre - * @p r > 0 && @p r < 32 - * @note - * @p x and @p r may be evaluated multiple times. - * @return The rotated result. - */ -#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ - && XXH_HAS_BUILTIN(__builtin_rotateleft64) -# define XXH_rotl32 __builtin_rotateleft32 -# define XXH_rotl64 __builtin_rotateleft64 -/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ -#elif defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) -# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) -#endif - -/*! - * @internal - * @fn xxh_u32 XXH_swap32(xxh_u32 x) - * @brief A 32-bit byteswap. - * - * @param x The 32-bit integer to byteswap. - * @return @p x, byteswapped. - */ -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -#else -static xxh_u32 XXH_swap32 (xxh_u32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -#endif - - -/* *************************** -* Memory reads -*****************************/ - -/*! - * @internal - * @brief Enum to indicate whether a pointer is aligned. - */ -typedef enum { - XXH_aligned, /*!< Aligned */ - XXH_unaligned /*!< Possibly unaligned */ -} XXH_alignment; - -/* - * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. - * - * This is ideal for older compilers which don't inline memcpy. - */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) - -XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[0] - | ((xxh_u32)bytePtr[1] << 8) - | ((xxh_u32)bytePtr[2] << 16) - | ((xxh_u32)bytePtr[3] << 24); -} - -XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[3] - | ((xxh_u32)bytePtr[2] << 8) - | ((xxh_u32)bytePtr[1] << 16) - | ((xxh_u32)bytePtr[0] << 24); -} - -#else -XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); -} - -static xxh_u32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} -#endif - -XXH_FORCE_INLINE xxh_u32 -XXH_readLE32_align(const void* ptr, XXH_alignment align) -{ - if (align==XXH_unaligned) { - return XXH_readLE32(ptr); - } else { - return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); - } -} - - -/* ************************************* -* Misc -***************************************/ -/*! @ingroup public */ -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ******************************************************************* -* 32-bit hash functions -*********************************************************************/ -/*! - * @} - * @defgroup XXH32_impl XXH32 implementation - * @ingroup impl - * - * Details on the XXH32 implementation. - * @{ - */ - /* #define instead of static const, to be used as initializers */ -#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ -#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ -#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ -#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ -#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ - -#ifdef XXH_OLD_NAMES -# define PRIME32_1 XXH_PRIME32_1 -# define PRIME32_2 XXH_PRIME32_2 -# define PRIME32_3 XXH_PRIME32_3 -# define PRIME32_4 XXH_PRIME32_4 -# define PRIME32_5 XXH_PRIME32_5 -#endif - -/*! - * @internal - * @brief Normal stripe processing routine. - * - * This shuffles the bits so that any bit from @p input impacts several bits in - * @p acc. - * - * @param acc The accumulator lane. - * @param input The stripe of input to mix. - * @return The mixed accumulator lane. - */ -static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) -{ - acc += input * XXH_PRIME32_2; - acc = XXH_rotl32(acc, 13); - acc *= XXH_PRIME32_1; -#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) - /* - * UGLY HACK: - * A compiler fence is the only thing that prevents GCC and Clang from - * autovectorizing the XXH32 loop (pragmas and attributes don't work for some - * reason) without globally disabling SSE4.1. - * - * The reason we want to avoid vectorization is because despite working on - * 4 integers at a time, there are multiple factors slowing XXH32 down on - * SSE4: - * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on - * newer chips!) making it slightly slower to multiply four integers at - * once compared to four integers independently. Even when pmulld was - * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE - * just to multiply unless doing a long operation. - * - * - Four instructions are required to rotate, - * movqda tmp, v // not required with VEX encoding - * pslld tmp, 13 // tmp <<= 13 - * psrld v, 19 // x >>= 19 - * por v, tmp // x |= tmp - * compared to one for scalar: - * roll v, 13 // reliably fast across the board - * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason - * - * - Instruction level parallelism is actually more beneficial here because - * the SIMD actually serializes this operation: While v1 is rotating, v2 - * can load data, while v3 can multiply. SSE forces them to operate - * together. - * - * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing - * the loop. NEON is only faster on the A53, and with the newer cores, it is less - * than half the speed. - * - * Additionally, this is used on WASM SIMD128 because it JITs to the same - * SIMD instructions and has the same issue. - */ - XXH_COMPILER_GUARD(acc); -#endif - return acc; -} - -/*! - * @internal - * @brief Mixes all bits to finalize the hash. - * - * The final mix ensures that all input bits have a chance to impact any bit in - * the output digest, resulting in an unbiased distribution. - * - * @param hash The hash to avalanche. - * @return The avalanched hash. - */ -static xxh_u32 XXH32_avalanche(xxh_u32 hash) -{ - hash ^= hash >> 15; - hash *= XXH_PRIME32_2; - hash ^= hash >> 13; - hash *= XXH_PRIME32_3; - hash ^= hash >> 16; - return hash; -} - -#define XXH_get32bits(p) XXH_readLE32_align(p, align) - -/*! - * @internal - * @brief Processes the last 0-15 bytes of @p ptr. - * - * There may be up to 15 bytes remaining to consume from the input. - * This final stage will digest them to ensure that all input bytes are present - * in the final mix. - * - * @param hash The hash to finalize. - * @param ptr The pointer to the remaining input. - * @param len The remaining length, modulo 16. - * @param align Whether @p ptr is aligned. - * @return The finalized hash. - * @see XXH64_finalize(). - */ -static XXH_PUREF xxh_u32 -XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) -{ -#define XXH_PROCESS1 do { \ - hash += (*ptr++) * XXH_PRIME32_5; \ - hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \ -} while (0) - -#define XXH_PROCESS4 do { \ - hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \ - ptr += 4; \ - hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \ -} while (0) - - if (ptr==NULL) XXH_ASSERT(len == 0); - - /* Compact rerolled version; generally faster */ - if (!XXH32_ENDJMP) { - len &= 15; - while (len >= 4) { - XXH_PROCESS4; - len -= 4; - } - while (len > 0) { - XXH_PROCESS1; - --len; - } - return XXH32_avalanche(hash); - } else { - switch(len&15) /* or switch(bEnd - p) */ { - case 12: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 8: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 4: XXH_PROCESS4; - return XXH32_avalanche(hash); - - case 13: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 9: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 5: XXH_PROCESS4; - XXH_PROCESS1; - return XXH32_avalanche(hash); - - case 14: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 10: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 6: XXH_PROCESS4; - XXH_PROCESS1; - XXH_PROCESS1; - return XXH32_avalanche(hash); - - case 15: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 11: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 7: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 3: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 2: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 1: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 0: return XXH32_avalanche(hash); - } - XXH_ASSERT(0); - return hash; /* reaching this point is deemed impossible */ - } -} - -#ifdef XXH_OLD_NAMES -# define PROCESS1 XXH_PROCESS1 -# define PROCESS4 XXH_PROCESS4 -#else -# undef XXH_PROCESS1 -# undef XXH_PROCESS4 -#endif - -/*! - * @internal - * @brief The implementation for @ref XXH32(). - * - * @param input , len , seed Directly passed from @ref XXH32(). - * @param align Whether @p input is aligned. - * @return The calculated hash. - */ -XXH_FORCE_INLINE XXH_PUREF xxh_u32 -XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) -{ - xxh_u32 h32; - - if (input==NULL) XXH_ASSERT(len == 0); - - if (len>=16) { - const xxh_u8* const bEnd = input + len; - const xxh_u8* const limit = bEnd - 15; - xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; - xxh_u32 v2 = seed + XXH_PRIME32_2; - xxh_u32 v3 = seed + 0; - xxh_u32 v4 = seed - XXH_PRIME32_1; - - do { - v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; - v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; - v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; - v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; - } while (input < limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) - + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + XXH_PRIME32_5; - } - - h32 += (xxh_u32)len; - - return XXH32_finalize(h32, input, len&15, align); -} - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) -{ -#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_state_t state; - XXH32_reset(&state, seed); - XXH32_update(&state, (const xxh_u8*)input, len); - return XXH32_digest(&state); -#else - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); - } } - - return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); -#endif -} - - - -/******* Hash streaming *******/ -#ifndef XXH_NO_STREAM -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) -{ - XXH_memcpy(dstState, srcState, sizeof(*dstState)); -} - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) -{ - XXH_ASSERT(statePtr != NULL); - memset(statePtr, 0, sizeof(*statePtr)); - statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2; - statePtr->v[1] = seed + XXH_PRIME32_2; - statePtr->v[2] = seed + 0; - statePtr->v[3] = seed - XXH_PRIME32_1; - return XXH_OK; -} - - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode -XXH32_update(XXH32_state_t* state, const void* input, size_t len) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } - - { const xxh_u8* p = (const xxh_u8*)input; - const xxh_u8* const bEnd = p + len; - - state->total_len_32 += (XXH32_hash_t)len; - state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); - state->memsize += (XXH32_hash_t)len; - return XXH_OK; - } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const xxh_u32* p32 = state->mem32; - state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++; - state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++; - state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++; - state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32)); - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const xxh_u8* const limit = bEnd - 16; - - do { - state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4; - state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4; - state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4; - state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4; - } while (p<=limit); - - } - - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - } - - return XXH_OK; -} - - -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state) -{ - xxh_u32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v[0], 1) - + XXH_rotl32(state->v[1], 7) - + XXH_rotl32(state->v[2], 12) - + XXH_rotl32(state->v[3], 18); - } else { - h32 = state->v[2] /* == seed */ + XXH_PRIME32_5; - } - - h32 += state->total_len_32; - - return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); -} -#endif /* !XXH_NO_STREAM */ - -/******* Canonical representation *******/ - -/*! - * @ingroup XXH32_family - * The default return values from XXH functions are unsigned 32 and 64 bit - * integers. - * - * The canonical representation uses big endian convention, the same convention - * as human-readable numbers (large digits first). - * - * This way, hash values can be written into a file or buffer, remaining - * comparable across different systems. - * - * The following functions allow transformation of hash values to and from their - * canonical format. - */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - XXH_memcpy(dst, &hash, sizeof(*dst)); -} -/*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - - -#ifndef XXH_NO_LONG_LONG - -/* ******************************************************************* -* 64-bit hash functions -*********************************************************************/ -/*! - * @} - * @ingroup impl - * @{ - */ -/******* Memory access *******/ - -typedef XXH64_hash_t xxh_u64; - -#ifdef XXH_OLD_NAMES -# define U64 xxh_u64 -#endif - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) -/* - * Manual byteshift. Best for old compilers which don't inline memcpy. - * We actually directly use XXH_readLE64 and XXH_readBE64. - */ -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static xxh_u64 XXH_read64(const void* memPtr) -{ - return *(const xxh_u64*) memPtr; -} - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* - * __attribute__((aligned(1))) is supported by gcc and clang. Originally the - * documentation claimed that it only increased the alignment, but actually it - * can decrease it on gcc, clang, and icc: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, - * https://gcc.godbolt.org/z/xYez1j67Y. - */ -#ifdef XXH_OLD_NAMES -typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; -#endif -static xxh_u64 XXH_read64(const void* ptr) -{ - typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64; - return *((const xxh_unalign64*)ptr); -} - -#else - -/* - * Portable and safe solution. Generally efficient. - * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html - */ -static xxh_u64 XXH_read64(const void* memPtr) -{ - xxh_u64 val; - XXH_memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap64 _byteswap_uint64 -#elif XXH_GCC_VERSION >= 403 -# define XXH_swap64 __builtin_bswap64 -#else -static xxh_u64 XXH_swap64(xxh_u64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - - -/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) - -XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[0] - | ((xxh_u64)bytePtr[1] << 8) - | ((xxh_u64)bytePtr[2] << 16) - | ((xxh_u64)bytePtr[3] << 24) - | ((xxh_u64)bytePtr[4] << 32) - | ((xxh_u64)bytePtr[5] << 40) - | ((xxh_u64)bytePtr[6] << 48) - | ((xxh_u64)bytePtr[7] << 56); -} - -XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[7] - | ((xxh_u64)bytePtr[6] << 8) - | ((xxh_u64)bytePtr[5] << 16) - | ((xxh_u64)bytePtr[4] << 24) - | ((xxh_u64)bytePtr[3] << 32) - | ((xxh_u64)bytePtr[2] << 40) - | ((xxh_u64)bytePtr[1] << 48) - | ((xxh_u64)bytePtr[0] << 56); -} - -#else -XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); -} - -static xxh_u64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} -#endif - -XXH_FORCE_INLINE xxh_u64 -XXH_readLE64_align(const void* ptr, XXH_alignment align) -{ - if (align==XXH_unaligned) - return XXH_readLE64(ptr); - else - return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); -} - - -/******* xxh64 *******/ -/*! - * @} - * @defgroup XXH64_impl XXH64 implementation - * @ingroup impl - * - * Details on the XXH64 implementation. - * @{ - */ -/* #define rather that static const, to be used as initializers */ -#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ -#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ -#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ -#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ -#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ - -#ifdef XXH_OLD_NAMES -# define PRIME64_1 XXH_PRIME64_1 -# define PRIME64_2 XXH_PRIME64_2 -# define PRIME64_3 XXH_PRIME64_3 -# define PRIME64_4 XXH_PRIME64_4 -# define PRIME64_5 XXH_PRIME64_5 -#endif - -/*! @copydoc XXH32_round */ -static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) -{ - acc += input * XXH_PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= XXH_PRIME64_1; - return acc; -} - -static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; - return acc; -} - -/*! @copydoc XXH32_avalanche */ -static xxh_u64 XXH64_avalanche(xxh_u64 hash) -{ - hash ^= hash >> 33; - hash *= XXH_PRIME64_2; - hash ^= hash >> 29; - hash *= XXH_PRIME64_3; - hash ^= hash >> 32; - return hash; -} - - -#define XXH_get64bits(p) XXH_readLE64_align(p, align) - -/*! - * @internal - * @brief Processes the last 0-31 bytes of @p ptr. - * - * There may be up to 31 bytes remaining to consume from the input. - * This final stage will digest them to ensure that all input bytes are present - * in the final mix. - * - * @param hash The hash to finalize. - * @param ptr The pointer to the remaining input. - * @param len The remaining length, modulo 32. - * @param align Whether @p ptr is aligned. - * @return The finalized hash - * @see XXH32_finalize(). - */ -static XXH_PUREF xxh_u64 -XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) -{ - if (ptr==NULL) XXH_ASSERT(len == 0); - len &= 31; - while (len >= 8) { - xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); - ptr += 8; - hash ^= k1; - hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; - len -= 8; - } - if (len >= 4) { - hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; - ptr += 4; - hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; - len -= 4; - } - while (len > 0) { - hash ^= (*ptr++) * XXH_PRIME64_5; - hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; - --len; - } - return XXH64_avalanche(hash); -} - -#ifdef XXH_OLD_NAMES -# define PROCESS1_64 XXH_PROCESS1_64 -# define PROCESS4_64 XXH_PROCESS4_64 -# define PROCESS8_64 XXH_PROCESS8_64 -#else -# undef XXH_PROCESS1_64 -# undef XXH_PROCESS4_64 -# undef XXH_PROCESS8_64 -#endif - -/*! - * @internal - * @brief The implementation for @ref XXH64(). - * - * @param input , len , seed Directly passed from @ref XXH64(). - * @param align Whether @p input is aligned. - * @return The calculated hash. - */ -XXH_FORCE_INLINE XXH_PUREF xxh_u64 -XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) -{ - xxh_u64 h64; - if (input==NULL) XXH_ASSERT(len == 0); - - if (len>=32) { - const xxh_u8* const bEnd = input + len; - const xxh_u8* const limit = bEnd - 31; - xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; - xxh_u64 v2 = seed + XXH_PRIME64_2; - xxh_u64 v3 = seed + 0; - xxh_u64 v4 = seed - XXH_PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; - v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; - v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; - v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; - } while (input= 2 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_state_t state; - XXH64_reset(&state, seed); - XXH64_update(&state, (const xxh_u8*)input, len); - return XXH64_digest(&state); -#else - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); - } } - - return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); - -#endif -} - -/******* Hash Streaming *******/ -#ifndef XXH_NO_STREAM -/*! @ingroup XXH64_family*/ -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState) -{ - XXH_memcpy(dstState, srcState, sizeof(*dstState)); -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed) -{ - XXH_ASSERT(statePtr != NULL); - memset(statePtr, 0, sizeof(*statePtr)); - statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2; - statePtr->v[1] = seed + XXH_PRIME64_2; - statePtr->v[2] = seed + 0; - statePtr->v[3] = seed - XXH_PRIME64_1; - return XXH_OK; -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode -XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } - - { const xxh_u8* p = (const xxh_u8*)input; - const xxh_u8* const bEnd = p + len; - - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); - state->memsize += (xxh_u32)len; - return XXH_OK; - } - - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0)); - state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1)); - state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2)); - state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3)); - p += 32 - state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) { - const xxh_u8* const limit = bEnd - 32; - - do { - state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8; - state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8; - state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8; - state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8; - } while (p<=limit); - - } - - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - } - - return XXH_OK; -} - - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) -{ - xxh_u64 h64; - - if (state->total_len >= 32) { - h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18); - h64 = XXH64_mergeRound(h64, state->v[0]); - h64 = XXH64_mergeRound(h64, state->v[1]); - h64 = XXH64_mergeRound(h64, state->v[2]); - h64 = XXH64_mergeRound(h64, state->v[3]); - } else { - h64 = state->v[2] /*seed*/ + XXH_PRIME64_5; - } - - h64 += (xxh_u64) state->total_len; - - return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); -} -#endif /* !XXH_NO_STREAM */ - -/******* Canonical representation *******/ - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - XXH_memcpy(dst, &hash, sizeof(*dst)); -} - -/*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} - -#ifndef XXH_NO_XXH3 - -/* ********************************************************************* -* XXH3 -* New generation hash designed for speed on small keys and vectorization -************************************************************************ */ -/*! - * @} - * @defgroup XXH3_impl XXH3 implementation - * @ingroup impl - * @{ - */ - -/* === Compiler specifics === */ - -#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ -# define XXH_RESTRICT /* disable */ -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ -# define XXH_RESTRICT restrict -#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ - || (defined (__clang__)) \ - || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \ - || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) -/* - * There are a LOT more compilers that recognize __restrict but this - * covers the major ones. - */ -# define XXH_RESTRICT __restrict -#else -# define XXH_RESTRICT /* disable */ -#endif - -#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ - || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ - || defined(__clang__) -# define XXH_likely(x) __builtin_expect(x, 1) -# define XXH_unlikely(x) __builtin_expect(x, 0) -#else -# define XXH_likely(x) (x) -# define XXH_unlikely(x) (x) -#endif - -#ifndef XXH_HAS_INCLUDE -# ifdef __has_include -# define XXH_HAS_INCLUDE(x) __has_include(x) -# else -# define XXH_HAS_INCLUDE(x) 0 -# endif -#endif - -#if defined(__GNUC__) || defined(__clang__) -# if defined(__ARM_FEATURE_SVE) -# include -# endif -# if defined(__ARM_NEON__) || defined(__ARM_NEON) \ - || (defined(_M_ARM) && _M_ARM >= 7) \ - || defined(_M_ARM64) || defined(_M_ARM64EC) \ - || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* WASM SIMD128 via SIMDe */ -# define inline __inline__ /* circumvent a clang bug */ -# include -# undef inline -# elif defined(__AVX2__) -# include -# elif defined(__SSE2__) -# include -# endif -#endif - -#if defined(_MSC_VER) -# include -#endif - -/* - * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while - * remaining a true 64-bit/128-bit hash function. - * - * This is done by prioritizing a subset of 64-bit operations that can be - * emulated without too many steps on the average 32-bit machine. - * - * For example, these two lines seem similar, and run equally fast on 64-bit: - * - * xxh_u64 x; - * x ^= (x >> 47); // good - * x ^= (x >> 13); // bad - * - * However, to a 32-bit machine, there is a major difference. - * - * x ^= (x >> 47) looks like this: - * - * x.lo ^= (x.hi >> (47 - 32)); - * - * while x ^= (x >> 13) looks like this: - * - * // note: funnel shifts are not usually cheap. - * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); - * x.hi ^= (x.hi >> 13); - * - * The first one is significantly faster than the second, simply because the - * shift is larger than 32. This means: - * - All the bits we need are in the upper 32 bits, so we can ignore the lower - * 32 bits in the shift. - * - The shift result will always fit in the lower 32 bits, and therefore, - * we can ignore the upper 32 bits in the xor. - * - * Thanks to this optimization, XXH3 only requires these features to be efficient: - * - * - Usable unaligned access - * - A 32-bit or 64-bit ALU - * - If 32-bit, a decent ADC instruction - * - A 32 or 64-bit multiply with a 64-bit result - * - For the 128-bit variant, a decent byteswap helps short inputs. - * - * The first two are already required by XXH32, and almost all 32-bit and 64-bit - * platforms which can run XXH32 can run XXH3 efficiently. - * - * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one - * notable exception. - * - * First of all, Thumb-1 lacks support for the UMULL instruction which - * performs the important long multiply. This means numerous __aeabi_lmul - * calls. - * - * Second of all, the 8 functional registers are just not enough. - * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need - * Lo registers, and this shuffling results in thousands more MOVs than A32. - * - * A32 and T32 don't have this limitation. They can access all 14 registers, - * do a 32->64 multiply with UMULL, and the flexible operand allowing free - * shifts is helpful, too. - * - * Therefore, we do a quick sanity check. - * - * If compiling Thumb-1 for a target which supports ARM instructions, we will - * emit a warning, as it is not a "sane" platform to compile for. - * - * Usually, if this happens, it is because of an accident and you probably need - * to specify -march, as you likely meant to compile for a newer architecture. - * - * Credit: large sections of the vectorial and asm source code paths - * have been contributed by @easyaspi314 - */ -#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) -# warning "XXH3 is highly inefficient without ARM or Thumb-2." -#endif - -/* ========================================== - * Vectorization detection - * ========================================== */ - -#ifdef XXH_DOXYGEN -/*! - * @ingroup tuning - * @brief Overrides the vectorization implementation chosen for XXH3. - * - * Can be defined to 0 to disable SIMD or any of the values mentioned in - * @ref XXH_VECTOR_TYPE. - * - * If this is not defined, it uses predefined macros to determine the best - * implementation. - */ -# define XXH_VECTOR XXH_SCALAR -/*! - * @ingroup tuning - * @brief Possible values for @ref XXH_VECTOR. - * - * Note that these are actually implemented as macros. - * - * If this is not defined, it is detected automatically. - * internal macro XXH_X86DISPATCH overrides this. - */ -enum XXH_VECTOR_TYPE /* fake enum */ { - XXH_SCALAR = 0, /*!< Portable scalar version */ - XXH_SSE2 = 1, /*!< - * SSE2 for Pentium 4, Opteron, all x86_64. - * - * @note SSE2 is also guaranteed on Windows 10, macOS, and - * Android x86. - */ - XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */ - XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */ - XXH_NEON = 4, /*!< - * NEON for most ARMv7-A, all AArch64, and WASM SIMD128 - * via the SIMDeverywhere polyfill provided with the - * Emscripten SDK. - */ - XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */ - XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */ -}; -/*! - * @ingroup tuning - * @brief Selects the minimum alignment for XXH3's accumulators. - * - * When using SIMD, this should match the alignment required for said vector - * type, so, for example, 32 for AVX2. - * - * Default: Auto detected. - */ -# define XXH_ACC_ALIGN 8 -#endif - -/* Actual definition */ -#ifndef XXH_DOXYGEN -# define XXH_SCALAR 0 -# define XXH_SSE2 1 -# define XXH_AVX2 2 -# define XXH_AVX512 3 -# define XXH_NEON 4 -# define XXH_VSX 5 -# define XXH_SVE 6 -#endif - -#ifndef XXH_VECTOR /* can be defined on command line */ -# if defined(__ARM_FEATURE_SVE) -# define XXH_VECTOR XXH_SVE -# elif ( \ - defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \ - || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \ - || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* wasm simd128 via SIMDe */ \ - ) && ( \ - defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ - ) -# define XXH_VECTOR XXH_NEON -# elif defined(__AVX512F__) -# define XXH_VECTOR XXH_AVX512 -# elif defined(__AVX2__) -# define XXH_VECTOR XXH_AVX2 -# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) -# define XXH_VECTOR XXH_SSE2 -# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ - || (defined(__s390x__) && defined(__VEC__)) \ - && defined(__GNUC__) /* TODO: IBM XL */ -# define XXH_VECTOR XXH_VSX -# else -# define XXH_VECTOR XXH_SCALAR -# endif -#endif - -/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */ -#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE) -# ifdef _MSC_VER -# pragma warning(once : 4606) -# else -# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead." -# endif -# undef XXH_VECTOR -# define XXH_VECTOR XXH_SCALAR -#endif - -/* - * Controls the alignment of the accumulator, - * for compatibility with aligned vector loads, which are usually faster. - */ -#ifndef XXH_ACC_ALIGN -# if defined(XXH_X86DISPATCH) -# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ -# elif XXH_VECTOR == XXH_SCALAR /* scalar */ -# define XXH_ACC_ALIGN 8 -# elif XXH_VECTOR == XXH_SSE2 /* sse2 */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_AVX2 /* avx2 */ -# define XXH_ACC_ALIGN 32 -# elif XXH_VECTOR == XXH_NEON /* neon */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_VSX /* vsx */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_AVX512 /* avx512 */ -# define XXH_ACC_ALIGN 64 -# elif XXH_VECTOR == XXH_SVE /* sve */ -# define XXH_ACC_ALIGN 64 -# endif -#endif - -#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ - || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 -# define XXH_SEC_ALIGN XXH_ACC_ALIGN -#elif XXH_VECTOR == XXH_SVE -# define XXH_SEC_ALIGN XXH_ACC_ALIGN -#else -# define XXH_SEC_ALIGN 8 -#endif - -#if defined(__GNUC__) || defined(__clang__) -# define XXH_ALIASING __attribute__((may_alias)) -#else -# define XXH_ALIASING /* nothing */ -#endif - -/* - * UGLY HACK: - * GCC usually generates the best code with -O3 for xxHash. - * - * However, when targeting AVX2, it is overzealous in its unrolling resulting - * in code roughly 3/4 the speed of Clang. - * - * There are other issues, such as GCC splitting _mm256_loadu_si256 into - * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which - * only applies to Sandy and Ivy Bridge... which don't even support AVX2. - * - * That is why when compiling the AVX2 version, it is recommended to use either - * -O2 -mavx2 -march=haswell - * or - * -O2 -mavx2 -mno-avx256-split-unaligned-load - * for decent performance, or to use Clang instead. - * - * Fortunately, we can control the first one with a pragma that forces GCC into - * -O2, but the other one we can't control without "failed to inline always - * inline function due to target mismatch" warnings. - */ -#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ - && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */ -# pragma GCC push_options -# pragma GCC optimize("-O2") -#endif - -#if XXH_VECTOR == XXH_NEON - -/* - * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3 - * optimizes out the entire hashLong loop because of the aliasing violation. - * - * However, GCC is also inefficient at load-store optimization with vld1q/vst1q, - * so the only option is to mark it as aliasing. - */ -typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING; - -/*! - * @internal - * @brief `vld1q_u64` but faster and alignment-safe. - * - * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only - * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86). - * - * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it - * prohibits load-store optimizations. Therefore, a direct dereference is used. - * - * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe - * unaligned load. - */ -#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) -XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */ -{ - return *(xxh_aliasing_uint64x2_t const *)ptr; -} -#else -XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) -{ - return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr)); -} -#endif - -/*! - * @internal - * @brief `vmlal_u32` on low and high halves of a vector. - * - * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with - * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32` - * with `vmlal_u32`. - */ -#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11 -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - /* Inline assembly is the only way */ - __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs)); - return acc; -} -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - /* This intrinsic works as expected */ - return vmlal_high_u32(acc, lhs, rhs); -} -#else -/* Portable intrinsic versions */ -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs)); -} -/*! @copydoc XXH_vmlal_low_u32 - * Assume the compiler converts this to vmlal_high_u32 on aarch64 */ -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs)); -} -#endif - -/*! - * @ingroup tuning - * @brief Controls the NEON to scalar ratio for XXH3 - * - * This can be set to 2, 4, 6, or 8. - * - * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used. - * - * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those - * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU - * bandwidth. - * - * This is even more noticeable on the more advanced cores like the Cortex-A76 which - * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once. - * - * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes - * and 2 scalar lanes, which is chosen by default. - * - * This does not apply to Apple processors or 32-bit processors, which run better with - * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes. - * - * This change benefits CPUs with large micro-op buffers without negatively affecting - * most other CPUs: - * - * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. | - * |:----------------------|:--------------------|----------:|-----------:|------:| - * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% | - * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% | - * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% | - * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% | - * - * It also seems to fix some bad codegen on GCC, making it almost as fast as clang. - * - * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning - * it effectively becomes worse 4. - * - * @see XXH3_accumulate_512_neon() - */ -# ifndef XXH3_NEON_LANES -# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \ - && !defined(__APPLE__) && XXH_SIZE_OPT <= 0 -# define XXH3_NEON_LANES 6 -# else -# define XXH3_NEON_LANES XXH_ACC_NB -# endif -# endif -#endif /* XXH_VECTOR == XXH_NEON */ - -/* - * VSX and Z Vector helpers. - * - * This is very messy, and any pull requests to clean this up are welcome. - * - * There are a lot of problems with supporting VSX and s390x, due to - * inconsistent intrinsics, spotty coverage, and multiple endiannesses. - */ -#if XXH_VECTOR == XXH_VSX -/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`, - * and `pixel`. This is a problem for obvious reasons. - * - * These keywords are unnecessary; the spec literally says they are - * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd - * after including the header. - * - * We use pragma push_macro/pop_macro to keep the namespace clean. */ -# pragma push_macro("bool") -# pragma push_macro("vector") -# pragma push_macro("pixel") -/* silence potential macro redefined warnings */ -# undef bool -# undef vector -# undef pixel - -# if defined(__s390x__) -# include -# else -# include -# endif - -/* Restore the original macro values, if applicable. */ -# pragma pop_macro("pixel") -# pragma pop_macro("vector") -# pragma pop_macro("bool") - -typedef __vector unsigned long long xxh_u64x2; -typedef __vector unsigned char xxh_u8x16; -typedef __vector unsigned xxh_u32x4; - -/* - * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue. - */ -typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING; - -# ifndef XXH_VSX_BE -# if defined(__BIG_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define XXH_VSX_BE 1 -# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ -# warning "-maltivec=be is not recommended. Please use native endianness." -# define XXH_VSX_BE 1 -# else -# define XXH_VSX_BE 0 -# endif -# endif /* !defined(XXH_VSX_BE) */ - -# if XXH_VSX_BE -# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) -# define XXH_vec_revb vec_revb -# else -/*! - * A polyfill for POWER9's vec_revb(). - */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) -{ - xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, - 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; - return vec_perm(val, val, vByteSwap); -} -# endif -# endif /* XXH_VSX_BE */ - -/*! - * Performs an unaligned vector load and byte swaps it on big endian. - */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) -{ - xxh_u64x2 ret; - XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2)); -# if XXH_VSX_BE - ret = XXH_vec_revb(ret); -# endif - return ret; -} - -/* - * vec_mulo and vec_mule are very problematic intrinsics on PowerPC - * - * These intrinsics weren't added until GCC 8, despite existing for a while, - * and they are endian dependent. Also, their meaning swap depending on version. - * */ -# if defined(__s390x__) - /* s390x is always big endian, no issue on this platform */ -# define XXH_vec_mulo vec_mulo -# define XXH_vec_mule vec_mule -# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__) -/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ - /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */ -# define XXH_vec_mulo __builtin_altivec_vmulouw -# define XXH_vec_mule __builtin_altivec_vmuleuw -# else -/* gcc needs inline assembly */ -/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) -{ - xxh_u64x2 result; - __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); - return result; -} -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) -{ - xxh_u64x2 result; - __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); - return result; -} -# endif /* XXH_vec_mulo, XXH_vec_mule */ -#endif /* XXH_VECTOR == XXH_VSX */ - -#if XXH_VECTOR == XXH_SVE -#define ACCRND(acc, offset) \ -do { \ - svuint64_t input_vec = svld1_u64(mask, xinput + offset); \ - svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \ - svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \ - svuint64_t swapped = svtbl_u64(input_vec, kSwap); \ - svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \ - svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \ - svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \ - acc = svadd_u64_x(mask, acc, mul); \ -} while (0) -#endif /* XXH_VECTOR == XXH_SVE */ - -/* prefetch - * can be disabled, by declaring XXH_NO_PREFETCH build macro */ -#if defined(XXH_NO_PREFETCH) -# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ -#else -# if XXH_SIZE_OPT >= 1 -# define XXH_PREFETCH(ptr) (void)(ptr) -# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# else -# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* XXH_NO_PREFETCH */ - - -/* ========================================== - * XXH3 default settings - * ========================================== */ - -#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ - -#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) -# error "default keyset is not large enough" -#endif - -/*! Pseudorandom secret taken directly from FARSH. */ -XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, -}; - -static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */ -static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */ - -#ifdef XXH_OLD_NAMES -# define kSecret XXH3_kSecret -#endif - -#ifdef XXH_DOXYGEN -/*! - * @brief Calculates a 32-bit to 64-bit long multiply. - * - * Implemented as a macro. - * - * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't - * need to (but it shouldn't need to anyways, it is about 7 instructions to do - * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we - * use that instead of the normal method. - * - * If you are compiling for platforms like Thumb-1 and don't have a better option, - * you may also want to write your own long multiply routine here. - * - * @param x, y Numbers to be multiplied - * @return 64-bit product of the low 32 bits of @p x and @p y. - */ -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64(xxh_u64 x, xxh_u64 y) -{ - return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); -} -#elif defined(_MSC_VER) && defined(_M_IX86) -# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) -#else -/* - * Downcast + upcast is usually better than masking on older compilers like - * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers. - * - * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands - * and perform a full 64x64 multiply -- entirely redundant on 32-bit. - */ -# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) -#endif - -/*! - * @brief Calculates a 64->128-bit long multiply. - * - * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar - * version. - * - * @param lhs , rhs The 64-bit integers to be multiplied - * @return The 128-bit result represented in an @ref XXH128_hash_t. - */ -static XXH128_hash_t -XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) -{ - /* - * GCC/Clang __uint128_t method. - * - * On most 64-bit targets, GCC and Clang define a __uint128_t type. - * This is usually the best way as it usually uses a native long 64-bit - * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. - * - * Usually. - * - * Despite being a 32-bit platform, Clang (and emscripten) define this type - * despite not having the arithmetic for it. This results in a laggy - * compiler builtin call which calculates a full 128-bit multiply. - * In that case it is best to use the portable one. - * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 - */ -#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \ - && defined(__SIZEOF_INT128__) \ - || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - - __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; - XXH128_hash_t r128; - r128.low64 = (xxh_u64)(product); - r128.high64 = (xxh_u64)(product >> 64); - return r128; - - /* - * MSVC for x64's _umul128 method. - * - * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); - * - * This compiles to single operand MUL on x64. - */ -#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC) - -#ifndef _MSC_VER -# pragma intrinsic(_umul128) -#endif - xxh_u64 product_high; - xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); - XXH128_hash_t r128; - r128.low64 = product_low; - r128.high64 = product_high; - return r128; - - /* - * MSVC for ARM64's __umulh method. - * - * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method. - */ -#elif defined(_M_ARM64) || defined(_M_ARM64EC) - -#ifndef _MSC_VER -# pragma intrinsic(__umulh) -#endif - XXH128_hash_t r128; - r128.low64 = lhs * rhs; - r128.high64 = __umulh(lhs, rhs); - return r128; - -#else - /* - * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. - * - * This is a fast and simple grade school multiply, which is shown below - * with base 10 arithmetic instead of base 0x100000000. - * - * 9 3 // D2 lhs = 93 - * x 7 5 // D2 rhs = 75 - * ---------- - * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 - * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 - * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 - * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 - * --------- - * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 - * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 - * --------- - * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 - * - * The reasons for adding the products like this are: - * 1. It avoids manual carry tracking. Just like how - * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. - * This avoids a lot of complexity. - * - * 2. It hints for, and on Clang, compiles to, the powerful UMAAL - * instruction available in ARM's Digital Signal Processing extension - * in 32-bit ARMv6 and later, which is shown below: - * - * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) - * { - * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; - * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); - * *RdHi = (xxh_u32)(product >> 32); - * } - * - * This instruction was designed for efficient long multiplication, and - * allows this to be calculated in only 4 instructions at speeds - * comparable to some 64-bit ALUs. - * - * 3. It isn't terrible on other platforms. Usually this will be a couple - * of 32-bit ADD/ADCs. - */ - - /* First calculate all of the cross products. */ - xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); - xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); - xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); - xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); - - /* Now add the products together. These will never overflow. */ - xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; - xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; - xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); - - XXH128_hash_t r128; - r128.low64 = lower; - r128.high64 = upper; - return r128; -#endif -} - -/*! - * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it. - * - * The reason for the separate function is to prevent passing too many structs - * around by value. This will hopefully inline the multiply, but we don't force it. - * - * @param lhs , rhs The 64-bit integers to multiply - * @return The low 64 bits of the product XOR'd by the high 64 bits. - * @see XXH_mult64to128() - */ -static xxh_u64 -XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) -{ - XXH128_hash_t product = XXH_mult64to128(lhs, rhs); - return product.low64 ^ product.high64; -} - -/*! Seems to produce slightly better code on GCC for some reason. */ -XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) -{ - XXH_ASSERT(0 <= shift && shift < 64); - return v64 ^ (v64 >> shift); -} - -/* - * This is a fast avalanche stage, - * suitable when input bits are already partially mixed - */ -static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) -{ - h64 = XXH_xorshift64(h64, 37); - h64 *= PRIME_MX1; - h64 = XXH_xorshift64(h64, 32); - return h64; -} - -/* - * This is a stronger avalanche, - * inspired by Pelle Evensen's rrmxmx - * preferable when input has not been previously mixed - */ -static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) -{ - /* this mix is inspired by Pelle Evensen's rrmxmx */ - h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); - h64 *= PRIME_MX2; - h64 ^= (h64 >> 35) + len ; - h64 *= PRIME_MX2; - return XXH_xorshift64(h64, 28); -} - - -/* ========================================== - * Short keys - * ========================================== - * One of the shortcomings of XXH32 and XXH64 was that their performance was - * sub-optimal on short lengths. It used an iterative algorithm which strongly - * favored lengths that were a multiple of 4 or 8. - * - * Instead of iterating over individual inputs, we use a set of single shot - * functions which piece together a range of lengths and operate in constant time. - * - * Additionally, the number of multiplies has been significantly reduced. This - * reduces latency, especially when emulating 64-bit multiplies on 32-bit. - * - * Depending on the platform, this may or may not be faster than XXH32, but it - * is almost guaranteed to be faster than XXH64. - */ - -/* - * At very short lengths, there isn't enough input to fully hide secrets, or use - * the entire secret. - * - * There is also only a limited amount of mixing we can do before significantly - * impacting performance. - * - * Therefore, we use different sections of the secret and always mix two secret - * samples with an XOR. This should have no effect on performance on the - * seedless or withSeed variants because everything _should_ be constant folded - * by modern compilers. - * - * The XOR mixing hides individual parts of the secret and increases entropy. - * - * This adds an extra layer of strength for custom secrets. - */ -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(secret != NULL); - /* - * len = 1: combined = { input[0], 0x01, input[0], input[0] } - * len = 2: combined = { input[1], 0x02, input[0], input[1] } - * len = 3: combined = { input[2], 0x03, input[0], input[1] } - */ - { xxh_u8 const c1 = input[0]; - xxh_u8 const c2 = input[len >> 1]; - xxh_u8 const c3 = input[len - 1]; - xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) - | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); - xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; - xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; - return XXH64_avalanche(keyed); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len <= 8); - seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; - { xxh_u32 const input1 = XXH_readLE32(input); - xxh_u32 const input2 = XXH_readLE32(input + len - 4); - xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; - xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); - xxh_u64 const keyed = input64 ^ bitflip; - return XXH3_rrmxmx(keyed, len); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(9 <= len && len <= 16); - { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; - xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; - xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; - xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; - xxh_u64 const acc = len - + XXH_swap64(input_lo) + input_hi - + XXH3_mul128_fold64(input_lo, input_hi); - return XXH3_avalanche(acc); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(len <= 16); - { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); - if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); - if (len) return XXH3_len_1to3_64b(input, len, secret, seed); - return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); - } -} - -/* - * DISCLAIMER: There are known *seed-dependent* multicollisions here due to - * multiplication by zero, affecting hashes of lengths 17 to 240. - * - * However, they are very unlikely. - * - * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all - * unseeded non-cryptographic hashes, it does not attempt to defend itself - * against specially crafted inputs, only random inputs. - * - * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes - * cancelling out the secret is taken an arbitrary number of times (addressed - * in XXH3_accumulate_512), this collision is very unlikely with random inputs - * and/or proper seeding: - * - * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a - * function that is only called up to 16 times per hash with up to 240 bytes of - * input. - * - * This is not too bad for a non-cryptographic hash function, especially with - * only 64 bit outputs. - * - * The 128-bit variant (which trades some speed for strength) is NOT affected - * by this, although it is always a good idea to use a proper seed if you care - * about strength. - */ -XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, - const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) -{ -#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ - /* - * UGLY HACK: - * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in - * slower code. - * - * By forcing seed64 into a register, we disrupt the cost model and - * cause it to scalarize. See `XXH32_round()` - * - * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, - * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on - * GCC 9.2, despite both emitting scalar code. - * - * GCC generates much better scalar code than Clang for the rest of XXH3, - * which is why finding a more optimal codepath is an interest. - */ - XXH_COMPILER_GUARD(seed64); -#endif - { xxh_u64 const input_lo = XXH_readLE64(input); - xxh_u64 const input_hi = XXH_readLE64(input+8); - return XXH3_mul128_fold64( - input_lo ^ (XXH_readLE64(secret) + seed64), - input_hi ^ (XXH_readLE64(secret+8) - seed64) - ); - } -} - -/* For mid range keys, XXH3 uses a Mum-hash variant. */ -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(16 < len && len <= 128); - - { xxh_u64 acc = len * XXH_PRIME64_1; -#if XXH_SIZE_OPT >= 1 - /* Smaller and cleaner, but slightly slower. */ - unsigned int i = (unsigned int)(len - 1) / 32; - do { - acc += XXH3_mix16B(input+16 * i, secret+32*i, seed); - acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed); - } while (i-- != 0); -#else - if (len > 32) { - if (len > 64) { - if (len > 96) { - acc += XXH3_mix16B(input+48, secret+96, seed); - acc += XXH3_mix16B(input+len-64, secret+112, seed); - } - acc += XXH3_mix16B(input+32, secret+64, seed); - acc += XXH3_mix16B(input+len-48, secret+80, seed); - } - acc += XXH3_mix16B(input+16, secret+32, seed); - acc += XXH3_mix16B(input+len-32, secret+48, seed); - } - acc += XXH3_mix16B(input+0, secret+0, seed); - acc += XXH3_mix16B(input+len-16, secret+16, seed); -#endif - return XXH3_avalanche(acc); - } -} - -#define XXH3_MIDSIZE_MAX 240 - -XXH_NO_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - - #define XXH3_MIDSIZE_STARTOFFSET 3 - #define XXH3_MIDSIZE_LASTOFFSET 17 - - { xxh_u64 acc = len * XXH_PRIME64_1; - xxh_u64 acc_end; - unsigned int const nbRounds = (unsigned int)len / 16; - unsigned int i; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - for (i=0; i<8; i++) { - acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); - } - /* last bytes */ - acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); - XXH_ASSERT(nbRounds >= 8); - acc = XXH3_avalanche(acc); -#if defined(__clang__) /* Clang */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. - * In everywhere else, it uses scalar code. - * - * For 64->128-bit multiplies, even if the NEON was 100% optimal, it - * would still be slower than UMAAL (see XXH_mult64to128). - * - * Unfortunately, Clang doesn't handle the long multiplies properly and - * converts them to the nonexistent "vmulq_u64" intrinsic, which is then - * scalarized into an ugly mess of VMOV.32 instructions. - * - * This mess is difficult to avoid without turning autovectorization - * off completely, but they are usually relatively minor and/or not - * worth it to fix. - * - * This loop is the easiest to fix, as unlike XXH32, this pragma - * _actually works_ because it is a loop vectorization instead of an - * SLP vectorization. - */ - #pragma clang loop vectorize(disable) -#endif - for (i=8 ; i < nbRounds; i++) { - /* - * Prevents clang for unrolling the acc loop and interleaving with this one. - */ - XXH_COMPILER_GUARD(acc); - acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); - } - return XXH3_avalanche(acc + acc_end); - } -} - - -/* ======= Long Keys ======= */ - -#define XXH_STRIPE_LEN 64 -#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ -#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) - -#ifdef XXH_OLD_NAMES -# define STRIPE_LEN XXH_STRIPE_LEN -# define ACC_NB XXH_ACC_NB -#endif - -#ifndef XXH_PREFETCH_DIST -# ifdef __clang__ -# define XXH_PREFETCH_DIST 320 -# else -# if (XXH_VECTOR == XXH_AVX512) -# define XXH_PREFETCH_DIST 512 -# else -# define XXH_PREFETCH_DIST 384 -# endif -# endif /* __clang__ */ -#endif /* XXH_PREFETCH_DIST */ - -/* - * These macros are to generate an XXH3_accumulate() function. - * The two arguments select the name suffix and target attribute. - * - * The name of this symbol is XXH3_accumulate_() and it calls - * XXH3_accumulate_512_(). - * - * It may be useful to hand implement this function if the compiler fails to - * optimize the inline function. - */ -#define XXH3_ACCUMULATE_TEMPLATE(name) \ -void \ -XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \ - const xxh_u8* XXH_RESTRICT input, \ - const xxh_u8* XXH_RESTRICT secret, \ - size_t nbStripes) \ -{ \ - size_t n; \ - for (n = 0; n < nbStripes; n++ ) { \ - const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \ - XXH_PREFETCH(in + XXH_PREFETCH_DIST); \ - XXH3_accumulate_512_##name( \ - acc, \ - in, \ - secret + n*XXH_SECRET_CONSUME_RATE); \ - } \ -} - - -XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) -{ - if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); - XXH_memcpy(dst, &v64, sizeof(v64)); -} - -/* Several intrinsic functions below are supposed to accept __int64 as argument, - * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . - * However, several environments do not define __int64 type, - * requiring a workaround. - */ -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) - typedef int64_t xxh_i64; -#else - /* the following type must have a width of 64-bit */ - typedef long long xxh_i64; -#endif - - -/* - * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. - * - * It is a hardened version of UMAC, based off of FARSH's implementation. - * - * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD - * implementations, and it is ridiculously fast. - * - * We harden it by mixing the original input to the accumulators as well as the product. - * - * This means that in the (relatively likely) case of a multiply by zero, the - * original input is preserved. - * - * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve - * cross-pollination, as otherwise the upper and lower halves would be - * essentially independent. - * - * This doesn't matter on 64-bit hashes since they all get merged together in - * the end, so we skip the extra step. - * - * Both XXH3_64bits and XXH3_128bits use this subroutine. - */ - -#if (XXH_VECTOR == XXH_AVX512) \ - || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) - -#ifndef XXH_TARGET_AVX512 -# define XXH_TARGET_AVX512 /* disable attribute target */ -#endif - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - __m512i* const xacc = (__m512i *) acc; - XXH_ASSERT((((size_t)acc) & 63) == 0); - XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); - - { - /* data_vec = input[0]; */ - __m512i const data_vec = _mm512_loadu_si512 (input); - /* key_vec = secret[0]; */ - __m512i const key_vec = _mm512_loadu_si512 (secret); - /* data_key = data_vec ^ key_vec; */ - __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); - /* xacc[0] += swap(data_vec); */ - __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); - __m512i const sum = _mm512_add_epi64(*xacc, data_swap); - /* xacc[0] += product; */ - *xacc = _mm512_add_epi64(product, sum); - } -} -XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512) - -/* - * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. - * - * Multiplication isn't perfect, as explained by Google in HighwayHash: - * - * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to - * // varying degrees. In descending order of goodness, bytes - * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. - * // As expected, the upper and lower bytes are much worse. - * - * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 - * - * Since our algorithm uses a pseudorandom secret to add some variance into the - * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. - * - * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid - * extraction. - * - * Both XXH3_64bits and XXH3_128bits use this subroutine. - */ - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 63) == 0); - XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); - { __m512i* const xacc = (__m512i*) acc; - const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); - - /* xacc[0] ^= (xacc[0] >> 47) */ - __m512i const acc_vec = *xacc; - __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); - /* xacc[0] ^= secret; */ - __m512i const key_vec = _mm512_loadu_si512 (secret); - __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */); - - /* xacc[0] *= XXH_PRIME32_1; */ - __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32); - __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); - __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); - *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); - } -} - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); - XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); - XXH_ASSERT(((size_t)customSecret & 63) == 0); - (void)(&XXH_writeLE64); - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); - __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64); - __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos); - - const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret); - __m512i* const dest = ( __m512i*) customSecret; - int i; - XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dest & 63) == 0); - for (i=0; i < nbRounds; ++i) { - dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed); - } } -} - -#endif - -#if (XXH_VECTOR == XXH_AVX2) \ - || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) - -#ifndef XXH_TARGET_AVX2 -# define XXH_TARGET_AVX2 /* disable attribute target */ -#endif - -XXH_FORCE_INLINE XXH_TARGET_AVX2 void -XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 31) == 0); - { __m256i* const xacc = (__m256i *) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xinput = (const __m256i *) input; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xsecret = (const __m256i *) secret; - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { - /* data_vec = xinput[i]; */ - __m256i const data_vec = _mm256_loadu_si256 (xinput+i); - /* key_vec = xsecret[i]; */ - __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); - /* data_key = data_vec ^ key_vec; */ - __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = _mm256_add_epi64(product, sum); - } } -} -XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2) - -XXH_FORCE_INLINE XXH_TARGET_AVX2 void -XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 31) == 0); - { __m256i* const xacc = (__m256i*) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xsecret = (const __m256i *) secret; - const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m256i const acc_vec = xacc[i]; - __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); - __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); - /* xacc[i] ^= xsecret; */ - __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); - __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32); - __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); - __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); - xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); - } - } -} - -XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); - XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); - (void)(&XXH_writeLE64); - XXH_PREFETCH(customSecret); - { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); - - const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret); - __m256i* dest = ( __m256i*) customSecret; - -# if defined(__GNUC__) || defined(__clang__) - /* - * On GCC & Clang, marking 'dest' as modified will cause the compiler: - * - do not extract the secret from sse registers in the internal loop - * - use less common registers, and avoid pushing these reg into stack - */ - XXH_COMPILER_GUARD(dest); -# endif - XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dest & 31) == 0); - - /* GCC -O2 need unroll loop manually */ - dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed); - dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed); - dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed); - dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed); - dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed); - dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed); - } -} - -#endif - -/* x86dispatch always generates SSE2 */ -#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) - -#ifndef XXH_TARGET_SSE2 -# define XXH_TARGET_SSE2 /* disable attribute target */ -#endif - -XXH_FORCE_INLINE XXH_TARGET_SSE2 void -XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - /* SSE2 is just a half-scale version of the AVX2 version. */ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { __m128i* const xacc = (__m128i *) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xinput = (const __m128i *) input; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xsecret = (const __m128i *) secret; - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { - /* data_vec = xinput[i]; */ - __m128i const data_vec = _mm_loadu_si128 (xinput+i); - /* key_vec = xsecret[i]; */ - __m128i const key_vec = _mm_loadu_si128 (xsecret+i); - /* data_key = data_vec ^ key_vec; */ - __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); - __m128i const sum = _mm_add_epi64(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = _mm_add_epi64(product, sum); - } } -} -XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2) - -XXH_FORCE_INLINE XXH_TARGET_SSE2 void -XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { __m128i* const xacc = (__m128i*) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xsecret = (const __m128i *) secret; - const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m128i const acc_vec = xacc[i]; - __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); - __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); - /* xacc[i] ^= xsecret[i]; */ - __m128i const key_vec = _mm_loadu_si128 (xsecret+i); - __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); - __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); - __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); - xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); - } - } -} - -XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); - (void)(&XXH_writeLE64); - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); - -# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 - /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ - XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) }; - __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); -# else - __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); -# endif - int i; - - const void* const src16 = XXH3_kSecret; - __m128i* dst16 = (__m128i*) customSecret; -# if defined(__GNUC__) || defined(__clang__) - /* - * On GCC & Clang, marking 'dest' as modified will cause the compiler: - * - do not extract the secret from sse registers in the internal loop - * - use less common registers, and avoid pushing these reg into stack - */ - XXH_COMPILER_GUARD(dst16); -# endif - XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dst16 & 15) == 0); - - for (i=0; i < nbRounds; ++i) { - dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed); - } } -} - -#endif - -#if (XXH_VECTOR == XXH_NEON) - -/* forward declarations for the scalar routines */ -XXH_FORCE_INLINE void -XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input, - void const* XXH_RESTRICT secret, size_t lane); - -XXH_FORCE_INLINE void -XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT secret, size_t lane); - -/*! - * @internal - * @brief The bulk processing loop for NEON and WASM SIMD128. - * - * The NEON code path is actually partially scalar when running on AArch64. This - * is to optimize the pipelining and can have up to 15% speedup depending on the - * CPU, and it also mitigates some GCC codegen issues. - * - * @see XXH3_NEON_LANES for configuring this and details about this optimization. - * - * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit - * integers instead of the other platforms which mask full 64-bit vectors, - * so the setup is more complicated than just shifting right. - * - * Additionally, there is an optimization for 4 lanes at once noted below. - * - * Since, as stated, the most optimal amount of lanes for Cortexes is 6, - * there needs to be *three* versions of the accumulate operation used - * for the remaining 2 lanes. - * - * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap - * nearly perfectly. - */ - -XXH_FORCE_INLINE void -XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0); - { /* GCC for darwin arm64 does not like aliasing here */ - xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc; - /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ - uint8_t const* xinput = (const uint8_t *) input; - uint8_t const* xsecret = (const uint8_t *) secret; - - size_t i; -#ifdef __wasm_simd128__ - /* - * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret - * is constant propagated, which results in it converting it to this - * inside the loop: - * - * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0) - * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0) - * ... - * - * This requires a full 32-bit address immediate (and therefore a 6 byte - * instruction) as well as an add for each offset. - * - * Putting an asm guard prevents it from folding (at the cost of losing - * the alignment hint), and uses the free offset in `v128.load` instead - * of adding secret_offset each time which overall reduces code size by - * about a kilobyte and improves performance. - */ - XXH_COMPILER_GUARD(xsecret); -#endif - /* Scalar lanes use the normal scalarRound routine */ - for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { - XXH3_scalarRound(acc, input, secret, i); - } - i = 0; - /* 4 NEON lanes at a time. */ - for (; i+1 < XXH3_NEON_LANES / 2; i+=2) { - /* data_vec = xinput[i]; */ - uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16)); - uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16)); - /* key_vec = xsecret[i]; */ - uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16)); - uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16)); - /* data_swap = swap(data_vec) */ - uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1); - uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1); - /* data_key = data_vec ^ key_vec; */ - uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1); - uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2); - - /* - * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a - * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to - * get one vector with the low 32 bits of each lane, and one vector - * with the high 32 bits of each lane. - * - * The intrinsic returns a double vector because the original ARMv7-a - * instruction modified both arguments in place. AArch64 and SIMD128 emit - * two instructions from this intrinsic. - * - * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] - * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] - */ - uint32x4x2_t unzipped = vuzpq_u32( - vreinterpretq_u32_u64(data_key_1), - vreinterpretq_u32_u64(data_key_2) - ); - /* data_key_lo = data_key & 0xFFFFFFFF */ - uint32x4_t data_key_lo = unzipped.val[0]; - /* data_key_hi = data_key >> 32 */ - uint32x4_t data_key_hi = unzipped.val[1]; - /* - * Then, we can split the vectors horizontally and multiply which, as for most - * widening intrinsics, have a variant that works on both high half vectors - * for free on AArch64. A similar instruction is available on SIMD128. - * - * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi - */ - uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi); - uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi); - /* - * Clang reorders - * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s - * c += a; // add acc.2d, acc.2d, swap.2d - * to - * c += a; // add acc.2d, acc.2d, swap.2d - * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s - * - * While it would make sense in theory since the addition is faster, - * for reasons likely related to umlal being limited to certain NEON - * pipelines, this is worse. A compiler guard fixes this. - */ - XXH_COMPILER_GUARD_CLANG_NEON(sum_1); - XXH_COMPILER_GUARD_CLANG_NEON(sum_2); - /* xacc[i] = acc_vec + sum; */ - xacc[i] = vaddq_u64(xacc[i], sum_1); - xacc[i+1] = vaddq_u64(xacc[i+1], sum_2); - } - /* Operate on the remaining NEON lanes 2 at a time. */ - for (; i < XXH3_NEON_LANES / 2; i++) { - /* data_vec = xinput[i]; */ - uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16)); - /* key_vec = xsecret[i]; */ - uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); - /* acc_vec_2 = swap(data_vec) */ - uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1); - /* data_key = data_vec ^ key_vec; */ - uint64x2_t data_key = veorq_u64(data_vec, key_vec); - /* For two lanes, just use VMOVN and VSHRN. */ - /* data_key_lo = data_key & 0xFFFFFFFF; */ - uint32x2_t data_key_lo = vmovn_u64(data_key); - /* data_key_hi = data_key >> 32; */ - uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32); - /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */ - uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi); - /* Same Clang workaround as before */ - XXH_COMPILER_GUARD_CLANG_NEON(sum); - /* xacc[i] = acc_vec + sum; */ - xacc[i] = vaddq_u64 (xacc[i], sum); - } - } -} -XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon) - -XXH_FORCE_INLINE void -XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - - { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc; - uint8_t const* xsecret = (uint8_t const*) secret; - - size_t i; - /* WASM uses operator overloads and doesn't need these. */ -#ifndef __wasm_simd128__ - /* { prime32_1, prime32_1 } */ - uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1); - /* { 0, prime32_1, 0, prime32_1 } */ - uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32)); -#endif - - /* AArch64 uses both scalar and neon at the same time */ - for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { - XXH3_scalarScrambleRound(acc, secret, i); - } - for (i=0; i < XXH3_NEON_LANES / 2; i++) { - /* xacc[i] ^= (xacc[i] >> 47); */ - uint64x2_t acc_vec = xacc[i]; - uint64x2_t shifted = vshrq_n_u64(acc_vec, 47); - uint64x2_t data_vec = veorq_u64(acc_vec, shifted); - - /* xacc[i] ^= xsecret[i]; */ - uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); - uint64x2_t data_key = veorq_u64(data_vec, key_vec); - /* xacc[i] *= XXH_PRIME32_1 */ -#ifdef __wasm_simd128__ - /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */ - xacc[i] = data_key * XXH_PRIME32_1; -#else - /* - * Expanded version with portable NEON intrinsics - * - * lo(x) * lo(y) + (hi(x) * lo(y) << 32) - * - * prod_hi = hi(data_key) * lo(prime) << 32 - * - * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector - * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits - * and avoid the shift. - */ - uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi); - /* Extract low bits for vmlal_u32 */ - uint32x2_t data_key_lo = vmovn_u64(data_key); - /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */ - xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo); -#endif - } - } -} -#endif - -#if (XXH_VECTOR == XXH_VSX) - -XXH_FORCE_INLINE void -XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - /* presumed aligned */ - xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; - xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */ - xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */ - xxh_u64x2 const v32 = { 32, 32 }; - size_t i; - for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { - /* data_vec = xinput[i]; */ - xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i); - /* key_vec = xsecret[i]; */ - xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); - xxh_u64x2 const data_key = data_vec ^ key_vec; - /* shuffled = (data_key << 32) | (data_key >> 32); */ - xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); - /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ - xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); - /* acc_vec = xacc[i]; */ - xxh_u64x2 acc_vec = xacc[i]; - acc_vec += product; - - /* swap high and low halves */ -#ifdef __s390x__ - acc_vec += vec_permi(data_vec, data_vec, 2); -#else - acc_vec += vec_xxpermdi(data_vec, data_vec, 2); -#endif - xacc[i] = acc_vec; - } -} -XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx) - -XXH_FORCE_INLINE void -XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - - { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; - const xxh_u8* const xsecret = (const xxh_u8*) secret; - /* constants */ - xxh_u64x2 const v32 = { 32, 32 }; - xxh_u64x2 const v47 = { 47, 47 }; - xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; - size_t i; - for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { - /* xacc[i] ^= (xacc[i] >> 47); */ - xxh_u64x2 const acc_vec = xacc[i]; - xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); - - /* xacc[i] ^= xsecret[i]; */ - xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); - xxh_u64x2 const data_key = data_vec ^ key_vec; - - /* xacc[i] *= XXH_PRIME32_1 */ - /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ - xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); - /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ - xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); - xacc[i] = prod_odd + (prod_even << v32); - } } -} - -#endif - -#if (XXH_VECTOR == XXH_SVE) - -XXH_FORCE_INLINE void -XXH3_accumulate_512_sve( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - uint64_t *xacc = (uint64_t *)acc; - const uint64_t *xinput = (const uint64_t *)(const void *)input; - const uint64_t *xsecret = (const uint64_t *)(const void *)secret; - svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - uint64_t element_count = svcntd(); - if (element_count >= 8) { - svbool_t mask = svptrue_pat_b64(SV_VL8); - svuint64_t vacc = svld1_u64(mask, xacc); - ACCRND(vacc, 0); - svst1_u64(mask, xacc, vacc); - } else if (element_count == 2) { /* sve128 */ - svbool_t mask = svptrue_pat_b64(SV_VL2); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 2); - svuint64_t acc2 = svld1_u64(mask, xacc + 4); - svuint64_t acc3 = svld1_u64(mask, xacc + 6); - ACCRND(acc0, 0); - ACCRND(acc1, 2); - ACCRND(acc2, 4); - ACCRND(acc3, 6); - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 2, acc1); - svst1_u64(mask, xacc + 4, acc2); - svst1_u64(mask, xacc + 6, acc3); - } else { - svbool_t mask = svptrue_pat_b64(SV_VL4); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 4); - ACCRND(acc0, 0); - ACCRND(acc1, 4); - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 4, acc1); - } -} - -XXH_FORCE_INLINE void -XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, - const xxh_u8* XXH_RESTRICT input, - const xxh_u8* XXH_RESTRICT secret, - size_t nbStripes) -{ - if (nbStripes != 0) { - uint64_t *xacc = (uint64_t *)acc; - const uint64_t *xinput = (const uint64_t *)(const void *)input; - const uint64_t *xsecret = (const uint64_t *)(const void *)secret; - svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - uint64_t element_count = svcntd(); - if (element_count >= 8) { - svbool_t mask = svptrue_pat_b64(SV_VL8); - svuint64_t vacc = svld1_u64(mask, xacc + 0); - do { - /* svprfd(svbool_t, void *, enum svfprop); */ - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(vacc, 0); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, vacc); - } else if (element_count == 2) { /* sve128 */ - svbool_t mask = svptrue_pat_b64(SV_VL2); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 2); - svuint64_t acc2 = svld1_u64(mask, xacc + 4); - svuint64_t acc3 = svld1_u64(mask, xacc + 6); - do { - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(acc0, 0); - ACCRND(acc1, 2); - ACCRND(acc2, 4); - ACCRND(acc3, 6); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 2, acc1); - svst1_u64(mask, xacc + 4, acc2); - svst1_u64(mask, xacc + 6, acc3); - } else { - svbool_t mask = svptrue_pat_b64(SV_VL4); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 4); - do { - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(acc0, 0); - ACCRND(acc1, 4); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 4, acc1); - } - } -} - -#endif - -/* scalar variants - universal */ - -#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__)) -/* - * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they - * emit an excess mask and a full 64-bit multiply-add (MADD X-form). - * - * While this might not seem like much, as AArch64 is a 64-bit architecture, only - * big Cortex designs have a full 64-bit multiplier. - * - * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit - * multiplies expand to 2-3 multiplies in microcode. This has a major penalty - * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline. - * - * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does - * not have this penalty and does the mask automatically. - */ -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) -{ - xxh_u64 ret; - /* note: %x = 64-bit register, %w = 32-bit register */ - __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc)); - return ret; -} -#else -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) -{ - return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc; -} -#endif - -/*! - * @internal - * @brief Scalar round for @ref XXH3_accumulate_512_scalar(). - * - * This is extracted to its own function because the NEON path uses a combination - * of NEON and scalar. - */ -XXH_FORCE_INLINE void -XXH3_scalarRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT input, - void const* XXH_RESTRICT secret, - size_t lane) -{ - xxh_u64* xacc = (xxh_u64*) acc; - xxh_u8 const* xinput = (xxh_u8 const*) input; - xxh_u8 const* xsecret = (xxh_u8 const*) secret; - XXH_ASSERT(lane < XXH_ACC_NB); - XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); - { - xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8); - xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8); - xacc[lane ^ 1] += data_val; /* swap adjacent lanes */ - xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]); - } -} - -/*! - * @internal - * @brief Processes a 64 byte block of data using the scalar path. - */ -XXH_FORCE_INLINE void -XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - size_t i; - /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */ -#if defined(__GNUC__) && !defined(__clang__) \ - && (defined(__arm__) || defined(__thumb2__)) \ - && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \ - && XXH_SIZE_OPT <= 0 -# pragma GCC unroll 8 -#endif - for (i=0; i < XXH_ACC_NB; i++) { - XXH3_scalarRound(acc, input, secret, i); - } -} -XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar) - -/*! - * @internal - * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar(). - * - * This is extracted to its own function because the NEON path uses a combination - * of NEON and scalar. - */ -XXH_FORCE_INLINE void -XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT secret, - size_t lane) -{ - xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ - const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ - XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); - XXH_ASSERT(lane < XXH_ACC_NB); - { - xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8); - xxh_u64 acc64 = xacc[lane]; - acc64 = XXH_xorshift64(acc64, 47); - acc64 ^= key64; - acc64 *= XXH_PRIME32_1; - xacc[lane] = acc64; - } -} - -/*! - * @internal - * @brief Scrambles the accumulators after a large chunk has been read - */ -XXH_FORCE_INLINE void -XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - size_t i; - for (i=0; i < XXH_ACC_NB; i++) { - XXH3_scalarScrambleRound(acc, secret, i); - } -} - -XXH_FORCE_INLINE void -XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - /* - * We need a separate pointer for the hack below, - * which requires a non-const pointer. - * Any decent compiler will optimize this out otherwise. - */ - const xxh_u8* kSecretPtr = XXH3_kSecret; - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); - -#if defined(__GNUC__) && defined(__aarch64__) - /* - * UGLY HACK: - * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are - * placed sequentially, in order, at the top of the unrolled loop. - * - * While MOVK is great for generating constants (2 cycles for a 64-bit - * constant compared to 4 cycles for LDR), it fights for bandwidth with - * the arithmetic instructions. - * - * I L S - * MOVK - * MOVK - * MOVK - * MOVK - * ADD - * SUB STR - * STR - * By forcing loads from memory (as the asm line causes the compiler to assume - * that XXH3_kSecretPtr has been changed), the pipelines are used more - * efficiently: - * I L S - * LDR - * ADD LDR - * SUB STR - * STR - * - * See XXH3_NEON_LANES for details on the pipsline. - * - * XXH3_64bits_withSeed, len == 256, Snapdragon 835 - * without hack: 2654.4 MB/s - * with hack: 3202.9 MB/s - */ - XXH_COMPILER_GUARD(kSecretPtr); -#endif - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; - int i; - for (i=0; i < nbRounds; i++) { - /* - * The asm hack causes the compiler to assume that kSecretPtr aliases with - * customSecret, and on aarch64, this prevented LDP from merging two - * loads together for free. Putting the loads together before the stores - * properly generates LDP. - */ - xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; - xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; - XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); - XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); - } } -} - - -typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t); -typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); -typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); - - -#if (XXH_VECTOR == XXH_AVX512) - -#define XXH3_accumulate_512 XXH3_accumulate_512_avx512 -#define XXH3_accumulate XXH3_accumulate_avx512 -#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 -#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 - -#elif (XXH_VECTOR == XXH_AVX2) - -#define XXH3_accumulate_512 XXH3_accumulate_512_avx2 -#define XXH3_accumulate XXH3_accumulate_avx2 -#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 -#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 - -#elif (XXH_VECTOR == XXH_SSE2) - -#define XXH3_accumulate_512 XXH3_accumulate_512_sse2 -#define XXH3_accumulate XXH3_accumulate_sse2 -#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 -#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 - -#elif (XXH_VECTOR == XXH_NEON) - -#define XXH3_accumulate_512 XXH3_accumulate_512_neon -#define XXH3_accumulate XXH3_accumulate_neon -#define XXH3_scrambleAcc XXH3_scrambleAcc_neon -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#elif (XXH_VECTOR == XXH_VSX) - -#define XXH3_accumulate_512 XXH3_accumulate_512_vsx -#define XXH3_accumulate XXH3_accumulate_vsx -#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#elif (XXH_VECTOR == XXH_SVE) -#define XXH3_accumulate_512 XXH3_accumulate_512_sve -#define XXH3_accumulate XXH3_accumulate_sve -#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#else /* scalar */ - -#define XXH3_accumulate_512 XXH3_accumulate_512_scalar -#define XXH3_accumulate XXH3_accumulate_scalar -#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar -#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar - -#endif - -#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */ -# undef XXH3_initCustomSecret -# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar -#endif - -XXH_FORCE_INLINE void -XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, - const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; - size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; - size_t const nb_blocks = (len - 1) / block_len; - - size_t n; - - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); - - for (n = 0; n < nb_blocks; n++) { - f_acc(acc, input + n*block_len, secret, nbStripesPerBlock); - f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); - } - - /* last partial block */ - XXH_ASSERT(len > XXH_STRIPE_LEN); - { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; - XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); - f_acc(acc, input + nb_blocks*block_len, secret, nbStripes); - - /* last stripe */ - { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; -#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ - XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); - } } -} - -XXH_FORCE_INLINE xxh_u64 -XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) -{ - return XXH3_mul128_fold64( - acc[0] ^ XXH_readLE64(secret), - acc[1] ^ XXH_readLE64(secret+8) ); -} - -static XXH64_hash_t -XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) -{ - xxh_u64 result64 = start; - size_t i = 0; - - for (i = 0; i < 4; i++) { - result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); -#if defined(__clang__) /* Clang */ \ - && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Prevent autovectorization on Clang ARMv7-a. Exact same problem as - * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. - * XXH3_64bits, len == 256, Snapdragon 835: - * without hack: 2063.7 MB/s - * with hack: 2560.7 MB/s - */ - XXH_COMPILER_GUARD(result64); -#endif - } - - return XXH3_avalanche(result64); -} - -#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ - XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } - -XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; - - XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble); - - /* converge into final hash */ - XXH_STATIC_ASSERT(sizeof(acc) == 64); - /* do not align on 8, so that the secret is different from the accumulator */ -#define XXH_SECRET_MERGEACCS_START 11 - XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1); -} - -/* - * It's important for performance to transmit secret's size (when it's static) - * so that the compiler can properly optimize the vectorized loop. - * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set. - * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE - * breaks -Og, this is XXH_NO_INLINE. - */ -XXH3_WITH_SECRET_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); -} - -/* - * It's preferable for performance that XXH3_hashLong is not inlined, - * as it results in a smaller function for small data, easier to the instruction cache. - * Note that inside this no_inline function, we do inline the internal loop, - * and provide a statically defined secret size to allow optimization of vector loop. - */ -XXH_NO_INLINE XXH_PUREF XXH64_hash_t -XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; (void)secret; (void)secretLen; - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); -} - -/* - * XXH3_hashLong_64b_withSeed(): - * Generate a custom key based on alteration of default XXH3_kSecret with the seed, - * and then use this key for long mode hashing. - * - * This operation is decently fast but nonetheless costs a little bit of time. - * Try to avoid it whenever possible (typically when seed==0). - * - * It's important for performance that XXH3_hashLong is not inlined. Not sure - * why (uop cache maybe?), but the difference is large and easily measurable. - */ -XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, - XXH64_hash_t seed, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble, - XXH3_f_initCustomSecret f_initSec) -{ -#if XXH_SIZE_OPT <= 0 - if (seed == 0) - return XXH3_hashLong_64b_internal(input, len, - XXH3_kSecret, sizeof(XXH3_kSecret), - f_acc, f_scramble); -#endif - { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; - f_initSec(secret, seed); - return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), - f_acc, f_scramble); - } -} - -/* - * It's important for performance that XXH3_hashLong is not inlined. - */ -XXH_NO_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)secret; (void)secretLen; - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); -} - - -typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, - XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); - -XXH_FORCE_INLINE XXH64_hash_t -XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, - XXH3_hashLong64_f f_hashLong) -{ - XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); - /* - * If an action is to be taken if `secretLen` condition is not respected, - * it should be done here. - * For now, it's a contract pre-condition. - * Adding a check and a branch here would cost performance at every hash. - * Also, note that function signature doesn't offer room to return an error. - */ - if (len <= 16) - return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); - if (len <= 128) - return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); -} - - -/* === Public entry point === */ - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) -{ - return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed) -{ - return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); -} - -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - if (length <= XXH3_MIDSIZE_MAX) - return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); - return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize); -} - - -/* === XXH3 streaming === */ -#ifndef XXH_NO_STREAM -/* - * Malloc's a pointer that is always aligned to align. - * - * This must be freed with `XXH_alignedFree()`. - * - * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte - * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2 - * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON. - * - * This underalignment previously caused a rather obvious crash which went - * completely unnoticed due to XXH3_createState() not actually being tested. - * Credit to RedSpah for noticing this bug. - * - * The alignment is done manually: Functions like posix_memalign or _mm_malloc - * are avoided: To maintain portability, we would have to write a fallback - * like this anyways, and besides, testing for the existence of library - * functions without relying on external build tools is impossible. - * - * The method is simple: Overallocate, manually align, and store the offset - * to the original behind the returned pointer. - * - * Align must be a power of 2 and 8 <= align <= 128. - */ -static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align) -{ - XXH_ASSERT(align <= 128 && align >= 8); /* range check */ - XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ - XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ - { /* Overallocate to make room for manual realignment and an offset byte */ - xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); - if (base != NULL) { - /* - * Get the offset needed to align this pointer. - * - * Even if the returned pointer is aligned, there will always be - * at least one byte to store the offset to the original pointer. - */ - size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ - /* Add the offset for the now-aligned pointer */ - xxh_u8* ptr = base + offset; - - XXH_ASSERT((size_t)ptr % align == 0); - - /* Store the offset immediately before the returned pointer. */ - ptr[-1] = (xxh_u8)offset; - return ptr; - } - return NULL; - } -} -/* - * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass - * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. - */ -static void XXH_alignedFree(void* p) -{ - if (p != NULL) { - xxh_u8* ptr = (xxh_u8*)p; - /* Get the offset byte we added in XXH_malloc. */ - xxh_u8 offset = ptr[-1]; - /* Free the original malloc'd pointer */ - xxh_u8* base = ptr - offset; - XXH_free(base); - } -} -/*! @ingroup XXH3_family */ -/*! - * @brief Allocate an @ref XXH3_state_t. - * - * Must be freed with XXH3_freeState(). - * @return An allocated XXH3_state_t on success, `NULL` on failure. - */ -XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) -{ - XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); - if (state==NULL) return NULL; - XXH3_INITSTATE(state); - return state; -} - -/*! @ingroup XXH3_family */ -/*! - * @brief Frees an @ref XXH3_state_t. - * - * Must be allocated with XXH3_createState(). - * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). - * @return XXH_OK. - */ -XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) -{ - XXH_alignedFree(statePtr); - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API void -XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state) -{ - XXH_memcpy(dst_state, src_state, sizeof(*dst_state)); -} - -static void -XXH3_reset_internal(XXH3_state_t* statePtr, - XXH64_hash_t seed, - const void* secret, size_t secretSize) -{ - size_t const initStart = offsetof(XXH3_state_t, bufferedSize); - size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; - XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); - XXH_ASSERT(statePtr != NULL); - /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ - memset((char*)statePtr + initStart, 0, initLength); - statePtr->acc[0] = XXH_PRIME32_3; - statePtr->acc[1] = XXH_PRIME64_1; - statePtr->acc[2] = XXH_PRIME64_2; - statePtr->acc[3] = XXH_PRIME64_3; - statePtr->acc[4] = XXH_PRIME64_4; - statePtr->acc[5] = XXH_PRIME32_2; - statePtr->acc[6] = XXH_PRIME64_5; - statePtr->acc[7] = XXH_PRIME32_1; - statePtr->seed = seed; - statePtr->useSeed = (seed != 0); - statePtr->extSecret = (const unsigned char*)secret; - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); - statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; - statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) -{ - if (statePtr == NULL) return XXH_ERROR; - XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - if (statePtr == NULL) return XXH_ERROR; - XXH3_reset_internal(statePtr, 0, secret, secretSize); - if (secret == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) -{ - if (statePtr == NULL) return XXH_ERROR; - if (seed==0) return XXH3_64bits_reset(statePtr); - if ((seed != statePtr->seed) || (statePtr->extSecret != NULL)) - XXH3_initCustomSecret(statePtr->customSecret, seed); - XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64) -{ - if (statePtr == NULL) return XXH_ERROR; - if (secret == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; - XXH3_reset_internal(statePtr, seed64, secret, secretSize); - statePtr->useSeed = 1; /* always, even if seed64==0 */ - return XXH_OK; -} - -/*! - * @internal - * @brief Processes a large input for XXH3_update() and XXH3_digest_long(). - * - * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block. - * - * @param acc Pointer to the 8 accumulator lanes - * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block* - * @param nbStripesPerBlock Number of stripes in a block - * @param input Input pointer - * @param nbStripes Number of stripes to process - * @param secret Secret pointer - * @param secretLimit Offset of the last block in @p secret - * @param f_acc Pointer to an XXH3_accumulate implementation - * @param f_scramble Pointer to an XXH3_scrambleAcc implementation - * @return Pointer past the end of @p input after processing - */ -XXH_FORCE_INLINE const xxh_u8 * -XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, - size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, - const xxh_u8* XXH_RESTRICT input, size_t nbStripes, - const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE; - /* Process full blocks */ - if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) { - /* Process the initial partial block... */ - size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr; - - do { - /* Accumulate and scramble */ - f_acc(acc, input, initialSecret, nbStripesThisIter); - f_scramble(acc, secret + secretLimit); - input += nbStripesThisIter * XXH_STRIPE_LEN; - nbStripes -= nbStripesThisIter; - /* Then continue the loop with the full block size */ - nbStripesThisIter = nbStripesPerBlock; - initialSecret = secret; - } while (nbStripes >= nbStripesPerBlock); - *nbStripesSoFarPtr = 0; - } - /* Process a partial block */ - if (nbStripes > 0) { - f_acc(acc, input, initialSecret, nbStripes); - input += nbStripes * XXH_STRIPE_LEN; - *nbStripesSoFarPtr += nbStripes; - } - /* Return end pointer */ - return input; -} - -#ifndef XXH3_STREAM_USE_STACK -# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */ -# define XXH3_STREAM_USE_STACK 1 -# endif -#endif -/* - * Both XXH3_64bits_update and XXH3_128bits_update use this routine. - */ -XXH_FORCE_INLINE XXH_errorcode -XXH3_update(XXH3_state_t* XXH_RESTRICT const state, - const xxh_u8* XXH_RESTRICT input, size_t len, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } - - XXH_ASSERT(state != NULL); - { const xxh_u8* const bEnd = input + len; - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; -#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 - /* For some reason, gcc and MSVC seem to suffer greatly - * when operating accumulators directly into state. - * Operating into stack space seems to enable proper optimization. - * clang, on the other hand, doesn't seem to need this trick */ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; - XXH_memcpy(acc, state->acc, sizeof(acc)); -#else - xxh_u64* XXH_RESTRICT const acc = state->acc; -#endif - state->totalLen += len; - XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); - - /* small input : just fill in tmp buffer */ - if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) { - XXH_memcpy(state->buffer + state->bufferedSize, input, len); - state->bufferedSize += (XXH32_hash_t)len; - return XXH_OK; - } - - /* total input is now > XXH3_INTERNALBUFFER_SIZE */ - #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) - XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ - - /* - * Internal buffer is partially filled (always, except at beginning) - * Complete it, then consume it. - */ - if (state->bufferedSize) { - size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; - XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); - input += loadSize; - XXH3_consumeStripes(acc, - &state->nbStripesSoFar, state->nbStripesPerBlock, - state->buffer, XXH3_INTERNALBUFFER_STRIPES, - secret, state->secretLimit, - f_acc, f_scramble); - state->bufferedSize = 0; - } - XXH_ASSERT(input < bEnd); - if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { - size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN; - input = XXH3_consumeStripes(acc, - &state->nbStripesSoFar, state->nbStripesPerBlock, - input, nbStripes, - secret, state->secretLimit, - f_acc, f_scramble); - XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); - - } - /* Some remaining input (always) : buffer it */ - XXH_ASSERT(input < bEnd); - XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE); - XXH_ASSERT(state->bufferedSize == 0); - XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); - state->bufferedSize = (XXH32_hash_t)(bEnd-input); -#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 - /* save stack accumulators into state */ - XXH_memcpy(state->acc, acc, sizeof(acc)); -#endif - } - - return XXH_OK; -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate, XXH3_scrambleAcc); -} - - -XXH_FORCE_INLINE void -XXH3_digest_long (XXH64_hash_t* acc, - const XXH3_state_t* state, - const unsigned char* secret) -{ - xxh_u8 lastStripe[XXH_STRIPE_LEN]; - const xxh_u8* lastStripePtr; - - /* - * Digest on a local copy. This way, the state remains unaltered, and it can - * continue ingesting more input afterwards. - */ - XXH_memcpy(acc, state->acc, sizeof(state->acc)); - if (state->bufferedSize >= XXH_STRIPE_LEN) { - /* Consume remaining stripes then point to remaining data in buffer */ - size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; - size_t nbStripesSoFar = state->nbStripesSoFar; - XXH3_consumeStripes(acc, - &nbStripesSoFar, state->nbStripesPerBlock, - state->buffer, nbStripes, - secret, state->secretLimit, - XXH3_accumulate, XXH3_scrambleAcc); - lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN; - } else { /* bufferedSize < XXH_STRIPE_LEN */ - /* Copy to temp buffer */ - size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; - XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ - XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); - XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); - lastStripePtr = lastStripe; - } - /* Last stripe */ - XXH3_accumulate_512(acc, - lastStripePtr, - secret + state->secretLimit - XXH_SECRET_LASTACC_START); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state) -{ - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; - if (state->totalLen > XXH3_MIDSIZE_MAX) { - XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; - XXH3_digest_long(acc, state, secret); - return XXH3_mergeAccs(acc, - secret + XXH_SECRET_MERGEACCS_START, - (xxh_u64)state->totalLen * XXH_PRIME64_1); - } - /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ - if (state->useSeed) - return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); - return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), - secret, state->secretLimit + XXH_STRIPE_LEN); -} -#endif /* !XXH_NO_STREAM */ - - -/* ========================================== - * XXH3 128 bits (a.k.a XXH128) - * ========================================== - * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, - * even without counting the significantly larger output size. - * - * For example, extra steps are taken to avoid the seed-dependent collisions - * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). - * - * This strength naturally comes at the cost of some speed, especially on short - * lengths. Note that longer hashes are about as fast as the 64-bit version - * due to it using only a slight modification of the 64-bit loop. - * - * XXH128 is also more oriented towards 64-bit machines. It is still extremely - * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). - */ - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - /* A doubled version of 1to3_64b with different constants. */ - XXH_ASSERT(input != NULL); - XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(secret != NULL); - /* - * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } - * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } - * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } - */ - { xxh_u8 const c1 = input[0]; - xxh_u8 const c2 = input[len >> 1]; - xxh_u8 const c3 = input[len - 1]; - xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) - | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); - xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); - xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; - xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; - xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; - xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; - XXH128_hash_t h128; - h128.low64 = XXH64_avalanche(keyed_lo); - h128.high64 = XXH64_avalanche(keyed_hi); - return h128; - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len <= 8); - seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; - { xxh_u32 const input_lo = XXH_readLE32(input); - xxh_u32 const input_hi = XXH_readLE32(input + len - 4); - xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); - xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; - xxh_u64 const keyed = input_64 ^ bitflip; - - /* Shift len to the left to ensure it is even, this avoids even multiplies. */ - XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); - - m128.high64 += (m128.low64 << 1); - m128.low64 ^= (m128.high64 >> 3); - - m128.low64 = XXH_xorshift64(m128.low64, 35); - m128.low64 *= PRIME_MX2; - m128.low64 = XXH_xorshift64(m128.low64, 28); - m128.high64 = XXH3_avalanche(m128.high64); - return m128; - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(9 <= len && len <= 16); - { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; - xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; - xxh_u64 const input_lo = XXH_readLE64(input); - xxh_u64 input_hi = XXH_readLE64(input + len - 8); - XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); - /* - * Put len in the middle of m128 to ensure that the length gets mixed to - * both the low and high bits in the 128x64 multiply below. - */ - m128.low64 += (xxh_u64)(len - 1) << 54; - input_hi ^= bitfliph; - /* - * Add the high 32 bits of input_hi to the high 32 bits of m128, then - * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to - * the high 64 bits of m128. - * - * The best approach to this operation is different on 32-bit and 64-bit. - */ - if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ - /* - * 32-bit optimized version, which is more readable. - * - * On 32-bit, it removes an ADC and delays a dependency between the two - * halves of m128.high64, but it generates an extra mask on 64-bit. - */ - m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); - } else { - /* - * 64-bit optimized (albeit more confusing) version. - * - * Uses some properties of addition and multiplication to remove the mask: - * - * Let: - * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) - * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) - * c = XXH_PRIME32_2 - * - * a + (b * c) - * Inverse Property: x + y - x == y - * a + (b * (1 + c - 1)) - * Distributive Property: x * (y + z) == (x * y) + (x * z) - * a + (b * 1) + (b * (c - 1)) - * Identity Property: x * 1 == x - * a + b + (b * (c - 1)) - * - * Substitute a, b, and c: - * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) - * - * Since input_hi.hi + input_hi.lo == input_hi, we get this: - * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) - */ - m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); - } - /* m128 ^= XXH_swap64(m128 >> 64); */ - m128.low64 ^= XXH_swap64(m128.high64); - - { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ - XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); - h128.high64 += m128.high64 * XXH_PRIME64_2; - - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = XXH3_avalanche(h128.high64); - return h128; - } } -} - -/* - * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN - */ -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(len <= 16); - { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); - if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); - if (len) return XXH3_len_1to3_128b(input, len, secret, seed); - { XXH128_hash_t h128; - xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); - xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); - h128.low64 = XXH64_avalanche(seed ^ bitflipl); - h128.high64 = XXH64_avalanche( seed ^ bitfliph); - return h128; - } } -} - -/* - * A bit slower than XXH3_mix16B, but handles multiply by zero better. - */ -XXH_FORCE_INLINE XXH128_hash_t -XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, - const xxh_u8* secret, XXH64_hash_t seed) -{ - acc.low64 += XXH3_mix16B (input_1, secret+0, seed); - acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); - acc.high64 += XXH3_mix16B (input_2, secret+16, seed); - acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); - return acc; -} - - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(16 < len && len <= 128); - - { XXH128_hash_t acc; - acc.low64 = len * XXH_PRIME64_1; - acc.high64 = 0; - -#if XXH_SIZE_OPT >= 1 - { - /* Smaller, but slightly slower. */ - unsigned int i = (unsigned int)(len - 1) / 32; - do { - acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed); - } while (i-- != 0); - } -#else - if (len > 32) { - if (len > 64) { - if (len > 96) { - acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); - } - acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); - } - acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); - } - acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); -#endif - { XXH128_hash_t h128; - h128.low64 = acc.low64 + acc.high64; - h128.high64 = (acc.low64 * XXH_PRIME64_1) - + (acc.high64 * XXH_PRIME64_4) - + ((len - seed) * XXH_PRIME64_2); - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); - return h128; - } - } -} - -XXH_NO_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - - { XXH128_hash_t acc; - unsigned i; - acc.low64 = len * XXH_PRIME64_1; - acc.high64 = 0; - /* - * We set as `i` as offset + 32. We do this so that unchanged - * `len` can be used as upper bound. This reaches a sweet spot - * where both x86 and aarch64 get simple agen and good codegen - * for the loop. - */ - for (i = 32; i < 160; i += 32) { - acc = XXH128_mix32B(acc, - input + i - 32, - input + i - 16, - secret + i - 32, - seed); - } - acc.low64 = XXH3_avalanche(acc.low64); - acc.high64 = XXH3_avalanche(acc.high64); - /* - * NB: `i <= len` will duplicate the last 32-bytes if - * len % 32 was zero. This is an unfortunate necessity to keep - * the hash result stable. - */ - for (i=160; i <= len; i += 32) { - acc = XXH128_mix32B(acc, - input + i - 32, - input + i - 16, - secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, - seed); - } - /* last bytes */ - acc = XXH128_mix32B(acc, - input + len - 16, - input + len - 32, - secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, - (XXH64_hash_t)0 - seed); - - { XXH128_hash_t h128; - h128.low64 = acc.low64 + acc.high64; - h128.high64 = (acc.low64 * XXH_PRIME64_1) - + (acc.high64 * XXH_PRIME64_4) - + ((len - seed) * XXH_PRIME64_2); - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); - return h128; - } - } -} - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; - - XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble); - - /* converge into final hash */ - XXH_STATIC_ASSERT(sizeof(acc) == 64); - XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - { XXH128_hash_t h128; - h128.low64 = XXH3_mergeAccs(acc, - secret + XXH_SECRET_MERGEACCS_START, - (xxh_u64)len * XXH_PRIME64_1); - h128.high64 = XXH3_mergeAccs(acc, - secret + secretSize - - sizeof(acc) - XXH_SECRET_MERGEACCS_START, - ~((xxh_u64)len * XXH_PRIME64_2)); - return h128; - } -} - -/* - * It's important for performance that XXH3_hashLong() is not inlined. - */ -XXH_NO_INLINE XXH_PUREF XXH128_hash_t -XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; (void)secret; (void)secretLen; - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_accumulate, XXH3_scrambleAcc); -} - -/* - * It's important for performance to pass @p secretLen (when it's static) - * to the compiler, so that it can properly optimize the vectorized loop. - * - * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE - * breaks -Og, this is XXH_NO_INLINE. - */ -XXH3_WITH_SECRET_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate, XXH3_scrambleAcc); -} - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble, - XXH3_f_initCustomSecret f_initSec) -{ - if (seed64 == 0) - return XXH3_hashLong_128b_internal(input, len, - XXH3_kSecret, sizeof(XXH3_kSecret), - f_acc, f_scramble); - { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; - f_initSec(secret, seed64); - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), - f_acc, f_scramble); - } -} - -/* - * It's important for performance that XXH3_hashLong is not inlined. - */ -XXH_NO_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSeed(const void* input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)secret; (void)secretLen; - return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, - XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); -} - -typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, - XXH64_hash_t, const void* XXH_RESTRICT, size_t); - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_128bits_internal(const void* input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, - XXH3_hashLong128_f f_hl128) -{ - XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); - /* - * If an action is to be taken if `secret` conditions are not respected, - * it should be done here. - * For now, it's a contract pre-condition. - * Adding a check and a branch here would cost performance at every hash. - */ - if (len <= 16) - return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); - if (len <= 128) - return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - return f_hl128(input, len, seed64, secret, secretLen); -} - - -/* === Public XXH128 API === */ - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_128bits_internal(input, len, 0, - XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_hashLong_128b_default); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_128bits_internal(input, len, 0, - (const xxh_u8*)secret, secretSize, - XXH3_hashLong_128b_withSecret); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ - return XXH3_128bits_internal(input, len, seed, - XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_hashLong_128b_withSeed); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); - return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ - return XXH3_128bits_withSeed(input, len, seed); -} - - -/* === XXH3 128-bit streaming === */ -#ifndef XXH_NO_STREAM -/* - * All initialization and update functions are identical to 64-bit streaming variant. - * The only difference is the finalization routine. - */ - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) -{ - return XXH3_64bits_reset(statePtr); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) -{ - return XXH3_64bits_reset_withSeed(statePtr, seed); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_64bits_update(state, input, len); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state) -{ - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; - if (state->totalLen > XXH3_MIDSIZE_MAX) { - XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; - XXH3_digest_long(acc, state, secret); - XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - { XXH128_hash_t h128; - h128.low64 = XXH3_mergeAccs(acc, - secret + XXH_SECRET_MERGEACCS_START, - (xxh_u64)state->totalLen * XXH_PRIME64_1); - h128.high64 = XXH3_mergeAccs(acc, - secret + state->secretLimit + XXH_STRIPE_LEN - - sizeof(acc) - XXH_SECRET_MERGEACCS_START, - ~((xxh_u64)state->totalLen * XXH_PRIME64_2)); - return h128; - } - } - /* len <= XXH3_MIDSIZE_MAX : short code */ - if (state->seed) - return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); - return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), - secret, state->secretLimit + XXH_STRIPE_LEN); -} -#endif /* !XXH_NO_STREAM */ -/* 128-bit utility functions */ - -#include /* memcmp, memcpy */ - -/* return : 1 is equal, 0 if different */ -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) -{ - /* note : XXH128_hash_t is compact, it has no padding byte */ - return !(memcmp(&h1, &h2, sizeof(h1))); -} - -/* This prototype is compatible with stdlib's qsort(). - * @return : >0 if *h128_1 > *h128_2 - * <0 if *h128_1 < *h128_2 - * =0 if *h128_1 == *h128_2 */ -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2) -{ - XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; - XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; - int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); - /* note : bets that, in most cases, hash values are different */ - if (hcmp) return hcmp; - return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); -} - - -/*====== Canonical representation ======*/ -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API void -XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) { - hash.high64 = XXH_swap64(hash.high64); - hash.low64 = XXH_swap64(hash.low64); - } - XXH_memcpy(dst, &hash.high64, sizeof(hash.high64)); - XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src) -{ - XXH128_hash_t h; - h.high64 = XXH_readBE64(src); - h.low64 = XXH_readBE64(src->digest + 8); - return h; -} - - - -/* ========================================== - * Secret generators - * ========================================== - */ -#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) - -XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) -{ - XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 ); - XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 ); -} - -/*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize) -{ -#if (XXH_DEBUGLEVEL >= 1) - XXH_ASSERT(secretBuffer != NULL); - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); -#else - /* production mode, assert() are disabled */ - if (secretBuffer == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; -#endif - - if (customSeedSize == 0) { - customSeed = XXH3_kSecret; - customSeedSize = XXH_SECRET_DEFAULT_SIZE; - } -#if (XXH_DEBUGLEVEL >= 1) - XXH_ASSERT(customSeed != NULL); -#else - if (customSeed == NULL) return XXH_ERROR; -#endif - - /* Fill secretBuffer with a copy of customSeed - repeat as needed */ - { size_t pos = 0; - while (pos < secretSize) { - size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize); - memcpy((char*)secretBuffer + pos, customSeed, toCopy); - pos += toCopy; - } } - - { size_t const nbSeg16 = secretSize / 16; - size_t n; - XXH128_canonical_t scrambler; - XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); - for (n=0; n