From 9be091e1c68f326b7c99f480683734e9e9edd72d Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Sat, 15 May 2021 19:54:09 -0700 Subject: [PATCH 01/12] Update pugixml to 1.11 --- lib/pugixml/pugiconfig.hpp | 9 +- lib/pugixml/pugixml.cpp | 355 ++++++++++++++++++++++++++----------- lib/pugixml/pugixml.hpp | 38 +++- 3 files changed, 291 insertions(+), 111 deletions(-) diff --git a/lib/pugixml/pugiconfig.hpp b/lib/pugixml/pugiconfig.hpp index 66ddc10..410a81e 100644 --- a/lib/pugixml/pugiconfig.hpp +++ b/lib/pugixml/pugiconfig.hpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.10 + * pugixml parser - version 1.11 * -------------------------------------------------------- - * Copyright (C) 2006-2019, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -40,6 +40,9 @@ // #define PUGIXML_MEMORY_OUTPUT_STACK 10240 // #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096 +// Tune this constant to adjust max nesting for XPath queries +// #define PUGIXML_XPATH_DEPTH_LIMIT 1024 + // Uncomment this to switch to header-only version // #define PUGIXML_HEADER_ONLY @@ -49,7 +52,7 @@ #endif /** - * Copyright (c) 2006-2019 Arseny Kapoulkine + * Copyright (c) 2006-2020 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/lib/pugixml/pugixml.cpp b/lib/pugixml/pugixml.cpp index 90c48b2..efdcdf6 100644 --- a/lib/pugixml/pugixml.cpp +++ b/lib/pugixml/pugixml.cpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.10 + * pugixml parser - version 1.11 * -------------------------------------------------------- - * Copyright (C) 2006-2019, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -378,7 +378,7 @@ PUGI__NS_BEGIN static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) { - unsigned int h = static_cast(reinterpret_cast(key)); + unsigned int h = static_cast(reinterpret_cast(key) & 0xffffffff); // MurmurHash3 32-bit finalizer h ^= h >> 16; @@ -4435,6 +4435,9 @@ PUGI__NS_BEGIN while (sit && sit != sn) { + // loop invariant: dit is inside the subtree rooted at dn + assert(dit); + // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop if (sit != dn) { @@ -4464,9 +4467,14 @@ PUGI__NS_BEGIN sit = sit->parent; dit = dit->parent; + + // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn + assert(sit == sn || dit); } while (sit != sn); } + + assert(!sit || dit == dn->parent); } PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) @@ -4665,19 +4673,19 @@ PUGI__NS_BEGIN } template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision) { char buf[128]; - PUGI__SNPRINTF(buf, "%.9g", double(value)); + PUGI__SNPRINTF(buf, "%.*g", precision, double(value)); return set_value_ascii(dest, header, header_mask, buf); } template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) + PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision) { char buf[128]; - PUGI__SNPRINTF(buf, "%.17g", value); + PUGI__SNPRINTF(buf, "%.*g", precision, value); return set_value_ascii(dest, header, header_mask, buf); } @@ -4973,7 +4981,12 @@ PUGI__NS_BEGIN #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) { +#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 + FILE* file = 0; + return _wfopen_s(&file, path, mode) == 0 ? file : 0; +#else return _wfopen(path, mode); +#endif } #else PUGI__FN char* convert_path_heap(const wchar_t* str) @@ -5017,6 +5030,16 @@ PUGI__NS_BEGIN } #endif + PUGI__FN FILE* open_file(const char* path, const char* mode) + { +#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 + FILE* file = 0; + return fopen_s(&file, path, mode) == 0 ? file : 0; +#else + return fopen(path, mode); +#endif + } + PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) { if (!file) return false; @@ -5342,14 +5365,28 @@ namespace pugi { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision); + } + + PUGI__FN bool xml_attribute::set_value(double rhs, int precision) + { + if (!_attr) return false; + + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); } PUGI__FN bool xml_attribute::set_value(float rhs) { if (!_attr) return false; - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision); + } + + PUGI__FN bool xml_attribute::set_value(float rhs, int precision) + { + if (!_attr) return false; + + return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); } PUGI__FN bool xml_attribute::set_value(bool rhs) @@ -6059,6 +6096,27 @@ namespace pugi return true; } + PUGI__FN bool xml_node::remove_attributes() + { + if (!_root) return false; + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + + for (xml_attribute_struct* attr = _root->first_attribute; attr; ) + { + xml_attribute_struct* next = attr->next_attribute; + + impl::destroy_attribute(attr, alloc); + + attr = next; + } + + _root->first_attribute = 0; + + return true; + } + PUGI__FN bool xml_node::remove_child(const char_t* name_) { return remove_child(child(name_)); @@ -6077,6 +6135,27 @@ namespace pugi return true; } + PUGI__FN bool xml_node::remove_children() + { + if (!_root) return false; + + impl::xml_allocator& alloc = impl::get_allocator(_root); + if (!alloc.reserve()) return false; + + for (xml_node_struct* cur = _root->first_child; cur; ) + { + xml_node_struct* next = cur->next_sibling; + + impl::destroy_node(cur, alloc); + + cur = next; + } + + _root->first_child = 0; + + return true; + } + PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) { // append_buffer is only valid for elements/documents @@ -6177,16 +6256,9 @@ namespace pugi PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const { - xml_node found = *this; // Current search context. - - if (!_root || !path_[0]) return found; + xml_node context = path_[0] == delimiter ? root() : *this; - if (path_[0] == delimiter) - { - // Absolute path; e.g. '/foo/bar' - found = found.root(); - ++path_; - } + if (!context._root) return xml_node(); const char_t* path_segment = path_; @@ -6196,19 +6268,19 @@ namespace pugi while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; - if (path_segment == path_segment_end) return found; + if (path_segment == path_segment_end) return context; const char_t* next_segment = path_segment_end; while (*next_segment == delimiter) ++next_segment; if (*path_segment == '.' && path_segment + 1 == path_segment_end) - return found.first_element_by_path(next_segment, delimiter); + return context.first_element_by_path(next_segment, delimiter); else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) - return found.parent().first_element_by_path(next_segment, delimiter); + return context.parent().first_element_by_path(next_segment, delimiter); else { - for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) + for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling) { if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) { @@ -6503,14 +6575,28 @@ namespace pugi { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false; + } + + PUGI__FN bool xml_text::set(float rhs, int precision) + { + xml_node_struct* dn = _data_new(); + + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; } PUGI__FN bool xml_text::set(double rhs) { xml_node_struct* dn = _data_new(); - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false; + } + + PUGI__FN bool xml_text::set(double rhs, int precision) + { + xml_node_struct* dn = _data_new(); + + return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; } PUGI__FN bool xml_text::set(bool rhs) @@ -6886,8 +6972,7 @@ namespace pugi { reset(); - for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) - append_copy(cur); + impl::node_copy_tree(_root, proto._root); } PUGI__FN void xml_document::_create() @@ -7117,7 +7202,7 @@ namespace pugi reset(); using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, "rb"), impl::close_file); + auto_deleter file(impl::open_file(path_, "rb"), impl::close_file); return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); } @@ -7200,7 +7285,7 @@ namespace pugi PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const { using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); + auto_deleter file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); return impl::save_file_impl(*this, file.data, indent, flags, encoding); } @@ -9686,7 +9771,7 @@ PUGI__NS_BEGIN { xpath_context c(*it, i, size); - if (expr->eval_number(c, stack) == i) + if (expr->eval_number(c, stack) == static_cast(i)) { *last++ = *it; @@ -9710,11 +9795,11 @@ PUGI__NS_BEGIN double er = expr->eval_number(c, stack); - if (er >= 1.0 && er <= size) + if (er >= 1.0 && er <= static_cast(size)) { size_t eri = static_cast(er); - if (er == eri) + if (er == static_cast(eri)) { xpath_node r = last[eri - 1]; @@ -10351,35 +10436,38 @@ PUGI__NS_BEGIN if (_rettype == xpath_type_boolean) return _data.variable->get_boolean(); + + // variable needs to be converted to the correct type, this is handled by the fallthrough block below + break; } - // fallthrough default: - { - switch (_rettype) - { - case xpath_type_number: - return convert_number_to_boolean(eval_number(c, stack)); + ; + } - case xpath_type_string: - { - xpath_allocator_capture cr(stack.result); + // none of the ast types that return the value directly matched, we need to perform type conversion + switch (_rettype) + { + case xpath_type_number: + return convert_number_to_boolean(eval_number(c, stack)); - return !eval_string(c, stack).empty(); - } + case xpath_type_string: + { + xpath_allocator_capture cr(stack.result); - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.result); + return !eval_string(c, stack).empty(); + } - return !eval_node_set(c, stack, nodeset_eval_any).empty(); - } + case xpath_type_node_set: + { + xpath_allocator_capture cr(stack.result); - default: - assert(false && "Wrong expression for return type boolean"); // unreachable - return false; - } + return !eval_node_set(c, stack, nodeset_eval_any).empty(); } + + default: + assert(false && "Wrong expression for return type boolean"); // unreachable + return false; } } @@ -10486,36 +10574,38 @@ PUGI__NS_BEGIN if (_rettype == xpath_type_number) return _data.variable->get_number(); + + // variable needs to be converted to the correct type, this is handled by the fallthrough block below + break; } - // fallthrough default: - { - switch (_rettype) - { - case xpath_type_boolean: - return eval_boolean(c, stack) ? 1 : 0; - - case xpath_type_string: - { - xpath_allocator_capture cr(stack.result); + ; + } - return convert_string_to_number(eval_string(c, stack).c_str()); - } + // none of the ast types that return the value directly matched, we need to perform type conversion + switch (_rettype) + { + case xpath_type_boolean: + return eval_boolean(c, stack) ? 1 : 0; - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.result); + case xpath_type_string: + { + xpath_allocator_capture cr(stack.result); - return convert_string_to_number(eval_string(c, stack).c_str()); - } + return convert_string_to_number(eval_string(c, stack).c_str()); + } - default: - assert(false && "Wrong expression for return type number"); // unreachable - return 0; - } + case xpath_type_node_set: + { + xpath_allocator_capture cr(stack.result); + return convert_string_to_number(eval_string(c, stack).c_str()); } + + default: + assert(false && "Wrong expression for return type number"); // unreachable + return 0; } } @@ -10672,7 +10762,7 @@ PUGI__NS_BEGIN double first = round_nearest(_right->eval_number(c, stack)); if (is_nan(first)) return xpath_string(); // NaN - else if (first >= s_length + 1) return xpath_string(); + else if (first >= static_cast(s_length + 1)) return xpath_string(); size_t pos = first < 1 ? 1 : static_cast(first); assert(1 <= pos && pos <= s_length + 1); @@ -10696,12 +10786,12 @@ PUGI__NS_BEGIN double last = first + round_nearest(_right->_next->eval_number(c, stack)); if (is_nan(first) || is_nan(last)) return xpath_string(); - else if (first >= s_length + 1) return xpath_string(); + else if (first >= static_cast(s_length + 1)) return xpath_string(); else if (first >= last) return xpath_string(); else if (last < 1) return xpath_string(); size_t pos = first < 1 ? 1 : static_cast(first); - size_t end = last >= s_length + 1 ? s_length + 1 : static_cast(last); + size_t end = last >= static_cast(s_length + 1) ? s_length + 1 : static_cast(last); assert(1 <= pos && pos <= end && end <= s_length + 1); const char_t* rbegin = s.c_str() + (pos - 1); @@ -10770,34 +10860,37 @@ PUGI__NS_BEGIN if (_rettype == xpath_type_string) return xpath_string::from_const(_data.variable->get_string()); + + // variable needs to be converted to the correct type, this is handled by the fallthrough block below + break; } - // fallthrough default: - { - switch (_rettype) - { - case xpath_type_boolean: - return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); + ; + } - case xpath_type_number: - return convert_number_to_string(eval_number(c, stack), stack.result); + // none of the ast types that return the value directly matched, we need to perform type conversion + switch (_rettype) + { + case xpath_type_boolean: + return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.temp); + case xpath_type_number: + return convert_number_to_string(eval_number(c, stack), stack.result); - xpath_stack swapped_stack = {stack.temp, stack.result}; + case xpath_type_node_set: + { + xpath_allocator_capture cr(stack.temp); - xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); - return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); - } + xpath_stack swapped_stack = {stack.temp, stack.result}; - default: - assert(false && "Wrong expression for return type string"); // unreachable - return xpath_string(); - } + xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); + return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); } + + default: + assert(false && "Wrong expression for return type string"); // unreachable + return xpath_string(); } } @@ -10919,13 +11012,18 @@ PUGI__NS_BEGIN return ns; } + + // variable needs to be converted to the correct type, this is handled by the fallthrough block below + break; } - // fallthrough default: - assert(false && "Wrong expression for return type node set"); // unreachable - return xpath_node_set_raw(); + ; } + + // none of the ast types that return the value directly matched, but conversions to node set are invalid + assert(false && "Wrong expression for return type node set"); // unreachable + return xpath_node_set_raw(); } void optimize(xpath_allocator* alloc) @@ -11060,6 +11158,14 @@ PUGI__NS_BEGIN } }; + static const size_t xpath_ast_depth_limit = + #ifdef PUGIXML_XPATH_DEPTH_LIMIT + PUGIXML_XPATH_DEPTH_LIMIT + #else + 1024 + #endif + ; + struct xpath_parser { xpath_allocator* _alloc; @@ -11072,6 +11178,8 @@ PUGI__NS_BEGIN char_t _scratch[32]; + size_t _depth; + xpath_ast_node* error(const char* message) { _result->error = message; @@ -11088,6 +11196,11 @@ PUGI__NS_BEGIN return 0; } + xpath_ast_node* error_rec() + { + return error("Exceeded maximum allowed query depth"); + } + void* alloc_node() { return _alloc->allocate(sizeof(xpath_ast_node)); @@ -11443,6 +11556,8 @@ PUGI__NS_BEGIN return error("Unrecognized function call"); _lexer.next(); + size_t old_depth = _depth; + while (_lexer.current() != lex_close_brace) { if (argc > 0) @@ -11452,6 +11567,9 @@ PUGI__NS_BEGIN _lexer.next(); } + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* n = parse_expression(); if (!n) return 0; @@ -11464,6 +11582,8 @@ PUGI__NS_BEGIN _lexer.next(); + _depth = old_depth; + return parse_function(function, argc, args); } @@ -11480,10 +11600,15 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_primary_expression(); if (!n) return 0; + size_t old_depth = _depth; + while (_lexer.current() == lex_open_square_brace) { _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + if (n->rettype() != xpath_type_node_set) return error("Predicate has to be applied to node set"); @@ -11499,6 +11624,8 @@ PUGI__NS_BEGIN _lexer.next(); } + _depth = old_depth; + return n; } @@ -11650,12 +11777,17 @@ PUGI__NS_BEGIN xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); if (!n) return 0; + size_t old_depth = _depth; + xpath_ast_node* last = 0; while (_lexer.current() == lex_open_square_brace) { _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* expr = parse_expression(); if (!expr) return 0; @@ -11672,6 +11804,8 @@ PUGI__NS_BEGIN last = pred; } + _depth = old_depth; + return n; } @@ -11681,11 +11815,16 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_step(set); if (!n) return 0; + size_t old_depth = _depth; + while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) { lexeme_t l = _lexer.current(); _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + if (l == lex_double_slash) { n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); @@ -11696,6 +11835,8 @@ PUGI__NS_BEGIN if (!n) return 0; } + _depth = old_depth; + return n; } @@ -11881,6 +12022,9 @@ PUGI__NS_BEGIN { _lexer.next(); + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* rhs = parse_path_or_unary_expression(); if (!rhs) return 0; @@ -11926,13 +12070,22 @@ PUGI__NS_BEGIN // | MultiplicativeExpr 'mod' UnaryExpr xpath_ast_node* parse_expression(int limit = 0) { + size_t old_depth = _depth; + + if (++_depth > xpath_ast_depth_limit) + return error_rec(); + xpath_ast_node* n = parse_path_or_unary_expression(); if (!n) return 0; - return parse_expression_rec(n, limit); + n = parse_expression_rec(n, limit); + + _depth = old_depth; + + return n; } - xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) + xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) { } @@ -11941,6 +12094,8 @@ PUGI__NS_BEGIN xpath_ast_node* n = parse_expression(); if (!n) return 0; + assert(_depth == 0); + // check if there are unparsed tokens left if (_lexer.current() != lex_eof) return error("Incorrect query"); @@ -12840,7 +12995,7 @@ namespace pugi #endif /** - * Copyright (c) 2006-2019 Arseny Kapoulkine + * Copyright (c) 2006-2020 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/lib/pugixml/pugixml.hpp b/lib/pugixml/pugixml.hpp index da8ff36..7e2ce77 100644 --- a/lib/pugixml/pugixml.hpp +++ b/lib/pugixml/pugixml.hpp @@ -1,7 +1,7 @@ /** - * pugixml parser - version 1.10 + * pugixml parser - version 1.11 * -------------------------------------------------------- - * Copyright (C) 2006-2019, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://pugixml.org/ * * This library is distributed under the MIT License. See notice at the end @@ -14,7 +14,7 @@ #ifndef PUGIXML_VERSION // Define version macro; evaluates to major * 1000 + minor * 10 + patch so that it's safe to use in less-than comparisons // Note: pugixml used major * 100 + minor * 10 + patch format up until 1.9 (which had version identifier 190); starting from pugixml 1.10, the minor version number is two digits -# define PUGIXML_VERSION 1100 +# define PUGIXML_VERSION 1110 #endif // Include user configuration file (this can define various configuration macros) @@ -111,6 +111,15 @@ # endif #endif +// If C++ is 2011 or higher, use 'nullptr' +#ifndef PUGIXML_NULL +# if __cplusplus >= 201103 +# define PUGIXML_NULL nullptr +# else +# define PUGIXML_NULL 0 +# endif +#endif + // Character interface macros #ifdef PUGIXML_WCHAR_MODE # define PUGIXML_TEXT(t) L ## t @@ -263,6 +272,9 @@ namespace pugi // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. const unsigned int format_default = format_indent; + const int default_double_precision = 17; + const int default_float_precision = 9; + // Forward declarations struct xml_attribute_struct; struct xml_node_struct; @@ -410,7 +422,9 @@ namespace pugi bool set_value(long rhs); bool set_value(unsigned long rhs); bool set_value(double rhs); + bool set_value(double rhs, int precision); bool set_value(float rhs); + bool set_value(float rhs, int precision); bool set_value(bool rhs); #ifdef PUGIXML_HAS_LONG_LONG @@ -576,10 +590,16 @@ namespace pugi bool remove_attribute(const xml_attribute& a); bool remove_attribute(const char_t* name); + // Remove all attributes + bool remove_attributes(); + // Remove specified child bool remove_child(const xml_node& n); bool remove_child(const char_t* name); + // Remove all children + bool remove_children(); + // Parses buffer as an XML document fragment and appends all nodes as children of the current node. // Copies/converts the buffer, so it may be deleted or changed after the function returns. // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory. @@ -650,15 +670,15 @@ namespace pugi #ifndef PUGIXML_NO_XPATH // Select single node by evaluating XPath query. Returns first node from the resulting node set. - xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node select_node(const char_t* query, xpath_variable_set* variables = PUGIXML_NULL) const; xpath_node select_node(const xpath_query& query) const; // Select node set by evaluating XPath query - xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; + xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = PUGIXML_NULL) const; xpath_node_set select_nodes(const xpath_query& query) const; // (deprecated: use select_node instead) Select single node by evaluating XPath query. - PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; + PUGIXML_DEPRECATED xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = PUGIXML_NULL) const; PUGIXML_DEPRECATED xpath_node select_single_node(const xpath_query& query) const; #endif @@ -761,7 +781,9 @@ namespace pugi bool set(long rhs); bool set(unsigned long rhs); bool set(double rhs); + bool set(double rhs, int precision); bool set(float rhs); + bool set(float rhs, int precision); bool set(bool rhs); #ifdef PUGIXML_HAS_LONG_LONG @@ -1199,7 +1221,7 @@ namespace pugi public: // Construct a compiled object from XPath expression. // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors. - explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0); + explicit xpath_query(const char_t* query, xpath_variable_set* variables = PUGIXML_NULL); // Constructor xpath_query(); @@ -1452,7 +1474,7 @@ namespace std #endif /** - * Copyright (c) 2006-2019 Arseny Kapoulkine + * Copyright (c) 2006-2020 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation From ad01b454bb7a033db83c12157931d32562272637 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Sat, 15 Jan 2022 22:33:50 -0800 Subject: [PATCH 02/12] Update LZMA to 21.07 --- Leanify.vcxproj | 24 +- lib/LZMA/7zTypes.h | 900 ++++--- lib/LZMA/Alloc.c | 918 +++---- lib/LZMA/Alloc.h | 109 +- lib/LZMA/Compiler.h | 76 +- lib/LZMA/CpuArch.c | 478 ++++ lib/LZMA/CpuArch.h | 442 +++ lib/LZMA/LzFind.c | 2755 +++++++++++-------- lib/LZMA/LzFind.h | 257 +- lib/LZMA/LzFindMt.c | 2253 ++++++++++------ lib/LZMA/LzFindMt.h | 210 +- lib/LZMA/LzFindOpt.c | 578 ++++ lib/LZMA/LzHash.h | 91 +- lib/LZMA/LzmaDec.c | 2548 ++++++++++-------- lib/LZMA/LzmaDec.h | 470 ++-- lib/LZMA/LzmaEnc.c | 6141 ++++++++++++++++++++++-------------------- lib/LZMA/LzmaEnc.h | 154 +- lib/LZMA/Precomp.h | 20 +- lib/LZMA/Threads.c | 635 ++++- lib/LZMA/Threads.h | 300 ++- 20 files changed, 11531 insertions(+), 7828 deletions(-) create mode 100644 lib/LZMA/CpuArch.c create mode 100644 lib/LZMA/CpuArch.h create mode 100644 lib/LZMA/LzFindOpt.c diff --git a/Leanify.vcxproj b/Leanify.vcxproj index f0ea0dc..3f9628c 100644 --- a/Leanify.vcxproj +++ b/Leanify.vcxproj @@ -321,10 +321,20 @@ + + + + + + + + + + @@ -354,17 +364,7 @@ - - - - - - - - - - @@ -377,7 +377,9 @@ - + + + diff --git a/lib/LZMA/7zTypes.h b/lib/LZMA/7zTypes.h index 65b3af6..afe2412 100644 --- a/lib/LZMA/7zTypes.h +++ b/lib/LZMA/7zTypes.h @@ -1,375 +1,525 @@ -/* 7zTypes.h -- Basic types -2018-08-04 : Igor Pavlov : Public domain */ - -#ifndef __7Z_TYPES_H -#define __7Z_TYPES_H - -#ifdef _WIN32 -/* #include */ -#endif - -#include - -#ifndef EXTERN_C_BEGIN -#ifdef __cplusplus -#define EXTERN_C_BEGIN extern "C" { -#define EXTERN_C_END } -#else -#define EXTERN_C_BEGIN -#define EXTERN_C_END -#endif -#endif - -EXTERN_C_BEGIN - -#define SZ_OK 0 - -#define SZ_ERROR_DATA 1 -#define SZ_ERROR_MEM 2 -#define SZ_ERROR_CRC 3 -#define SZ_ERROR_UNSUPPORTED 4 -#define SZ_ERROR_PARAM 5 -#define SZ_ERROR_INPUT_EOF 6 -#define SZ_ERROR_OUTPUT_EOF 7 -#define SZ_ERROR_READ 8 -#define SZ_ERROR_WRITE 9 -#define SZ_ERROR_PROGRESS 10 -#define SZ_ERROR_FAIL 11 -#define SZ_ERROR_THREAD 12 - -#define SZ_ERROR_ARCHIVE 16 -#define SZ_ERROR_NO_ARCHIVE 17 - -typedef int SRes; - - -#ifdef _WIN32 - -/* typedef DWORD WRes; */ -typedef unsigned WRes; -#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) - -#else - -typedef int WRes; -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_WIN32 -#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) - -#endif - - -#ifndef RINOK -#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } -#endif - -typedef unsigned char Byte; -typedef short Int16; -typedef unsigned short UInt16; - -#ifdef _LZMA_UINT32_IS_ULONG -typedef long Int32; -typedef unsigned long UInt32; -#else -typedef int Int32; -typedef unsigned int UInt32; -#endif - -#ifdef _SZ_NO_INT_64 - -/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. - NOTES: Some code will work incorrectly in that case! */ - -typedef long Int64; -typedef unsigned long UInt64; - -#else - -#if defined(_MSC_VER) || defined(__BORLANDC__) -typedef __int64 Int64; -typedef unsigned __int64 UInt64; -#define UINT64_CONST(n) n -#else -typedef long long int Int64; -typedef unsigned long long int UInt64; -#define UINT64_CONST(n) n ## ULL -#endif - -#endif - -#ifdef _LZMA_NO_SYSTEM_SIZE_T -typedef UInt32 SizeT; -#else -typedef size_t SizeT; -#endif - -typedef int BoolInt; -/* typedef BoolInt Bool; */ -#define True 1 -#define False 0 - - -#ifdef _WIN32 -#define MY_STD_CALL __stdcall -#else -#define MY_STD_CALL -#endif - -#ifdef _MSC_VER - -#if _MSC_VER >= 1300 -#define MY_NO_INLINE __declspec(noinline) -#else -#define MY_NO_INLINE -#endif - -#define MY_FORCE_INLINE __forceinline - -#define MY_CDECL __cdecl -#define MY_FAST_CALL __fastcall - -#else - -#define MY_NO_INLINE -#define MY_FORCE_INLINE -#define MY_CDECL -#define MY_FAST_CALL - -/* inline keyword : for C++ / C99 */ - -/* GCC, clang: */ -/* -#if defined (__GNUC__) && (__GNUC__ >= 4) -#define MY_FORCE_INLINE __attribute__((always_inline)) -#define MY_NO_INLINE __attribute__((noinline)) -#endif -*/ - -#endif - - -/* The following interfaces use first parameter as pointer to structure */ - -typedef struct IByteIn IByteIn; -struct IByteIn -{ - Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ -}; -#define IByteIn_Read(p) (p)->Read(p) - - -typedef struct IByteOut IByteOut; -struct IByteOut -{ - void (*Write)(const IByteOut *p, Byte b); -}; -#define IByteOut_Write(p, b) (p)->Write(p, b) - - -typedef struct ISeqInStream ISeqInStream; -struct ISeqInStream -{ - SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) < input(*size)) is allowed */ -}; -#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) - -/* it can return SZ_ERROR_INPUT_EOF */ -SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); -SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); -SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); - - -typedef struct ISeqOutStream ISeqOutStream; -struct ISeqOutStream -{ - size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); - /* Returns: result - the number of actually written bytes. - (result < size) means error */ -}; -#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) - -typedef enum -{ - SZ_SEEK_SET = 0, - SZ_SEEK_CUR = 1, - SZ_SEEK_END = 2 -} ESzSeek; - - -typedef struct ISeekInStream ISeekInStream; -struct ISeekInStream -{ - SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ - SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); -}; -#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) -#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) - - -typedef struct ILookInStream ILookInStream; -struct ILookInStream -{ - SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); - /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. - (output(*size) > input(*size)) is not allowed - (output(*size) < input(*size)) is allowed */ - SRes (*Skip)(const ILookInStream *p, size_t offset); - /* offset must be <= output(*size) of Look */ - - SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); - /* reads directly (without buffer). It's same as ISeqInStream::Read */ - SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); -}; - -#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) -#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) -#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) -#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) - - -SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); -SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); - -/* reads via ILookInStream::Read */ -SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); -SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); - - - -typedef struct -{ - ILookInStream vt; - const ISeekInStream *realStream; - - size_t pos; - size_t size; /* it's data size */ - - /* the following variables must be set outside */ - Byte *buf; - size_t bufSize; -} CLookToRead2; - -void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); - -#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } - - -typedef struct -{ - ISeqInStream vt; - const ILookInStream *realStream; -} CSecToLook; - -void SecToLook_CreateVTable(CSecToLook *p); - - - -typedef struct -{ - ISeqInStream vt; - const ILookInStream *realStream; -} CSecToRead; - -void SecToRead_CreateVTable(CSecToRead *p); - - -typedef struct ICompressProgress ICompressProgress; - -struct ICompressProgress -{ - SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); - /* Returns: result. (result != SZ_OK) means break. - Value (UInt64)(Int64)-1 for size means unknown value. */ -}; -#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) - - - -typedef struct ISzAlloc ISzAlloc; -typedef const ISzAlloc * ISzAllocPtr; - -struct ISzAlloc -{ - void *(*Alloc)(ISzAllocPtr p, size_t size); - void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ -}; - -#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) -#define ISzAlloc_Free(p, a) (p)->Free(p, a) - -/* deprecated */ -#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) -#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) - - - - - -#ifndef MY_offsetof - #ifdef offsetof - #define MY_offsetof(type, m) offsetof(type, m) - /* - #define MY_offsetof(type, m) FIELD_OFFSET(type, m) - */ - #else - #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) - #endif -#endif - - - -#ifndef MY_container_of - -/* -#define MY_container_of(ptr, type, m) container_of(ptr, type, m) -#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) -#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) -#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) -*/ - -/* - GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" - GCC 3.4.4 : classes with constructor - GCC 4.8.1 : classes with non-public variable members" -*/ - -#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) - - -#endif - -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) - -/* -#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) -*/ -#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) - -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) -/* -#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) -*/ - - - -#ifdef _WIN32 - -#define CHAR_PATH_SEPARATOR '\\' -#define WCHAR_PATH_SEPARATOR L'\\' -#define STRING_PATH_SEPARATOR "\\" -#define WSTRING_PATH_SEPARATOR L"\\" - -#else - -#define CHAR_PATH_SEPARATOR '/' -#define WCHAR_PATH_SEPARATOR L'/' -#define STRING_PATH_SEPARATOR "/" -#define WSTRING_PATH_SEPARATOR L"/" - -#endif - -EXTERN_C_END - -#endif +/* 7zTypes.h -- Basic types +2021-12-25 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#ifdef _WIN32 +/* #include */ +#else +#include +#endif + +#include + +#ifndef EXTERN_C_BEGIN +#ifdef __cplusplus +#define EXTERN_C_BEGIN extern "C" { +#define EXTERN_C_END } +#else +#define EXTERN_C_BEGIN +#define EXTERN_C_END +#endif +#endif + +EXTERN_C_BEGIN + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + + +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + +#ifdef _WIN32 + +/* typedef DWORD WRes; */ +typedef unsigned WRes; +#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) + +// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + +#else // _WIN32 + +// #define ENV_HAVE_LSTAT +typedef int WRes; + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY__FACILITY_ERRNO 0x800 +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY__FACILITY__WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_PARAMETER EINVAL +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY__FACILITY__WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits +typedef long INT_PTR; +typedef unsigned long UINT_PTR; + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ + +#endif + + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +#ifndef RINOK_WRes +#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) + + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#define UINT64_CONST(n) n +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#define UINT64_CONST(n) n ## ULL +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int BoolInt; +/* typedef BoolInt Bool; */ +#define True 1 +#define False 0 + + +#ifdef _WIN32 +#define MY_STD_CALL __stdcall +#else +#define MY_STD_CALL +#endif + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE __forceinline + +#define MY_CDECL __cdecl +#define MY_FAST_CALL __fastcall + +#else // _MSC_VER + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) +#define MY_NO_INLINE __attribute__((noinline)) +// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define MY_NO_INLINE +#endif + +#define MY_FORCE_INLINE + + +#define MY_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define MY_FAST_CALL __attribute__((fastcall)) +// #define MY_FAST_CALL __attribute__((cdecl)) +#define MY_FAST_CALL +#elif defined(MY_CPU_AMD64) +// #define MY_FAST_CALL __attribute__((ms_abi)) +#define MY_FAST_CALL +#else +#define MY_FAST_CALL +#endif + +#endif // _MSC_VER + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct IByteIn IByteIn; +struct IByteIn +{ + Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ +}; +#define IByteIn_Read(p) (p)->Read(p) + + +typedef struct IByteOut IByteOut; +struct IByteOut +{ + void (*Write)(const IByteOut *p, Byte b); +}; +#define IByteOut_Write(p, b) (p)->Write(p, b) + + +typedef struct ISeqInStream ISeqInStream; +struct ISeqInStream +{ + SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +}; +#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); + + +typedef struct ISeqOutStream ISeqOutStream; +struct ISeqOutStream +{ + size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +}; +#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size) + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + + +typedef struct ISeekInStream ISeekInStream; +struct ISeekInStream +{ + SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); +}; +#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +typedef struct ILookInStream ILookInStream; +struct ILookInStream +{ + SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(const ILookInStream *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(const ILookInStream *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); +}; + +#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) +#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset) +#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size) +#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) + + +SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); + + + +typedef struct +{ + ILookInStream vt; + const ISeekInStream *realStream; + + size_t pos; + size_t size; /* it's data size */ + + /* the following variables must be set outside */ + Byte *buf; + size_t bufSize; +} CLookToRead2; + +void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); + +#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + + + +typedef struct +{ + ISeqInStream vt; + const ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + + +typedef struct ICompressProgress ICompressProgress; + +struct ICompressProgress +{ + SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +}; +#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) + + + +typedef struct ISzAlloc ISzAlloc; +typedef const ISzAlloc * ISzAllocPtr; + +struct ISzAlloc +{ + void *(*Alloc)(ISzAllocPtr p, size_t size); + void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */ +}; + +#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size) +#define ISzAlloc_Free(p, a) (p)->Free(p, a) + +/* deprecated */ +#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size) +#define IAlloc_Free(p, a) ISzAlloc_Free(p, a) + + + + + +#ifndef MY_offsetof + #ifdef offsetof + #define MY_offsetof(type, m) offsetof(type, m) + /* + #define MY_offsetof(type, m) FIELD_OFFSET(type, m) + */ + #else + #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m)) + #endif +#endif + + + +#ifndef MY_container_of + +/* +#define MY_container_of(ptr, type, m) container_of(ptr, type, m) +#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) +#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) +#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) +*/ + +/* + GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly" + GCC 3.4.4 : classes with constructor + GCC 4.8.1 : classes with non-public variable members" +*/ + +#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) + +#endif + +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) + +/* +#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +*/ +#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) + +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) +/* +#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) +*/ + + +#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) + +#ifdef _WIN32 + +#define CHAR_PATH_SEPARATOR '\\' +#define WCHAR_PATH_SEPARATOR L'\\' +#define STRING_PATH_SEPARATOR "\\" +#define WSTRING_PATH_SEPARATOR L"\\" + +#else + +#define CHAR_PATH_SEPARATOR '/' +#define WCHAR_PATH_SEPARATOR L'/' +#define STRING_PATH_SEPARATOR "/" +#define WSTRING_PATH_SEPARATOR L"/" + +#endif + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/Alloc.c b/lib/LZMA/Alloc.c index bcede4b..142a1ea 100644 --- a/lib/LZMA/Alloc.c +++ b/lib/LZMA/Alloc.c @@ -1,455 +1,463 @@ -/* Alloc.c -- Memory allocation functions -2018-04-27 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -#ifdef _WIN32 -#include -#endif -#include - -#include "Alloc.h" - -/* #define _SZ_ALLOC_DEBUG */ - -/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ -#ifdef _SZ_ALLOC_DEBUG - -#include -int g_allocCount = 0; -int g_allocCountMid = 0; -int g_allocCountBig = 0; - - -#define CONVERT_INT_TO_STR(charType, tempSize) \ - unsigned char temp[tempSize]; unsigned i = 0; \ - while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ - *s++ = (charType)('0' + (unsigned)val); \ - while (i != 0) { i--; *s++ = temp[i]; } \ - *s = 0; - -static void ConvertUInt64ToString(UInt64 val, char *s) -{ - CONVERT_INT_TO_STR(char, 24); -} - -#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) - -static void ConvertUInt64ToHex(UInt64 val, char *s) -{ - UInt64 v = val; - unsigned i; - for (i = 1;; i++) - { - v >>= 4; - if (v == 0) - break; - } - s[i] = 0; - do - { - unsigned t = (unsigned)(val & 0xF); - val >>= 4; - s[--i] = GET_HEX_CHAR(t); - } - while (i); -} - -#define DEBUG_OUT_STREAM stderr - -static void Print(const char *s) -{ - fputs(s, DEBUG_OUT_STREAM); -} - -static void PrintAligned(const char *s, size_t align) -{ - size_t len = strlen(s); - for(;;) - { - fputc(' ', DEBUG_OUT_STREAM); - if (len >= align) - break; - ++len; - } - Print(s); -} - -static void PrintLn() -{ - Print("\n"); -} - -static void PrintHex(UInt64 v, size_t align) -{ - char s[32]; - ConvertUInt64ToHex(v, s); - PrintAligned(s, align); -} - -static void PrintDec(UInt64 v, size_t align) -{ - char s[32]; - ConvertUInt64ToString(v, s); - PrintAligned(s, align); -} - -static void PrintAddr(void *p) -{ - PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12); -} - - -#define PRINT_ALLOC(name, cnt, size, ptr) \ - Print(name " "); \ - PrintDec(cnt++, 10); \ - PrintHex(size, 10); \ - PrintAddr(ptr); \ - PrintLn(); - -#define PRINT_FREE(name, cnt, ptr) if (ptr) { \ - Print(name " "); \ - PrintDec(--cnt, 10); \ - PrintAddr(ptr); \ - PrintLn(); } - -#else - -#define PRINT_ALLOC(name, cnt, size, ptr) -#define PRINT_FREE(name, cnt, ptr) -#define Print(s) -#define PrintLn() -#define PrintHex(v, align) -#define PrintDec(v, align) -#define PrintAddr(p) - -#endif - - - -void *MyAlloc(size_t size) -{ - if (size == 0) - return NULL; - #ifdef _SZ_ALLOC_DEBUG - { - void *p = malloc(size); - PRINT_ALLOC("Alloc ", g_allocCount, size, p); - return p; - } - #else - return malloc(size); - #endif -} - -void MyFree(void *address) -{ - PRINT_FREE("Free ", g_allocCount, address); - - free(address); -} - -#ifdef _WIN32 - -void *MidAlloc(size_t size) -{ - if (size == 0) - return NULL; - - PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); - - return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); -} - -void MidFree(void *address) -{ - PRINT_FREE("Free-Mid", g_allocCountMid, address); - - if (!address) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#ifndef MEM_LARGE_PAGES -#undef _7ZIP_LARGE_PAGES -#endif - -#ifdef _7ZIP_LARGE_PAGES -SIZE_T g_LargePageSize = 0; -typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); -#endif - -void SetLargePageSize() -{ - #ifdef _7ZIP_LARGE_PAGES - SIZE_T size; - GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) - GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); - if (!largePageMinimum) - return; - size = largePageMinimum(); - if (size == 0 || (size & (size - 1)) != 0) - return; - g_LargePageSize = size; - #endif -} - - -void *BigAlloc(size_t size) -{ - if (size == 0) - return NULL; - - PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); - - #ifdef _7ZIP_LARGE_PAGES - { - SIZE_T ps = g_LargePageSize; - if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) - { - size_t size2; - ps--; - size2 = (size + ps) & ~ps; - if (size2 >= size) - { - void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - if (res) - return res; - } - } - } - #endif - - return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); -} - -void BigFree(void *address) -{ - PRINT_FREE("Free-Big", g_allocCountBig, address); - - if (!address) - return; - VirtualFree(address, 0, MEM_RELEASE); -} - -#endif - - -static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } -static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } -const ISzAlloc g_Alloc = { SzAlloc, SzFree }; - -static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } -static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } -const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; - -static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } -static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } -const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; - - -/* - uintptr_t : C99 (optional) - : unsupported in VS6 -*/ - -#ifdef _WIN32 - typedef UINT_PTR UIntPtr; -#else - /* - typedef uintptr_t UIntPtr; - */ - typedef ptrdiff_t UIntPtr; -#endif - - -#define ADJUST_ALLOC_SIZE 0 -/* -#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) -*/ -/* - Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if - MyAlloc() can return address that is NOT multiple of sizeof(void *). -*/ - - -/* -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) -*/ -#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) - -#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) - - -#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32) - #define USE_posix_memalign -#endif - -/* - This posix_memalign() is for test purposes only. - We also need special Free() function instead of free(), - if this posix_memalign() is used. -*/ - -/* -static int posix_memalign(void **ptr, size_t align, size_t size) -{ - size_t newSize = size + align; - void *p; - void *pAligned; - *ptr = NULL; - if (newSize < size) - return 12; // ENOMEM - p = MyAlloc(newSize); - if (!p) - return 12; // ENOMEM - pAligned = MY_ALIGN_PTR_UP_PLUS(p, align); - ((void **)pAligned)[-1] = p; - *ptr = pAligned; - return 0; -} -*/ - -/* - ALLOC_ALIGN_SIZE >= sizeof(void *) - ALLOC_ALIGN_SIZE >= cache_line_size -*/ - -#define ALLOC_ALIGN_SIZE ((size_t)1 << 7) - -static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) -{ - #ifndef USE_posix_memalign - - void *p; - void *pAligned; - size_t newSize; - UNUSED_VAR(pp); - - /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned - block to prevent cache line sharing with another allocated blocks */ - - newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE; - if (newSize < size) - return NULL; - - p = MyAlloc(newSize); - - if (!p) - return NULL; - pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE); - - Print(" size="); PrintHex(size, 8); - Print(" a_size="); PrintHex(newSize, 8); - Print(" ptr="); PrintAddr(p); - Print(" a_ptr="); PrintAddr(pAligned); - PrintLn(); - - ((void **)pAligned)[-1] = p; - - return pAligned; - - #else - - void *p; - UNUSED_VAR(pp); - if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) - return NULL; - - Print(" posix_memalign="); PrintAddr(p); - PrintLn(); - - return p; - - #endif -} - - -static void SzAlignedFree(ISzAllocPtr pp, void *address) -{ - UNUSED_VAR(pp); - #ifndef USE_posix_memalign - if (address) - MyFree(((void **)address)[-1]); - #else - free(address); - #endif -} - - -const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; - - - -#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) - -/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ -#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] -/* -#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] -*/ - -static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) -{ - CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); - void *adr; - void *pAligned; - size_t newSize; - size_t extra; - size_t alignSize = (size_t)1 << p->numAlignBits; - - if (alignSize < sizeof(void *)) - alignSize = sizeof(void *); - - if (p->offset >= alignSize) - return NULL; - - /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned - block to prevent cache line sharing with another allocated blocks */ - extra = p->offset & (sizeof(void *) - 1); - newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE; - if (newSize < size) - return NULL; - - adr = ISzAlloc_Alloc(p->baseAlloc, newSize); - - if (!adr) - return NULL; - - pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + - alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; - - PrintLn(); - Print("- Aligned: "); - Print(" size="); PrintHex(size, 8); - Print(" a_size="); PrintHex(newSize, 8); - Print(" ptr="); PrintAddr(adr); - Print(" a_ptr="); PrintAddr(pAligned); - PrintLn(); - - REAL_BLOCK_PTR_VAR(pAligned) = adr; - - return pAligned; -} - - -static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) -{ - if (address) - { - CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); - PrintLn(); - Print("- Aligned Free: "); - PrintLn(); - ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); - } -} - - -void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) -{ - p->vt.Alloc = AlignOffsetAlloc_Alloc; - p->vt.Free = AlignOffsetAlloc_Free; -} +/* Alloc.c -- Memory allocation functions +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +#ifdef _WIN32 +#include +#endif +#include + +#include "Alloc.h" + +/* #define _SZ_ALLOC_DEBUG */ + +/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef _SZ_ALLOC_DEBUG + +#include +int g_allocCount = 0; +int g_allocCountMid = 0; +int g_allocCountBig = 0; + + +#define CONVERT_INT_TO_STR(charType, tempSize) \ + unsigned char temp[tempSize]; unsigned i = 0; \ + while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ + *s++ = (charType)('0' + (unsigned)val); \ + while (i != 0) { i--; *s++ = temp[i]; } \ + *s = 0; + +static void ConvertUInt64ToString(UInt64 val, char *s) +{ + CONVERT_INT_TO_STR(char, 24); +} + +#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + +static void ConvertUInt64ToHex(UInt64 val, char *s) +{ + UInt64 v = val; + unsigned i; + for (i = 1;; i++) + { + v >>= 4; + if (v == 0) + break; + } + s[i] = 0; + do + { + unsigned t = (unsigned)(val & 0xF); + val >>= 4; + s[--i] = GET_HEX_CHAR(t); + } + while (i); +} + +#define DEBUG_OUT_STREAM stderr + +static void Print(const char *s) +{ + fputs(s, DEBUG_OUT_STREAM); +} + +static void PrintAligned(const char *s, size_t align) +{ + size_t len = strlen(s); + for(;;) + { + fputc(' ', DEBUG_OUT_STREAM); + if (len >= align) + break; + ++len; + } + Print(s); +} + +static void PrintLn() +{ + Print("\n"); +} + +static void PrintHex(UInt64 v, size_t align) +{ + char s[32]; + ConvertUInt64ToHex(v, s); + PrintAligned(s, align); +} + +static void PrintDec(UInt64 v, size_t align) +{ + char s[32]; + ConvertUInt64ToString(v, s); + PrintAligned(s, align); +} + +static void PrintAddr(void *p) +{ + PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12); +} + + +#define PRINT_ALLOC(name, cnt, size, ptr) \ + Print(name " "); \ + PrintDec(cnt++, 10); \ + PrintHex(size, 10); \ + PrintAddr(ptr); \ + PrintLn(); + +#define PRINT_FREE(name, cnt, ptr) if (ptr) { \ + Print(name " "); \ + PrintDec(--cnt, 10); \ + PrintAddr(ptr); \ + PrintLn(); } + +#else + +#define PRINT_ALLOC(name, cnt, size, ptr) +#define PRINT_FREE(name, cnt, ptr) +#define Print(s) +#define PrintLn() +#define PrintHex(v, align) +#define PrintAddr(p) + +#endif + + + +void *MyAlloc(size_t size) +{ + if (size == 0) + return NULL; + PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); + #ifdef _SZ_ALLOC_DEBUG + { + void *p = malloc(size); + // PRINT_ALLOC("Alloc ", g_allocCount, size, p); + return p; + } + #else + return malloc(size); + #endif +} + +void MyFree(void *address) +{ + PRINT_FREE("Free ", g_allocCount, address); + + free(address); +} + +#ifdef _WIN32 + +void *MidAlloc(size_t size) +{ + if (size == 0) + return NULL; + + PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} + +void MidFree(void *address) +{ + PRINT_FREE("Free-Mid", g_allocCountMid, address); + + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#ifdef _7ZIP_LARGE_PAGES + +#ifdef MEM_LARGE_PAGES + #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES +#else + #define MY__MEM_LARGE_PAGES 0x20000000 +#endif + +extern +SIZE_T g_LargePageSize; +SIZE_T g_LargePageSize = 0; +typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); + +#endif // _7ZIP_LARGE_PAGES + +void SetLargePageSize() +{ + #ifdef _7ZIP_LARGE_PAGES + SIZE_T size; + GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) + GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); + if (!largePageMinimum) + return; + size = largePageMinimum(); + if (size == 0 || (size & (size - 1)) != 0) + return; + g_LargePageSize = size; + #endif +} + + +void *BigAlloc(size_t size) +{ + if (size == 0) + return NULL; + + PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); + + #ifdef _7ZIP_LARGE_PAGES + { + SIZE_T ps = g_LargePageSize; + if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) + { + size_t size2; + ps--; + size2 = (size + ps) & ~ps; + if (size2 >= size) + { + void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); + if (res) + return res; + } + } + } + #endif + + return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +} + +void BigFree(void *address) +{ + PRINT_FREE("Free-Big", g_allocCountBig, address); + + if (!address) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#endif + + +static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } +const ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +#ifdef _WIN32 +static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } +static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } +static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } +static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; +const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; +#endif + +/* + uintptr_t : C99 (optional) + : unsupported in VS6 +*/ + +#ifdef _WIN32 + typedef UINT_PTR UIntPtr; +#else + /* + typedef uintptr_t UIntPtr; + */ + typedef ptrdiff_t UIntPtr; +#endif + + +#define ADJUST_ALLOC_SIZE 0 +/* +#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1) +*/ +/* + Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if + MyAlloc() can return address that is NOT multiple of sizeof(void *). +*/ + + +/* +#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1)))) +*/ +#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1)))) + + +#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) + #define USE_posix_memalign +#endif + +#ifndef USE_posix_memalign +#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align) +#endif + +/* + This posix_memalign() is for test purposes only. + We also need special Free() function instead of free(), + if this posix_memalign() is used. +*/ + +/* +static int posix_memalign(void **ptr, size_t align, size_t size) +{ + size_t newSize = size + align; + void *p; + void *pAligned; + *ptr = NULL; + if (newSize < size) + return 12; // ENOMEM + p = MyAlloc(newSize); + if (!p) + return 12; // ENOMEM + pAligned = MY_ALIGN_PTR_UP_PLUS(p, align); + ((void **)pAligned)[-1] = p; + *ptr = pAligned; + return 0; +} +*/ + +/* + ALLOC_ALIGN_SIZE >= sizeof(void *) + ALLOC_ALIGN_SIZE >= cache_line_size +*/ + +#define ALLOC_ALIGN_SIZE ((size_t)1 << 7) + +static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size) +{ + #ifndef USE_posix_memalign + + void *p; + void *pAligned; + size_t newSize; + UNUSED_VAR(pp); + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + + newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + p = MyAlloc(newSize); + + if (!p) + return NULL; + pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE); + + Print(" size="); PrintHex(size, 8); + Print(" a_size="); PrintHex(newSize, 8); + Print(" ptr="); PrintAddr(p); + Print(" a_ptr="); PrintAddr(pAligned); + PrintLn(); + + ((void **)pAligned)[-1] = p; + + return pAligned; + + #else + + void *p; + UNUSED_VAR(pp); + if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) + return NULL; + + Print(" posix_memalign="); PrintAddr(p); + PrintLn(); + + return p; + + #endif +} + + +static void SzAlignedFree(ISzAllocPtr pp, void *address) +{ + UNUSED_VAR(pp); + #ifndef USE_posix_memalign + if (address) + MyFree(((void **)address)[-1]); + #else + free(address); + #endif +} + + +const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree }; + + + +#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *)) + +/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */ +#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1] +/* +#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1] +*/ + +static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) +{ + CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + void *adr; + void *pAligned; + size_t newSize; + size_t extra; + size_t alignSize = (size_t)1 << p->numAlignBits; + + if (alignSize < sizeof(void *)) + alignSize = sizeof(void *); + + if (p->offset >= alignSize) + return NULL; + + /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned + block to prevent cache line sharing with another allocated blocks */ + extra = p->offset & (sizeof(void *) - 1); + newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE; + if (newSize < size) + return NULL; + + adr = ISzAlloc_Alloc(p->baseAlloc, newSize); + + if (!adr) + return NULL; + + pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr + + alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset; + + PrintLn(); + Print("- Aligned: "); + Print(" size="); PrintHex(size, 8); + Print(" a_size="); PrintHex(newSize, 8); + Print(" ptr="); PrintAddr(adr); + Print(" a_ptr="); PrintAddr(pAligned); + PrintLn(); + + REAL_BLOCK_PTR_VAR(pAligned) = adr; + + return pAligned; +} + + +static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address) +{ + if (address) + { + CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); + PrintLn(); + Print("- Aligned Free: "); + PrintLn(); + ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address)); + } +} + + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p) +{ + p->vt.Alloc = AlignOffsetAlloc_Alloc; + p->vt.Free = AlignOffsetAlloc_Free; +} diff --git a/lib/LZMA/Alloc.h b/lib/LZMA/Alloc.h index 6482376..59de107 100644 --- a/lib/LZMA/Alloc.h +++ b/lib/LZMA/Alloc.h @@ -1,51 +1,58 @@ -/* Alloc.h -- Memory allocation functions -2018-02-19 : Igor Pavlov : Public domain */ - -#ifndef __COMMON_ALLOC_H -#define __COMMON_ALLOC_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -void *MyAlloc(size_t size); -void MyFree(void *address); - -#ifdef _WIN32 - -void SetLargePageSize(); - -void *MidAlloc(size_t size); -void MidFree(void *address); -void *BigAlloc(size_t size); -void BigFree(void *address); - -#else - -#define MidAlloc(size) MyAlloc(size) -#define MidFree(address) MyFree(address) -#define BigAlloc(size) MyAlloc(size) -#define BigFree(address) MyFree(address) - -#endif - -extern const ISzAlloc g_Alloc; -extern const ISzAlloc g_BigAlloc; -extern const ISzAlloc g_MidAlloc; -extern const ISzAlloc g_AlignedAlloc; - - -typedef struct -{ - ISzAlloc vt; - ISzAllocPtr baseAlloc; - unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */ - size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */ -} CAlignOffsetAlloc; - -void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p); - - -EXTERN_C_END - -#endif +/* Alloc.h -- Memory allocation functions +2021-07-13 : Igor Pavlov : Public domain */ + +#ifndef __COMMON_ALLOC_H +#define __COMMON_ALLOC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +void *MyAlloc(size_t size); +void MyFree(void *address); + +#ifdef _WIN32 + +void SetLargePageSize(void); + +void *MidAlloc(size_t size); +void MidFree(void *address); +void *BigAlloc(size_t size); +void BigFree(void *address); + +#else + +#define MidAlloc(size) MyAlloc(size) +#define MidFree(address) MyFree(address) +#define BigAlloc(size) MyAlloc(size) +#define BigFree(address) MyFree(address) + +#endif + +extern const ISzAlloc g_Alloc; + +#ifdef _WIN32 +extern const ISzAlloc g_BigAlloc; +extern const ISzAlloc g_MidAlloc; +#else +#define g_BigAlloc g_AlignedAlloc +#define g_MidAlloc g_AlignedAlloc +#endif + +extern const ISzAlloc g_AlignedAlloc; + + +typedef struct +{ + ISzAlloc vt; + ISzAllocPtr baseAlloc; + unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */ + size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */ +} CAlignOffsetAlloc; + +void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p); + + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/Compiler.h b/lib/LZMA/Compiler.h index 0cc409d..eba3742 100644 --- a/lib/LZMA/Compiler.h +++ b/lib/LZMA/Compiler.h @@ -1,33 +1,43 @@ -/* Compiler.h -2017-04-03 : Igor Pavlov : Public domain */ - -#ifndef __7Z_COMPILER_H -#define __7Z_COMPILER_H - -#ifdef _MSC_VER - - #ifdef UNDER_CE - #define RPC_NO_WINDOWS_H - /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ - #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union - #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int - #endif - - #if _MSC_VER >= 1300 - #pragma warning(disable : 4996) // This function or variable may be unsafe - #else - #pragma warning(disable : 4511) // copy constructor could not be generated - #pragma warning(disable : 4512) // assignment operator could not be generated - #pragma warning(disable : 4514) // unreferenced inline function has been removed - #pragma warning(disable : 4702) // unreachable code - #pragma warning(disable : 4710) // not inlined - #pragma warning(disable : 4714) // function marked as __forceinline not inlined - #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information - #endif - -#endif - -#define UNUSED_VAR(x) (void)x; -/* #define UNUSED_VAR(x) x=x; */ - -#endif +/* Compiler.h +2021-01-05 : Igor Pavlov : Public domain */ + +#ifndef __7Z_COMPILER_H +#define __7Z_COMPILER_H + + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wunused-private-field" + #endif + +#ifdef _MSC_VER + + #ifdef UNDER_CE + #define RPC_NO_WINDOWS_H + /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */ + #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union + #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int + #endif + + #if _MSC_VER >= 1300 + #pragma warning(disable : 4996) // This function or variable may be unsafe + #else + #pragma warning(disable : 4511) // copy constructor could not be generated + #pragma warning(disable : 4512) // assignment operator could not be generated + #pragma warning(disable : 4514) // unreferenced inline function has been removed + #pragma warning(disable : 4702) // unreachable code + #pragma warning(disable : 4710) // not inlined + #pragma warning(disable : 4714) // function marked as __forceinline not inlined + #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information + #endif + + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" + // #pragma clang diagnostic ignored "-Wreserved-id-macro" + #endif + +#endif + +#define UNUSED_VAR(x) (void)x; +/* #define UNUSED_VAR(x) x=x; */ + +#endif diff --git a/lib/LZMA/CpuArch.c b/lib/LZMA/CpuArch.c new file mode 100644 index 0000000..a0e93e8 --- /dev/null +++ b/lib/LZMA/CpuArch.c @@ -0,0 +1,478 @@ +/* CpuArch.c -- CPU specific code +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 + +#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) +#define USE_ASM +#endif + +#if !defined(USE_ASM) && _MSC_VER >= 1500 +#include +#endif + +#if defined(USE_ASM) && !defined(MY_CPU_AMD64) +static UInt32 CheckFlag(UInt32 flag) +{ + #ifdef _MSC_VER + __asm pushfd; + __asm pop EAX; + __asm mov EDX, EAX; + __asm xor EAX, flag; + __asm push EAX; + __asm popfd; + __asm pushfd; + __asm pop EAX; + __asm xor EAX, EDX; + __asm push EDX; + __asm popfd; + __asm and flag, EAX; + #else + __asm__ __volatile__ ( + "pushf\n\t" + "pop %%EAX\n\t" + "movl %%EAX,%%EDX\n\t" + "xorl %0,%%EAX\n\t" + "push %%EAX\n\t" + "popf\n\t" + "pushf\n\t" + "pop %%EAX\n\t" + "xorl %%EDX,%%EAX\n\t" + "push %%EDX\n\t" + "popf\n\t" + "andl %%EAX, %0\n\t": + "=c" (flag) : "c" (flag) : + "%eax", "%edx"); + #endif + return flag; +} +#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False; +#else +#define CHECK_CPUID_IS_SUPPORTED +#endif + +#ifndef USE_ASM + #ifdef _MSC_VER + #if _MSC_VER >= 1600 + #define MY__cpuidex __cpuidex + #else + +/* + __cpuid (function == 4) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) doesn't clear ECX + We still can use __cpuid for low (function) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FAST_CALL / NO_INLINE function, + So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + + DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +*/ + +static +MY_NO_INLINE +void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +{ + UNUSED_VAR(subFunction); + __cpuid(CPUInfo, function); +} + + #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) + #pragma message("======== MY__cpuidex_HACK WAS USED ========") + #endif + #else + #define MY__cpuidex(info, func, func2) __cpuid(info, func) + #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") + #endif +#endif + + + + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) +{ + #ifdef USE_ASM + + #ifdef _MSC_VER + + UInt32 a2, b2, c2, d2; + __asm xor EBX, EBX; + __asm xor ECX, ECX; + __asm xor EDX, EDX; + __asm mov EAX, function; + __asm cpuid; + __asm mov a2, EAX; + __asm mov b2, EBX; + __asm mov c2, ECX; + __asm mov d2, EDX; + + *a = a2; + *b = b2; + *c = c2; + *d = d2; + + #else + + __asm__ __volatile__ ( + #if defined(MY_CPU_AMD64) && defined(__PIC__) + "mov %%rbx, %%rdi;" + "cpuid;" + "xchg %%rbx, %%rdi;" + : "=a" (*a) , + "=D" (*b) , + #elif defined(MY_CPU_X86) && defined(__PIC__) + "mov %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=a" (*a) , + "=D" (*b) , + #else + "cpuid" + : "=a" (*a) , + "=b" (*b) , + #endif + "=c" (*c) , + "=d" (*d) + : "0" (function), "c"(0) ) ; + + #endif + + #else + + int CPUInfo[4]; + + MY__cpuidex(CPUInfo, (int)function, 0); + + *a = (UInt32)CPUInfo[0]; + *b = (UInt32)CPUInfo[1]; + *c = (UInt32)CPUInfo[2]; + *d = (UInt32)CPUInfo[3]; + + #endif +} + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p) +{ + CHECK_CPUID_IS_SUPPORTED + MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); + MyCPUID(1, &p->ver, &p->b, &p->c, &p->d); + return True; +} + +static const UInt32 kVendors[][3] = +{ + { 0x756E6547, 0x49656E69, 0x6C65746E}, + { 0x68747541, 0x69746E65, 0x444D4163}, + { 0x746E6543, 0x48727561, 0x736C7561} +}; + +int x86cpuid_GetFirm(const Cx86cpuid *p) +{ + unsigned i; + for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) + { + const UInt32 *v = kVendors[i]; + if (v[0] == p->vendor[0] && + v[1] == p->vendor[1] && + v[2] == p->vendor[2]) + return (int)i; + } + return -1; +} + +BoolInt CPU_Is_InOrder() +{ + Cx86cpuid p; + int firm; + UInt32 family, model; + if (!x86cpuid_CheckAndRead(&p)) + return True; + + family = x86cpuid_GetFamily(p.ver); + model = x86cpuid_GetModel(p.ver); + + firm = x86cpuid_GetFirm(&p); + + switch (firm) + { + case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( + /* In-Order Atom CPU */ + model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ + || model == 0x26 /* 45 nm, Z6xx */ + || model == 0x27 /* 32 nm, Z2460 */ + || model == 0x35 /* 32 nm, Z2760 */ + || model == 0x36 /* 32 nm, N2xxx, D2xxx */ + ))); + case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); + case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); + } + return True; +} + +#if !defined(MY_CPU_AMD64) && defined(_WIN32) +#include +static BoolInt CPU_Sys_Is_SSE_Supported() +{ + OSVERSIONINFO vi; + vi.dwOSVersionInfoSize = sizeof(vi); + if (!GetVersionEx(&vi)) + return False; + return (vi.dwMajorVersion >= 5); +} +#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; +#else +#define CHECK_SYS_SSE_SUPPORT +#endif + + +static UInt32 X86_CPUID_ECX_Get_Flags() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return 0; + return p.c; +} + +BoolInt CPU_IsSupported_AES() +{ + return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3() +{ + return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41() +{ + return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return False; + + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + return (d[1] >> 29) & 1; + } +} + +// #include + +#ifdef _WIN32 +#include +#endif + +BoolInt CPU_IsSupported_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5); // avx2 + } +} + +BoolInt CPU_IsSupported_VAES_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } +} + +BoolInt CPU_IsSupported_PageGB() +{ + Cx86cpuid cpuid; + if (!x86cpuid_CheckAndRead(&cpuid)) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); + if (d[0] < 0x80000001) + return False; + } + { + UInt32 d[4] = { 0 }; + MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]); + return (d[3] >> 26) & 1; + } +} + + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include + +BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ + +static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + + /* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); + */ + +BoolInt CPU_IsSupported_CRC32(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#include + + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + MY_HWCAP_CHECK_FUNC(NEON) + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + int res = My_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + +#endif diff --git a/lib/LZMA/CpuArch.h b/lib/LZMA/CpuArch.h new file mode 100644 index 0000000..b300a5a --- /dev/null +++ b/lib/LZMA/CpuArch.h @@ -0,0 +1,442 @@ +/* CpuArch.h -- CPU specific code +2021-07-13 : Igor Pavlov : Public domain */ + +#ifndef __CPU_ARCH_H +#define __CPU_ARCH_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* +MY_CPU_LE means that CPU is LITTLE ENDIAN. +MY_CPU_BE means that CPU is BIG ENDIAN. +If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. + +MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) +*/ + +#if defined(_M_X64) \ + || defined(_M_AMD64) \ + || defined(__x86_64__) \ + || defined(__AMD64__) \ + || defined(__amd64__) + #define MY_CPU_AMD64 + #ifdef __ILP32__ + #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#endif + + +#if defined(_M_IX86) \ + || defined(__i386__) + #define MY_CPU_X86 + #define MY_CPU_NAME "x86" + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_ARM64) \ + || defined(__AARCH64EL__) \ + || defined(__AARCH64EB__) \ + || defined(__aarch64__) + #define MY_CPU_ARM64 + #define MY_CPU_NAME "arm64" + #define MY_CPU_64BIT +#endif + + +#if defined(_M_ARM) \ + || defined(_M_ARM_NT) \ + || defined(_M_ARMT) \ + || defined(__arm__) \ + || defined(__thumb__) \ + || defined(__ARMEL__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEL__) \ + || defined(__THUMBEB__) + #define MY_CPU_ARM + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 +#endif + + +#if defined(_M_IA64) \ + || defined(__ia64__) + #define MY_CPU_IA64 + #define MY_CPU_NAME "ia64" + #define MY_CPU_64BIT +#endif + + +#if defined(__mips64) \ + || defined(__mips64__) \ + || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3)) + #define MY_CPU_NAME "mips64" + #define MY_CPU_64BIT +#elif defined(__mips__) + #define MY_CPU_NAME "mips" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) + #ifdef __ILP32__ + #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 + #else + #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 + #endif + #define MY_CPU_64BIT +#else + #define MY_CPU_NAME "ppc" + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif +#endif + + +#if defined(__sparc64__) + #define MY_CPU_NAME "sparc64" + #define MY_CPU_64BIT +#elif defined(__sparc__) + #define MY_CPU_NAME "sparc" + /* #define MY_CPU_32BIT */ +#endif + + +#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64) +#define MY_CPU_X86_OR_AMD64 +#endif + +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + + +#ifdef _WIN32 + + #ifdef MY_CPU_ARM + #define MY_CPU_ARM_LE + #endif + + #ifdef MY_CPU_ARM64 + #define MY_CPU_ARM64_LE + #endif + + #ifdef _M_IA64 + #define MY_CPU_IA64_LE + #endif + +#endif + + +#if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM_LE) \ + || defined(MY_CPU_ARM64_LE) \ + || defined(MY_CPU_IA64_LE) \ + || defined(__LITTLE_ENDIAN__) \ + || defined(__ARMEL__) \ + || defined(__THUMBEL__) \ + || defined(__AARCH64EL__) \ + || defined(__MIPSEL__) \ + || defined(__MIPSEL) \ + || defined(_MIPSEL) \ + || defined(__BFIN__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) + #define MY_CPU_LE +#endif + +#if defined(__BIG_ENDIAN__) \ + || defined(__ARMEB__) \ + || defined(__THUMBEB__) \ + || defined(__AARCH64EB__) \ + || defined(__MIPSEB__) \ + || defined(__MIPSEB) \ + || defined(_MIPSEB) \ + || defined(__m68k__) \ + || defined(__s390__) \ + || defined(__s390x__) \ + || defined(__zarch__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define MY_CPU_BE +#endif + + +#if defined(MY_CPU_LE) && defined(MY_CPU_BE) + #error Stop_Compiling_Bad_Endian +#endif + + +#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) + #error Stop_Compiling_Bad_32_64_BIT +#endif + +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + + +#ifndef MY_CPU_NAME + #ifdef MY_CPU_LE + #define MY_CPU_NAME "LE" + #elif defined(MY_CPU_BE) + #define MY_CPU_NAME "BE" + #else + /* + #define MY_CPU_NAME "" + */ + #endif +#endif + + + + + +#ifdef MY_CPU_LE + #if defined(MY_CPU_X86_OR_AMD64) \ + || defined(MY_CPU_ARM64) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) + /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. + So we can't use unaligned 64-bit operations. */ + #define MY_CPU_LE_UNALIGN + #endif +#endif + + +#ifdef MY_CPU_LE_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(const void *)(p)) +#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 +#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#endif + +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#ifdef MY_CPU_LE_UNALIGN_64 +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif + +#else + +#define GetUi16(p) ( (UInt16) ( \ + ((const Byte *)(p))[0] | \ + ((UInt16)((const Byte *)(p))[1] << 8) )) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); } + +#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)_vvv_; \ + _ppp_[1] = (Byte)(_vvv_ >> 8); \ + _ppp_[2] = (Byte)(_vvv_ >> 16); \ + _ppp_[3] = (Byte)(_vvv_ >> 24); } + +#endif + + +#ifndef MY_CPU_LE_UNALIGN_64 + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) + +#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ + SetUi32(_ppp2_ , (UInt32)_vvv2_); \ + SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } + +#endif + + + + +#ifdef __has_builtin + #define MY__has_builtin(x) __has_builtin(x) +#else + #define MY__has_builtin(x) 0 +#endif + +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) + +/* Note: we use bswap instruction, that is unsupported in 386 cpu */ + +#include + +#pragma intrinsic(_byteswap_ushort) +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) + +/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ +#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) + +#elif defined(MY_CPU_LE_UNALIGN) && ( \ + (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ + || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) + +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) + +#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) + +#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ + _ppp_[0] = (Byte)(_vvv_ >> 24); \ + _ppp_[1] = (Byte)(_vvv_ >> 16); \ + _ppp_[2] = (Byte)(_vvv_ >> 8); \ + _ppp_[3] = (Byte)_vvv_; } + +#endif + + +#ifndef GetBe16 + +#define GetBe16(p) ( (UInt16) ( \ + ((UInt16)((const Byte *)(p))[0] << 8) | \ + ((const Byte *)(p))[1] )) + +#endif + + + +#ifdef MY_CPU_X86_OR_AMD64 + +typedef struct +{ + UInt32 maxFunc; + UInt32 vendor[3]; + UInt32 ver; + UInt32 b; + UInt32 c; + UInt32 d; +} Cx86cpuid; + +enum +{ + CPU_FIRM_INTEL, + CPU_FIRM_AMD, + CPU_FIRM_VIA +}; + +void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d); + +BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p); +int x86cpuid_GetFirm(const Cx86cpuid *p); + +#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF)) +#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) +#define x86cpuid_GetStepping(ver) (ver & 0xF) + +BoolInt CPU_Is_InOrder(void); + +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif + +#if defined(__APPLE__) +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); +#endif + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/LzFind.c b/lib/LZMA/LzFind.c index df55e86..a17c06b 100644 --- a/lib/LZMA/LzFind.c +++ b/lib/LZMA/LzFind.c @@ -1,1127 +1,1628 @@ -/* LzFind.c -- Match finder for LZ algorithms -2018-07-08 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -#include "LzFind.h" -#include "LzHash.h" - -#define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)7 << 29) - -#define kStartMaxLen 3 - -static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) -{ - if (!p->directInput) - { - ISzAlloc_Free(alloc, p->bufferBase); - p->bufferBase = NULL; - } -} - -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ - -static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) -{ - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) - { - p->blockSize = blockSize; - return 1; - } - if (!p->bufferBase || p->blockSize != blockSize) - { - LzInWindow_Free(p, alloc); - p->blockSize = blockSize; - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); - } - return (p->bufferBase != NULL); -} - -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } - -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } - -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) -{ - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} - -static void MatchFinder_ReadBlock(CMatchFinder *p) -{ - if (p->streamEndWasReached || p->result != SZ_OK) - return; - - /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ - - if (p->directInput) - { - UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); - if (curSize > p->directInputRem) - curSize = (UInt32)p->directInputRem; - p->directInputRem -= curSize; - p->streamPos += curSize; - if (p->directInputRem == 0) - p->streamEndWasReached = 1; - return; - } - - for (;;) - { - Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); - if (size == 0) - return; - - p->result = ISeqInStream_Read(p->stream, dest, &size); - if (p->result != SZ_OK) - return; - if (size == 0) - { - p->streamEndWasReached = 1; - return; - } - p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) - return; - } -} - -void MatchFinder_MoveBlock(CMatchFinder *p) -{ - memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); - p->buffer = p->bufferBase + p->keepSizeBefore; -} - -int MatchFinder_NeedMove(CMatchFinder *p) -{ - if (p->directInput) - return 0; - /* if (p->streamEndWasReached) return 0; */ - return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); -} - -void MatchFinder_ReadIfRequired(CMatchFinder *p) -{ - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) -{ - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} - -static void MatchFinder_SetDefaultSettings(CMatchFinder *p) -{ - p->cutValue = 32; - p->btMode = 1; - p->numHashBytes = 4; - p->bigHash = 0; -} - -#define kCrcPoly 0xEDB88320 - -void MatchFinder_Construct(CMatchFinder *p) -{ - unsigned i; - p->bufferBase = NULL; - p->directInput = 0; - p->hash = NULL; - p->expectedDataSize = (UInt64)(Int64)-1; - MatchFinder_SetDefaultSettings(p); - - for (i = 0; i < 256; i++) - { - UInt32 r = (UInt32)i; - unsigned j; - for (j = 0; j < 8; j++) - r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); - p->crc[i] = r; - } -} - -static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->hash); - p->hash = NULL; -} - -void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) -{ - MatchFinder_FreeThisClassMemory(p, alloc); - LzInWindow_Free(p, alloc); -} - -static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) -{ - size_t sizeInBytes = (size_t)num * sizeof(CLzRef); - if (sizeInBytes / sizeof(CLzRef) != num) - return NULL; - return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); -} - -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAllocPtr alloc) -{ - UInt32 sizeReserv; - - if (historySize > kMaxHistorySize) - { - MatchFinder_Free(p, alloc); - return 0; - } - - sizeReserv = historySize >> 1; - if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; - else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; - - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); - - p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - - if (LzInWindow_Create(p, sizeReserv, alloc)) - { - UInt32 newCyclicBufferSize = historySize + 1; - UInt32 hs; - p->matchMaxLen = matchMaxLen; - { - p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else - { - hs = historySize; - if (hs > p->expectedDataSize) - hs = (UInt32)p->expectedDataSize; - if (hs != 0) - hs--; - hs |= (hs >> 1); - hs |= (hs >> 2); - hs |= (hs >> 4); - hs |= (hs >> 8); - hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) - { - if (p->numHashBytes == 3) - hs = (1 << 24) - 1; - else - hs >>= 1; - /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ - } - } - p->hashMask = hs; - hs++; - if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; - if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; - hs += p->fixedHashSize; - } - - { - size_t newSize; - size_t numSons; - p->historySize = historySize; - p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; - - numSons = newCyclicBufferSize; - if (p->btMode) - numSons <<= 1; - newSize = hs + numSons; - - if (p->hash && p->numRefs == newSize) - return 1; - - MatchFinder_FreeThisClassMemory(p, alloc); - p->numRefs = newSize; - p->hash = AllocRefs(newSize, alloc); - - if (p->hash) - { - p->son = p->hash + p->hashSizeSum; - return 1; - } - } - } - - MatchFinder_Free(p, alloc); - return 0; -} - -static void MatchFinder_SetLimits(CMatchFinder *p) -{ - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - - if (limit2 <= p->keepSizeAfter) - { - if (limit2 > 0) - limit2 = 1; - } - else - limit2 -= p->keepSizeAfter; - - if (limit2 < limit) - limit = limit2; - - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; -} - - -void MatchFinder_Init_LowHash(CMatchFinder *p) -{ - size_t i; - CLzRef *items = p->hash; - size_t numItems = p->fixedHashSize; - for (i = 0; i < numItems; i++) - items[i] = kEmptyHashValue; -} - - -void MatchFinder_Init_HighHash(CMatchFinder *p) -{ - size_t i; - CLzRef *items = p->hash + p->fixedHashSize; - size_t numItems = (size_t)p->hashMask + 1; - for (i = 0; i < numItems; i++) - items[i] = kEmptyHashValue; -} - - -void MatchFinder_Init_3(CMatchFinder *p, int readData) -{ - p->cyclicBufferPos = 0; - p->buffer = p->bufferBase; - p->pos = - p->streamPos = p->cyclicBufferSize; - p->result = SZ_OK; - p->streamEndWasReached = 0; - - if (readData) - MatchFinder_ReadBlock(p); - - MatchFinder_SetLimits(p); -} - - -void MatchFinder_Init(CMatchFinder *p) -{ - MatchFinder_Init_HighHash(p); - MatchFinder_Init_LowHash(p); - MatchFinder_Init_3(p, True); -} - - -static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) -{ - return (p->pos - p->historySize - 1) & kNormalizeMask; -} - -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) -{ - size_t i; - for (i = 0; i < numItems; i++) - { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; - } -} - -static void MatchFinder_Normalize(CMatchFinder *p) -{ - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->numRefs); - MatchFinder_ReduceOffsets(p, subValue); -} - - -MY_NO_INLINE -static void MatchFinder_CheckLimits(CMatchFinder *p) -{ - if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); - if (p->cyclicBufferPos == p->cyclicBufferSize) - p->cyclicBufferPos = 0; - MatchFinder_SetLimits(p); -} - - -/* - (lenLimit > maxLen) -*/ -MY_FORCE_INLINE -static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, unsigned maxLen) -{ - /* - son[_cyclicBufferPos] = curMatch; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; - { - const Byte *pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - if (pb[maxLen] == cur[maxLen] && *pb == *cur) - { - UInt32 len = 0; - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - maxLen = len; - *distances++ = len; - *distances++ = delta - 1; - if (len == lenLimit) - return distances; - } - } - } - } - */ - - const Byte *lim = cur + lenLimit; - son[_cyclicBufferPos] = curMatch; - do - { - UInt32 delta = pos - curMatch; - if (delta >= _cyclicBufferSize) - break; - { - ptrdiff_t diff; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - diff = (ptrdiff_t)0 - delta; - if (cur[maxLen] == cur[maxLen + diff]) - { - const Byte *c = cur; - while (*c == c[diff]) - { - if (++c == lim) - { - distances[0] = (UInt32)(lim - cur); - distances[1] = delta - 1; - return distances + 2; - } - } - { - unsigned len = (unsigned)(c - cur); - if (maxLen < len) - { - maxLen = len; - distances[0] = (UInt32)len; - distances[1] = delta - 1; - distances += 2; - } - } - } - } - } - while (--cutValue); - - return distances; -} - - -MY_FORCE_INLINE -UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, UInt32 maxLen) -{ - CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); - unsigned len0 = 0, len1 = 0; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } - { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - unsigned len = (len0 < len1 ? len0 : len1); - UInt32 pair0 = pair[0]; - if (pb[len] == cur[len]) - { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - maxLen = (UInt32)len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; - if (len == lenLimit) - { - *ptr1 = pair0; - *ptr0 = pair[1]; - return distances; - } - } - } - if (pb[len] < cur[len]) - { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } - else - { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) -{ - CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); - unsigned len0 = 0, len1 = 0; - for (;;) - { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } - { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - unsigned len = (len0 < len1 ? len0 : len1); - if (pb[len] == cur[len]) - { - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - { - if (len == lenLimit) - { - *ptr1 = pair[0]; - *ptr0 = pair[1]; - return; - } - } - } - if (pb[len] < cur[len]) - { - *ptr1 = curMatch; - ptr1 = pair + 1; - curMatch = *ptr1; - len1 = len; - } - else - { - *ptr0 = curMatch; - ptr0 = pair; - curMatch = *ptr0; - len0 = len; - } - } - } -} - -#define MOVE_POS \ - ++p->cyclicBufferPos; \ - p->buffer++; \ - if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); - -#define MOVE_POS_RET MOVE_POS return (UInt32)offset; - -static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } - -#define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ - lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ - cur = p->buffer; - -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) - -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue - -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; - -#define SKIP_FOOTER \ - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; - -#define UPDATE_maxLen { \ - ptrdiff_t diff = (ptrdiff_t)0 - d2; \ - const Byte *c = cur + maxLen; \ - const Byte *lim = cur + lenLimit; \ - for (; c != lim; c++) if (*(c + diff) != *c) break; \ - maxLen = (unsigned)(c - cur); } - -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - unsigned offset; - GET_MATCHES_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) -} - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - unsigned offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) -} - -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, d2, pos; - unsigned maxLen, offset; - UInt32 *hash; - GET_MATCHES_HEADER(3) - - HASH3_CALC; - - hash = p->hash; - pos = p->pos; - - d2 = pos - hash[h2]; - - curMatch = (hash + kFix3HashSize)[hv]; - - hash[h2] = pos; - (hash + kFix3HashSize)[hv] = pos; - - maxLen = 2; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - UPDATE_maxLen - distances[0] = (UInt32)maxLen; - distances[1] = d2 - 1; - offset = 2; - if (maxLen == lenLimit) - { - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - - GET_MATCHES_FOOTER(offset, maxLen) -} - -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, h3, d2, d3, pos; - unsigned maxLen, offset; - UInt32 *hash; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - hash = p->hash; - pos = p->pos; - - d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - - curMatch = (hash + kFix4HashSize)[hv]; - - hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) - { - UPDATE_maxLen - distances[(size_t)offset - 2] = (UInt32)maxLen; - if (maxLen == lenLimit) - { - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - - if (maxLen < 3) - maxLen = 3; - - GET_MATCHES_FOOTER(offset, maxLen) -} - -/* -static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; - UInt32 *hash; - GET_MATCHES_HEADER(5) - - HASH5_CALC; - - hash = p->hash; - pos = p->pos; - - d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; - - curMatch = (hash + kFix5HashSize)[hv]; - - hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; - (hash + kFix5HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; - d2 = d3; - } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { - UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; - if (maxLen == lenLimit) - { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; - } - } - - if (maxLen < 4) - maxLen = 4; - - GET_MATCHES_FOOTER(offset, maxLen) -} -*/ - -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, h3, d2, d3, pos; - unsigned maxLen, offset; - UInt32 *hash; - GET_MATCHES_HEADER(4) - - HASH4_CALC; - - hash = p->hash; - pos = p->pos; - - d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = (hash + kFix4HashSize)[hv]; - - hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) - { - UPDATE_maxLen - distances[(size_t)offset - 2] = (UInt32)maxLen; - if (maxLen == lenLimit) - { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - - if (maxLen < 3) - maxLen = 3; - - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET -} - -/* -static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos - UInt32 *hash; - GET_MATCHES_HEADER(5) - - HASH5_CALC; - - hash = p->hash; - pos = p->pos; - - d2 = pos - hash [h2]; - d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; - - curMatch = (hash + kFix5HashSize)[hv]; - - hash [h2] = pos; - (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; - (hash + kFix5HashSize)[hv] = pos; - - maxLen = 0; - offset = 0; - - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; - d2 = d3; - } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { - UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; - if (maxLen == lenLimit) - { - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS_RET; - } - } - - if (maxLen < 4) - maxLen = 4; - - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET -} -*/ - -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) -{ - unsigned offset; - GET_MATCHES_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances)); - MOVE_POS_RET -} - -static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(2) - HASH2_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2; - UInt32 *hash; - SKIP_HEADER(3) - HASH3_CALC; - hash = p->hash; - curMatch = (hash + kFix3HashSize)[hv]; - hash[h2] = - (hash + kFix3HashSize)[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(4) - HASH4_CALC; - hash = p->hash; - curMatch = (hash + kFix4HashSize)[hv]; - hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} - -/* -static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3, h4; - UInt32 *hash; - SKIP_HEADER(5) - HASH5_CALC; - hash = p->hash; - curMatch = (hash + kFix5HashSize)[hv]; - hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = - (hash + kFix5HashSize)[hv] = p->pos; - SKIP_FOOTER - } - while (--num != 0); -} -*/ - -static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(4) - HASH4_CALC; - hash = p->hash; - curMatch = (hash + kFix4HashSize)[hv]; - hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} - -/* -static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - UInt32 h2, h3, h4; - UInt32 *hash; - SKIP_HEADER(5) - HASH5_CALC; - hash = p->hash; - curMatch = hash + kFix5HashSize)[hv]; - hash [h2] = - (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = - (hash + kFix5HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} -*/ - -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) -{ - do - { - SKIP_HEADER(3) - HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); -} - -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) -{ - vTable->Init = (Mf_Init_Func)MatchFinder_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; - if (!p->btMode) - { - /* if (p->numHashBytes <= 4) */ - { - vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; - } - /* - else - { - vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; - } - */ - } - else if (p->numHashBytes == 2) - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; - } - else if (p->numHashBytes == 3) - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; - } - else /* if (p->numHashBytes == 4) */ - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; - } - /* - else - { - vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; - vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; - } - */ -} +/* LzFind.c -- Match finder for LZ algorithms +2021-11-29 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include +// #include + +#include "CpuArch.h" +#include "LzFind.h" +#include "LzHash.h" + +#define kBlockMoveAlign (1 << 7) // alignment for memmove() +#define kBlockSizeAlign (1 << 16) // alignment for block allocation +#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary + +#define kEmptyHashValue 0 + +#define kMaxValForNormalize ((UInt32)0) +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug + +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#define GET_AVAIL_BYTES(p) \ + Inline_MatchFinder_GetNumAvailableBytes(p) + + +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +#define kFix5HashSize kFix4HashSize + +/* + HASH2_CALC: + if (hv) match, then cur[0] and cur[1] also match +*/ +#define HASH2_CALC hv = GetUi16(cur); + +// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] + +/* + HASH3_CALC: + if (cur[0]) and (h2) match, then cur[1] also match + if (cur[0]) and (hv) match, then cur[1] and cur[2] also match +*/ +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ + /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ + hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } + +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + + +static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + if (!p->directInput) + { + ISzAlloc_Free(alloc, p->bufferBase); + p->bufferBase = NULL; + } +} + + +static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc) +{ + if (blockSize == 0) + return 0; + if (!p->bufferBase || p->blockSize != blockSize) + { + // size_t blockSizeT; + LzInWindow_Free(p, alloc); + p->blockSize = blockSize; + // blockSizeT = blockSize; + + // printf("\nblockSize = 0x%x\n", blockSize); + /* + #if defined _WIN64 + // we can allocate 4GiB, but still use UInt32 for (p->blockSize) + // we use UInt32 type for (p->blockSize), because + // we don't want to wrap over 4 GiB, + // when we use (p->streamPos - p->pos) that is UInt32. + if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign) + { + blockSizeT = ((size_t)1 << 32); + printf("\nchanged to blockSizeT = 4GiB\n"); + } + #endif + */ + + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufferBase); + // return 0; // for debug + } + return (p->bufferBase != NULL); +} + +static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } + +static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } + + +MY_NO_INLINE +static void MatchFinder_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + + /* We use (p->streamPos - p->pos) value. + (p->streamPos < p->pos) is allowed. */ + + if (p->directInput) + { + UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); + if (curSize > p->directInputRem) + curSize = (UInt32)p->directInputRem; + p->directInputRem -= curSize; + p->streamPos += curSize; + if (p->directInputRem == 0) + p->streamEndWasReached = 1; + return; + } + + for (;;) + { + Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufferBase + p->blockSize - dest); + if (size == 0) + { + /* we call ReadBlock() after NeedMove() and MoveBlock(). + NeedMove() and MoveBlock() povide more than (keepSizeAfter) + to the end of (blockSize). + So we don't execute this branch in normal code flow. + We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock(). + */ + // p->result = SZ_ERROR_FAIL; // we can show error here + return; + } + + // #define kRead 3 + // if (size > kRead) size = kRead; // for debug + + p->result = ISeqInStream_Read(p->stream, dest, &size); + if (p->result != SZ_OK) + return; + if (size == 0) + { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (UInt32)size; + if (GET_AVAIL_BYTES(p) > p->keepSizeAfter) + return; + /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function + (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */ + } + + // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter) +} + + + +MY_NO_INLINE +void MatchFinder_MoveBlock(CMatchFinder *p) +{ + const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; + p->buffer = p->bufferBase + keepBefore; + memmove(p->bufferBase, + p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + keepBefore + (size_t)GET_AVAIL_BYTES(p)); +} + +/* We call MoveBlock() before ReadBlock(). + So MoveBlock() can be wasteful operation, if the whole input data + can fit in current block even without calling MoveBlock(). + in important case where (dataSize <= historySize) + condition (p->blockSize > dataSize + p->keepSizeAfter) is met + So there is no MoveBlock() in that case case. +*/ + +int MatchFinder_NeedMove(CMatchFinder *p) +{ + if (p->directInput) + return 0; + if (p->streamEndWasReached || p->result != SZ_OK) + return 0; + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder *p) +{ + if (p->keepSizeAfter >= GET_AVAIL_BYTES(p)) + MatchFinder_ReadBlock(p); +} + + + +static void MatchFinder_SetDefaultSettings(CMatchFinder *p) +{ + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder *p) +{ + unsigned i; + p->bufferBase = NULL; + p->directInput = 0; + p->hash = NULL; + p->expectedDataSize = (UInt64)(Int64)-1; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) + { + UInt32 r = (UInt32)i; + unsigned j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1))); + p->crc[i] = r; + } +} + +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->hash); + p->hash = NULL; +} + +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc) +{ + MatchFinder_FreeThisClassMemory(p, alloc); + LzInWindow_Free(p, alloc); +} + +static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) +{ + size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return NULL; + return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); +} + +#if (kBlockSizeReserveMin < kBlockSizeAlign * 2) + #error Stop_Compiling_Bad_Reserve +#endif + + + +static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) +{ + UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter); + /* + if (historySize > kMaxHistorySize) + return 0; + */ + // printf("\nhistorySize == 0x%x\n", historySize); + + if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow + return 0; + + { + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign; + const UInt32 rem = kBlockSizeMax - blockSize; + const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)) + + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here + if (blockSize >= kBlockSizeMax + || rem < kBlockSizeReserveMin) // we reject settings that will be slow + return 0; + if (reserve >= rem) + blockSize = kBlockSizeMax; + else + { + blockSize += reserve; + blockSize &= ~(UInt32)(kBlockSizeAlign - 1); + } + } + // printf("\n LzFind_blockSize = %x\n", blockSize); + // printf("\n LzFind_blockSize = %d\n", blockSize >> 20); + return blockSize; +} + + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc) +{ + /* we need one additional byte in (p->keepSizeBefore), + since we use MoveBlock() after (p->pos++) and before dictionary using */ + // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + + keepAddBufferAfter += matchMaxLen; + /* we need (p->keepSizeAfter >= p->numHashBytes) */ + if (keepAddBufferAfter < p->numHashBytes) + keepAddBufferAfter = p->numHashBytes; + // keepAddBufferAfter -= 2; // for debug + p->keepSizeAfter = keepAddBufferAfter; + + if (p->directInput) + p->blockSize = 0; + if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) + { + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it + UInt32 hs; + p->matchMaxLen = matchMaxLen; + { + // UInt32 hs4; + p->fixedHashSize = 0; + hs = (1 << 16) - 1; + if (p->numHashBytes != 2) + { + hs = historySize; + if (hs > p->expectedDataSize) + hs = (UInt32)p->expectedDataSize; + if (hs != 0) + hs--; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later + hs >>= 1; + if (hs >= (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ + } + + // hs = ((UInt32)1 << 25) - 1; // for test + + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; + } + p->hashMask = hs; + hs++; + + /* + hs4 = (1 << 20); + if (hs4 > hs) + hs4 = hs; + // hs4 = (1 << 16); // for test + p->hash4Mask = hs4 - 1; + */ + + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; + if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; + hs += p->fixedHashSize; + } + + { + size_t newSize; + size_t numSons; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) + + numSons = newCyclicBufferSize; + if (p->btMode) + numSons <<= 1; + newSize = hs + numSons; + + // aligned size is not required here, but it can be better for some loops + #define NUM_REFS_ALIGN_MASK 0xF + newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; + + if (p->hash && p->numRefs == newSize) + return 1; + + MatchFinder_FreeThisClassMemory(p, alloc); + p->numRefs = newSize; + p->hash = AllocRefs(newSize, alloc); + + if (p->hash) + { + p->son = p->hash + p->hashSizeSum; + return 1; + } + } + } + + MatchFinder_Free(p, alloc); + return 0; +} + + +static void MatchFinder_SetLimits(CMatchFinder *p) +{ + UInt32 k; + UInt32 n = kMaxValForNormalize - p->pos; + if (n == 0) + n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0) + + k = p->cyclicBufferSize - p->cyclicBufferPos; + if (k < n) + n = k; + + k = GET_AVAIL_BYTES(p); + { + const UInt32 ksa = p->keepSizeAfter; + UInt32 mm = p->matchMaxLen; + if (k > ksa) + k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock + else if (k >= mm) + { + // the limitation for (p->lenLimit) update + k -= mm; // optimization : to reduce the number of checks + k++; + // k = 1; // non-optimized version : for debug + } + else + { + mm = k; + if (k != 0) + k = 1; + } + p->lenLimit = mm; + } + if (k < n) + n = k; + + p->posLimit = p->pos + n; +} + + +void MatchFinder_Init_LowHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash; + const size_t numItems = p->fixedHashSize; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_HighHash(CMatchFinder *p) +{ + size_t i; + CLzRef *items = p->hash + p->fixedHashSize; + const size_t numItems = (size_t)p->hashMask + 1; + for (i = 0; i < numItems; i++) + items[i] = kEmptyHashValue; +} + + +void MatchFinder_Init_4(CMatchFinder *p) +{ + p->buffer = p->bufferBase; + { + /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. + the code in CMatchFinderMt expects (pos = 1) */ + p->pos = + p->streamPos = + 1; // it's smallest optimal value. do not change it + // 0; // for debug + } + p->result = SZ_OK; + p->streamEndWasReached = 0; +} + + +// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + +void MatchFinder_Init(CMatchFinder *p) +{ + MatchFinder_Init_HighHash(p); + MatchFinder_Init_LowHash(p); + MatchFinder_Init_4(p); + // if (readData) + MatchFinder_ReadBlock(p); + + /* if we init (cyclicBufferPos = pos), then we can use one variable + instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */ + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos) + // p->cyclicBufferPos = 0; // smallest value + // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses. + MatchFinder_SetLimits(p); +} + + + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + #define USE_SATUR_SUB_128 + #define USE_AVX2 + #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1600) + #define USE_SATUR_SUB_128 + #if (_MSC_VER >= 1900) + #define USE_AVX2 + #include // avx + #endif + #endif + #endif + +// #elif defined(MY_CPU_ARM_OR_ARM64) +#elif defined(MY_CPU_ARM64) + + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_SATUR_SUB_128 + #ifdef MY_CPU_ARM64 + // #define ATTRIB_SSE41 __attribute__((__target__(""))) + #else + // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif + + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_SATUR_SUB_128 + #endif + #endif + + #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #include + #else + #include + #endif + +#endif + +/* +#ifndef ATTRIB_SSE41 + #define ATTRIB_SSE41 +#endif +#ifndef ATTRIB_AVX2 + #define ATTRIB_AVX2 +#endif +*/ + +#ifdef USE_SATUR_SUB_128 + +// #define _SHOW_HW_STATUS + +#ifdef _SHOW_HW_STATUS +#include +#define _PRF(x) x +_PRF(;) +#else +#define _PRF(x) +#endif + +#ifdef MY_CPU_ARM_OR_ARM64 + +#ifdef MY_CPU_ARM64 +// #define FORCE_SATUR_SUB_128 +#endif + +typedef uint32x4_t v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); + +#else + +#include // sse4.1 + +typedef __m128i v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 + +#endif + + + +MY_NO_INLINE +static +#ifdef ATTRIB_SSE41 +ATTRIB_SSE41 +#endif +void +MY_FAST_CALL +LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + v128 sub2 = + #ifdef MY_CPU_ARM_OR_ARM64 + vdupq_n_u32(subValue); + #else + _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + #endif + do + { + SASUB_128(0) + SASUB_128(1) + SASUB_128(2) + SASUB_128(3) + items += 4 * 4; + } + while (items != lim); +} + + + +#ifdef USE_AVX2 + +#include // avx + +#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 + +MY_NO_INLINE +static +#ifdef ATTRIB_AVX2 +ATTRIB_AVX2 +#endif +void +MY_FAST_CALL +LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + __m256i sub2 = _mm256_set_epi32( + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + do + { + SASUB_256(0) + SASUB_256(1) + items += 2 * 8; + } + while (items != lim); +} +#endif // USE_AVX2 + +#ifndef FORCE_SATUR_SUB_128 +typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( + UInt32 subValue, CLzRef *items, const CLzRef *lim); +static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; +#endif // FORCE_SATUR_SUB_128 + +#endif // USE_SATUR_SUB_128 + + +// kEmptyHashValue must be zero +// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; +#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; + +#ifdef FORCE_SATUR_SUB_128 + +#define DEFAULT_SaturSub LzFind_SaturSub_128 + +#else + +#define DEFAULT_SaturSub LzFind_SaturSub_32 + +MY_NO_INLINE +static +void +MY_FAST_CALL +LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + do + { + UInt32 v; + SASUB_32(0) + SASUB_32(1) + SASUB_32(2) + SASUB_32(3) + SASUB_32(4) + SASUB_32(5) + SASUB_32(6) + SASUB_32(7) + items += 8; + } + while (items != lim); +} + +#endif + + +MY_NO_INLINE +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) +{ + #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) + + CLzRef *lim; + + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; + } + + { + #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) + lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); + numItems &= K_NORM_ALIGN_MASK; + if (items != lim) + { + #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + if (g_LzFind_SaturSub) + g_LzFind_SaturSub(subValue, items, lim); + else + #endif + DEFAULT_SaturSub(subValue, items, lim); + } + items = lim; + } + + + for (; numItems != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; + } +} + + + +// call MatchFinder_CheckLimits() only after (p->pos++) update + +MY_NO_INLINE +static void MatchFinder_CheckLimits(CMatchFinder *p) +{ + if (// !p->streamEndWasReached && p->result == SZ_OK && + p->keepSizeAfter == GET_AVAIL_BYTES(p)) + { + // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p)) + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); + } + + if (p->pos == kMaxValForNormalize) + if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data. + /* + if we disable normalization for last bytes of data, and + if (data_size == 4 GiB), we don't call wastfull normalization, + but (pos) will be wrapped over Zero (0) in that case. + And we cannot resume later to normal operation + */ + { + // MatchFinder_Normalize(p); + /* after normalization we need (p->pos >= p->historySize + 1); */ + /* we can reduce subValue to aligned value, if want to keep alignment + of (p->pos) and (p->buffer) for speculated accesses. */ + const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; + // const UInt32 subValue = (1 << 15); // for debug + // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + Inline_MatchFinder_ReduceOffsets(p, subValue); + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + } + + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + + MatchFinder_SetLimits(p); +} + + +/* + (lenLimit > maxLen) +*/ +MY_FORCE_INLINE +static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, unsigned maxLen) +{ + /* + son[_cyclicBufferPos] = curMatch; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return d; + { + const Byte *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + UInt32 len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *d++ = len; + *d++ = delta - 1; + if (len == lenLimit) + return d; + } + } + } + } + */ + + const Byte *lim = cur + lenLimit; + son[_cyclicBufferPos] = curMatch; + + do + { + UInt32 delta; + + if (curMatch == 0) + break; + // if (curMatch2 >= curMatch) return NULL; + delta = pos - curMatch; + if (delta >= _cyclicBufferSize) + break; + { + ptrdiff_t diff; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) + { + const Byte *c = cur; + while (*c == c[diff]) + { + if (++c == lim) + { + d[0] = (UInt32)(lim - cur); + d[1] = delta - 1; + return d + 2; + } + } + { + const unsigned len = (unsigned)(c - cur); + if (maxLen < len) + { + maxLen = len; + d[0] = (UInt32)len; + d[1] = delta - 1; + d += 2; + } + } + } + } + } + while (--cutValue); + + return d; +} + + +MY_FORCE_INLINE +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, UInt32 maxLen) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + + UInt32 cmCheck; + + // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (cmCheck < curMatch) + do + { + const UInt32 delta = pos - curMatch; + { + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + unsigned len = (len0 < len1 ? len0 : len1); + const UInt32 pair0 = pair[0]; + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = (UInt32)len; + *d++ = (UInt32)len; + *d++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair0; + *ptr0 = pair[1]; + return d; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + // const UInt32 curMatch2 = pair[1]; + // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + // curMatch = curMatch2; + curMatch = pair[1]; + ptr1 = pair + 1; + len1 = len; + } + else + { + *ptr0 = curMatch; + curMatch = pair[0]; + ptr0 = pair; + len0 = len; + } + } + } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return d; +} + + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + unsigned len0 = 0, len1 = 0; + + UInt32 cmCheck; + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (// curMatch >= pos || // failure + cmCheck < curMatch) + do + { + const UInt32 delta = pos - curMatch; + { + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + unsigned len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + curMatch = pair[1]; + ptr1 = pair + 1; + len1 = len; + } + else + { + *ptr0 = curMatch; + curMatch = pair[0]; + ptr0 = pair; + len0 = len; + } + } + } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return; +} + + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } + +#define MOVE_POS_RET MOVE_POS return distances; + +MY_NO_INLINE +static void MatchFinder_MovePos(CMatchFinder *p) +{ + /* we go here at the end of stream data, when (avail < num_hash_bytes) + We don't update sons[cyclicBufferPos << btMode]. + So (sons) record will contain junk. And we cannot resume match searching + to normal operation, even if we will provide more input data in buffer. + p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue + if (p->btMode) + p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue + */ + MOVE_POS; +} + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ + lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) +#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); + +#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ + distances = func(MF_PARAMS(p), \ + distances, (UInt32)_maxLen_); MOVE_POS_RET; + +#define GET_MATCHES_FOOTER_BT(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) + +#define GET_MATCHES_FOOTER_HC(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) + + + +#define UPDATE_maxLen { \ + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ + const Byte *c = cur + maxLen; \ + const Byte *lim = cur + lenLimit; \ + for (; c != lim; c++) if (*(c + diff) != *c) break; \ + maxLen = (unsigned)(c - cur); } + +static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_BT(1) +} + +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_BT(2) +} + + +#define SET_mmm \ + mmm = p->cyclicBufferSize; \ + if (pos < mmm) \ + mmm = pos; + + +static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, d2, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash[h2]; + + curMatch = (hash + kFix3HashSize)[hv]; + + hash[h2] = pos; + (hash + kFix3HashSize)[hv] = pos; + + SET_mmm + + maxLen = 2; + + if (d2 < mmm && *(cur - d2) == *cur) + { + UPDATE_maxLen + distances[0] = (UInt32)maxLen; + distances[1] = d2 - 1; + distances += 2; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET; + } + } + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + SET_mmm + + maxLen = 3; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET + } + break; + } + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + // d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + // (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + SET_mmm + + maxLen = 4; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET; + } + break; + } + + GET_MATCHES_FOOTER_BT(maxLen) +} + + +static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, h3, d2, d3, pos; + unsigned maxLen; + UInt32 *hash; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + curMatch = (hash + kFix4HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + (hash + kFix4HashSize)[hv] = pos; + + SET_mmm + + maxLen = 3; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + + UPDATE_maxLen + distances[-2] = (UInt32)maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + break; + } + + GET_MATCHES_FOOTER_HC(maxLen); +} + + +static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; + UInt32 *hash; + GET_MATCHES_HEADER(5) + + HASH5_CALC; + + hash = p->hash; + pos = p->pos; + + d2 = pos - hash [h2]; + d3 = pos - (hash + kFix3HashSize)[h3]; + // d4 = pos - (hash + kFix4HashSize)[h4]; + + curMatch = (hash + kFix5HashSize)[hv]; + + hash [h2] = pos; + (hash + kFix3HashSize)[h3] = pos; + // (hash + kFix4HashSize)[h4] = pos; + (hash + kFix5HashSize)[hv] = pos; + + SET_mmm + + maxLen = 4; + + for (;;) + { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; + UPDATE_maxLen + distances[-2] = maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + break; + } + + GET_MATCHES_FOOTER_HC(maxLen); +} + + +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + GET_MATCHES_FOOTER_HC(2) +} + + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + SKIP_HEADER(2) + { + HASH2_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + } + SKIP_FOOTER +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + SKIP_HEADER(3) + { + HASH_ZIP_CALC; + curMatch = p->hash[hv]; + p->hash[hv] = p->pos; + } + SKIP_FOOTER +} + +static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + SKIP_HEADER(3) + { + UInt32 h2; + UInt32 *hash; + HASH3_CALC; + hash = p->hash; + curMatch = (hash + kFix3HashSize)[hv]; + hash[h2] = + (hash + kFix3HashSize)[hv] = p->pos; + } + SKIP_FOOTER +} + +static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + SKIP_HEADER(4) + { + UInt32 h2, h3; + UInt32 *hash; + HASH4_CALC; + hash = p->hash; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = p->pos; + } + SKIP_FOOTER +} + +static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + SKIP_HEADER(5) + { + UInt32 h2, h3; + UInt32 *hash; + HASH5_CALC; + hash = p->hash; + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = p->pos; + } + SKIP_FOOTER +} + + +#define HC_SKIP_HEADER(minLen) \ + do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ + Byte *cur; \ + UInt32 *hash; \ + UInt32 *son; \ + UInt32 pos = p->pos; \ + UInt32 num2 = num; \ + /* (p->pos == p->posLimit) is not allowed here !!! */ \ + { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + num -= num2; \ + { const UInt32 cycPos = p->cyclicBufferPos; \ + son = p->son + cycPos; \ + p->cyclicBufferPos = cycPos + num2; } \ + cur = p->buffer; \ + hash = p->hash; \ + do { \ + UInt32 curMatch; \ + UInt32 hv; + + +#define HC_SKIP_FOOTER \ + cur++; pos++; *son++ = curMatch; \ + } while (--num2); \ + p->buffer = cur; \ + p->pos = pos; \ + if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ + }} while(num); \ + + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + HC_SKIP_HEADER(4) + + UInt32 h2, h3; + HASH4_CALC; + curMatch = (hash + kFix4HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + (hash + kFix4HashSize)[hv] = pos; + + HC_SKIP_FOOTER +} + + +static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + HC_SKIP_HEADER(5) + + UInt32 h2, h3; + HASH5_CALC + curMatch = (hash + kFix5HashSize)[hv]; + hash [h2] = + (hash + kFix3HashSize)[h3] = + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = pos; + + HC_SKIP_FOOTER +} + + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + HC_SKIP_HEADER(3) + + HASH_ZIP_CALC; + curMatch = hash[hv]; + hash[hv] = pos; + + HC_SKIP_FOOTER +} + + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) + { + if (p->numHashBytes <= 4) + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; + } + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } + else if (p->numHashBytes == 4) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; + } +} + + + +void LzFindPrepare() +{ + #ifndef FORCE_SATUR_SUB_128 + #ifdef USE_SATUR_SUB_128 + LZFIND_SATUR_SUB_CODE_FUNC f = NULL; + #ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + // #pragma message ("=== LzFind NEON") + _PRF(printf("\n=== LzFind NEON\n")); + f = LzFind_SaturSub_128; + } + // f = 0; // for debug + } + #else // MY_CPU_ARM_OR_ARM64 + if (CPU_IsSupported_SSE41()) + { + // #pragma message ("=== LzFind SSE41") + _PRF(printf("\n=== LzFind SSE41\n")); + f = LzFind_SaturSub_128; + + #ifdef USE_AVX2 + if (CPU_IsSupported_AVX2()) + { + // #pragma message ("=== LzFind AVX2") + _PRF(printf("\n=== LzFind AVX2\n")); + f = LzFind_SaturSub_256; + } + #endif + } + #endif // MY_CPU_ARM_OR_ARM64 + g_LzFind_SaturSub = f; + #endif // USE_SATUR_SUB_128 + #endif // FORCE_SATUR_SUB_128 +} diff --git a/lib/LZMA/LzFind.h b/lib/LZMA/LzFind.h index 42c13be..8f9fade 100644 --- a/lib/LZMA/LzFind.h +++ b/lib/LZMA/LzFind.h @@ -1,121 +1,136 @@ -/* LzFind.h -- Match finder for LZ algorithms -2017-06-10 : Igor Pavlov : Public domain */ - -#ifndef __LZ_FIND_H -#define __LZ_FIND_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -typedef UInt32 CLzRef; - -typedef struct _CMatchFinder -{ - Byte *buffer; - UInt32 pos; - UInt32 posLimit; - UInt32 streamPos; - UInt32 lenLimit; - - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ - - Byte streamEndWasReached; - Byte btMode; - Byte bigHash; - Byte directInput; - - UInt32 matchMaxLen; - CLzRef *hash; - CLzRef *son; - UInt32 hashMask; - UInt32 cutValue; - - Byte *bufferBase; - ISeqInStream *stream; - - UInt32 blockSize; - UInt32 keepSizeBefore; - UInt32 keepSizeAfter; - - UInt32 numHashBytes; - size_t directInputRem; - UInt32 historySize; - UInt32 fixedHashSize; - UInt32 hashSizeSum; - SRes result; - UInt32 crc[256]; - size_t numRefs; - - UInt64 expectedDataSize; -} CMatchFinder; - -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) - -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) - -#define Inline_MatchFinder_IsFinishedOK(p) \ - ((p)->streamEndWasReached \ - && (p)->streamPos == (p)->pos \ - && (!(p)->directInput || (p)->directInputRem == 0)) - -int MatchFinder_NeedMove(CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); -void MatchFinder_MoveBlock(CMatchFinder *p); -void MatchFinder_ReadIfRequired(CMatchFinder *p); - -void MatchFinder_Construct(CMatchFinder *p); - -/* Conditions: - historySize <= 3 GB - keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB -*/ -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAllocPtr alloc); -void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); - -UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 maxLen); - -/* -Conditions: - Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. - Mf_GetPointerToCurrentPos_Func's result must be used only before any other function -*/ - -typedef void (*Mf_Init_Func)(void *object); -typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); -typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); -typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); -typedef void (*Mf_Skip_Func)(void *object, UInt32); - -typedef struct _IMatchFinder -{ - Mf_Init_Func Init; - Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; - Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; - Mf_GetMatches_Func GetMatches; - Mf_Skip_Func Skip; -} IMatchFinder; - -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); - -void MatchFinder_Init_LowHash(CMatchFinder *p); -void MatchFinder_Init_HighHash(CMatchFinder *p); -void MatchFinder_Init_3(CMatchFinder *p, int readData); -void MatchFinder_Init(CMatchFinder *p); - -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); - -void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); -void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); - -EXTERN_C_END - -#endif +/* LzFind.h -- Match finder for LZ algorithms +2021-07-13 : Igor Pavlov : Public domain */ + +#ifndef __LZ_FIND_H +#define __LZ_FIND_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +typedef UInt32 CLzRef; + +typedef struct _CMatchFinder +{ + Byte *buffer; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + + Byte streamEndWasReached; + Byte btMode; + Byte bigHash; + Byte directInput; + + UInt32 matchMaxLen; + CLzRef *hash; + CLzRef *son; + UInt32 hashMask; + UInt32 cutValue; + + Byte *bufferBase; + ISeqInStream *stream; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; + + UInt32 numHashBytes; + size_t directInputRem; + UInt32 historySize; + UInt32 fixedHashSize; + UInt32 hashSizeSum; + SRes result; + UInt32 crc[256]; + size_t numRefs; + + UInt64 expectedDataSize; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos)) + +/* +#define Inline_MatchFinder_IsFinishedOK(p) \ + ((p)->streamEndWasReached \ + && (p)->streamPos == (p)->pos \ + && (!(p)->directInput || (p)->directInputRem == 0)) +*/ + +int MatchFinder_NeedMove(CMatchFinder *p); +/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */ +void MatchFinder_MoveBlock(CMatchFinder *p); +void MatchFinder_ReadIfRequired(CMatchFinder *p); + +void MatchFinder_Construct(CMatchFinder *p); + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); + +/* +#define Inline_MatchFinder_InitPos(p, val) \ + (p)->pos = (val); \ + (p)->streamPos = (val); +*/ + +#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ + (p)->pos -= (subValue); \ + (p)->streamPos -= (subValue); + + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *distances, UInt32 maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void *object); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); +typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef void (*Mf_Skip_Func)(void *object, UInt32); + +typedef struct _IMatchFinder +{ + Mf_Init_Func Init; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder2; + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); + +void MatchFinder_Init_LowHash(CMatchFinder *p); +void MatchFinder_Init_HighHash(CMatchFinder *p); +void MatchFinder_Init_4(CMatchFinder *p); +void MatchFinder_Init(CMatchFinder *p); + +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); + +void LzFindPrepare(void); + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/LzFindMt.c b/lib/LZMA/LzFindMt.c index bb0f42c..3865263 100644 --- a/lib/LZMA/LzFindMt.c +++ b/lib/LZMA/LzFindMt.c @@ -1,853 +1,1400 @@ -/* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2018-12-29 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include "LzHash.h" - -#include "LzFindMt.h" - -static void MtSync_Construct(CMtSync *p) -{ - p->wasCreated = False; - p->csWasInitialized = False; - p->csWasEntered = False; - Thread_Construct(&p->thread); - Event_Construct(&p->canStart); - Event_Construct(&p->wasStarted); - Event_Construct(&p->wasStopped); - Semaphore_Construct(&p->freeSemaphore); - Semaphore_Construct(&p->filledSemaphore); -} - -static void MtSync_GetNextBlock(CMtSync *p) -{ - if (p->needStart) - { - p->numProcessedBlocks = 1; - p->needStart = False; - p->stopWriting = False; - p->exit = False; - Event_Reset(&p->wasStarted); - Event_Reset(&p->wasStopped); - - Event_Set(&p->canStart); - Event_Wait(&p->wasStarted); - - // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder); - } - else - { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - p->numProcessedBlocks++; - Semaphore_Release1(&p->freeSemaphore); - } - Semaphore_Wait(&p->filledSemaphore); - CriticalSection_Enter(&p->cs); - p->csWasEntered = True; -} - -/* MtSync_StopWriting must be called if Writing was started */ - -static void MtSync_StopWriting(CMtSync *p) -{ - UInt32 myNumBlocks = p->numProcessedBlocks; - if (!Thread_WasCreated(&p->thread) || p->needStart) - return; - p->stopWriting = True; - if (p->csWasEntered) - { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - } - Semaphore_Release1(&p->freeSemaphore); - - Event_Wait(&p->wasStopped); - - while (myNumBlocks++ != p->numProcessedBlocks) - { - Semaphore_Wait(&p->filledSemaphore); - Semaphore_Release1(&p->freeSemaphore); - } - p->needStart = True; -} - -static void MtSync_Destruct(CMtSync *p) -{ - if (Thread_WasCreated(&p->thread)) - { - MtSync_StopWriting(p); - p->exit = True; - if (p->needStart) - Event_Set(&p->canStart); - Thread_Wait(&p->thread); - Thread_Close(&p->thread); - } - if (p->csWasInitialized) - { - CriticalSection_Delete(&p->cs); - p->csWasInitialized = False; - } - - Event_Close(&p->canStart); - Event_Close(&p->wasStarted); - Event_Close(&p->wasStopped); - Semaphore_Close(&p->freeSemaphore); - Semaphore_Close(&p->filledSemaphore); - - p->wasCreated = False; -} - -#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } - -static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) -{ - if (p->wasCreated) - return SZ_OK; - - RINOK_THREAD(CriticalSection_Init(&p->cs)); - p->csWasInitialized = True; - - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); - - RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); - RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); - - p->needStart = True; - - RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); - p->wasCreated = True; - return SZ_OK; -} - -static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) -{ - SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); - if (res != SZ_OK) - MtSync_Destruct(p); - return res; -} - -void MtSync_Init(CMtSync *p) { p->needStart = True; } - -#define kMtMaxValForNormalize 0xFFFFFFFF - -#define DEF_GetHeads2(name, v, action) \ - static void GetHeads ## name(const Byte *p, UInt32 pos, \ - UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ - { action; for (; numHeads != 0; numHeads--) { \ - const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } - -#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) - -DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) -DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) -DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) -DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) -/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */ - -static void HashThreadFunc(CMatchFinderMt *mt) -{ - CMtSync *p = &mt->hashSync; - for (;;) - { - UInt32 numProcessedBlocks = 0; - Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); - - MatchFinder_Init_HighHash(mt->MatchFinder); - - for (;;) - { - if (p->exit) - return; - if (p->stopWriting) - { - p->numProcessedBlocks = numProcessedBlocks; - Event_Set(&p->wasStopped); - break; - } - - { - CMatchFinder *mf = mt->MatchFinder; - if (MatchFinder_NeedMove(mf)) - { - CriticalSection_Enter(&mt->btSync.cs); - CriticalSection_Enter(&mt->hashSync.cs); - { - const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf); - ptrdiff_t offset; - MatchFinder_MoveBlock(mf); - offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf); - mt->pointerToCurPos -= offset; - mt->buffer -= offset; - } - CriticalSection_Leave(&mt->btSync.cs); - CriticalSection_Leave(&mt->hashSync.cs); - continue; - } - - Semaphore_Wait(&p->freeSemaphore); - - MatchFinder_ReadIfRequired(mf); - if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) - { - UInt32 subValue = (mf->pos - mf->historySize - 1); - MatchFinder_ReduceOffsets(mf, subValue); - MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); - } - { - UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; - UInt32 num = mf->streamPos - mf->pos; - heads[0] = 2; - heads[1] = num; - if (num >= mf->numHashBytes) - { - num = num - mf->numHashBytes + 1; - if (num > kMtHashBlockSize - 2) - num = kMtHashBlockSize - 2; - mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); - heads[0] = 2 + num; - } - mf->pos += num; - mf->buffer += num; - } - } - - Semaphore_Release1(&p->filledSemaphore); - } - } -} - -static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) -{ - MtSync_GetNextBlock(&p->hashSync); - p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; - p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; - p->hashNumAvail = p->hashBuf[p->hashBufPos++]; -} - -#define kEmptyHashValue 0 - -#define MFMT_GM_INLINE - -#ifdef MFMT_GM_INLINE - -/* - we use size_t for _cyclicBufferPos instead of UInt32 - to eliminate "movsx" BUG in old MSVC x64 compiler. -*/ - -MY_NO_INLINE -static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, - size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) -{ - do - { - UInt32 *_distances = ++distances; - UInt32 delta = *hash++; - - CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); - unsigned len0 = 0, len1 = 0; - UInt32 cutValue = _cutValue; - unsigned maxLen = (unsigned)_maxLen; - - /* - if (size > 1) - { - UInt32 delta = *hash; - if (delta < _cyclicBufferSize) - { - UInt32 cyc1 = _cyclicBufferPos + 1; - CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1); - Byte b = *(cur + 1 - delta); - _distances[0] = pair[0]; - _distances[1] = b; - } - } - */ - if (cutValue == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - } - else - for(;;) - { - { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - unsigned len = (len0 < len1 ? len0 : len1); - UInt32 pair0 = *pair; - if (pb[len] == cur[len]) - { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - maxLen = len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; - if (len == lenLimit) - { - UInt32 pair1 = pair[1]; - *ptr1 = pair0; - *ptr0 = pair1; - break; - } - } - } - { - UInt32 curMatch = pos - delta; - // delta = pos - *pair; - // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31]; - if (pb[len] < cur[len]) - { - delta = pos - pair[1]; - *ptr1 = curMatch; - ptr1 = pair + 1; - len1 = len; - } - else - { - delta = pos - *pair; - *ptr0 = curMatch; - ptr0 = pair; - len0 = len; - } - } - } - if (--cutValue == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - break; - } - } - pos++; - _cyclicBufferPos++; - cur++; - { - UInt32 num = (UInt32)(distances - _distances); - _distances[-1] = num; - } - } - while (distances < limit && --size != 0); - *posRes = pos; - return distances; -} - -#endif - - - -static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) -{ - UInt32 numProcessed = 0; - UInt32 curPos = 2; - UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2 - - distances[1] = p->hashNumAvail; - - while (curPos < limit) - { - if (p->hashBufPos == p->hashBufPosLimit) - { - MatchFinderMt_GetNextBlock_Hash(p); - distances[1] = numProcessed + p->hashNumAvail; - if (p->hashNumAvail >= p->numHashBytes) - continue; - distances[0] = curPos + p->hashNumAvail; - distances += curPos; - for (; p->hashNumAvail != 0; p->hashNumAvail--) - *distances++ = 0; - return; - } - { - UInt32 size = p->hashBufPosLimit - p->hashBufPos; - UInt32 lenLimit = p->matchMaxLen; - UInt32 pos = p->pos; - UInt32 cyclicBufferPos = p->cyclicBufferPos; - if (lenLimit >= p->hashNumAvail) - lenLimit = p->hashNumAvail; - { - UInt32 size2 = p->hashNumAvail - lenLimit + 1; - if (size2 < size) - size = size2; - size2 = p->cyclicBufferSize - cyclicBufferPos; - if (size2 < size) - size = size2; - } - - #ifndef MFMT_GM_INLINE - while (curPos < limit && size-- != 0) - { - UInt32 *startDistances = distances + curPos; - UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], - pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - startDistances + 1, p->numHashBytes - 1) - startDistances); - *startDistances = num - 1; - curPos += num; - cyclicBufferPos++; - pos++; - p->buffer++; - } - #else - { - UInt32 posRes; - curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, - distances + limit, - size, &posRes) - distances); - p->hashBufPos += posRes - pos; - cyclicBufferPos += posRes - pos; - p->buffer += posRes - pos; - pos = posRes; - } - #endif - - numProcessed += pos - p->pos; - p->hashNumAvail -= pos - p->pos; - p->pos = pos; - if (cyclicBufferPos == p->cyclicBufferSize) - cyclicBufferPos = 0; - p->cyclicBufferPos = cyclicBufferPos; - } - } - - distances[0] = curPos; -} - -static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) -{ - CMtSync *sync = &p->hashSync; - if (!sync->needStart) - { - CriticalSection_Enter(&sync->cs); - sync->csWasEntered = True; - } - - BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); - - if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) - { - UInt32 subValue = p->pos - p->cyclicBufferSize; - MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); - p->pos -= subValue; - } - - if (!sync->needStart) - { - CriticalSection_Leave(&sync->cs); - sync->csWasEntered = False; - } -} - -void BtThreadFunc(CMatchFinderMt *mt) -{ - CMtSync *p = &mt->btSync; - for (;;) - { - UInt32 blockIndex = 0; - Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); - for (;;) - { - if (p->exit) - return; - if (p->stopWriting) - { - p->numProcessedBlocks = blockIndex; - MtSync_StopWriting(&mt->hashSync); - Event_Set(&p->wasStopped); - break; - } - Semaphore_Wait(&p->freeSemaphore); - BtFillBlock(mt, blockIndex++); - Semaphore_Release1(&p->filledSemaphore); - } - } -} - -void MatchFinderMt_Construct(CMatchFinderMt *p) -{ - p->hashBuf = NULL; - MtSync_Construct(&p->hashSync); - MtSync_Construct(&p->btSync); -} - -static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->hashBuf); - p->hashBuf = NULL; -} - -void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) -{ - MtSync_Destruct(&p->hashSync); - MtSync_Destruct(&p->btSync); - MatchFinderMt_FreeMem(p, alloc); -} - -#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) -#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) - -static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } -static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) -{ - Byte allocaDummy[0x180]; - unsigned i = 0; - for (i = 0; i < 16; i++) - allocaDummy[i] = (Byte)0; - if (allocaDummy[0] == 0) - BtThreadFunc((CMatchFinderMt *)p); - return 0; -} - -SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) -{ - CMatchFinder *mf = p->MatchFinder; - p->historySize = historySize; - if (kMtBtBlockSize <= matchMaxLen * 4) - return SZ_ERROR_PARAM; - if (!p->hashBuf) - { - p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); - if (!p->hashBuf) - return SZ_ERROR_MEM; - p->btBuf = p->hashBuf + kHashBufferSize; - } - keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); - keepAddBufferAfter += kMtHashBlockSize; - if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) - return SZ_ERROR_MEM; - - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); - return SZ_OK; -} - -/* Call it after ReleaseStream / SetStream */ -static void MatchFinderMt_Init(CMatchFinderMt *p) -{ - CMatchFinder *mf = p->MatchFinder; - - p->btBufPos = - p->btBufPosLimit = 0; - p->hashBufPos = - p->hashBufPosLimit = 0; - - /* Init without data reading. We don't want to read data in this thread */ - MatchFinder_Init_3(mf, False); - MatchFinder_Init_LowHash(mf); - - p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); - p->btNumAvailBytes = 0; - p->lzPos = p->historySize + 1; - - p->hash = mf->hash; - p->fixedHashSize = mf->fixedHashSize; - p->crc = mf->crc; - - p->son = mf->son; - p->matchMaxLen = mf->matchMaxLen; - p->numHashBytes = mf->numHashBytes; - p->pos = mf->pos; - p->buffer = mf->buffer; - p->cyclicBufferPos = mf->cyclicBufferPos; - p->cyclicBufferSize = mf->cyclicBufferSize; - p->cutValue = mf->cutValue; -} - -/* ReleaseStream is required to finish multithreading */ -void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) -{ - MtSync_StopWriting(&p->btSync); - /* p->MatchFinder->ReleaseStream(); */ -} - -static void MatchFinderMt_Normalize(CMatchFinderMt *p) -{ - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; -} - -static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) -{ - UInt32 blockIndex; - MtSync_GetNextBlock(&p->btSync); - blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; - p->btBufPosLimit += p->btBuf[p->btBufPos++]; - p->btNumAvailBytes = p->btBuf[p->btBufPos++]; - if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) - MatchFinderMt_Normalize(p); -} - -static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) -{ - return p->pointerToCurPos; -} - -#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); - -static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) -{ - GET_NEXT_BLOCK_IF_REQUIRED; - return p->btNumAvailBytes; -} - -static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 h2, curMatch2; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH2_CALC - - curMatch2 = hash[h2]; - hash[h2] = lzPos; - - if (curMatch2 >= matchMinPos) - if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - *distances++ = 2; - *distances++ = lzPos - curMatch2 - 1; - } - - return distances; -} - -static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 h2, h3, curMatch2, curMatch3; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH3_CALC - - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; - - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) - { - distances[0] = 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) - { - *distances++ = 3; - *distances++ = lzPos - curMatch3 - 1; - } - - return distances; -} - -/* -static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH4_CALC - - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; - curMatch4 = (hash + kFix4HashSize)[h4]; - - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; - (hash + kFix4HashSize)[h4] = lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) - { - distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch3 - 1; - if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) - { - distances[0] = 4; - return distances + 2; - } - distances[0] = 3; - distances += 2; - } - - if (curMatch4 >= matchMinPos) - if ( - cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && - cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] - ) - { - *distances++ = 4; - *distances++ = lzPos - curMatch4 - 1; - } - - return distances; -} -*/ - -#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; - -static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) -{ - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - p->btNumAvailBytes--; - { - UInt32 i; - for (i = 0; i < len; i += 2) - { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances[0] = v0; - distances[1] = v1; - distances += 2; - } - } - INCREASE_LZ_POS - return len; -} - -static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) -{ - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - - if (len == 0) - { - /* change for bt5 ! */ - if (p->btNumAvailBytes-- >= 4) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); - } - else - { - /* Condition: there are matches in btBuf with length < p->numHashBytes */ - UInt32 *distances2; - p->btNumAvailBytes--; - distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); - do - { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances2[0] = v0; - distances2[1] = v1; - distances2 += 2; - } - while ((len -= 2) != 0); - len = (UInt32)(distances2 - (distances)); - } - INCREASE_LZ_POS - return len; -} - -#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED -#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; -#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0); - -static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) -{ - SKIP_HEADER2_MT { p->btNumAvailBytes--; - SKIP_FOOTER_MT -} - -static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) -{ - SKIP_HEADER_MT(2) - UInt32 h2; - MT_HASH2_CALC - hash[h2] = p->lzPos; - SKIP_FOOTER_MT -} - -static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) -{ - SKIP_HEADER_MT(3) - UInt32 h2, h3; - MT_HASH3_CALC - (hash + kFix3HashSize)[h3] = - hash[ h2] = - p->lzPos; - SKIP_FOOTER_MT -} - -/* -static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) -{ - SKIP_HEADER_MT(4) - UInt32 h2, h3, h4; - MT_HASH4_CALC - (hash + kFix4HashSize)[h4] = - (hash + kFix3HashSize)[h3] = - hash[ h2] = - p->lzPos; - SKIP_FOOTER_MT -} -*/ - -void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) -{ - vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; - vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; - vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; - - switch (p->MatchFinder->numHashBytes) - { - case 2: - p->GetHeadsFunc = GetHeads2; - p->MixMatchesFunc = (Mf_Mix_Matches)NULL; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; - vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; - break; - case 3: - p->GetHeadsFunc = GetHeads3; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; - break; - default: - /* case 4: */ - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; - break; - /* - default: - p->GetHeadsFunc = GetHeads5; - p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; - break; - */ - } -} +/* LzFindMt.c -- multithreaded Match finder for LZ algorithms +2021-12-21 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +// #include + +#include "CpuArch.h" + +#include "LzHash.h" +#include "LzFindMt.h" + +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +extern UInt64 g_NumIters_Tree; +extern UInt64 g_NumIters_Loop; +extern UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +#define kMtHashBlockSize ((UInt32)1 << 17) +#define kMtHashNumBlocks (1 << 1) + +#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize) + +#define kMtBtBlockSize ((UInt32)1 << 16) +#define kMtBtNumBlocks (1 << 4) + +#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize) + +/* + HASH functions: + We use raw 8/16 bits from a[1] and a[2], + xored with crc(a[0]) and crc(a[3]). + We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. + our crc() function provides one-to-one correspondence for low 8-bit values: + (crc[0...0xFF] & 0xFF) <-> [0...0xFF] +*/ + +#define MF(mt) ((mt)->MatchFinder) +#define MF_CRC (p->crc) + +// #define MF(mt) (&(mt)->MatchFinder) +// #define MF_CRC (p->MatchFinder.crc) + +#define MT_HASH2_CALC \ + h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +/* +#define MT_HASH3_CALC__NO_2 { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define __MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } + // (kHash4Size - 1); +*/ + + +MY_NO_INLINE +static void MtSync_Construct(CMtSync *p) +{ + p->affinity = 0; + p->wasCreated = False; + p->csWasInitialized = False; + p->csWasEntered = False; + Thread_Construct(&p->thread); + Event_Construct(&p->canStart); + Event_Construct(&p->wasStopped); + Semaphore_Construct(&p->freeSemaphore); + Semaphore_Construct(&p->filledSemaphore); +} + + +#define DEBUG_BUFFER_LOCK // define it to debug lock state + +#ifdef DEBUG_BUFFER_LOCK +#include +#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); +#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); +#else +#define BUFFER_MUST_BE_LOCKED(p) +#define BUFFER_MUST_BE_UNLOCKED(p) +#endif + +#define LOCK_BUFFER(p) { \ + BUFFER_MUST_BE_UNLOCKED(p); \ + CriticalSection_Enter(&(p)->cs); \ + (p)->csWasEntered = True; } + +#define UNLOCK_BUFFER(p) { \ + BUFFER_MUST_BE_LOCKED(p); \ + CriticalSection_Leave(&(p)->cs); \ + (p)->csWasEntered = False; } + + +MY_NO_INLINE +static UInt32 MtSync_GetNextBlock(CMtSync *p) +{ + UInt32 numBlocks = 0; + if (p->needStart) + { + BUFFER_MUST_BE_UNLOCKED(p) + p->numProcessedBlocks = 1; + p->needStart = False; + p->stopWriting = False; + p->exit = False; + Event_Reset(&p->wasStopped); + Event_Set(&p->canStart); + } + else + { + UNLOCK_BUFFER(p) + // we free current block + numBlocks = p->numProcessedBlocks++; + Semaphore_Release1(&p->freeSemaphore); + } + + // buffer is UNLOCKED here + Semaphore_Wait(&p->filledSemaphore); + LOCK_BUFFER(p); + return numBlocks; +} + + +/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ + +MY_NO_INLINE +static void MtSync_StopWriting(CMtSync *p) +{ + if (!Thread_WasCreated(&p->thread) || p->needStart) + return; + + PRF(printf("\nMtSync_StopWriting %p\n", p)); + + if (p->csWasEntered) + { + /* we don't use buffer in this thread after StopWriting(). + So we UNLOCK buffer. + And we restore default UNLOCKED state for stopped thread */ + UNLOCK_BUFFER(p) + } + + /* We send (p->stopWriting) message and release freeSemaphore + to free current block. + So the thread will see (p->stopWriting) at some + iteration after Wait(freeSemaphore). + The thread doesn't need to fill all avail free blocks, + so we can get fast thread stop. + */ + + p->stopWriting = True; + Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!! + + PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p)); + Event_Wait(&p->wasStopped); + PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p)); + + /* 21.03 : we don't restore samaphore counters here. + We will recreate and reinit samaphores in next start */ + + p->needStart = True; +} + + +MY_NO_INLINE +static void MtSync_Destruct(CMtSync *p) +{ + PRF(printf("\nMtSync_Destruct %p\n", p)); + + if (Thread_WasCreated(&p->thread)) + { + /* we want thread to be in Stopped state before sending EXIT command. + note: stop(btSync) will stop (htSync) also */ + MtSync_StopWriting(p); + /* thread in Stopped state here : (p->needStart == true) */ + p->exit = True; + // if (p->needStart) // it's (true) + Event_Set(&p->canStart); // we send EXIT command to thread + Thread_Wait_Close(&p->thread); // we wait thread finishing + } + + if (p->csWasInitialized) + { + CriticalSection_Delete(&p->cs); + p->csWasInitialized = False; + } + p->csWasEntered = False; + + Event_Close(&p->canStart); + Event_Close(&p->wasStopped); + Semaphore_Close(&p->freeSemaphore); + Semaphore_Close(&p->filledSemaphore); + + p->wasCreated = False; +} + + +// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } +// we want to get real system error codes here instead of SZ_ERROR_THREAD +#define RINOK_THREAD(x) RINOK(x) + + +// call it before each new file (when new starting is required): +MY_NO_INLINE +static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) +{ + WRes wres; + // BUFFER_MUST_BE_UNLOCKED(p) + if (!p->needStart || p->csWasEntered) + return SZ_ERROR_FAIL; + wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks); + if (wres == 0) + wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks); + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) +{ + WRes wres; + + if (p->wasCreated) + return SZ_OK; + + RINOK_THREAD(CriticalSection_Init(&p->cs)); + p->csWasInitialized = True; + p->csWasEntered = False; + + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); + + p->needStart = True; + p->exit = True; /* p->exit is unused before (canStart) Event. + But in case of some unexpected code failure we will get fast exit from thread */ + + // return ERROR_TOO_MANY_POSTS; // for debug + // return EINVAL; // for debug + + if (p->affinity != 0) + wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); + else + wres = Thread_Create(&p->thread, startAddress, obj); + + RINOK_THREAD(wres); + p->wasCreated = True; + return SZ_OK; +} + + +MY_NO_INLINE +static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) +{ + const WRes wres = MtSync_Create_WRes(p, startAddress, obj); + if (wres == 0) + return 0; + MtSync_Destruct(p); + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +// ---------- HASH THREAD ---------- + +#define kMtMaxValForNormalize 0xFFFFFFFF +// #define kMtMaxValForNormalize ((1 << 21)) // for debug +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#ifdef MY_CPU_LE_UNALIGN + #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) +#else + #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) +#endif + +#define GetHeads_DECL(name) \ + static void GetHeads ## name(const Byte *p, UInt32 pos, \ + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) + +#define GetHeads_LOOP(v) \ + for (; numHeads != 0; numHeads--) { \ + const UInt32 value = (v); \ + p++; \ + *heads++ = pos - hash[value]; \ + hash[value] = pos++; } + +#define DEF_GetHeads2(name, v, action) \ + GetHeads_DECL(name) { action \ + GetHeads_LOOP(v) } + +#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) + +DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) +DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +// BT3 is not good for crc collisions for big hashMask values. + +/* +GetHeads_DECL(3b) +{ + UNUSED_VAR(hashMask); + UNUSED_VAR(crc); + { + const Byte *pLim = p + numHeads; + if (numHeads == 0) + return; + pLim--; + while (p < pLim) + { + UInt32 v1 = GetUi32(p); + UInt32 v0 = v1 & 0xFFFFFF; + UInt32 h0, h1; + p += 2; + v1 >>= 8; + h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; + h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; + heads += 2; + } + if (p == pLim) + { + UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); + *heads = pos - hash[v0]; + hash[v0] = pos; + } + } +} +*/ + +/* +GetHeads_DECL(4) +{ + unsigned sh = 0; + UNUSED_VAR(crc) + while ((hashMask & 0x80000000) == 0) + { + hashMask <<= 1; + sh++; + } + GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) +} +#define GetHeads4b GetHeads4 +*/ + +#define USE_GetHeads_LOCAL_CRC + +#ifdef USE_GetHeads_LOCAL_CRC + +GetHeads_DECL(4) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + // crc1[i] = rotlFixed(v, 8) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(4b) +{ + UInt32 crc0[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + crc0[i] = crc[i] & hashMask; + } + GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) +} + +GetHeads_DECL(5) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + UInt32 crc2[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(5b) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) +} + +#else + +DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) +DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) +DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) + +#endif + + +static void HashThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->hashSync; + PRF(printf("\nHashThreadFunc\n")); + + for (;;) + { + UInt32 blockIndex = 0; + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n")); + Event_Wait(&p->canStart); + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n")); + if (p->exit) + { + PRF(printf("\nHashThreadFunc : exit \n")); + return; + } + + MatchFinder_Init_HighHash(MF(mt)); + + for (;;) + { + PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos)); + + { + CMatchFinder *mf = MF(mt); + if (MatchFinder_NeedMove(mf)) + { + CriticalSection_Enter(&mt->btSync.cs); + CriticalSection_Enter(&mt->hashSync.cs); + { + const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf); + ptrdiff_t offset; + MatchFinder_MoveBlock(mf); + offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf); + mt->pointerToCurPos -= offset; + mt->buffer -= offset; + } + CriticalSection_Leave(&mt->hashSync.cs); + CriticalSection_Leave(&mt->btSync.cs); + continue; + } + + Semaphore_Wait(&p->freeSemaphore); + + if (p->exit) // exit is unexpected here. But we check it here for some failure case + return; + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + + MatchFinder_ReadIfRequired(mf); + { + UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++); + UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf); + heads[0] = 2; + heads[1] = num; + + /* heads[1] contains the number of avail bytes: + if (avail < mf->numHashBytes) : + { + it means that stream was finished + HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes. + HASH_THREAD doesn't stop, + HASH_THREAD fills only the header (2 numbers) for all next blocks: + {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0} + } + else + { + HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes; + } + */ + + if (num >= mf->numHashBytes) + { + num = num - mf->numHashBytes + 1; + if (num > kMtHashBlockSize - 2) + num = kMtHashBlockSize - 2; + + if (mf->pos > (UInt32)kMtMaxValForNormalize - num) + { + const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + Inline_MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); + } + + heads[0] = 2 + num; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + } + + mf->pos += num; // wrap over zero is allowed at the end of stream + mf->buffer += num; + } + } + + Semaphore_Release1(&p->filledSemaphore); + } // for() processing end + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); + } // for() thread end +} + + + + +// ---------- BT THREAD ---------- + +/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap. + here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */ +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + +#define MFMT_GM_INLINE + +#ifdef MFMT_GM_INLINE + +/* + we use size_t for (pos) instead of UInt32 + to eliminate "movsx" BUG in old MSVC x64 compiler. +*/ + + +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); + +#endif + + +static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) +{ + UInt32 numProcessed = 0; + UInt32 curPos = 2; + + /* GetMatchesSpec() functions don't create (len = 1) + in [len, dist] match pairs, if (p->numHashBytes >= 2) + Also we suppose here that (matchMaxLen >= 2). + So the following code for (reserve) is not required + UInt32 reserve = (p->matchMaxLen * 2); + const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX + if (reserve < kNumHashBytes_Max - 1) + reserve = kNumHashBytes_Max - 1; + const UInt32 limit = kMtBtBlockSize - (reserve); + */ + + const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + + d[1] = p->hashNumAvail; + + if (p->failure_BT) + { + // printf("\n == 1 BtGetMatches() p->failure_BT\n"); + d[0] = 0; + // d[1] = 0; + return; + } + + while (curPos < limit) + { + if (p->hashBufPos == p->hashBufPosLimit) + { + // MatchFinderMt_GetNextBlock_Hash(p); + UInt32 avail; + { + const UInt32 bi = MtSync_GetNextBlock(&p->hashSync); + const UInt32 k = GET_HASH_BLOCK_OFFSET(bi); + const UInt32 *h = p->hashBuf + k; + avail = h[1]; + p->hashBufPosLimit = k + h[0]; + p->hashNumAvail = avail; + p->hashBufPos = k + 2; + } + + { + /* we must prevent UInt32 overflow for avail total value, + if avail was increased with new hash block */ + UInt32 availSum = numProcessed + avail; + if (availSum < numProcessed) + availSum = (UInt32)(Int32)-1; + d[1] = availSum; + } + + if (avail >= p->numHashBytes) + continue; + + // if (p->hashBufPos != p->hashBufPosLimit) exit(1); + + /* (avail < p->numHashBytes) + It means that stream was finished. + And (avail) - is a number of remaining bytes, + we fill (d) for (avail) bytes for LZ_THREAD (receiver). + but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */ + + /* here we suppose that we have space enough: + (kMtBtBlockSize - curPos >= p->hashNumAvail) */ + p->hashNumAvail = 0; + d[0] = curPos + avail; + d += curPos; + for (; avail != 0; avail--) + *d++ = 0; + return; + } + { + UInt32 size = p->hashBufPosLimit - p->hashBufPos; + UInt32 pos = p->pos; + UInt32 cyclicBufferPos = p->cyclicBufferPos; + UInt32 lenLimit = p->matchMaxLen; + if (lenLimit >= p->hashNumAvail) + lenLimit = p->hashNumAvail; + { + UInt32 size2 = p->hashNumAvail - lenLimit + 1; + if (size2 < size) + size = size2; + size2 = p->cyclicBufferSize - cyclicBufferPos; + if (size2 < size) + size = size2; + } + + if (pos > (UInt32)kMtMaxValForNormalize - size) + { + const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1); + pos -= subValue; + p->pos = pos; + MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); + } + + #ifndef MFMT_GM_INLINE + while (curPos < limit && size-- != 0) + { + UInt32 *startDistances = d + curPos; + UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], + pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + startDistances + 1, p->numHashBytes - 1) - startDistances); + *startDistances = num - 1; + curPos += num; + cyclicBufferPos++; + pos++; + p->buffer++; + } + #else + { + UInt32 posRes = pos; + const UInt32 *d_end; + { + d_end = GetMatchesSpecN_2( + p->buffer + lenLimit - 1, + pos, p->buffer, p->son, p->cutValue, d + curPos, + p->numHashBytes - 1, p->hashBuf + p->hashBufPos, + d + limit, p->hashBuf + p->hashBufPos + size, + cyclicBufferPos, p->cyclicBufferSize, + &posRes); + } + { + if (!d_end) + { + // printf("\n == 2 BtGetMatches() p->failure_BT\n"); + // internal data failure + p->failure_BT = True; + d[0] = 0; + // d[1] = 0; + return; + } + } + curPos = (UInt32)(d_end - d); + { + const UInt32 processed = posRes - pos; + pos = posRes; + p->hashBufPos += processed; + cyclicBufferPos += processed; + p->buffer += processed; + } + } + #endif + + { + const UInt32 processed = pos - p->pos; + numProcessed += processed; + p->hashNumAvail -= processed; + p->pos = pos; + } + if (cyclicBufferPos == p->cyclicBufferSize) + cyclicBufferPos = 0; + p->cyclicBufferPos = cyclicBufferPos; + } + } + + d[0] = curPos; +} + + +static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) +{ + CMtSync *sync = &p->hashSync; + + BUFFER_MUST_BE_UNLOCKED(sync) + + if (!sync->needStart) + { + LOCK_BUFFER(sync) + } + + BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex)); + + /* We suppose that we have called GetNextBlock() from start. + So buffer is LOCKED */ + + UNLOCK_BUFFER(sync) +} + + +MY_NO_INLINE +static void BtThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->btSync; + for (;;) + { + UInt32 blockIndex = 0; + Event_Wait(&p->canStart); + + for (;;) + { + PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos)); + /* (p->exit == true) is possible after (p->canStart) at first loop iteration + and is unexpected after more Wait(freeSemaphore) iterations */ + if (p->exit) + return; + + Semaphore_Wait(&p->freeSemaphore); + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + + BtFillBlock(mt, blockIndex++); + + Semaphore_Release1(&p->filledSemaphore); + } + + // we stop HASH_THREAD here + MtSync_StopWriting(&mt->hashSync); + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); + } +} + + +void MatchFinderMt_Construct(CMatchFinderMt *p) +{ + p->hashBuf = NULL; + MtSync_Construct(&p->hashSync); + MtSync_Construct(&p->btSync); +} + +static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->hashBuf); + p->hashBuf = NULL; +} + +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) +{ + /* + HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs. + So we must be sure that HASH_THREAD will not use CriticalSection(s) + after deleting CriticalSection here. + + we call ReleaseStream(p) + that calls StopWriting(btSync) + that calls StopWriting(hashSync), if it's required to stop HASH_THREAD. + after StopWriting() it's safe to destruct MtSync(s) in any order */ + + MatchFinderMt_ReleaseStream(p); + + MtSync_Destruct(&p->btSync); + MtSync_Destruct(&p->hashSync); + + LOG_ITER( + printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n", + (UInt32)(g_NumIters_Tree / 1000), + (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), + (UInt32)(g_NumIters_Loop / 1000), + (UInt32)(g_NumIters_Bytes / 1000) + )); + + MatchFinderMt_FreeMem(p, alloc); +} + + +#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) +#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + + +static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } +static THREAD_FUNC_DECL BtThreadFunc2(void *p) +{ + Byte allocaDummy[0x180]; + unsigned i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)0; + if (allocaDummy[0] == 0) + BtThreadFunc((CMatchFinderMt *)p); + return 0; +} + + +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) +{ + CMatchFinder *mf = MF(p); + p->historySize = historySize; + if (kMtBtBlockSize <= matchMaxLen * 4) + return SZ_ERROR_PARAM; + if (!p->hashBuf) + { + p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32)); + if (!p->hashBuf) + return SZ_ERROR_MEM; + p->btBuf = p->hashBuf + kHashBufferSize; + } + keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); + keepAddBufferAfter += kMtHashBlockSize; + if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) + return SZ_ERROR_MEM; + + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); + return SZ_OK; +} + + +SRes MatchFinderMt_InitMt(CMatchFinderMt *p) +{ + RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); + return MtSync_Init(&p->btSync, kMtBtNumBlocks); +} + + +static void MatchFinderMt_Init(CMatchFinderMt *p) +{ + CMatchFinder *mf = MF(p); + + p->btBufPos = + p->btBufPosLimit = NULL; + p->hashBufPos = + p->hashBufPosLimit = 0; + p->hashNumAvail = 0; // 21.03 + + p->failure_BT = False; + + /* Init without data reading. We don't want to read data in this thread */ + MatchFinder_Init_4(mf); + + MatchFinder_Init_LowHash(mf); + + p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); + p->btNumAvailBytes = 0; + p->failure_LZ_BT = False; + // p->failure_LZ_LZ = False; + + p->lzPos = + 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug + + p->hash = mf->hash; + p->fixedHashSize = mf->fixedHashSize; + // p->hash4Mask = mf->hash4Mask; + p->crc = mf->crc; + // memcpy(p->crc, mf->crc, sizeof(mf->crc)); + + p->son = mf->son; + p->matchMaxLen = mf->matchMaxLen; + p->numHashBytes = mf->numHashBytes; + + /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */ + // mf->streamPos = mf->pos = 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug + + /* we must init (p->pos = mf->pos) for BT, because + BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */ + p->pos = mf->pos; // do not change it + + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); + p->cyclicBufferSize = mf->cyclicBufferSize; + p->buffer = mf->buffer; + p->cutValue = mf->cutValue; + // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses. +} + + +/* ReleaseStream is required to finish multithreading */ +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) +{ + // Sleep(1); // for debug + MtSync_StopWriting(&p->btSync); + // Sleep(200); // for debug + /* p->MatchFinder->ReleaseStream(); */ +} + + +MY_NO_INLINE +static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) +{ + if (p->failure_LZ_BT) + p->btBufPos = p->failureBuf; + else + { + const UInt32 bi = MtSync_GetNextBlock(&p->btSync); + const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi); + { + const UInt32 numItems = bt[0]; + p->btBufPosLimit = bt + numItems; + p->btNumAvailBytes = bt[1]; + p->btBufPos = bt + 2; + if (numItems < 2 || numItems > kMtBtBlockSize) + { + p->failureBuf[0] = 0; + p->btBufPos = p->failureBuf; + p->btBufPosLimit = p->failureBuf + 1; + p->failure_LZ_BT = True; + // p->btNumAvailBytes = 0; + /* we don't want to decrease AvailBytes, that was load before. + that can be unxepected for the code that have loaded anopther value before */ + } + } + + if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize) + { + /* we don't check (lzPos) over exact avail bytes in (btBuf). + (fixedHashSize) is small, so normalization is fast */ + const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + p->lzPos -= subValue; + MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize); + } + } + return p->btNumAvailBytes; +} + + + +static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) +{ + return p->pointerToCurPos; +} + + +#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); + + +static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) +{ + if (p->btBufPos != p->btBufPosLimit) + return p->btNumAvailBytes; + return MatchFinderMt_GetNextBlock_Bt(p); +} + + +// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; } +#define CHECK_FAILURE_LZ(_match_, _pos_) + +static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, c2; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH2_CALC + + c2 = hash[h2]; + hash[h2] = m; + + if (c2 >= matchMinPos) + { + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 2; + *d++ = m - c2 - 1; + } + } + + return d; +} + +static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, c2, c3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos) + { + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + return d + 2; + } + d[0] = 2; + d += 2; + } + } + + if (c3 >= matchMinPos) + { + CHECK_FAILURE_LZ(c3, m) + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + } + + return d; +} + + +#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; + +/* +static +UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + UInt32 matchMinPos; + UInt32 avail = p->btNumAvailBytes - 1; + p->btBufPos = btLim; + + { + p->btNumAvailBytes = avail; + + #define BT_HASH_BYTES_MAX 5 + + matchMinPos = p->lzPos; + + if (len != 0) + matchMinPos -= bt[1]; + else if (avail < (BT_HASH_BYTES_MAX - 1) - 1) + { + INCREASE_LZ_POS + return d; + } + else + { + const UInt32 hs = p->historySize; + if (matchMinPos > hs) + matchMinPos -= hs; + else + matchMinPos = 1; + } + } + + for (;;) + { + + UInt32 h2, h3, c2, c3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + d += 2; + break; + } + // else + { + d[0] = 2; + d += 2; + } + } + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + break; + } + + if (len != 0) + { + do + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while (bt != btLim); + } + INCREASE_LZ_POS + return d; +} +*/ + + +static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH3_CALC + // MT_HASH4_CALC + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + // c4 = (hash + kFix4HashSize)[h4]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + // (hash + kFix4HashSize)[h4] = m; + + #define _USE_H2 + + #ifdef _USE_H2 + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; + // return d + 2; + + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + return d; + } + d[0] = 2; + d += 2; + } + #endif + + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c3 - 1; + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + } + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + + return d; +} + + +static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + p->btBufPos = btLim; + p->btNumAvailBytes--; + INCREASE_LZ_POS + { + while (bt != btLim) + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + } + return d; +} + + + +static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBufPos; + UInt32 len = *bt++; + const UInt32 avail = p->btNumAvailBytes - 1; + p->btNumAvailBytes = avail; + p->btBufPos = bt + len; + if (len == 0) + { + #define BT_HASH_BYTES_MAX 5 + if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) + { + UInt32 m = p->lzPos; + if (m > p->historySize) + m -= p->historySize; + else + m = 1; + d = p->MixMatchesFunc(p, m, d); + } + } + else + { + /* + first match pair from BinTree: (match_len, match_dist), + (match_len >= numHashBytes). + MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) + */ + d = p->MixMatchesFunc(p, p->lzPos - bt[1], d); + // if (d) // check for failure + do + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while (len -= 2); + } + INCREASE_LZ_POS + return d; +} + +#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED +#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; +#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); + +static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER2_MT { p->btNumAvailBytes--; + SKIP_FOOTER_MT +} + +static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(2) + UInt32 h2; + MT_HASH2_CALC + hash[h2] = p->lzPos; + SKIP_FOOTER_MT +} + +static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(3) + UInt32 h2, h3; + MT_HASH3_CALC + (hash + kFix3HashSize)[h3] = + hash[ h2] = + p->lzPos; + SKIP_FOOTER_MT +} + +/* +// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip(). +// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream. + +static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER_MT(4) + UInt32 h2, h3; // h4 + MT_HASH3_CALC + // MT_HASH4_CALC + // (hash + kFix4HashSize)[h4] = + (hash + kFix3HashSize)[h3] = + hash[ h2] = + p->lzPos; + SKIP_FOOTER_MT +} +*/ + +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + + switch (MF(p)->numHashBytes) + { + case 2: + p->GetHeadsFunc = GetHeads2; + p->MixMatchesFunc = (Mf_Mix_Matches)NULL; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + break; + case 3: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + break; + case 4: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; + + // it's fast inline version of GetMatches() + // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + break; + default: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + vTable->Skip = + (Mf_Skip_Func)MatchFinderMt3_Skip; + // (Mf_Skip_Func)MatchFinderMt4_Skip; + break; + } +} diff --git a/lib/LZMA/LzFindMt.h b/lib/LZMA/LzFindMt.h index ef431e3..ee9a1b6 100644 --- a/lib/LZMA/LzFindMt.h +++ b/lib/LZMA/LzFindMt.h @@ -1,101 +1,109 @@ -/* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2018-07-04 : Igor Pavlov : Public domain */ - -#ifndef __LZ_FIND_MT_H -#define __LZ_FIND_MT_H - -#include "LzFind.h" -#include "Threads.h" - -EXTERN_C_BEGIN - -#define kMtHashBlockSize (1 << 13) -#define kMtHashNumBlocks (1 << 3) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) - -#define kMtBtBlockSize (1 << 14) -#define kMtBtNumBlocks (1 << 6) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) - -typedef struct _CMtSync -{ - BoolInt wasCreated; - BoolInt needStart; - BoolInt exit; - BoolInt stopWriting; - - CThread thread; - CAutoResetEvent canStart; - CAutoResetEvent wasStarted; - CAutoResetEvent wasStopped; - CSemaphore freeSemaphore; - CSemaphore filledSemaphore; - BoolInt csWasInitialized; - BoolInt csWasEntered; - CCriticalSection cs; - UInt32 numProcessedBlocks; -} CMtSync; - -typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); - -/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ -#define kMtCacheLineDummy 128 - -typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, - UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); - -typedef struct _CMatchFinderMt -{ - /* LZ */ - const Byte *pointerToCurPos; - UInt32 *btBuf; - UInt32 btBufPos; - UInt32 btBufPosLimit; - UInt32 lzPos; - UInt32 btNumAvailBytes; - - UInt32 *hash; - UInt32 fixedHashSize; - UInt32 historySize; - const UInt32 *crc; - - Mf_Mix_Matches MixMatchesFunc; - - /* LZ + BT */ - CMtSync btSync; - Byte btDummy[kMtCacheLineDummy]; - - /* BT */ - UInt32 *hashBuf; - UInt32 hashBufPos; - UInt32 hashBufPosLimit; - UInt32 hashNumAvail; - - CLzRef *son; - UInt32 matchMaxLen; - UInt32 numHashBytes; - UInt32 pos; - const Byte *buffer; - UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be historySize + 1 */ - UInt32 cutValue; - - /* BT + Hash */ - CMtSync hashSync; - /* Byte hashDummy[kMtCacheLineDummy]; */ - - /* Hash */ - Mf_GetHeads GetHeadsFunc; - CMatchFinder *MatchFinder; -} CMatchFinderMt; - -void MatchFinderMt_Construct(CMatchFinderMt *p); -void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc); -SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, - UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); -void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); -void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); - -EXTERN_C_END - -#endif +/* LzFindMt.h -- multithreaded Match finder for LZ algorithms +2021-07-12 : Igor Pavlov : Public domain */ + +#ifndef __LZ_FIND_MT_H +#define __LZ_FIND_MT_H + +#include "LzFind.h" +#include "Threads.h" + +EXTERN_C_BEGIN + +typedef struct _CMtSync +{ + UInt32 numProcessedBlocks; + CThread thread; + UInt64 affinity; + + BoolInt wasCreated; + BoolInt needStart; + BoolInt csWasInitialized; + BoolInt csWasEntered; + + BoolInt exit; + BoolInt stopWriting; + + CAutoResetEvent canStart; + CAutoResetEvent wasStopped; + CSemaphore freeSemaphore; + CSemaphore filledSemaphore; + CCriticalSection cs; + // UInt32 numBlocks_Sent; +} CMtSync; + +typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); + +/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ +#define kMtCacheLineDummy 128 + +typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); + +typedef struct _CMatchFinderMt +{ + /* LZ */ + const Byte *pointerToCurPos; + UInt32 *btBuf; + const UInt32 *btBufPos; + const UInt32 *btBufPosLimit; + UInt32 lzPos; + UInt32 btNumAvailBytes; + + UInt32 *hash; + UInt32 fixedHashSize; + // UInt32 hash4Mask; + UInt32 historySize; + const UInt32 *crc; + + Mf_Mix_Matches MixMatchesFunc; + UInt32 failure_LZ_BT; // failure in BT transfered to LZ + // UInt32 failure_LZ_LZ; // failure in LZ tables + UInt32 failureBuf[1]; + // UInt32 crc[256]; + + /* LZ + BT */ + CMtSync btSync; + Byte btDummy[kMtCacheLineDummy]; + + /* BT */ + UInt32 *hashBuf; + UInt32 hashBufPos; + UInt32 hashBufPosLimit; + UInt32 hashNumAvail; + UInt32 failure_BT; + + + CLzRef *son; + UInt32 matchMaxLen; + UInt32 numHashBytes; + UInt32 pos; + const Byte *buffer; + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + UInt32 cutValue; + + /* BT + Hash */ + CMtSync hashSync; + /* Byte hashDummy[kMtCacheLineDummy]; */ + + /* Hash */ + Mf_GetHeads GetHeadsFunc; + CMatchFinder *MatchFinder; + // CMatchFinder MatchFinder; +} CMatchFinderMt; + +// only for Mt part +void MatchFinderMt_Construct(CMatchFinderMt *p); +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc); + +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable); + +/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */ +SRes MatchFinderMt_InitMt(CMatchFinderMt *p); +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/LzFindOpt.c b/lib/LZMA/LzFindOpt.c new file mode 100644 index 0000000..dbb6ad9 --- /dev/null +++ b/lib/LZMA/LzFindOpt.c @@ -0,0 +1,578 @@ +/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "LzFind.h" + +// #include "LzFindMt.h" + +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +// #define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +UInt64 g_NumIters_Tree; +UInt64 g_NumIters_Loop; +UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +// ---------- BT THREAD ---------- + +#define USE_SON_PREFETCH +#define USE_LONG_MATCH_OPT + +#define kEmptyHashValue 0 + +// #define CYC_TO_POS_OFFSET 0 + +// #define CYC_TO_POS_OFFSET 1 // for debug + +/* +MY_NO_INLINE +UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) +{ + do + { + UInt32 delta; + if (hash == size) + break; + delta = *hash++; + + if (delta == 0 || delta > (UInt32)pos) + return NULL; + + lenLimit++; + + if (delta == (UInt32)pos) + { + CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2; + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; + CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + + const Byte *len0 = cur, *len1 = cur; + UInt32 cutValue = _cutValue; + const Byte *maxLen = cur + _maxLen; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1); + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + hash++; + pos++; + cur++; + lenLimit++; + { + CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; + #else + const UInt32 p0 = ptr[0 + (diff * 2)]; + const UInt32 p1 = ptr[1 + (diff * 2)]; + ptr[0] = p0; + ptr[1] = p1; + // ptr[0] = ptr[0 + (diff * 2)]; + // ptr[1] = ptr[1 + (diff * 2)]; + #endif + } + // PrintSon(son + 2, pos - 1); + // printf("\npos = %x delta = %x\n", pos, delta); + len++; + *d++ = 2; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + if (delta >= curMatch) + return NULL; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + if (delta >= curMatch) + return NULL; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= pos) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ + +/* define cbs if you use 2 functions. + GetMatchesSpecN_1() : (pos < _cyclicBufferSize) + GetMatchesSpecN_2() : (pos >= _cyclicBufferSize) + + do not define cbs if you use 1 function: + GetMatchesSpecN_2() +*/ + +// #define cbs _cyclicBufferSize + +/* + we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32 + to eliminate "movsx" BUG in old MSVC x64 compiler. +*/ + +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); + +MY_NO_INLINE +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + lenLimit++; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + + UInt32 cutValue = _cutValue; + const Byte *len0 = cur, *len1 = cur; + const Byte *maxLen = cur + _maxLen; + + // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // SPEC code + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + *d++ = 2; + *d++ = (UInt32)(lenLimit - cur); + *d++ = delta - 1; + cur++; + lenLimit++; + // SPEC + _cyclicBufferPos++; + { + // SPEC code + CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1); + // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + pos++; + hash++; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } // for() end for long matches + } + #endif + + break; // break from TREE iterations + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + if (delta >= curMatch) + return NULL; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + if (delta >= curMatch) + return NULL; + } + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= cbs) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} + + + +/* +typedef UInt32 uint32plus; // size_t + +UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + UInt32 *_distances = ++d; + uint32plus len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + uint32plus maxLen = _maxLen; + // lenLimit++; // const Byte *lenLimit = cur + _lenLimit; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + const Byte *pb = cur - delta; + uint32plus len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)len; + *d++ = delta - 1; + if (len == lenLimit) + { + { + const UInt32 pair1 = pair[1]; + *ptr0 = pair1; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + } + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + for (;;) + { + *d++ = 2; + *d++ = (UInt32)lenLimit; + *d++ = delta - 1; + _cyclicBufferPos++; + { + CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1); + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + hash++; + pos++; + cur++; + pb++; + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; + if (pb[len] < cur[len]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + { + if (delta >= curMatch) + return NULL; + delta = (UInt32)pos - delta; + if (delta >= cbs + // delta >= _cyclicBufferSize || delta >= pos + || --cutValue == 0) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ diff --git a/lib/LZMA/LzHash.h b/lib/LZMA/LzHash.h index e7c9423..a682f83 100644 --- a/lib/LZMA/LzHash.h +++ b/lib/LZMA/LzHash.h @@ -1,57 +1,34 @@ -/* LzHash.h -- HASH functions for LZ algorithms -2015-04-12 : Igor Pavlov : Public domain */ - -#ifndef __LZ_HASH_H -#define __LZ_HASH_H - -#define kHash2Size (1 << 10) -#define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) - -#define kFix3HashSize (kHash2Size) -#define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) - -#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); - -#define HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - temp ^= (p->crc[cur[3]] << 5); \ - h4 = temp & (kHash4Size - 1); \ - hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } - -/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } - -#endif +/* LzHash.h -- HASH functions for LZ algorithms +2019-10-30 : Igor Pavlov : Public domain */ + +#ifndef __LZ_HASH_H +#define __LZ_HASH_H + +/* + (kHash2Size >= (1 << 8)) : Required + (kHash3Size >= (1 << 16)) : Required +*/ + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +// #define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +/* + We use up to 3 crc values for hash: + crc0 + crc1 << Shift_1 + crc2 << Shift_2 + (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff. + Small values for Shift are not good for collision rate. + Big value for Shift_2 increases the minimum size + of hash table, that will be slow for small files. +*/ + +#define kLzHash_CrcShift_1 5 +#define kLzHash_CrcShift_2 10 + +#endif diff --git a/lib/LZMA/LzmaDec.c b/lib/LZMA/LzmaDec.c index ba3e1dd..80b70a9 100644 --- a/lib/LZMA/LzmaDec.c +++ b/lib/LZMA/LzmaDec.c @@ -1,1185 +1,1363 @@ -/* LzmaDec.c -- LZMA Decoder -2018-07-04 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -/* #include "CpuArch.h" */ -#include "LzmaDec.h" - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 - -#define RC_INIT_SIZE 5 - -#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } - -#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) -#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); -#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); -#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ - { UPDATE_0(p); i = (i + i); A0; } else \ - { UPDATE_1(p); i = (i + i) + 1; A1; } - -#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } - -#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ - { UPDATE_0(p + i); A0; } else \ - { UPDATE_1(p + i); A1; } -#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) -#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) -#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) - -#define TREE_DECODE(probs, limit, i) \ - { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } - -/* #define _LZMA_SIZE_OPT */ - -#ifdef _LZMA_SIZE_OPT -#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) -#else -#define TREE_6_DECODE(probs, i) \ - { i = 1; \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - TREE_GET_BIT(probs, i); \ - i -= 0x40; } -#endif - -#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) -#define MATCHED_LITER_DEC \ - matchByte += matchByte; \ - bit = offs; \ - offs &= matchByte; \ - probLit = prob + (offs + bit + symbol); \ - GET_BIT2(probLit, symbol, offs ^= bit; , ;) - - - -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } - -#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) -#define UPDATE_0_CHECK range = bound; -#define UPDATE_1_CHECK range -= bound; code -= bound; -#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ - { UPDATE_0_CHECK; i = (i + i); A0; } else \ - { UPDATE_1_CHECK; i = (i + i) + 1; A1; } -#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) -#define TREE_DECODE_CHECK(probs, limit, i) \ - { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } - - -#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ - { UPDATE_0_CHECK; i += m; m += m; } else \ - { UPDATE_1_CHECK; m += m; i += m; } - - -#define kNumPosBitsMax 4 -#define kNumPosStatesMax (1 << kNumPosBitsMax) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define LenLow 0 -#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) -#define kNumLenProbs (LenHigh + kLenNumHighSymbols) - -#define LenChoice LenLow -#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) - -#define kNumStates 12 -#define kNumStates2 16 -#define kNumLitStates 7 - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -#define kNumPosSlotBits 6 -#define kNumLenToPosStates 4 - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) - -#define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) - -/* External ASM code needs same CLzmaProb array layout. So don't change it. */ - -/* (probs_1664) is faster and better for code size at some platforms */ -/* -#ifdef MY_CPU_X86_OR_AMD64 -*/ -#define kStartOffset 1664 -#define GET_PROBS p->probs_1664 -/* -#define GET_PROBS p->probs + kStartOffset -#else -#define kStartOffset 0 -#define GET_PROBS p->probs -#endif -*/ - -#define SpecPos (-kStartOffset) -#define IsRep0Long (SpecPos + kNumFullDistances) -#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) -#define LenCoder (RepLenCoder + kNumLenProbs) -#define IsMatch (LenCoder + kNumLenProbs) -#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) -#define IsRep (Align + kAlignTableSize) -#define IsRepG0 (IsRep + kNumStates) -#define IsRepG1 (IsRepG0 + kNumStates) -#define IsRepG2 (IsRepG1 + kNumStates) -#define PosSlot (IsRepG2 + kNumStates) -#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define NUM_BASE_PROBS (Literal + kStartOffset) - -#if Align != 0 && kStartOffset != 0 - #error Stop_Compiling_Bad_LZMA_kAlign -#endif - -#if NUM_BASE_PROBS != 1984 - #error Stop_Compiling_Bad_LZMA_PROBS -#endif - - -#define LZMA_LIT_SIZE 0x300 - -#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) - - -#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) -#define COMBINED_PS_STATE (posState + state) -#define GET_LEN_STATE (posState) - -#define LZMA_DIC_MIN (1 << 12) - -/* -p->remainLen : shows status of LZMA decoder: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : need init range coder - = kMatchSpecLenStart + 2 : need init range coder and state -*/ - -/* ---------- LZMA_DECODE_REAL ---------- */ -/* -LzmaDec_DecodeReal_3() can be implemented in external ASM file. -3 - is the code compatibility version of that function for check at link time. -*/ - -#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 - -/* -LZMA_DECODE_REAL() -In: - RangeCoder is normalized - if (p->dicPos == limit) - { - LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. - So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol - is not END_OF_PAYALOAD_MARKER, then function returns error code. - } - -Processing: - first LZMA symbol will be decoded in any case - All checks for limits are at the end of main loop, - It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), - RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. - -Out: - RangeCoder is normalized - Result: - SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished -*/ - - -#ifdef _LZMA_DEC_OPT - -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); - -#else - -static -int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - CLzmaProb *probs = GET_PROBS; - unsigned state = (unsigned)p->state; - UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; - unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lc = p->prop.lc; - unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); - - Byte *dic = p->dic; - SizeT dicBufSize = p->dicBufSize; - SizeT dicPos = p->dicPos; - - UInt32 processedPos = p->processedPos; - UInt32 checkDicSize = p->checkDicSize; - unsigned len = 0; - - const Byte *buf = p->buf; - UInt32 range = p->range; - UInt32 code = p->code; - - do - { - CLzmaProb *prob; - UInt32 bound; - unsigned ttt; - unsigned posState = CALC_POS_STATE(processedPos, pbMask); - - prob = probs + IsMatch + COMBINED_PS_STATE; - IF_BIT_0(prob) - { - unsigned symbol; - UPDATE_0(prob); - prob = probs + Literal; - if (processedPos != 0 || checkDicSize != 0) - prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); - processedPos++; - - if (state < kNumLitStates) - { - state -= (state < 4) ? state : 3; - symbol = 1; - #ifdef _LZMA_SIZE_OPT - do { NORMAL_LITER_DEC } while (symbol < 0x100); - #else - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - NORMAL_LITER_DEC - #endif - } - else - { - unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; - unsigned offs = 0x100; - state -= (state < 10) ? 3 : 6; - symbol = 1; - #ifdef _LZMA_SIZE_OPT - do - { - unsigned bit; - CLzmaProb *probLit; - MATCHED_LITER_DEC - } - while (symbol < 0x100); - #else - { - unsigned bit; - CLzmaProb *probLit; - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - MATCHED_LITER_DEC - } - #endif - } - - dic[dicPos++] = (Byte)symbol; - continue; - } - - { - UPDATE_1(prob); - prob = probs + IsRep + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - state += kNumStates; - prob = probs + LenCoder; - } - else - { - UPDATE_1(prob); - /* - // that case was checked before with kBadRepCode - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - */ - prob = probs + IsRepG0 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - prob = probs + IsRep0Long + COMBINED_PS_STATE; - IF_BIT_0(prob) - { - UPDATE_0(prob); - dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; - dicPos++; - processedPos++; - state = state < kNumLitStates ? 9 : 11; - continue; - } - UPDATE_1(prob); - } - else - { - UInt32 distance; - UPDATE_1(prob); - prob = probs + IsRepG1 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep1; - } - else - { - UPDATE_1(prob); - prob = probs + IsRepG2 + state; - IF_BIT_0(prob) - { - UPDATE_0(prob); - distance = rep2; - } - else - { - UPDATE_1(prob); - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < kNumLitStates ? 8 : 11; - prob = probs + RepLenCoder; - } - - #ifdef _LZMA_SIZE_OPT - { - unsigned lim, offset; - CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenLow + GET_LEN_STATE; - offset = 0; - lim = (1 << kLenNumLowBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); - offset = kLenNumLowSymbols; - lim = (1 << kLenNumLowBits); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenHigh; - offset = kLenNumLowSymbols * 2; - lim = (1 << kLenNumHighBits); - } - } - TREE_DECODE(probLen, lim, len); - len += offset; - } - #else - { - CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenLow + GET_LEN_STATE; - len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - len -= 8; - } - else - { - UPDATE_1(probLen); - probLen = prob + LenChoice2; - IF_BIT_0(probLen) - { - UPDATE_0(probLen); - probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); - len = 1; - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - TREE_GET_BIT(probLen, len); - } - else - { - UPDATE_1(probLen); - probLen = prob + LenHigh; - TREE_DECODE(probLen, (1 << kLenNumHighBits), len); - len += kLenNumLowSymbols * 2; - } - } - } - #endif - - if (state >= kNumStates) - { - UInt32 distance; - prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); - TREE_6_DECODE(prob, distance); - if (distance >= kStartPosModelIndex) - { - unsigned posSlot = (unsigned)distance; - unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); - distance = (2 | (distance & 1)); - if (posSlot < kEndPosModelIndex) - { - distance <<= numDirectBits; - prob = probs + SpecPos; - { - UInt32 m = 1; - distance++; - do - { - REV_BIT_VAR(prob, distance, m); - } - while (--numDirectBits); - distance -= m; - } - } - else - { - numDirectBits -= kNumAlignBits; - do - { - NORMALIZE - range >>= 1; - - { - UInt32 t; - code -= range; - t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ - distance = (distance << 1) + (t + 1); - code += range & t; - } - /* - distance <<= 1; - if (code >= range) - { - code -= range; - distance |= 1; - } - */ - } - while (--numDirectBits); - prob = probs + Align; - distance <<= kNumAlignBits; - { - unsigned i = 1; - REV_BIT_CONST(prob, i, 1); - REV_BIT_CONST(prob, i, 2); - REV_BIT_CONST(prob, i, 4); - REV_BIT_LAST (prob, i, 8); - distance |= i; - } - if (distance == (UInt32)0xFFFFFFFF) - { - len = kMatchSpecLenStart; - state -= kNumStates; - break; - } - } - } - - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - rep0 = distance + 1; - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; - if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) - { - p->dicPos = dicPos; - return SZ_ERROR_DATA; - } - } - - len += kMatchMinLen; - - { - SizeT rem; - unsigned curLen; - SizeT pos; - - if ((rem = limit - dicPos) == 0) - { - p->dicPos = dicPos; - return SZ_ERROR_DATA; - } - - curLen = ((rem < len) ? (unsigned)rem : len); - pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); - - processedPos += (UInt32)curLen; - - len -= curLen; - if (curLen <= dicBufSize - pos) - { - Byte *dest = dic + dicPos; - ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; - const Byte *lim = dest + curLen; - dicPos += (SizeT)curLen; - do - *(dest) = (Byte)*(dest + src); - while (++dest != lim); - } - else - { - do - { - dic[dicPos++] = dic[pos]; - if (++pos == dicBufSize) - pos = 0; - } - while (--curLen != 0); - } - } - } - } - while (dicPos < limit && buf < bufLimit); - - NORMALIZE; - - p->buf = buf; - p->range = range; - p->code = code; - p->remainLen = (UInt32)len; - p->dicPos = dicPos; - p->processedPos = processedPos; - p->reps[0] = rep0; - p->reps[1] = rep1; - p->reps[2] = rep2; - p->reps[3] = rep3; - p->state = (UInt32)state; - - return SZ_OK; -} -#endif - -static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) -{ - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) - { - Byte *dic = p->dic; - SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = (unsigned)p->remainLen; - SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ - SizeT rem = limit - dicPos; - if (rem < len) - len = (unsigned)(rem); - - if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) - p->checkDicSize = p->prop.dicSize; - - p->processedPos += (UInt32)len; - p->remainLen -= (UInt32)len; - while (len != 0) - { - len--; - dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; - dicPos++; - } - p->dicPos = dicPos; - } -} - - -#define kRange0 0xFFFFFFFF -#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) -#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) -#if kBadRepCode != (0xC0000000 - 0x400) - #error Stop_Compiling_Bad_LZMA_Check -#endif - -static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - do - { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - - if (p->processedPos == 0) - if (p->code >= kBadRepCode) - return SZ_ERROR_DATA; - } - - RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); - - if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) - p->checkDicSize = p->prop.dicSize; - - LzmaDec_WriteRem(p, limit); - } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - return 0; -} - -typedef enum -{ - DUMMY_ERROR, /* unexpected end of input stream */ - DUMMY_LIT, - DUMMY_MATCH, - DUMMY_REP -} ELzmaDummy; - -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) -{ - UInt32 range = p->range; - UInt32 code = p->code; - const Byte *bufLimit = buf + inSize; - const CLzmaProb *probs = GET_PROBS; - unsigned state = (unsigned)p->state; - ELzmaDummy res; - - { - const CLzmaProb *prob; - UInt32 bound; - unsigned ttt; - unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); - - prob = probs + IsMatch + COMBINED_PS_STATE; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK - - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - - prob = probs + Literal; - if (p->checkDicSize != 0 || p->processedPos != 0) - prob += ((UInt32)LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); - - if (state < kNumLitStates) - { - unsigned symbol = 1; - do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); - } - else - { - unsigned matchByte = p->dic[p->dicPos - p->reps[0] + - (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; - unsigned offs = 0x100; - unsigned symbol = 1; - do - { - unsigned bit; - const CLzmaProb *probLit; - matchByte += matchByte; - bit = offs; - offs &= matchByte; - probLit = prob + (offs + bit + symbol); - GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) - } - while (symbol < 0x100); - } - res = DUMMY_LIT; - } - else - { - unsigned len; - UPDATE_1_CHECK; - - prob = probs + IsRep + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - state = 0; - prob = probs + LenCoder; - res = DUMMY_MATCH; - } - else - { - UPDATE_1_CHECK; - res = DUMMY_REP; - prob = probs + IsRepG0 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - prob = probs + IsRep0Long + COMBINED_PS_STATE; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; - } - else - { - UPDATE_1_CHECK; - } - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG1 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - prob = probs + IsRepG2 + state; - IF_BIT_0_CHECK(prob) - { - UPDATE_0_CHECK; - } - else - { - UPDATE_1_CHECK; - } - } - } - state = kNumStates; - prob = probs + RepLenCoder; - } - { - unsigned limit, offset; - const CLzmaProb *probLen = prob + LenChoice; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; - probLen = prob + LenLow + GET_LEN_STATE; - offset = 0; - limit = 1 << kLenNumLowBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenChoice2; - IF_BIT_0_CHECK(probLen) - { - UPDATE_0_CHECK; - probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); - offset = kLenNumLowSymbols; - limit = 1 << kLenNumLowBits; - } - else - { - UPDATE_1_CHECK; - probLen = prob + LenHigh; - offset = kLenNumLowSymbols * 2; - limit = 1 << kLenNumHighBits; - } - } - TREE_DECODE_CHECK(probLen, limit, len); - len += offset; - } - - if (state < 4) - { - unsigned posSlot; - prob = probs + PosSlot + - ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << - kNumPosSlotBits); - TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); - if (posSlot >= kStartPosModelIndex) - { - unsigned numDirectBits = ((posSlot >> 1) - 1); - - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - - if (posSlot < kEndPosModelIndex) - { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); - } - else - { - numDirectBits -= kNumAlignBits; - do - { - NORMALIZE_CHECK - range >>= 1; - code -= range & (((code - range) >> 31) - 1); - /* if (code >= range) code -= range; */ - } - while (--numDirectBits); - prob = probs + Align; - numDirectBits = kNumAlignBits; - } - { - unsigned i = 1; - unsigned m = 1; - do - { - REV_BIT_CHECK(prob, i, m); - } - while (--numDirectBits); - } - } - } - } - } - NORMALIZE_CHECK; - return res; -} - - -void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) -{ - p->remainLen = kMatchSpecLenStart + 1; - p->tempBufSize = 0; - - if (initDic) - { - p->processedPos = 0; - p->checkDicSize = 0; - p->remainLen = kMatchSpecLenStart + 2; - } - if (initState) - p->remainLen = kMatchSpecLenStart + 2; -} - -void LzmaDec_Init(CLzmaDec *p) -{ - p->dicPos = 0; - LzmaDec_InitDicAndState(p, True, True); -} - - -SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, - ELzmaFinishMode finishMode, ELzmaStatus *status) -{ - SizeT inSize = *srcLen; - (*srcLen) = 0; - - *status = LZMA_STATUS_NOT_SPECIFIED; - - if (p->remainLen > kMatchSpecLenStart) - { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize != 0 && p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - if (p->tempBufSize < RC_INIT_SIZE) - { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - p->code = - ((UInt32)p->tempBuf[1] << 24) - | ((UInt32)p->tempBuf[2] << 16) - | ((UInt32)p->tempBuf[3] << 8) - | ((UInt32)p->tempBuf[4]); - p->range = 0xFFFFFFFF; - p->tempBufSize = 0; - - if (p->remainLen > kMatchSpecLenStart + 1) - { - SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); - SizeT i; - CLzmaProb *probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - } - - p->remainLen = 0; - } - - LzmaDec_WriteRem(p, dicLimit); - - while (p->remainLen != kMatchSpecLenStart) - { - int checkEndMarkNow = 0; - - if (p->dicPos >= dicLimit) - { - if (p->remainLen == 0 && p->code == 0) - { - *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; - return SZ_OK; - } - if (finishMode == LZMA_FINISH_ANY) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_OK; - } - if (p->remainLen != 0) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - checkEndMarkNow = 1; - } - - if (p->tempBufSize == 0) - { - SizeT processed; - const Byte *bufLimit; - if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) - { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) - { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; - (*srcLen) += inSize; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - bufLimit = src; - } - else - bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; - p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } - else - { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) - { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); - if (dummyRes == DUMMY_ERROR) - { - (*srcLen) += (SizeT)lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) - { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; - } - } - p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; - - { - unsigned kkk = (unsigned)(p->buf - p->tempBuf); - if (rem < kkk) - return SZ_ERROR_FAIL; /* some internal error */ - rem -= kkk; - if (lookAhead < rem) - return SZ_ERROR_FAIL; /* some internal error */ - lookAhead -= rem; - } - (*srcLen) += (SizeT)lookAhead; - src += lookAhead; - inSize -= (SizeT)lookAhead; - p->tempBufSize = 0; - } - } - - if (p->code != 0) - return SZ_ERROR_DATA; - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return SZ_OK; -} - - -SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) -{ - SizeT outSize = *destLen; - SizeT inSize = *srcLen; - *srcLen = *destLen = 0; - for (;;) - { - SizeT inSizeCur = inSize, outSizeCur, dicPos; - ELzmaFinishMode curFinishMode; - SRes res; - if (p->dicPos == p->dicBufSize) - p->dicPos = 0; - dicPos = p->dicPos; - if (outSize > p->dicBufSize - dicPos) - { - outSizeCur = p->dicBufSize; - curFinishMode = LZMA_FINISH_ANY; - } - else - { - outSizeCur = dicPos + outSize; - curFinishMode = finishMode; - } - - res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); - src += inSizeCur; - inSize -= inSizeCur; - *srcLen += inSizeCur; - outSizeCur = p->dicPos - dicPos; - memcpy(dest, p->dic + dicPos, outSizeCur); - dest += outSizeCur; - outSize -= outSizeCur; - *destLen += outSizeCur; - if (res != 0) - return res; - if (outSizeCur == 0 || outSize == 0) - return SZ_OK; - } -} - -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->probs); - p->probs = NULL; -} - -static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->dic); - p->dic = NULL; -} - -void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) -{ - LzmaDec_FreeProbs(p, alloc); - LzmaDec_FreeDict(p, alloc); -} - -SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) -{ - UInt32 dicSize; - Byte d; - - if (size < LZMA_PROPS_SIZE) - return SZ_ERROR_UNSUPPORTED; - else - dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); - - if (dicSize < LZMA_DIC_MIN) - dicSize = LZMA_DIC_MIN; - p->dicSize = dicSize; - - d = data[0]; - if (d >= (9 * 5 * 5)) - return SZ_ERROR_UNSUPPORTED; - - p->lc = (Byte)(d % 9); - d /= 9; - p->pb = (Byte)(d / 5); - p->lp = (Byte)(d % 5); - - return SZ_OK; -} - -static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) -{ - UInt32 numProbs = LzmaProps_GetNumProbs(propNew); - if (!p->probs || numProbs != p->numProbs) - { - LzmaDec_FreeProbs(p, alloc); - p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); - if (!p->probs) - return SZ_ERROR_MEM; - p->probs_1664 = p->probs + 1664; - p->numProbs = numProbs; - } - return SZ_OK; -} - -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) -{ - CLzmaProps propNew; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) -{ - CLzmaProps propNew; - SizeT dicBufSize; - RINOK(LzmaProps_Decode(&propNew, props, propsSize)); - RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); - - { - UInt32 dictSize = propNew.dicSize; - SizeT mask = ((UInt32)1 << 12) - 1; - if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; - else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; - dicBufSize = ((SizeT)dictSize + mask) & ~mask; - if (dicBufSize < dictSize) - dicBufSize = dictSize; - } - - if (!p->dic || dicBufSize != p->dicBufSize) - { - LzmaDec_FreeDict(p, alloc); - p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); - if (!p->dic) - { - LzmaDec_FreeProbs(p, alloc); - return SZ_ERROR_MEM; - } - } - p->dicBufSize = dicBufSize; - p->prop = propNew; - return SZ_OK; -} - -SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, - const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAllocPtr alloc) -{ - CLzmaDec p; - SRes res; - SizeT outSize = *destLen, inSize = *srcLen; - *destLen = *srcLen = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; - if (inSize < RC_INIT_SIZE) - return SZ_ERROR_INPUT_EOF; - LzmaDec_Construct(&p); - RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); - p.dic = dest; - p.dicBufSize = outSize; - LzmaDec_Init(&p); - *srcLen = inSize; - res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); - *destLen = p.dicPos; - if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) - res = SZ_ERROR_INPUT_EOF; - LzmaDec_FreeProbs(&p, alloc); - return res; -} +/* LzmaDec.c -- LZMA Decoder +2021-04-01 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #include "CpuArch.h" */ +#include "LzmaDec.h" + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) + +#define RC_INIT_SIZE 5 + +#ifndef _LZMA_DEC_OPT + +#define kNumMoveBits 5 +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p); i = (i + i); A0; } else \ + { UPDATE_1(p); i = (i + i) + 1; A1; } + +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i); A0; } else \ + { UPDATE_1(p + i); A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; } +#endif + +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) +#define MATCHED_LITER_DEC \ + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + +#endif // _LZMA_DEC_OPT + + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK; i = (i + i); A0; } else \ + { UPDATE_1_CHECK; i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK; i += m; m += m; } else \ + { UPDATE_1_CHECK; m += m; i += m; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) + +#define kNumStates 12 +#define kNumStates2 16 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) + +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign +#endif + +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) + +#define LZMA_DIC_MIN (1 << 12) + +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. + } + +Processing: + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. + +Out: + RangeCoder is normalized + Result: + SZ_OK - OK + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined +*/ + + +#ifdef _LZMA_DEC_OPT + +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (processedPos != 0 || checkDicSize != 0) + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); + processedPos++; + + if (state < kNumLitStates) + { + state -= (state < 4) ? state : 3; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do { NORMAL_LITER_DEC } while (symbol < 0x100); + #else + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + NORMAL_LITER_DEC + #endif + } + else + { + unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + unsigned offs = 0x100; + state -= (state < 10) ? 3 : 6; + symbol = 1; + #ifdef _LZMA_SIZE_OPT + do + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + } + while (symbol < 0x100); + #else + { + unsigned bit; + CLzmaProb *probLit; + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + MATCHED_LITER_DEC + } + #endif + } + + dic[dicPos++] = (Byte)symbol; + continue; + } + + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0(prob) + { + UPDATE_0(prob); + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + UInt32 distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + + #ifdef _LZMA_SIZE_OPT + { + unsigned lim, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + lim = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + lim = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, lim, len); + len += offset; + } + #else + { + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE; + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + len -= 8; + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + len = 1; + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + TREE_GET_BIT(probLen, len); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + TREE_DECODE(probLen, (1 << kLenNumHighBits), len); + len += kLenNumLowSymbols * 2; + } + } + } + #endif + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos; + { + UInt32 m = 1; + distance++; + do + { + REV_BIT_VAR(prob, distance, m); + } + while (--numDirectBits); + distance -= m; + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + REV_BIT_CONST(prob, i, 1); + REV_BIT_CONST(prob, i, 2); + REV_BIT_CONST(prob, i, 4); + REV_BIT_LAST (prob, i, 8); + distance |= i; + } + if (distance == (UInt32)0xFFFFFFFF) + { + len = kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) + { + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; + } + } + + len += kMatchMinLen; + + { + SizeT rem; + unsigned curLen; + SizeT pos; + + if ((rem = limit - dicPos) == 0) + { + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; + } + + curLen = ((rem < len) ? (unsigned)rem : len); + pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); + + processedPos += (UInt32)curLen; + + len -= curLen; + if (curLen <= dicBufSize - pos) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += (SizeT)curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + + NORMALIZE; + + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = (UInt32)state; + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; + return SZ_OK; +} +#endif + + + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; + { + SizeT dicPos = p->dicPos; + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += (UInt32)len; + p->remainLen -= (UInt32)len; + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do + { + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; + dicPos++; + } + while (--len); + p->dicPos = dicPos; + } +} + + +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); + if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + return res; + } +} + + + +typedef enum +{ + DUMMY_INPUT_EOF, /* need more input data */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = *bufOut; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; + ELzmaDummy res; + + for (;;) + { + const CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); + + prob = probs + IsMatch + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += ((UInt32)LZMA_LIT_SIZE * + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + (p->dicPos < p->reps[0] ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + const CLzmaProb *probLit; + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + COMBINED_PS_STATE; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + break; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + const CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + GET_LEN_STATE; + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols * 2; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + unsigned numDirectBits = ((posSlot >> 1) - 1); + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + unsigned m = 1; + do + { + REV_BIT_CHECK(prob, i, m); + } + while (--numDirectBits); + } + } + } + } + break; + } + NORMALIZE_CHECK; + + *bufOut = buf; + return res; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) +{ + p->remainLen = kMatchSpecLenStart + 1; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->remainLen = kMatchSpecLenStart + 2; + } + if (initState) + p->remainLen = kMatchSpecLenStart + 2; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + + +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN__NOT_FINISHED__FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + + if (p->remainLen > kMatchSpecLenStart) + { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + for (;;) + { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + + int checkEndMarkNow = 0; + + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + RETURN__NOT_FINISHED__FOR_FINISH; + } + checkEndMarkNow = 1; + } + + // (p->remainLen == 0) + + if (p->tempBufSize == 0) + { + const Byte *bufLimit; + int dummyProcessed = -1; + + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; + (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; + } + + bufLimit = src; + // we will decode only one iteration + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + + p->buf = src; + + { + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) + { + if (processed > inSize) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + continue; + } + + { + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; + } + } + + p->buf = p->tempBuf; + + { + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } + } + } + } + } + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; +} + + + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->probs); + p->probs = NULL; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->dic); + p->dic = NULL; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = (Byte)(d % 9); + d /= 9; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (!p->probs || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); + if (!p->probs) + return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + + { + UInt32 dictSize = propNew.dicSize; + SizeT mask = ((UInt32)1 << 12) - 1; + if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; + else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; + dicBufSize = ((SizeT)dictSize + mask) & ~mask; + if (dicBufSize < dictSize) + dicBufSize = dictSize; + } + + if (!p->dic || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize); + if (!p->dic) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc) +{ + CLzmaDec p; + SRes res; + SizeT outSize = *destLen, inSize = *srcLen; + *destLen = *srcLen = 0; + *status = LZMA_STATUS_NOT_SPECIFIED; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + LzmaDec_Construct(&p); + RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); + p.dic = dest; + p.dicBufSize = outSize; + LzmaDec_Init(&p); + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + *destLen = p.dicPos; + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + LzmaDec_FreeProbs(&p, alloc); + return res; +} diff --git a/lib/LZMA/LzmaDec.h b/lib/LZMA/LzmaDec.h index 1f0927a..6194b7d 100644 --- a/lib/LZMA/LzmaDec.h +++ b/lib/LZMA/LzmaDec.h @@ -1,234 +1,236 @@ -/* LzmaDec.h -- LZMA Decoder -2018-04-21 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_DEC_H -#define __LZMA_DEC_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -/* #define _LZMA_PROB32 */ -/* _LZMA_PROB32 can increase the speed on some CPUs, - but memory usage for CLzmaDec::probs will be doubled in that case */ - -typedef -#ifdef _LZMA_PROB32 - UInt32 -#else - UInt16 -#endif - CLzmaProb; - - -/* ---------- LZMA Properties ---------- */ - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaProps -{ - Byte lc; - Byte lp; - Byte pb; - Byte _pad_; - UInt32 dicSize; -} CLzmaProps; - -/* LzmaProps_Decode - decodes properties -Returns: - SZ_OK - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); - - -/* ---------- LZMA Decoder state ---------- */ - -/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. - Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ - -#define LZMA_REQUIRED_INPUT_MAX 20 - -typedef struct -{ - /* Don't change this structure. ASM code can use it. */ - CLzmaProps prop; - CLzmaProb *probs; - CLzmaProb *probs_1664; - Byte *dic; - SizeT dicBufSize; - SizeT dicPos; - const Byte *buf; - UInt32 range; - UInt32 code; - UInt32 processedPos; - UInt32 checkDicSize; - UInt32 reps[4]; - UInt32 state; - UInt32 remainLen; - - UInt32 numProbs; - unsigned tempBufSize; - Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; -} CLzmaDec; - -#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } - -void LzmaDec_Init(CLzmaDec *p); - -/* There are two types of LZMA streams: - - Stream with end mark. That end mark adds about 6 bytes to compressed size. - - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ - -typedef enum -{ - LZMA_FINISH_ANY, /* finish at any point */ - LZMA_FINISH_END /* block must be finished at the end */ -} ELzmaFinishMode; - -/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! - - You must use LZMA_FINISH_END, when you know that current output buffer - covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. - - If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, - and output value of destLen will be less than output buffer size limit. - You can check status result also. - - You can use multiple checks to test data integrity after full decompression: - 1) Check Result and "status" variable. - 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. - 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. - You must use correct finish mode in that case. */ - -typedef enum -{ - LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ - LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ - LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ - LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ -} ELzmaStatus; - -/* ELzmaStatus is used only as output value for function call */ - - -/* ---------- Interfaces ---------- */ - -/* There are 3 levels of interfaces: - 1) Dictionary Interface - 2) Buffer Interface - 3) One Call Interface - You can select any of these interfaces, but don't mix functions from different - groups for same object. */ - - -/* There are two variants to allocate state for Dictionary Interface: - 1) LzmaDec_Allocate / LzmaDec_Free - 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs - You can use variant 2, if you set dictionary buffer manually. - For Buffer Interface you must always use variant 1. - -LzmaDec_Allocate* can return: - SZ_OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties -*/ - -SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); -void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); - -SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); -void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); - -/* ---------- Dictionary Interface ---------- */ - -/* You can use it, if you want to eliminate the overhead for data copying from - dictionary to some other external buffer. - You must work with CLzmaDec variables directly in this interface. - - STEPS: - LzmaDec_Construct() - LzmaDec_Allocate() - for (each new stream) - { - LzmaDec_Init() - while (it needs more decompression) - { - LzmaDec_DecodeToDic() - use data from CLzmaDec::dic and update CLzmaDec::dicPos - } - } - LzmaDec_Free() -*/ - -/* LzmaDec_DecodeToDic - - The decoding to internal dictionary buffer (CLzmaDec::dic). - You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! - -finishMode: - It has meaning only if the decoding reaches output limit (dicLimit). - LZMA_FINISH_ANY - Decode just dicLimit bytes. - LZMA_FINISH_END - Stream must be finished after dicLimit. - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_NEEDS_MORE_INPUT - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error -*/ - -SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, - const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); - - -/* ---------- Buffer Interface ---------- */ - -/* It's zlib-like interface. - See LzmaDec_DecodeToDic description for information about STEPS and return results, - but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need - to work with CLzmaDec variables manually. - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). -*/ - -SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, - const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); - - -/* ---------- One Call Interface ---------- */ - -/* LzmaDecode - -finishMode: - It has meaning only if the decoding reaches output limit (*destLen). - LZMA_FINISH_ANY - Decode just destLen bytes. - LZMA_FINISH_END - Stream must be finished after (*destLen). - -Returns: - SZ_OK - status: - LZMA_STATUS_FINISHED_WITH_MARK - LZMA_STATUS_NOT_FINISHED - LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK - SZ_ERROR_DATA - Data error - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_UNSUPPORTED - Unsupported properties - SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). -*/ - -SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, - const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, - ELzmaStatus *status, ISzAllocPtr alloc); - -EXTERN_C_END - -#endif +/* LzmaDec.h -- LZMA Decoder +2020-03-19 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_DEC_H +#define __LZMA_DEC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +typedef +#ifdef _LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps +{ + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + /* Don't change this structure. ASM code can use it. */ + CLzmaProps prop; + CLzmaProb *probs; + CLzmaProb *probs_1664; + Byte *dic; + SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; + UInt32 processedPos; + UInt32 checkDicSize; + UInt32 reps[4]; + UInt32 state; + UInt32 remainLen; + + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Construct() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAllocPtr alloc); + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/LzmaEnc.c b/lib/LZMA/LzmaEnc.c index d78dbe1..5705506 100644 --- a/lib/LZMA/LzmaEnc.c +++ b/lib/LZMA/LzmaEnc.c @@ -1,2976 +1,3165 @@ -/* LzmaEnc.c -- LZMA Encoder -2018-12-29: Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include - -/* #define SHOW_STAT */ -/* #define SHOW_STAT2 */ - -#if defined(SHOW_STAT) || defined(SHOW_STAT2) -#include -#endif - -#include "LzmaEnc.h" - -#include "LzFind.h" -#ifndef _7ZIP_ST -#include "LzFindMt.h" -#endif - -#ifdef SHOW_STAT -static unsigned g_STAT_OFFSET = 0; -#endif - -#define kLzmaMaxHistorySize ((UInt32)3 << 29) -/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ - -#define kNumTopBits 24 -#define kTopValue ((UInt32)1 << kNumTopBits) - -#define kNumBitModelTotalBits 11 -#define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 -#define kProbInitValue (kBitModelTotal >> 1) - -#define kNumMoveReducingBits 4 -#define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) - -#define REP_LEN_COUNT 64 - -void LzmaEncProps_Init(CLzmaEncProps *p) -{ - p->level = 5; - p->dictSize = p->mc = 0; - p->reduceSize = (UInt64)(Int64)-1; - p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; - p->writeEndMark = 0; -} - -void LzmaEncProps_Normalize(CLzmaEncProps *p) -{ - int level = p->level; - if (level < 0) level = 5; - p->level = level; - - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); - if (p->dictSize > p->reduceSize) - { - unsigned i; - UInt32 reduceSize = (UInt32)p->reduceSize; - for (i = 11; i <= 30; i++) - { - if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } - if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } - } - } - - if (p->lc < 0) p->lc = 3; - if (p->lp < 0) p->lp = 0; - if (p->pb < 0) p->pb = 2; - - if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); - if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); - if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); - - if (p->numThreads < 0) - p->numThreads = - #ifndef _7ZIP_ST - ((p->btMode && p->algo) ? 2 : 1); - #else - 1; - #endif -} - -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) -{ - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - return props.dictSize; -} - -#if (_MSC_VER >= 1400) -/* BSR code is fast for some new CPUs */ -/* #define LZMA_LOG_BSR */ -#endif - -#ifdef LZMA_LOG_BSR - -#define kDicLogSizeMaxCompress 32 - -#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } - -static unsigned GetPosSlot1(UInt32 pos) -{ - unsigned res; - BSR2_RET(pos, res); - return res; -} -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } - -#else - -#define kNumLogBits (9 + sizeof(size_t) / 2) -/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ - -#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) - -static void LzmaEnc_FastPosInit(Byte *g_FastPos) -{ - unsigned slot; - g_FastPos[0] = 0; - g_FastPos[1] = 1; - g_FastPos += 2; - - for (slot = 2; slot < kNumLogBits * 2; slot++) - { - size_t k = ((size_t)1 << ((slot >> 1) - 1)); - size_t j; - for (j = 0; j < k; j++) - g_FastPos[j] = (Byte)slot; - g_FastPos += k; - } -} - -/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ -/* -#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ - res = p->g_FastPos[pos >> zz] + (zz * 2); } -*/ - -/* -#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ - (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ - res = p->g_FastPos[pos >> zz] + (zz * 2); } -*/ - -#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ - res = p->g_FastPos[pos >> zz] + (zz * 2); } - -/* -#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ - p->g_FastPos[pos >> 6] + 12 : \ - p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } -*/ - -#define GetPosSlot1(pos) p->g_FastPos[pos] -#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } - -#endif - - -#define LZMA_NUM_REPS 4 - -typedef UInt16 CState; -typedef UInt16 CExtra; - -typedef struct -{ - UInt32 price; - CState state; - CExtra extra; - // 0 : normal - // 1 : LIT : MATCH - // > 1 : MATCH (extra-1) : LIT : REP0 (len) - UInt32 len; - UInt32 dist; - UInt32 reps[LZMA_NUM_REPS]; -} COptimal; - - -// 18.06 -#define kNumOpts (1 << 11) -#define kPackReserve (kNumOpts * 8) -// #define kNumOpts (1 << 12) -// #define kPackReserve (1 + kNumOpts * 2) - -#define kNumLenToPosStates 4 -#define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 -#define kDicLogSizeMax 32 -#define kDistTableSizeMax (kDicLogSizeMax * 2) - -#define kNumAlignBits 4 -#define kAlignTableSize (1 << kNumAlignBits) -#define kAlignMask (kAlignTableSize - 1) - -#define kStartPosModelIndex 4 -#define kEndPosModelIndex 14 -#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) - -typedef -#ifdef _LZMA_PROB32 - UInt32 -#else - UInt16 -#endif - CLzmaProb; - -#define LZMA_PB_MAX 4 -#define LZMA_LC_MAX 8 -#define LZMA_LP_MAX 4 - -#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - -#define kLenNumLowBits 3 -#define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumHighBits 8 -#define kLenNumHighSymbols (1 << kLenNumHighBits) -#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) - -#define LZMA_MATCH_LEN_MIN 2 -#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) - -#define kNumStates 12 - - -typedef struct -{ - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; - CLzmaProb high[kLenNumHighSymbols]; -} CLenEnc; - - -typedef struct -{ - unsigned tableSize; - UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2]; - // UInt32 prices2[kLenNumSymbolsTotal]; -} CLenPriceEnc; - -#define GET_PRICE_LEN(p, posState, len) \ - ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN]) - -/* -#define GET_PRICE_LEN(p, posState, len) \ - ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9))) -*/ - -typedef struct -{ - UInt32 range; - unsigned cache; - UInt64 low; - UInt64 cacheSize; - Byte *buf; - Byte *bufLim; - Byte *bufBase; - ISeqOutStream *outStream; - UInt64 processed; - SRes res; -} CRangeEnc; - - -typedef struct -{ - CLzmaProb *litProbs; - - unsigned state; - UInt32 reps[LZMA_NUM_REPS]; - - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances]; - - CLenEnc lenProbs; - CLenEnc repLenProbs; - -} CSaveState; - - -typedef UInt32 CProbPrice; - - -typedef struct -{ - void *matchFinderObj; - IMatchFinder matchFinder; - - unsigned optCur; - unsigned optEnd; - - unsigned longestMatchLen; - unsigned numPairs; - UInt32 numAvail; - - unsigned state; - unsigned numFastBytes; - unsigned additionalOffset; - UInt32 reps[LZMA_NUM_REPS]; - unsigned lpMask, pbMask; - CLzmaProb *litProbs; - CRangeEnc rc; - - UInt32 backRes; - - unsigned lc, lp, pb; - unsigned lclp; - - BoolInt fastMode; - BoolInt writeEndMark; - BoolInt finished; - BoolInt multiThread; - BoolInt needInit; - // BoolInt _maxMode; - - UInt64 nowPos64; - - unsigned matchPriceCount; - // unsigned alignPriceCount; - int repLenEncCounter; - - unsigned distTableSize; - - UInt32 dictSize; - SRes result; - - #ifndef _7ZIP_ST - BoolInt mtMode; - // begin of CMatchFinderMt is used in LZ thread - CMatchFinderMt matchFinderMt; - // end of CMatchFinderMt is used in BT and HASH threads - #endif - - CMatchFinder matchFinderBase; - - #ifndef _7ZIP_ST - Byte pad[128]; - #endif - - // LZ thread - CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; - - UInt32 alignPrices[kAlignTableSize]; - UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; - UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; - CLzmaProb isRep[kNumStates]; - CLzmaProb isRepG0[kNumStates]; - CLzmaProb isRepG1[kNumStates]; - CLzmaProb isRepG2[kNumStates]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances]; - - CLenEnc lenProbs; - CLenEnc repLenProbs; - - #ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; - #endif - - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; - - COptimal opt[kNumOpts]; - - CSaveState saveState; - - #ifndef _7ZIP_ST - Byte pad2[128]; - #endif -} CLzmaEnc; - - - -#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); - -void LzmaEnc_SaveState(CLzmaEncHandle pp) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - CSaveState *dest = &p->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; - - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); -} - - -void LzmaEnc_RestoreState(CLzmaEncHandle pp) -{ - CLzmaEnc *dest = (CLzmaEnc *)pp; - const CSaveState *p = &dest->saveState; - - dest->state = p->state; - - dest->lenProbs = p->lenProbs; - dest->repLenProbs = p->repLenProbs; - - COPY_ARR(dest, p, reps); - - COPY_ARR(dest, p, posAlignEncoder); - COPY_ARR(dest, p, isRep); - COPY_ARR(dest, p, isRepG0); - COPY_ARR(dest, p, isRepG1); - COPY_ARR(dest, p, isRepG2); - COPY_ARR(dest, p, isMatch); - COPY_ARR(dest, p, isRep0Long); - COPY_ARR(dest, p, posSlotEncoder); - COPY_ARR(dest, p, posEncoders); - - memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); -} - - - -SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - CLzmaEncProps props = *props2; - LzmaEncProps_Normalize(&props); - - if (props.lc > LZMA_LC_MAX - || props.lp > LZMA_LP_MAX - || props.pb > LZMA_PB_MAX - || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) - || props.dictSize > kLzmaMaxHistorySize) - return SZ_ERROR_PARAM; - - p->dictSize = props.dictSize; - { - unsigned fb = props.fb; - if (fb < 5) - fb = 5; - if (fb > LZMA_MATCH_LEN_MAX) - fb = LZMA_MATCH_LEN_MAX; - p->numFastBytes = fb; - } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; - p->fastMode = (props.algo == 0); - // p->_maxMode = True; - p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); - { - unsigned numHashBytes = 4; - if (props.btMode) - { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; - } - p->matchFinderBase.numHashBytes = numHashBytes; - } - - p->matchFinderBase.cutValue = props.mc; - - p->writeEndMark = props.writeEndMark; - - #ifndef _7ZIP_ST - /* - if (newMultiThread != _multiThread) - { - ReleaseMatchFinder(); - _multiThread = newMultiThread; - } - */ - p->multiThread = (props.numThreads > 1); - #endif - - return SZ_OK; -} - - -void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.expectedDataSize = expectedDataSiize; -} - - -#define kState_Start 0 -#define kState_LitAfterMatch 4 -#define kState_LitAfterRep 5 -#define kState_MatchAfterLit 7 -#define kState_RepAfterLit 8 - -static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; - -#define IsLitState(s) ((s) < 7) -#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) -#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) - -#define kInfinityPrice (1 << 30) - -static void RangeEnc_Construct(CRangeEnc *p) -{ - p->outStream = NULL; - p->bufBase = NULL; -} - -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) -#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) - -#define RC_BUF_SIZE (1 << 16) - -static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) -{ - if (!p->bufBase) - { - p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); - if (!p->bufBase) - return 0; - p->bufLim = p->bufBase + RC_BUF_SIZE; - } - return 1; -} - -static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->bufBase); - p->bufBase = 0; -} - -static void RangeEnc_Init(CRangeEnc *p) -{ - /* Stream.Init(); */ - p->range = 0xFFFFFFFF; - p->cache = 0; - p->low = 0; - p->cacheSize = 0; - - p->buf = p->bufBase; - - p->processed = 0; - p->res = SZ_OK; -} - -MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) -{ - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; - p->processed += num; - p->buf = p->bufBase; -} - -MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) -{ - UInt32 low = (UInt32)p->low; - unsigned high = (unsigned)(p->low >> 32); - p->low = (UInt32)(low << 8); - if (low < (UInt32)0xFF000000 || high != 0) - { - { - Byte *buf = p->buf; - *buf++ = (Byte)(p->cache + high); - p->cache = (unsigned)(low >> 24); - p->buf = buf; - if (buf == p->bufLim) - RangeEnc_FlushStream(p); - if (p->cacheSize == 0) - return; - } - high += 0xFF; - for (;;) - { - Byte *buf = p->buf; - *buf++ = (Byte)(high); - p->buf = buf; - if (buf == p->bufLim) - RangeEnc_FlushStream(p); - if (--p->cacheSize == 0) - return; - } - } - p->cacheSize++; -} - -static void RangeEnc_FlushData(CRangeEnc *p) -{ - int i; - for (i = 0; i < 5; i++) - RangeEnc_ShiftLow(p); -} - -#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } - -#define RC_BIT_PRE(p, prob) \ - ttt = *(prob); \ - newBound = (range >> kNumBitModelTotalBits) * ttt; - -// #define _LZMA_ENC_USE_BRANCH - -#ifdef _LZMA_ENC_USE_BRANCH - -#define RC_BIT(p, prob, bit) { \ - RC_BIT_PRE(p, prob) \ - if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ - else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ - *(prob) = (CLzmaProb)ttt; \ - RC_NORM(p) \ - } - -#else - -#define RC_BIT(p, prob, bit) { \ - UInt32 mask; \ - RC_BIT_PRE(p, prob) \ - mask = 0 - (UInt32)bit; \ - range &= mask; \ - mask &= newBound; \ - range -= mask; \ - (p)->low += mask; \ - mask = (UInt32)bit - 1; \ - range += newBound & mask; \ - mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ - mask += ((1 << kNumMoveBits) - 1); \ - ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ - *(prob) = (CLzmaProb)ttt; \ - RC_NORM(p) \ - } - -#endif - - - - -#define RC_BIT_0_BASE(p, prob) \ - range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); - -#define RC_BIT_1_BASE(p, prob) \ - range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ - -#define RC_BIT_0(p, prob) \ - RC_BIT_0_BASE(p, prob) \ - RC_NORM(p) - -#define RC_BIT_1(p, prob) \ - RC_BIT_1_BASE(p, prob) \ - RC_NORM(p) - -static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) -{ - UInt32 range, ttt, newBound; - range = p->range; - RC_BIT_PRE(p, prob) - RC_BIT_0(p, prob) - p->range = range; -} - -static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) -{ - UInt32 range = p->range; - sym |= 0x100; - do - { - UInt32 ttt, newBound; - // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1); - CLzmaProb *prob = probs + (sym >> 8); - UInt32 bit = (sym >> 7) & 1; - sym <<= 1; - RC_BIT(p, prob, bit); - } - while (sym < 0x10000); - p->range = range; -} - -static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte) -{ - UInt32 range = p->range; - UInt32 offs = 0x100; - sym |= 0x100; - do - { - UInt32 ttt, newBound; - CLzmaProb *prob; - UInt32 bit; - matchByte <<= 1; - // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1); - prob = probs + (offs + (matchByte & offs) + (sym >> 8)); - bit = (sym >> 7) & 1; - sym <<= 1; - offs &= ~(matchByte ^ sym); - RC_BIT(p, prob, bit); - } - while (sym < 0x10000); - p->range = range; -} - - - -static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) -{ - UInt32 i; - for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) - { - const unsigned kCyclesBits = kNumBitPriceShiftBits; - UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); - unsigned bitCount = 0; - unsigned j; - for (j = 0; j < kCyclesBits; j++) - { - w = w * w; - bitCount <<= 1; - while (w >= ((UInt32)1 << 16)) - { - w >>= 1; - bitCount++; - } - } - ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); - // printf("\n%3d: %5d", i, ProbPrices[i]); - } -} - - -#define GET_PRICE(prob, bit) \ - p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICEa(prob, bit) \ - ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; - -#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - -#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] - - -static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices) -{ - UInt32 price = 0; - sym |= 0x100; - do - { - unsigned bit = sym & 1; - sym >>= 1; - price += GET_PRICEa(probs[sym], bit); - } - while (sym >= 2); - return price; -} - - -static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices) -{ - UInt32 price = 0; - UInt32 offs = 0x100; - sym |= 0x100; - do - { - matchByte <<= 1; - price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1); - sym <<= 1; - offs &= ~(matchByte ^ sym); - } - while (sym < 0x10000); - return price; -} - - -static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym) -{ - UInt32 range = rc->range; - unsigned m = 1; - do - { - UInt32 ttt, newBound; - unsigned bit = sym & 1; - // RangeEnc_EncodeBit(rc, probs + m, bit); - sym >>= 1; - RC_BIT(rc, probs + m, bit); - m = (m << 1) | bit; - } - while (--numBits); - rc->range = range; -} - - - -static void LenEnc_Init(CLenEnc *p) -{ - unsigned i; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) - p->low[i] = kProbInitValue; - for (i = 0; i < kLenNumHighSymbols; i++) - p->high[i] = kProbInitValue; -} - -static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState) -{ - UInt32 range, ttt, newBound; - CLzmaProb *probs = p->low; - range = rc->range; - RC_BIT_PRE(rc, probs); - if (sym >= kLenNumLowSymbols) - { - RC_BIT_1(rc, probs); - probs += kLenNumLowSymbols; - RC_BIT_PRE(rc, probs); - if (sym >= kLenNumLowSymbols * 2) - { - RC_BIT_1(rc, probs); - rc->range = range; - // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); - LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); - return; - } - sym -= kLenNumLowSymbols; - } - - // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym); - { - unsigned m; - unsigned bit; - RC_BIT_0(rc, probs); - probs += (posState << (1 + kLenNumLowBits)); - bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; - bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; - bit = sym & 1; RC_BIT(rc, probs + m, bit); - rc->range = range; - } -} - -static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) -{ - unsigned i; - for (i = 0; i < 8; i += 2) - { - UInt32 price = startPrice; - UInt32 prob; - price += GET_PRICEa(probs[1 ], (i >> 2)); - price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); - prob = probs[4 + (i >> 1)]; - prices[i ] = price + GET_PRICEa_0(prob); - prices[i + 1] = price + GET_PRICEa_1(prob); - } -} - - -MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( - CLenPriceEnc *p, - unsigned numPosStates, - const CLenEnc *enc, - const CProbPrice *ProbPrices) -{ - UInt32 b; - - { - unsigned prob = enc->low[0]; - UInt32 a, c; - unsigned posState; - b = GET_PRICEa_1(prob); - a = GET_PRICEa_0(prob); - c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); - for (posState = 0; posState < numPosStates; posState++) - { - UInt32 *prices = p->prices[posState]; - const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); - SetPrices_3(probs, a, prices, ProbPrices); - SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices); - } - } - - /* - { - unsigned i; - UInt32 b; - a = GET_PRICEa_0(enc->low[0]); - for (i = 0; i < kLenNumLowSymbols; i++) - p->prices2[i] = a; - a = GET_PRICEa_1(enc->low[0]); - b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); - for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++) - p->prices2[i] = b; - a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); - } - */ - - // p->counter = numSymbols; - // p->counter = 64; - - { - unsigned i = p->tableSize; - - if (i > kLenNumLowSymbols * 2) - { - const CLzmaProb *probs = enc->high; - UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2; - i -= kLenNumLowSymbols * 2 - 1; - i >>= 1; - b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); - do - { - /* - p->prices2[i] = a + - // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); - LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices); - */ - // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices); - unsigned sym = --i + (1 << (kLenNumHighBits - 1)); - UInt32 price = b; - do - { - unsigned bit = sym & 1; - sym >>= 1; - price += GET_PRICEa(probs[sym], bit); - } - while (sym >= 2); - - { - unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; - prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); - prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); - } - } - while (i); - - { - unsigned posState; - size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); - for (posState = 1; posState < numPosStates; posState++) - memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); - } - } - } -} - -/* - #ifdef SHOW_STAT - g_STAT_OFFSET += num; - printf("\n MovePos %u", num); - #endif -*/ - -#define MOVE_POS(p, num) { \ - p->additionalOffset += (num); \ - p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); } - - -static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) -{ - unsigned numPairs; - - p->additionalOffset++; - p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); - *numPairsRes = numPairs; - - #ifdef SHOW_STAT - printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); - g_STAT_OFFSET++; - { - unsigned i; - for (i = 0; i < numPairs; i += 2) - printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); - } - #endif - - if (numPairs == 0) - return 0; - { - unsigned len = p->matches[(size_t)numPairs - 2]; - if (len != p->numFastBytes) - return len; - { - UInt32 numAvail = p->numAvail; - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - { - const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - const Byte *p2 = p1 + len; - ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; - const Byte *lim = p1 + numAvail; - for (; p2 != lim && *p2 == p2[dif]; p2++) - {} - return (unsigned)(p2 - p1); - } - } - } -} - -#define MARK_LIT ((UInt32)(Int32)-1) - -#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } -#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } -#define IsShortRep(p) ((p)->dist == 0) - - -#define GetPrice_ShortRep(p, state, posState) \ - ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) - -#define GetPrice_Rep_0(p, state, posState) ( \ - GET_PRICE_1(p->isMatch[state][posState]) \ - + GET_PRICE_1(p->isRep0Long[state][posState])) \ - + GET_PRICE_1(p->isRep[state]) \ - + GET_PRICE_0(p->isRepG0[state]) - -MY_FORCE_INLINE -static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) -{ - UInt32 price; - UInt32 prob = p->isRepG0[state]; - if (repIndex == 0) - { - price = GET_PRICE_0(prob); - price += GET_PRICE_1(p->isRep0Long[state][posState]); - } - else - { - price = GET_PRICE_1(prob); - prob = p->isRepG1[state]; - if (repIndex == 1) - price += GET_PRICE_0(prob); - else - { - price += GET_PRICE_1(prob); - price += GET_PRICE(p->isRepG2[state], repIndex - 2); - } - } - return price; -} - - -static unsigned Backward(CLzmaEnc *p, unsigned cur) -{ - unsigned wr = cur + 1; - p->optEnd = wr; - - for (;;) - { - UInt32 dist = p->opt[cur].dist; - unsigned len = (unsigned)p->opt[cur].len; - unsigned extra = (unsigned)p->opt[cur].extra; - cur -= len; - - if (extra) - { - wr--; - p->opt[wr].len = (UInt32)len; - cur -= extra; - len = extra; - if (extra == 1) - { - p->opt[wr].dist = dist; - dist = MARK_LIT; - } - else - { - p->opt[wr].dist = 0; - len--; - wr--; - p->opt[wr].dist = MARK_LIT; - p->opt[wr].len = 1; - } - } - - if (cur == 0) - { - p->backRes = dist; - p->optCur = wr; - return len; - } - - wr--; - p->opt[wr].dist = dist; - p->opt[wr].len = (UInt32)len; - } -} - - - -#define LIT_PROBS(pos, prevByte) \ - (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) - - -static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) -{ - unsigned last, cur; - UInt32 reps[LZMA_NUM_REPS]; - unsigned repLens[LZMA_NUM_REPS]; - UInt32 *matches; - - { - UInt32 numAvail; - unsigned numPairs, mainLen, repMaxIndex, i, posState; - UInt32 matchPrice, repMatchPrice; - const Byte *data; - Byte curByte, matchByte; - - p->optCur = p->optEnd = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLen; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) - { - p->backRes = MARK_LIT; - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - unsigned len; - const Byte *data2; - reps[i] = p->reps[i]; - data2 = data - reps[i]; - if (data[0] != data2[0] || data[1] != data2[1]) - { - repLens[i] = 0; - continue; - } - for (len = 2; len < numAvail && data[len] == data2[len]; len++) - {} - repLens[i] = len; - if (len > repLens[repMaxIndex]) - repMaxIndex = i; - } - - if (repLens[repMaxIndex] >= p->numFastBytes) - { - unsigned len; - p->backRes = (UInt32)repMaxIndex; - len = repLens[repMaxIndex]; - MOVE_POS(p, len - 1) - return len; - } - - matches = p->matches; - - if (mainLen >= p->numFastBytes) - { - p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; - MOVE_POS(p, mainLen - 1) - return mainLen; - } - - curByte = *data; - matchByte = *(data - reps[0]); - - last = repLens[repMaxIndex]; - if (last <= mainLen) - last = mainLen; - - if (last < 2 && curByte != matchByte) - { - p->backRes = MARK_LIT; - return 1; - } - - p->opt[0].state = (CState)p->state; - - posState = (position & p->pbMask); - - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsLitState(p->state) ? - LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAs_Lit(&p->opt[1]); - - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); - - // 18.06 - if (matchByte == curByte && repLens[0] == 0) - { - UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) - { - p->opt[1].price = shortRepPrice; - MakeAs_ShortRep(&p->opt[1]); - } - if (last < 2) - { - p->backRes = p->opt[1].dist; - return 1; - } - } - - p->opt[1].len = 1; - - p->opt[0].reps[0] = reps[0]; - p->opt[0].reps[1] = reps[1]; - p->opt[0].reps[2] = reps[2]; - p->opt[0].reps[3] = reps[3]; - - // ---------- REP ---------- - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - unsigned repLen = repLens[i]; - UInt32 price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); - do - { - UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen); - COptimal *opt = &p->opt[repLen]; - if (price2 < opt->price) - { - opt->price = price2; - opt->len = (UInt32)repLen; - opt->dist = (UInt32)i; - opt->extra = 0; - } - } - while (--repLen >= 2); - } - - - // ---------- MATCH ---------- - { - unsigned len = repLens[0] + 1; - if (len <= mainLen) - { - unsigned offs = 0; - UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - - if (len < 2) - len = 2; - else - while (len > matches[offs]) - offs += 2; - - for (; ; len++) - { - COptimal *opt; - UInt32 dist = matches[(size_t)offs + 1]; - UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); - unsigned lenToPosState = GetLenToPosState(len); - - if (dist < kNumFullDistances) - price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; - else - { - unsigned slot; - GetPosSlot2(dist, slot); - price += p->alignPrices[dist & kAlignMask]; - price += p->posSlotPrices[lenToPosState][slot]; - } - - opt = &p->opt[len]; - - if (price < opt->price) - { - opt->price = price; - opt->len = (UInt32)len; - opt->dist = dist + LZMA_NUM_REPS; - opt->extra = 0; - } - - if (len == matches[offs]) - { - offs += 2; - if (offs == numPairs) - break; - } - } - } - } - - - cur = 0; - - #ifdef SHOW_STAT2 - /* if (position >= 0) */ - { - unsigned i; - printf("\n pos = %4X", position); - for (i = cur; i <= last; i++) - printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); - } - #endif - } - - - - // ---------- Optimal Parsing ---------- - - for (;;) - { - unsigned numAvail; - UInt32 numAvailFull; - unsigned newLen, numPairs, prev, state, posState, startLen; - UInt32 litPrice, matchPrice, repMatchPrice; - BoolInt nextIsLit; - Byte curByte, matchByte; - const Byte *data; - COptimal *curOpt, *nextOpt; - - if (++cur == last) - break; - - // 18.06 - if (cur >= kNumOpts - 64) - { - unsigned j, best; - UInt32 price = p->opt[cur].price; - best = cur; - for (j = cur + 1; j <= last; j++) - { - UInt32 price2 = p->opt[j].price; - if (price >= price2) - { - price = price2; - best = j; - } - } - { - unsigned delta = best - cur; - if (delta != 0) - { - MOVE_POS(p, delta); - } - } - cur = best; - break; - } - - newLen = ReadMatchDistances(p, &numPairs); - - if (newLen >= p->numFastBytes) - { - p->numPairs = numPairs; - p->longestMatchLen = newLen; - break; - } - - curOpt = &p->opt[cur]; - - position++; - - // we need that check here, if skip_items in p->opt are possible - /* - if (curOpt->price >= kInfinityPrice) - continue; - */ - - prev = cur - curOpt->len; - - if (curOpt->len == 1) - { - state = (unsigned)p->opt[prev].state; - if (IsShortRep(curOpt)) - state = kShortRepNextStates[state]; - else - state = kLiteralNextStates[state]; - } - else - { - const COptimal *prevOpt; - UInt32 b0; - UInt32 dist = curOpt->dist; - - if (curOpt->extra) - { - prev -= (unsigned)curOpt->extra; - state = kState_RepAfterLit; - if (curOpt->extra == 1) - state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit); - } - else - { - state = (unsigned)p->opt[prev].state; - if (dist < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - - prevOpt = &p->opt[prev]; - b0 = prevOpt->reps[0]; - - if (dist < LZMA_NUM_REPS) - { - if (dist == 0) - { - reps[0] = b0; - reps[1] = prevOpt->reps[1]; - reps[2] = prevOpt->reps[2]; - reps[3] = prevOpt->reps[3]; - } - else - { - reps[1] = b0; - b0 = prevOpt->reps[1]; - if (dist == 1) - { - reps[0] = b0; - reps[2] = prevOpt->reps[2]; - reps[3] = prevOpt->reps[3]; - } - else - { - reps[2] = b0; - reps[0] = prevOpt->reps[dist]; - reps[3] = prevOpt->reps[dist ^ 1]; - } - } - } - else - { - reps[0] = (dist - LZMA_NUM_REPS + 1); - reps[1] = b0; - reps[2] = prevOpt->reps[1]; - reps[3] = prevOpt->reps[2]; - } - } - - curOpt->state = (CState)state; - curOpt->reps[0] = reps[0]; - curOpt->reps[1] = reps[1]; - curOpt->reps[2] = reps[2]; - curOpt->reps[3] = reps[3]; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - curByte = *data; - matchByte = *(data - reps[0]); - - posState = (position & p->pbMask); - - /* - The order of Price checks: - < LIT - <= SHORT_REP - < LIT : REP_0 - < REP [ : LIT : REP_0 ] - < MATCH [ : LIT : REP_0 ] - */ - - { - UInt32 curPrice = curOpt->price; - unsigned prob = p->isMatch[state][posState]; - matchPrice = curPrice + GET_PRICE_1(prob); - litPrice = curPrice + GET_PRICE_0(prob); - } - - nextOpt = &p->opt[(size_t)cur + 1]; - nextIsLit = False; - - // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice) - // 18.new.06 - if (nextOpt->price < kInfinityPrice - // && !IsLitState(state) - && matchByte == curByte - || litPrice > nextOpt->price - ) - litPrice = 0; - else - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - litPrice += (!IsLitState(state) ? - LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - - if (litPrice < nextOpt->price) - { - nextOpt->price = litPrice; - nextOpt->len = 1; - MakeAs_Lit(nextOpt); - nextIsLit = True; - } - } - - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - - numAvailFull = p->numAvail; - { - unsigned temp = kNumOpts - 1 - cur; - if (numAvailFull > temp) - numAvailFull = (UInt32)temp; - } - - // 18.06 - // ---------- SHORT_REP ---------- - if (IsLitState(state)) // 18.new - if (matchByte == curByte) - if (repMatchPrice < nextOpt->price) // 18.new - // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1)) - if ( - // nextOpt->price >= kInfinityPrice || - nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt - || (nextOpt->dist != 0 - // && nextOpt->extra <= 1 // 17.old - ) - ) - { - UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); - // if (shortRepPrice <= nextOpt->price) // 17.old - if (shortRepPrice < nextOpt->price) // 18.new - { - nextOpt->price = shortRepPrice; - nextOpt->len = 1; - MakeAs_ShortRep(nextOpt); - nextIsLit = False; - } - } - - if (numAvailFull < 2) - continue; - numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - - // numAvail <= p->numFastBytes - - // ---------- LIT : REP_0 ---------- - - if (!nextIsLit - && litPrice != 0 // 18.new - && matchByte != curByte - && numAvailFull > 2) - { - const Byte *data2 = data - reps[0]; - if (data[1] == data2[1] && data[2] == data2[2]) - { - unsigned len; - unsigned limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; - for (len = 3; len < limit && data[len] == data2[len]; len++) - {} - - { - unsigned state2 = kLiteralNextStates[state]; - unsigned posState2 = (position + 1) & p->pbMask; - UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); - { - unsigned offset = cur + len; - - if (last < offset) - last = offset; - - // do - { - UInt32 price2; - COptimal *opt; - len--; - // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2); - price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len); - - opt = &p->opt[offset]; - // offset--; - if (price2 < opt->price) - { - opt->price = price2; - opt->len = (UInt32)len; - opt->dist = 0; - opt->extra = 1; - } - } - // while (len >= 3); - } - } - } - } - - startLen = 2; /* speed optimization */ - - { - // ---------- REP ---------- - unsigned repIndex = 0; // 17.old - // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused - for (; repIndex < LZMA_NUM_REPS; repIndex++) - { - unsigned len; - UInt32 price; - const Byte *data2 = data - reps[repIndex]; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - - for (len = 2; len < numAvail && data[len] == data2[len]; len++) - {} - - // if (len < startLen) continue; // 18.new: speed optimization - - { - unsigned offset = cur + len; - if (last < offset) - last = offset; - } - { - unsigned len2 = len; - price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); - do - { - UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2); - COptimal *opt = &p->opt[cur + len2]; - if (price2 < opt->price) - { - opt->price = price2; - opt->len = (UInt32)len2; - opt->dist = (UInt32)repIndex; - opt->extra = 0; - } - } - while (--len2 >= 2); - } - - if (repIndex == 0) startLen = len + 1; // 17.old - // startLen = len + 1; // 18.new - - /* if (_maxMode) */ - { - // ---------- REP : LIT : REP_0 ---------- - // numFastBytes + 1 + numFastBytes - - unsigned len2 = len + 1; - unsigned limit = len2 + p->numFastBytes; - if (limit > numAvailFull) - limit = numAvailFull; - - len2 += 2; - if (len2 <= limit) - if (data[len2 - 2] == data2[len2 - 2]) - if (data[len2 - 1] == data2[len2 - 1]) - { - unsigned state2 = kRepNextStates[state]; - unsigned posState2 = (position + len) & p->pbMask; - price += GET_PRICE_LEN(&p->repLenEnc, posState, len) - + GET_PRICE_0(p->isMatch[state2][posState2]) - + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), - data[len], data2[len], p->ProbPrices); - - // state2 = kLiteralNextStates[state2]; - state2 = kState_LitAfterRep; - posState2 = (posState2 + 1) & p->pbMask; - - - price += GetPrice_Rep_0(p, state2, posState2); - - for (; len2 < limit && data[len2] == data2[len2]; len2++) - {} - - len2 -= len; - // if (len2 >= 3) - { - { - unsigned offset = cur + len + len2; - - if (last < offset) - last = offset; - // do - { - UInt32 price2; - COptimal *opt; - len2--; - // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); - price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); - - opt = &p->opt[offset]; - // offset--; - if (price2 < opt->price) - { - opt->price = price2; - opt->len = (UInt32)len2; - opt->extra = (CExtra)(len + 1); - opt->dist = (UInt32)repIndex; - } - } - // while (len2 >= 3); - } - } - } - } - } - } - - - // ---------- MATCH ---------- - /* for (unsigned len = 2; len <= newLen; len++) */ - if (newLen > numAvail) - { - newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); - matches[numPairs] = (UInt32)newLen; - numPairs += 2; - } - - // startLen = 2; /* speed optimization */ - - if (newLen >= startLen) - { - UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - UInt32 dist; - unsigned offs, posSlot, len; - - { - unsigned offset = cur + newLen; - if (last < offset) - last = offset; - } - - offs = 0; - while (startLen > matches[offs]) - offs += 2; - dist = matches[(size_t)offs + 1]; - - // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); - - for (len = /*2*/ startLen; ; len++) - { - UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); - { - COptimal *opt; - unsigned lenNorm = len - 2; - lenNorm = GetLenToPosState2(lenNorm); - if (dist < kNumFullDistances) - price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)]; - else - price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask]; - - opt = &p->opt[cur + len]; - if (price < opt->price) - { - opt->price = price; - opt->len = (UInt32)len; - opt->dist = dist + LZMA_NUM_REPS; - opt->extra = 0; - } - } - - if (len == matches[offs]) - { - // if (p->_maxMode) { - // MATCH : LIT : REP_0 - - const Byte *data2 = data - dist - 1; - unsigned len2 = len + 1; - unsigned limit = len2 + p->numFastBytes; - if (limit > numAvailFull) - limit = numAvailFull; - - len2 += 2; - if (len2 <= limit) - if (data[len2 - 2] == data2[len2 - 2]) - if (data[len2 - 1] == data2[len2 - 1]) - { - for (; len2 < limit && data[len2] == data2[len2]; len2++) - {} - - len2 -= len; - - // if (len2 >= 3) - { - unsigned state2 = kMatchNextStates[state]; - unsigned posState2 = (position + len) & p->pbMask; - unsigned offset; - price += GET_PRICE_0(p->isMatch[state2][posState2]); - price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), - data[len], data2[len], p->ProbPrices); - - // state2 = kLiteralNextStates[state2]; - state2 = kState_LitAfterMatch; - - posState2 = (posState2 + 1) & p->pbMask; - price += GetPrice_Rep_0(p, state2, posState2); - - offset = cur + len + len2; - - if (last < offset) - last = offset; - // do - { - UInt32 price2; - COptimal *opt; - len2--; - // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); - price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); - opt = &p->opt[offset]; - // offset--; - if (price2 < opt->price) - { - opt->price = price2; - opt->len = (UInt32)len2; - opt->extra = (CExtra)(len + 1); - opt->dist = dist + LZMA_NUM_REPS; - } - } - // while (len2 >= 3); - } - - } - - offs += 2; - if (offs == numPairs) - break; - dist = matches[(size_t)offs + 1]; - // if (dist >= kNumFullDistances) - GetPosSlot2(dist, posSlot); - } - } - } - } - - do - p->opt[last].price = kInfinityPrice; - while (--last); - - return Backward(p, cur); -} - - - -#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) - - - -static unsigned GetOptimumFast(CLzmaEnc *p) -{ - UInt32 numAvail, mainDist; - unsigned mainLen, numPairs, repIndex, repLen, i; - const Byte *data; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLen; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - p->backRes = MARK_LIT; - if (numAvail < 2) - return 1; - // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repLen = repIndex = 0; - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - unsigned len; - const Byte *data2 = data - p->reps[i]; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (len = 2; len < numAvail && data[len] == data2[len]; len++) - {} - if (len >= p->numFastBytes) - { - p->backRes = (UInt32)i; - MOVE_POS(p, len - 1) - return len; - } - if (len > repLen) - { - repIndex = i; - repLen = len; - } - } - - if (mainLen >= p->numFastBytes) - { - p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; - MOVE_POS(p, mainLen - 1) - return mainLen; - } - - mainDist = 0; /* for GCC */ - - if (mainLen >= 2) - { - mainDist = p->matches[(size_t)numPairs - 1]; - while (numPairs > 2) - { - UInt32 dist2; - if (mainLen != p->matches[(size_t)numPairs - 4] + 1) - break; - dist2 = p->matches[(size_t)numPairs - 3]; - if (!ChangePair(dist2, mainDist)) - break; - numPairs -= 2; - mainLen--; - mainDist = dist2; - } - if (mainLen == 2 && mainDist >= 0x80) - mainLen = 1; - } - - if (repLen >= 2) - if ( repLen + 1 >= mainLen - || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) - || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) - { - p->backRes = (UInt32)repIndex; - MOVE_POS(p, repLen - 1) - return repLen; - } - - if (mainLen < 2 || numAvail <= 2) - return 1; - - { - unsigned len1 = ReadMatchDistances(p, &p->numPairs); - p->longestMatchLen = len1; - - if (len1 >= 2) - { - UInt32 newDist = p->matches[(size_t)p->numPairs - 1]; - if ( (len1 >= mainLen && newDist < mainDist) - || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist)) - || (len1 > mainLen + 1) - || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) - return 1; - } - } - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - unsigned len, limit; - const Byte *data2 = data - p->reps[i]; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - limit = mainLen - 1; - for (len = 2;; len++) - { - if (len >= limit) - return 1; - if (data[len] != data2[len]) - break; - } - } - - p->backRes = mainDist + LZMA_NUM_REPS; - if (mainLen != 2) - { - MOVE_POS(p, mainLen - 2) - } - return mainLen; -} - - - - -static void WriteEndMarker(CLzmaEnc *p, unsigned posState) -{ - UInt32 range; - range = p->rc.range; - { - UInt32 ttt, newBound; - CLzmaProb *prob = &p->isMatch[p->state][posState]; - RC_BIT_PRE(&p->rc, prob) - RC_BIT_1(&p->rc, prob) - prob = &p->isRep[p->state]; - RC_BIT_PRE(&p->rc, prob) - RC_BIT_0(&p->rc, prob) - } - p->state = kMatchNextStates[p->state]; - - p->rc.range = range; - LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); - range = p->rc.range; - - { - // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1); - CLzmaProb *probs = p->posSlotEncoder[0]; - unsigned m = 1; - do - { - UInt32 ttt, newBound; - RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); - m = (m << 1) + 1; - } - while (m < (1 << kNumPosSlotBits)); - } - { - // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range; - unsigned numBits = 30 - kNumAlignBits; - do - { - range >>= 1; - p->rc.low += range; - RC_NORM(&p->rc) - } - while (--numBits); - } - - { - // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); - CLzmaProb *probs = p->posAlignEncoder; - unsigned m = 1; - do - { - UInt32 ttt, newBound; - RC_BIT_PRE(p, probs + m) - RC_BIT_1(&p->rc, probs + m); - m = (m << 1) + 1; - } - while (m < kAlignTableSize); - } - p->rc.range = range; -} - - -static SRes CheckErrors(CLzmaEnc *p) -{ - if (p->result != SZ_OK) - return p->result; - if (p->rc.res != SZ_OK) - p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) - p->result = SZ_ERROR_READ; - if (p->result != SZ_OK) - p->finished = True; - return p->result; -} - - -MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) -{ - /* ReleaseMFStream(); */ - p->finished = True; - if (p->writeEndMark) - WriteEndMarker(p, nowPos & p->pbMask); - RangeEnc_FlushData(&p->rc); - RangeEnc_FlushStream(&p->rc); - return CheckErrors(p); -} - - -MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) -{ - unsigned i; - const CProbPrice *ProbPrices = p->ProbPrices; - const CLzmaProb *probs = p->posAlignEncoder; - // p->alignPriceCount = 0; - for (i = 0; i < kAlignTableSize / 2; i++) - { - UInt32 price = 0; - unsigned sym = i; - unsigned m = 1; - unsigned bit; - UInt32 prob; - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; - prob = probs[m]; - p->alignPrices[i ] = price + GET_PRICEa_0(prob); - p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); - // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); - } -} - - -MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) -{ - // int y; for (y = 0; y < 100; y++) { - - UInt32 tempPrices[kNumFullDistances]; - unsigned i, lps; - - const CProbPrice *ProbPrices = p->ProbPrices; - p->matchPriceCount = 0; - - for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) - { - unsigned posSlot = GetPosSlot1(i); - unsigned footerBits = (posSlot >> 1) - 1; - unsigned base = ((2 | (posSlot & 1)) << footerBits); - const CLzmaProb *probs = p->posEncoders + (size_t)base * 2; - // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); - UInt32 price = 0; - unsigned m = 1; - unsigned sym = i; - unsigned offset = (unsigned)1 << footerBits; - base += i; - - if (footerBits) - do - { - unsigned bit = sym & 1; - sym >>= 1; - price += GET_PRICEa(probs[m], bit); - m = (m << 1) + bit; - } - while (--footerBits); - - { - unsigned prob = probs[m]; - tempPrices[base ] = price + GET_PRICEa_0(prob); - tempPrices[base + offset] = price + GET_PRICEa_1(prob); - } - } - - for (lps = 0; lps < kNumLenToPosStates; lps++) - { - unsigned slot; - unsigned distTableSize2 = (p->distTableSize + 1) >> 1; - UInt32 *posSlotPrices = p->posSlotPrices[lps]; - const CLzmaProb *probs = p->posSlotEncoder[lps]; - - for (slot = 0; slot < distTableSize2; slot++) - { - // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices); - UInt32 price; - unsigned bit; - unsigned sym = slot + (1 << (kNumPosSlotBits - 1)); - unsigned prob; - bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit); - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); - bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); - prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))]; - posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob); - posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob); - } - - { - UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits); - for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++) - { - posSlotPrices[(size_t)slot * 2 ] += delta; - posSlotPrices[(size_t)slot * 2 + 1] += delta; - delta += ((UInt32)1 << kNumBitPriceShiftBits); - } - } - - { - UInt32 *dp = p->distancesPrices[lps]; - - dp[0] = posSlotPrices[0]; - dp[1] = posSlotPrices[1]; - dp[2] = posSlotPrices[2]; - dp[3] = posSlotPrices[3]; - - for (i = 4; i < kNumFullDistances; i += 2) - { - UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; - dp[i ] = slotPrice + tempPrices[i]; - dp[i + 1] = slotPrice + tempPrices[i + 1]; - } - } - } - // } -} - - - -void LzmaEnc_Construct(CLzmaEnc *p) -{ - RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); - - #ifndef _7ZIP_ST - MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; - #endif - - { - CLzmaEncProps props; - LzmaEncProps_Init(&props); - LzmaEnc_SetProps(p, &props); - } - - #ifndef LZMA_LOG_BSR - LzmaEnc_FastPosInit(p->g_FastPos); - #endif - - LzmaEnc_InitPriceTables(p->ProbPrices); - p->litProbs = NULL; - p->saveState.litProbs = NULL; - -} - -CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) -{ - void *p; - p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); - if (p) - LzmaEnc_Construct((CLzmaEnc *)p); - return p; -} - -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) -{ - ISzAlloc_Free(alloc, p->litProbs); - ISzAlloc_Free(alloc, p->saveState.litProbs); - p->litProbs = NULL; - p->saveState.litProbs = NULL; -} - -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - #ifndef _7ZIP_ST - MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); - #endif - - MatchFinder_Free(&p->matchFinderBase, allocBig); - LzmaEnc_FreeLits(p, alloc); - RangeEnc_Free(&p->rc, alloc); -} - -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); - ISzAlloc_Free(alloc, p); -} - - -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) -{ - UInt32 nowPos32, startPos32; - if (p->needInit) - { - p->matchFinder.Init(p->matchFinderObj); - p->needInit = 0; - } - - if (p->finished) - return p->result; - RINOK(CheckErrors(p)); - - nowPos32 = (UInt32)p->nowPos64; - startPos32 = nowPos32; - - if (p->nowPos64 == 0) - { - unsigned numPairs; - Byte curByte; - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - return Flush(p, nowPos32); - ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); - // p->state = kLiteralNextStates[p->state]; - curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); - LitEnc_Encode(&p->rc, p->litProbs, curByte); - p->additionalOffset--; - nowPos32++; - } - - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) - - for (;;) - { - UInt32 dist; - unsigned len, posState; - UInt32 range, ttt, newBound; - CLzmaProb *probs; - - if (p->fastMode) - len = GetOptimumFast(p); - else - { - unsigned oci = p->optCur; - if (p->optEnd == oci) - len = GetOptimum(p, nowPos32); - else - { - const COptimal *opt = &p->opt[oci]; - len = opt->len; - p->backRes = opt->dist; - p->optCur = oci + 1; - } - } - - posState = (unsigned)nowPos32 & p->pbMask; - range = p->rc.range; - probs = &p->isMatch[p->state][posState]; - - RC_BIT_PRE(&p->rc, probs) - - dist = p->backRes; - - #ifdef SHOW_STAT2 - printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); - #endif - - if (dist == MARK_LIT) - { - Byte curByte; - const Byte *data; - unsigned state; - - RC_BIT_0(&p->rc, probs); - p->rc.range = range; - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - probs = LIT_PROBS(nowPos32, *(data - 1)); - curByte = *data; - state = p->state; - p->state = kLiteralNextStates[state]; - if (IsLitState(state)) - LitEnc_Encode(&p->rc, probs, curByte); - else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); - } - else - { - RC_BIT_1(&p->rc, probs); - probs = &p->isRep[p->state]; - RC_BIT_PRE(&p->rc, probs) - - if (dist < LZMA_NUM_REPS) - { - RC_BIT_1(&p->rc, probs); - probs = &p->isRepG0[p->state]; - RC_BIT_PRE(&p->rc, probs) - if (dist == 0) - { - RC_BIT_0(&p->rc, probs); - probs = &p->isRep0Long[p->state][posState]; - RC_BIT_PRE(&p->rc, probs) - if (len != 1) - { - RC_BIT_1_BASE(&p->rc, probs); - } - else - { - RC_BIT_0_BASE(&p->rc, probs); - p->state = kShortRepNextStates[p->state]; - } - } - else - { - RC_BIT_1(&p->rc, probs); - probs = &p->isRepG1[p->state]; - RC_BIT_PRE(&p->rc, probs) - if (dist == 1) - { - RC_BIT_0_BASE(&p->rc, probs); - dist = p->reps[1]; - } - else - { - RC_BIT_1(&p->rc, probs); - probs = &p->isRepG2[p->state]; - RC_BIT_PRE(&p->rc, probs) - if (dist == 2) - { - RC_BIT_0_BASE(&p->rc, probs); - dist = p->reps[2]; - } - else - { - RC_BIT_1_BASE(&p->rc, probs); - dist = p->reps[3]; - p->reps[3] = p->reps[2]; - } - p->reps[2] = p->reps[1]; - } - p->reps[1] = p->reps[0]; - p->reps[0] = dist; - } - - RC_NORM(&p->rc) - - p->rc.range = range; - - if (len != 1) - { - LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); - --p->repLenEncCounter; - p->state = kRepNextStates[p->state]; - } - } - else - { - unsigned posSlot; - RC_BIT_0(&p->rc, probs); - p->rc.range = range; - p->state = kMatchNextStates[p->state]; - - LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); - // --p->lenEnc.counter; - - dist -= LZMA_NUM_REPS; - p->reps[3] = p->reps[2]; - p->reps[2] = p->reps[1]; - p->reps[1] = p->reps[0]; - p->reps[0] = dist + 1; - - p->matchPriceCount++; - GetPosSlot(dist, posSlot); - // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); - { - UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); - range = p->rc.range; - probs = p->posSlotEncoder[GetLenToPosState(len)]; - do - { - CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); - UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; - sym <<= 1; - RC_BIT(&p->rc, prob, bit); - } - while (sym < (1 << kNumPosSlotBits * 2)); - p->rc.range = range; - } - - if (dist >= kStartPosModelIndex) - { - unsigned footerBits = ((posSlot >> 1) - 1); - - if (dist < kNumFullDistances) - { - unsigned base = ((2 | (posSlot & 1)) << footerBits); - RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */)); - } - else - { - UInt32 pos2 = (dist | 0xF) << (32 - footerBits); - range = p->rc.range; - // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - /* - do - { - range >>= 1; - p->rc.low += range & (0 - ((dist >> --footerBits) & 1)); - RC_NORM(&p->rc) - } - while (footerBits > kNumAlignBits); - */ - do - { - range >>= 1; - p->rc.low += range & (0 - (pos2 >> 31)); - pos2 += pos2; - RC_NORM(&p->rc) - } - while (pos2 != 0xF0000000); - - - // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - - { - unsigned m = 1; - unsigned bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; - bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); - p->rc.range = range; - // p->alignPriceCount++; - } - } - } - } - } - - nowPos32 += (UInt32)len; - p->additionalOffset -= len; - - if (p->additionalOffset == 0) - { - UInt32 processed; - - if (!p->fastMode) - { - /* - if (p->alignPriceCount >= 16) // kAlignTableSize - FillAlignPrices(p); - if (p->matchPriceCount >= 128) - FillDistancesPrices(p); - if (p->lenEnc.counter <= 0) - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); - */ - if (p->matchPriceCount >= 64) - { - FillAlignPrices(p); - // { int y; for (y = 0; y < 100; y++) { - FillDistancesPrices(p); - // }} - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); - } - if (p->repLenEncCounter <= 0) - { - p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); - } - } - - if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) - break; - processed = nowPos32 - startPos32; - - if (maxPackSize) - { - if (processed + kNumOpts + 300 >= maxUnpackSize - || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) - break; - } - else if (processed >= (1 << 17)) - { - p->nowPos64 += nowPos32 - startPos32; - return CheckErrors(p); - } - } - } - - p->nowPos64 += nowPos32 - startPos32; - return Flush(p, nowPos32); -} - - - -#define kBigHashDicLimit ((UInt32)1 << 24) - -static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - UInt32 beforeSize = kNumOpts; - if (!RangeEnc_Alloc(&p->rc, alloc)) - return SZ_ERROR_MEM; - - #ifndef _7ZIP_ST - p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); - #endif - - { - unsigned lclp = p->lc + p->lp; - if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) - { - LzmaEnc_FreeLits(p, alloc); - p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); - if (!p->litProbs || !p->saveState.litProbs) - { - LzmaEnc_FreeLits(p, alloc); - return SZ_ERROR_MEM; - } - p->lclp = lclp; - } - } - - p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); - - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; - - #ifndef _7ZIP_ST - if (p->mtMode) - { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, - LZMA_MATCH_LEN_MAX - + 1 /* 18.04 */ - , allocBig)); - p->matchFinderObj = &p->matchFinderMt; - p->matchFinderBase.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); - MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); - } - else - #endif - { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) - return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); - } - - return SZ_OK; -} - -void LzmaEnc_Init(CLzmaEnc *p) -{ - unsigned i; - p->state = 0; - p->reps[0] = - p->reps[1] = - p->reps[2] = - p->reps[3] = 1; - - RangeEnc_Init(&p->rc); - - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - - for (i = 0; i < kNumStates; i++) - { - unsigned j; - for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) - { - p->isMatch[i][j] = kProbInitValue; - p->isRep0Long[i][j] = kProbInitValue; - } - p->isRep[i] = kProbInitValue; - p->isRepG0[i] = kProbInitValue; - p->isRepG1[i] = kProbInitValue; - p->isRepG2[i] = kProbInitValue; - } - - { - for (i = 0; i < kNumLenToPosStates; i++) - { - CLzmaProb *probs = p->posSlotEncoder[i]; - unsigned j; - for (j = 0; j < (1 << kNumPosSlotBits); j++) - probs[j] = kProbInitValue; - } - } - { - for (i = 0; i < kNumFullDistances; i++) - p->posEncoders[i] = kProbInitValue; - } - - { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - UInt32 k; - CLzmaProb *probs = p->litProbs; - for (k = 0; k < num; k++) - probs[k] = kProbInitValue; - } - - - LenEnc_Init(&p->lenProbs); - LenEnc_Init(&p->repLenProbs); - - p->optEnd = 0; - p->optCur = 0; - - { - for (i = 0; i < kNumOpts; i++) - p->opt[i].price = kInfinityPrice; - } - - p->additionalOffset = 0; - - p->pbMask = (1 << p->pb) - 1; - p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); -} - - -void LzmaEnc_InitPrices(CLzmaEnc *p) -{ - if (!p->fastMode) - { - FillDistancesPrices(p); - FillAlignPrices(p); - } - - p->lenEnc.tableSize = - p->repLenEnc.tableSize = - p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - - p->repLenEncCounter = REP_LEN_COUNT; - - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); -} - -static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - unsigned i; - for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) - if (p->dictSize <= ((UInt32)1 << i)) - break; - p->distTableSize = i * 2; - - p->finished = False; - p->result = SZ_OK; - RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - p->nowPos64 = 0; - return SZ_OK; -} - -static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, - ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; - p->needInit = 1; - p->rc.outStream = outStream; - return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); -} - -SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, - ISeqInStream *inStream, UInt32 keepWindowSize, - ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; - p->needInit = 1; - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) -{ - p->matchFinderBase.directInput = 1; - p->matchFinderBase.bufferBase = (Byte *)src; - p->matchFinderBase.directInputRem = srcLen; -} - -SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, - UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - LzmaEnc_SetInputBuf(p, src, srcLen); - p->needInit = 1; - - LzmaEnc_SetDataSize(pp, srcLen); - return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); -} - -void LzmaEnc_Finish(CLzmaEncHandle pp) -{ - #ifndef _7ZIP_ST - CLzmaEnc *p = (CLzmaEnc *)pp; - if (p->mtMode) - MatchFinderMt_ReleaseStream(&p->matchFinderMt); - #else - UNUSED_VAR(pp); - #endif -} - - -typedef struct -{ - ISeqOutStream vt; - Byte *data; - SizeT rem; - BoolInt overflow; -} CLzmaEnc_SeqOutStreamBuf; - -static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) -{ - CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); - if (p->rem < size) - { - size = p->rem; - p->overflow = True; - } - memcpy(p->data, data, size); - p->rem -= size; - p->data += size; - return size; -} - - -UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) -{ - const CLzmaEnc *p = (CLzmaEnc *)pp; - return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); -} - - -const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) -{ - const CLzmaEnc *p = (CLzmaEnc *)pp; - return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; -} - - -SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, - Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - UInt64 nowPos64; - SRes res; - CLzmaEnc_SeqOutStreamBuf outStream; - - outStream.vt.Write = SeqOutStreamBuf_Write; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = False; - p->finished = False; - p->result = SZ_OK; - - if (reInit) - LzmaEnc_Init(p); - LzmaEnc_InitPrices(p); - - nowPos64 = p->nowPos64; - RangeEnc_Init(&p->rc); - p->rc.outStream = &outStream.vt; - - if (desiredPackSize == 0) - return SZ_ERROR_OUTPUT_EOF; - - res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); - - *unpackSize = (UInt32)(p->nowPos64 - nowPos64); - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - - return res; -} - - -static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) -{ - SRes res = SZ_OK; - - #ifndef _7ZIP_ST - Byte allocaDummy[0x300]; - allocaDummy[0] = 0; - allocaDummy[1] = allocaDummy[0]; - #endif - - for (;;) - { - res = LzmaEnc_CodeOneBlock(p, 0, 0); - if (res != SZ_OK || p->finished) - break; - if (progress) - { - res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); - if (res != SZ_OK) - { - res = SZ_ERROR_PROGRESS; - break; - } - } - } - - LzmaEnc_Finish(p); - - /* - if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) - res = SZ_ERROR_FAIL; - } - */ - - return res; -} - - -SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, - ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); - return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); -} - - -SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) -{ - CLzmaEnc *p = (CLzmaEnc *)pp; - unsigned i; - UInt32 dictSize = p->dictSize; - if (*size < LZMA_PROPS_SIZE) - return SZ_ERROR_PARAM; - *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - if (dictSize >= ((UInt32)1 << 22)) - { - UInt32 kDictMask = ((UInt32)1 << 20) - 1; - if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) - dictSize = (dictSize + kDictMask) & ~kDictMask; - } - else for (i = 11; i <= 30; i++) - { - if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } - if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } - } - - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; -} - - -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) -{ - return ((CLzmaEnc *)pp)->writeEndMark; -} - - -SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - SRes res; - CLzmaEnc *p = (CLzmaEnc *)pp; - - CLzmaEnc_SeqOutStreamBuf outStream; - - outStream.vt.Write = SeqOutStreamBuf_Write; - outStream.data = dest; - outStream.rem = *destLen; - outStream.overflow = False; - - p->writeEndMark = writeEndMark; - p->rc.outStream = &outStream.vt; - - res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); - - if (res == SZ_OK) - { - res = LzmaEnc_Encode2(p, progress); - if (res == SZ_OK && p->nowPos64 != srcLen) - res = SZ_ERROR_FAIL; - } - - *destLen -= outStream.rem; - if (outStream.overflow) - return SZ_ERROR_OUTPUT_EOF; - return res; -} - - -SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) -{ - CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); - SRes res; - if (!p) - return SZ_ERROR_MEM; - - res = LzmaEnc_SetProps(p, props); - if (res == SZ_OK) - { - res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); - if (res == SZ_OK) - res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, - writeEndMark, progress, alloc, allocBig); - } - - LzmaEnc_Destroy(p, alloc, allocBig); - return res; -} +/* LzmaEnc.c -- LZMA Encoder +2021-11-18: Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "CpuArch.h" +#include "LzmaEnc.h" + +#include "LzFind.h" +#ifndef _7ZIP_ST +#include "LzFindMt.h" +#endif + +/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); +void LzmaEnc_Finish(CLzmaEncHandle pp); +void LzmaEnc_SaveState(CLzmaEncHandle pp); +void LzmaEnc_RestoreState(CLzmaEncHandle pp); + +#ifdef SHOW_STAT +static unsigned g_STAT_OFFSET = 0; +#endif + +/* for good normalization speed we still reserve 256 MB before 4 GB range */ +#define kLzmaMaxHistorySize ((UInt32)15 << 28) + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +// #define kBitPrice (1 << kNumBitPriceShiftBits) + +#define REP_LEN_COUNT 64 + +void LzmaEncProps_Init(CLzmaEncProps *p) +{ + p->level = 5; + p->dictSize = p->mc = 0; + p->reduceSize = (UInt64)(Int64)-1; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->writeEndMark = 0; + p->affinity = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps *p) +{ + int level = p->level; + if (level < 0) level = 5; + p->level = level; + + if (p->dictSize == 0) + p->dictSize = + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) + ))); + + if (p->dictSize > p->reduceSize) + { + UInt32 v = (UInt32)p->reduceSize; + const UInt32 kReduceMin = ((UInt32)1 << 12); + if (v < kReduceMin) + v = kReduceMin; + if (p->dictSize > v) + p->dictSize = v; + } + + if (p->lc < 0) p->lc = 3; + if (p->lp < 0) p->lp = 0; + if (p->pb < 0) p->pb = 2; + + if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); + if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); + if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); + + if (p->numThreads < 0) + p->numThreads = + #ifndef _7ZIP_ST + ((p->btMode && p->algo) ? 2 : 1); + #else + 1; + #endif +} + +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +{ + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + + +/* +x86/x64: + +BSR: + IF (SRC == 0) ZF = 1, DEST is undefined; + AMD : DEST is unchanged; + IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit + BSR is slow in some processors + +LZCNT: + IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64) + IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits + IF (DEST == 0) ZF = 1; + +LZCNT works only in new processors starting from Haswell. +if LZCNT is not supported by processor, then it's executed as BSR. +LZCNT can be faster than BSR, if supported. +*/ + +// #define LZMA_LOG_BSR + +#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */ + + #if (defined(__clang__) && (__clang_major__ >= 6)) \ + || (defined(__GNUC__) && (__GNUC__ >= 6)) + #define LZMA_LOG_BSR + #elif defined(_MSC_VER) && (_MSC_VER >= 1300) + // #if defined(MY_CPU_ARM_OR_ARM64) + #define LZMA_LOG_BSR + // #endif + #endif +#endif + +// #include + +#ifdef LZMA_LOG_BSR + +#if defined(__clang__) \ + || defined(__GNUC__) + +/* + C code: : (30 - __builtin_clz(x)) + gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31) + clang10 for x64 : 31 + (bsr(x) xor -32) +*/ + + #define MY_clz(x) ((unsigned)__builtin_clz(x)) + // __lzcnt32 + // __builtin_ia32_lzcnt_u32 + +#else // #if defined(_MSC_VER) + + #ifdef MY_CPU_ARM_OR_ARM64 + + #define MY_clz _CountLeadingZeros + + #else // if defined(MY_CPU_X86_OR_AMD64) + + // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU) + // _BitScanReverse code is not optimal for some MSVC compilers + #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \ + res = (zz + zz) + (pos >> zz); } + + #endif // MY_CPU_X86_OR_AMD64 + +#endif // _MSC_VER + + +#ifndef BSR2_RET + + #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \ + res = (zz + zz) + (pos >> zz); } + +#endif + + +unsigned GetPosSlot1(UInt32 pos); +unsigned GetPosSlot1(UInt32 pos) +{ + unsigned res; + BSR2_RET(pos, res); + return res; +} +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } + + +#else // ! LZMA_LOG_BSR + +#define kNumLogBits (11 + sizeof(size_t) / 8 * 3) + +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +static void LzmaEnc_FastPosInit(Byte *g_FastPos) +{ + unsigned slot; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + g_FastPos += 2; + + for (slot = 2; slot < kNumLogBits * 2; slot++) + { + size_t k = ((size_t)1 << ((slot >> 1) - 1)); + size_t j; + for (j = 0; j < k; j++) + g_FastPos[j] = (Byte)slot; + g_FastPos += k; + } +} + +/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +/* +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } +*/ + +#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ + res = p->g_FastPos[pos >> zz] + (zz * 2); } + +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } + +#endif // LZMA_LOG_BSR + + +#define LZMA_NUM_REPS 4 + +typedef UInt16 CState; +typedef UInt16 CExtra; + +typedef struct +{ + UInt32 price; + CState state; + CExtra extra; + // 0 : normal + // 1 : LIT : MATCH + // > 1 : MATCH (extra-1) : LIT : REP0 (len) + UInt32 len; + UInt32 dist; + UInt32 reps[LZMA_NUM_REPS]; +} COptimal; + + +// 18.06 +#define kNumOpts (1 << 11) +#define kPackReserve (kNumOpts * 8) +// #define kNumOpts (1 << 12) +// #define kPackReserve (1 + kNumOpts * 2) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +// #define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +typedef +#ifdef _LZMA_PROB32 + UInt32 +#else + UInt16 +#endif + CLzmaProb; + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) +#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + + +typedef struct +{ + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + + +typedef struct +{ + unsigned tableSize; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2]; + // UInt32 prices2[kLenNumSymbolsTotal]; +} CLenPriceEnc; + +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN]) + +/* +#define GET_PRICE_LEN(p, posState, len) \ + ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9))) +*/ + +typedef struct +{ + UInt32 range; + unsigned cache; + UInt64 low; + UInt64 cacheSize; + Byte *buf; + Byte *bufLim; + Byte *bufBase; + ISeqOutStream *outStream; + UInt64 processed; + SRes res; +} CRangeEnc; + + +typedef struct +{ + CLzmaProb *litProbs; + + unsigned state; + UInt32 reps[LZMA_NUM_REPS]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + +} CSaveState; + + +typedef UInt32 CProbPrice; + + +typedef struct +{ + void *matchFinderObj; + IMatchFinder2 matchFinder; + + unsigned optCur; + unsigned optEnd; + + unsigned longestMatchLen; + unsigned numPairs; + UInt32 numAvail; + + unsigned state; + unsigned numFastBytes; + unsigned additionalOffset; + UInt32 reps[LZMA_NUM_REPS]; + unsigned lpMask, pbMask; + CLzmaProb *litProbs; + CRangeEnc rc; + + UInt32 backRes; + + unsigned lc, lp, pb; + unsigned lclp; + + BoolInt fastMode; + BoolInt writeEndMark; + BoolInt finished; + BoolInt multiThread; + BoolInt needInit; + // BoolInt _maxMode; + + UInt64 nowPos64; + + unsigned matchPriceCount; + // unsigned alignPriceCount; + int repLenEncCounter; + + unsigned distTableSize; + + UInt32 dictSize; + SRes result; + + #ifndef _7ZIP_ST + BoolInt mtMode; + // begin of CMatchFinderMt is used in LZ thread + CMatchFinderMt matchFinderMt; + // end of CMatchFinderMt is used in BT and HASH threads + // #else + // CMatchFinder matchFinderBase; + #endif + CMatchFinder matchFinderBase; + + + // we suppose that we have 8-bytes alignment after CMatchFinder + + #ifndef _7ZIP_ST + Byte pad[128]; + #endif + + // LZ thread + CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + + // we want {len , dist} pairs to be 8-bytes aligned in matches array + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2]; + + // we want 8-bytes alignment here + UInt32 alignPrices[kAlignTableSize]; + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances]; + + CLenEnc lenProbs; + CLenEnc repLenProbs; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + COptimal opt[kNumOpts]; + + CSaveState saveState; + + // BoolInt mf_Failure; + #ifndef _7ZIP_ST + Byte pad2[128]; + #endif +} CLzmaEnc; + + +#define MFB (p->matchFinderBase) +/* +#ifndef _7ZIP_ST +#define MFB (p->matchFinderMt.MatchFinder) +#endif +*/ + +#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); + +void LzmaEnc_SaveState(CLzmaEncHandle pp) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CSaveState *dest = &p->saveState; + + dest->state = p->state; + + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); + + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); +} + + +void LzmaEnc_RestoreState(CLzmaEncHandle pp) +{ + CLzmaEnc *dest = (CLzmaEnc *)pp; + const CSaveState *p = &dest->saveState; + + dest->state = p->state; + + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); + + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); +} + + + +SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX + || props.lp > LZMA_LP_MAX + || props.pb > LZMA_PB_MAX) + return SZ_ERROR_PARAM; + + + if (props.dictSize > kLzmaMaxHistorySize) + props.dictSize = kLzmaMaxHistorySize; + + #ifndef LZMA_LOG_BSR + { + const UInt64 dict64 = props.dictSize; + if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress)) + return SZ_ERROR_PARAM; + } + #endif + + p->dictSize = props.dictSize; + { + unsigned fb = (unsigned)props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = (unsigned)props.lc; + p->lp = (unsigned)props.lp; + p->pb = (unsigned)props.pb; + p->fastMode = (props.algo == 0); + // p->_maxMode = True; + MFB.btMode = (Byte)(props.btMode ? 1 : 0); + { + unsigned numHashBytes = 4; + if (props.btMode) + { + if (props.numHashBytes < 2) numHashBytes = 2; + else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes; + } + if (props.numHashBytes >= 5) numHashBytes = 5; + + MFB.numHashBytes = numHashBytes; + } + + MFB.cutValue = props.mc; + + p->writeEndMark = (BoolInt)props.writeEndMark; + + #ifndef _7ZIP_ST + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); + p->matchFinderMt.btSync.affinity = + p->matchFinderMt.hashSync.affinity = props.affinity; + #endif + + return SZ_OK; +} + + +void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + MFB.expectedDataSize = expectedDataSiize; +} + + +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 + +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc *p) +{ + p->outStream = NULL; + p->bufBase = NULL; +} + +#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) + +#define RC_BUF_SIZE (1 << 16) + +static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) +{ + if (!p->bufBase) + { + p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE); + if (!p->bufBase) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->bufBase); + p->bufBase = NULL; +} + +static void RangeEnc_Init(CRangeEnc *p) +{ + p->range = 0xFFFFFFFF; + p->cache = 0; + p->low = 0; + p->cacheSize = 0; + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + +MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) +{ + const size_t num = (size_t)(p->buf - p->bufBase); + if (p->res == SZ_OK) + { + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + } + p->processed += num; + p->buf = p->bufBase; +} + +MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +{ + UInt32 low = (UInt32)p->low; + unsigned high = (unsigned)(p->low >> 32); + p->low = (UInt32)(low << 8); + if (low < (UInt32)0xFF000000 || high != 0) + { + { + Byte *buf = p->buf; + *buf++ = (Byte)(p->cache + high); + p->cache = (unsigned)(low >> 24); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (p->cacheSize == 0) + return; + } + high += 0xFF; + for (;;) + { + Byte *buf = p->buf; + *buf++ = (Byte)(high); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (--p->cacheSize == 0) + return; + } + } + p->cacheSize++; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } + +#define RC_BIT_PRE(p, prob) \ + ttt = *(prob); \ + newBound = (range >> kNumBitModelTotalBits) * ttt; + +// #define _LZMA_ENC_USE_BRANCH + +#ifdef _LZMA_ENC_USE_BRANCH + +#define RC_BIT(p, prob, bit) { \ + RC_BIT_PRE(p, prob) \ + if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ + else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#else + +#define RC_BIT(p, prob, bit) { \ + UInt32 mask; \ + RC_BIT_PRE(p, prob) \ + mask = 0 - (UInt32)bit; \ + range &= mask; \ + mask &= newBound; \ + range -= mask; \ + (p)->low += mask; \ + mask = (UInt32)bit - 1; \ + range += newBound & mask; \ + mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ + mask += ((1 << kNumMoveBits) - 1); \ + ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#endif + + + + +#define RC_BIT_0_BASE(p, prob) \ + range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + +#define RC_BIT_1_BASE(p, prob) \ + range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ + +#define RC_BIT_0(p, prob) \ + RC_BIT_0_BASE(p, prob) \ + RC_NORM(p) + +#define RC_BIT_1(p, prob) \ + RC_BIT_1_BASE(p, prob) \ + RC_NORM(p) + +static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) +{ + UInt32 range, ttt, newBound; + range = p->range; + RC_BIT_PRE(p, prob) + RC_BIT_0(p, prob) + p->range = range; +} + +static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym) +{ + UInt32 range = p->range; + sym |= 0x100; + do + { + UInt32 ttt, newBound; + // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1); + CLzmaProb *prob = probs + (sym >> 8); + UInt32 bit = (sym >> 7) & 1; + sym <<= 1; + RC_BIT(p, prob, bit); + } + while (sym < 0x10000); + p->range = range; +} + +static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UInt32 matchByte) +{ + UInt32 range = p->range; + UInt32 offs = 0x100; + sym |= 0x100; + do + { + UInt32 ttt, newBound; + CLzmaProb *prob; + UInt32 bit; + matchByte <<= 1; + // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (sym >> 8)), (sym >> 7) & 1); + prob = probs + (offs + (matchByte & offs) + (sym >> 8)); + bit = (sym >> 7) & 1; + sym <<= 1; + offs &= ~(matchByte ^ sym); + RC_BIT(p, prob, bit); + } + while (sym < 0x10000); + p->range = range; +} + + + +static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) +{ + UInt32 i; + for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) + { + const unsigned kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); + unsigned bitCount = 0; + unsigned j; + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((UInt32)1 << 16)) + { + w >>= 1; + bitCount++; + } + } + ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + // printf("\n%3d: %5d", i, ProbPrices[i]); + } +} + + +#define GET_PRICE(prob, bit) \ + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICEa(prob, bit) \ + ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + + +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices) +{ + UInt32 price = 0; + sym |= 0x100; + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + return price; +} + + +static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices) +{ + UInt32 price = 0; + UInt32 offs = 0x100; + sym |= 0x100; + do + { + matchByte <<= 1; + price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1); + sym <<= 1; + offs &= ~(matchByte ^ sym); + } + while (sym < 0x10000); + return price; +} + + +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, unsigned sym) +{ + UInt32 range = rc->range; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + unsigned bit = sym & 1; + // RangeEnc_EncodeBit(rc, probs + m, bit); + sym >>= 1; + RC_BIT(rc, probs + m, bit); + m = (m << 1) | bit; + } + while (--numBits); + rc->range = range; +} + + + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState) +{ + UInt32 range, ttt, newBound; + CLzmaProb *probs = p->low; + range = rc->range; + RC_BIT_PRE(rc, probs); + if (sym >= kLenNumLowSymbols) + { + RC_BIT_1(rc, probs); + probs += kLenNumLowSymbols; + RC_BIT_PRE(rc, probs); + if (sym >= kLenNumLowSymbols * 2) + { + RC_BIT_1(rc, probs); + rc->range = range; + // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); + LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); + return; + } + sym -= kLenNumLowSymbols; + } + + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym); + { + unsigned m; + unsigned bit; + RC_BIT_0(rc, probs); + probs += (posState << (1 + kLenNumLowBits)); + bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; + bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; + bit = sym & 1; RC_BIT(rc, probs + m, bit); + rc->range = range; + } +} + +static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) +{ + unsigned i; + for (i = 0; i < 8; i += 2) + { + UInt32 price = startPrice; + UInt32 prob; + price += GET_PRICEa(probs[1 ], (i >> 2)); + price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); + prob = probs[4 + (i >> 1)]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + } +} + + +MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( + CLenPriceEnc *p, + unsigned numPosStates, + const CLenEnc *enc, + const CProbPrice *ProbPrices) +{ + UInt32 b; + + { + unsigned prob = enc->low[0]; + UInt32 a, c; + unsigned posState; + b = GET_PRICEa_1(prob); + a = GET_PRICEa_0(prob); + c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (posState = 0; posState < numPosStates; posState++) + { + UInt32 *prices = p->prices[posState]; + const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); + SetPrices_3(probs, a, prices, ProbPrices); + SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices); + } + } + + /* + { + unsigned i; + UInt32 b; + a = GET_PRICEa_0(enc->low[0]); + for (i = 0; i < kLenNumLowSymbols; i++) + p->prices2[i] = a; + a = GET_PRICEa_1(enc->low[0]); + b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]); + for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++) + p->prices2[i] = b; + a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + } + */ + + // p->counter = numSymbols; + // p->counter = 64; + + { + unsigned i = p->tableSize; + + if (i > kLenNumLowSymbols * 2) + { + const CLzmaProb *probs = enc->high; + UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2; + i -= kLenNumLowSymbols * 2 - 1; + i >>= 1; + b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + do + { + /* + p->prices2[i] = a + + // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); + LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices); + */ + // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices); + unsigned sym = --i + (1 << (kLenNumHighBits - 1)); + UInt32 price = b; + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[sym], bit); + } + while (sym >= 2); + + { + unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))]; + prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob); + prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob); + } + } + while (i); + + { + unsigned posState; + size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]); + for (posState = 1; posState < numPosStates; posState++) + memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num); + } + } + } +} + +/* + #ifdef SHOW_STAT + g_STAT_OFFSET += num; + printf("\n MovePos %u", num); + #endif +*/ + +#define MOVE_POS(p, num) { \ + p->additionalOffset += (num); \ + p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); } + + +static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) +{ + unsigned numPairs; + + p->additionalOffset++; + p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); + { + const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; } + numPairs = (unsigned)(d - p->matches); + } + *numPairsRes = numPairs; + + #ifdef SHOW_STAT + printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); + g_STAT_OFFSET++; + { + unsigned i; + for (i = 0; i < numPairs; i += 2) + printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); + } + #endif + + if (numPairs == 0) + return 0; + { + const unsigned len = p->matches[(size_t)numPairs - 2]; + if (len != p->numFastBytes) + return len; + { + UInt32 numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { + const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + const Byte *p2 = p1 + len; + const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; + const Byte *lim = p1 + numAvail; + for (; p2 != lim && *p2 == p2[dif]; p2++) + {} + return (unsigned)(p2 - p1); + } + } + } +} + +#define MARK_LIT ((UInt32)(Int32)-1) + +#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } +#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } +#define IsShortRep(p) ((p)->dist == 0) + + +#define GetPrice_ShortRep(p, state, posState) \ + ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) + +#define GetPrice_Rep_0(p, state, posState) ( \ + GET_PRICE_1(p->isMatch[state][posState]) \ + + GET_PRICE_1(p->isRep0Long[state][posState])) \ + + GET_PRICE_1(p->isRep[state]) \ + + GET_PRICE_0(p->isRepG0[state]) + +MY_FORCE_INLINE +static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) +{ + UInt32 price; + UInt32 prob = p->isRepG0[state]; + if (repIndex == 0) + { + price = GET_PRICE_0(prob); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { + price = GET_PRICE_1(prob); + prob = p->isRepG1[state]; + if (repIndex == 1) + price += GET_PRICE_0(prob); + else + { + price += GET_PRICE_1(prob); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + + +static unsigned Backward(CLzmaEnc *p, unsigned cur) +{ + unsigned wr = cur + 1; + p->optEnd = wr; + + for (;;) + { + UInt32 dist = p->opt[cur].dist; + unsigned len = (unsigned)p->opt[cur].len; + unsigned extra = (unsigned)p->opt[cur].extra; + cur -= len; + + if (extra) + { + wr--; + p->opt[wr].len = (UInt32)len; + cur -= extra; + len = extra; + if (extra == 1) + { + p->opt[wr].dist = dist; + dist = MARK_LIT; + } + else + { + p->opt[wr].dist = 0; + len--; + wr--; + p->opt[wr].dist = MARK_LIT; + p->opt[wr].len = 1; + } + } + + if (cur == 0) + { + p->backRes = dist; + p->optCur = wr; + return len; + } + + wr--; + p->opt[wr].dist = dist; + p->opt[wr].len = (UInt32)len; + } +} + + + +#define LIT_PROBS(pos, prevByte) \ + (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) + + +static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) +{ + unsigned last, cur; + UInt32 reps[LZMA_NUM_REPS]; + unsigned repLens[LZMA_NUM_REPS]; + UInt32 *matches; + + { + UInt32 numAvail; + unsigned numPairs, mainLen, repMaxIndex, i, posState; + UInt32 matchPrice, repMatchPrice; + const Byte *data; + Byte curByte, matchByte; + + p->optCur = p->optEnd = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLen; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + if (numAvail < 2) + { + p->backRes = MARK_LIT; + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len; + const Byte *data2; + reps[i] = p->reps[i]; + data2 = data - reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + { + repLens[i] = 0; + continue; + } + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + repLens[i] = len; + if (len > repLens[repMaxIndex]) + repMaxIndex = i; + if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization + break; + } + + if (repLens[repMaxIndex] >= p->numFastBytes) + { + unsigned len; + p->backRes = (UInt32)repMaxIndex; + len = repLens[repMaxIndex]; + MOVE_POS(p, len - 1) + return len; + } + + matches = p->matches; + #define MATCHES matches + // #define MATCHES p->matches + + if (mainLen >= p->numFastBytes) + { + p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + curByte = *data; + matchByte = *(data - reps[0]); + + last = repLens[repMaxIndex]; + if (last <= mainLen) + last = mainLen; + + if (last < 2 && curByte != matchByte) + { + p->backRes = MARK_LIT; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsLitState(p->state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAs_Lit(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + // 18.06 + if (matchByte == curByte && repLens[0] == 0) + { + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) + { + p->opt[1].price = shortRepPrice; + MakeAs_ShortRep(&p->opt[1]); + } + if (last < 2) + { + p->backRes = p->opt[1].dist; + return 1; + } + } + + p->opt[1].len = 1; + + p->opt[0].reps[0] = reps[0]; + p->opt[0].reps[1] = reps[1]; + p->opt[0].reps[2] = reps[2]; + p->opt[0].reps[3] = reps[3]; + + // ---------- REP ---------- + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); + do + { + UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, repLen); + COptimal *opt = &p->opt[repLen]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)repLen; + opt->dist = (UInt32)i; + opt->extra = 0; + } + } + while (--repLen >= 2); + } + + + // ---------- MATCH ---------- + { + unsigned len = repLens[0] + 1; + if (len <= mainLen) + { + unsigned offs = 0; + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + if (len < 2) + len = 2; + else + while (len > MATCHES[offs]) + offs += 2; + + for (; ; len++) + { + COptimal *opt; + UInt32 dist = MATCHES[(size_t)offs + 1]; + UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); + unsigned lenToPosState = GetLenToPosState(len); + + if (dist < kNumFullDistances) + price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + { + unsigned slot; + GetPosSlot2(dist, slot); + price += p->alignPrices[dist & kAlignMask]; + price += p->posSlotPrices[lenToPosState][slot]; + } + + opt = &p->opt[len]; + + if (price < opt->price) + { + opt->price = price; + opt->len = (UInt32)len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + + if (len == MATCHES[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } + } + } + + + cur = 0; + + #ifdef SHOW_STAT2 + /* if (position >= 0) */ + { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= last; i++) + printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); + } + #endif + } + + + + // ---------- Optimal Parsing ---------- + + for (;;) + { + unsigned numAvail; + UInt32 numAvailFull; + unsigned newLen, numPairs, prev, state, posState, startLen; + UInt32 litPrice, matchPrice, repMatchPrice; + BoolInt nextIsLit; + Byte curByte, matchByte; + const Byte *data; + COptimal *curOpt, *nextOpt; + + if (++cur == last) + break; + + // 18.06 + if (cur >= kNumOpts - 64) + { + unsigned j, best; + UInt32 price = p->opt[cur].price; + best = cur; + for (j = cur + 1; j <= last; j++) + { + UInt32 price2 = p->opt[j].price; + if (price >= price2) + { + price = price2; + best = j; + } + } + { + unsigned delta = best - cur; + if (delta != 0) + { + MOVE_POS(p, delta); + } + } + cur = best; + break; + } + + newLen = ReadMatchDistances(p, &numPairs); + + if (newLen >= p->numFastBytes) + { + p->numPairs = numPairs; + p->longestMatchLen = newLen; + break; + } + + curOpt = &p->opt[cur]; + + position++; + + // we need that check here, if skip_items in p->opt are possible + /* + if (curOpt->price >= kInfinityPrice) + continue; + */ + + prev = cur - curOpt->len; + + if (curOpt->len == 1) + { + state = (unsigned)p->opt[prev].state; + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } + else + { + const COptimal *prevOpt; + UInt32 b0; + UInt32 dist = curOpt->dist; + + if (curOpt->extra) + { + prev -= (unsigned)curOpt->extra; + state = kState_RepAfterLit; + if (curOpt->extra == 1) + state = (dist < LZMA_NUM_REPS ? kState_RepAfterLit : kState_MatchAfterLit); + } + else + { + state = (unsigned)p->opt[prev].state; + if (dist < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + + prevOpt = &p->opt[prev]; + b0 = prevOpt->reps[0]; + + if (dist < LZMA_NUM_REPS) + { + if (dist == 0) + { + reps[0] = b0; + reps[1] = prevOpt->reps[1]; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[1] = b0; + b0 = prevOpt->reps[1]; + if (dist == 1) + { + reps[0] = b0; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[2] = b0; + reps[0] = prevOpt->reps[dist]; + reps[3] = prevOpt->reps[dist ^ 1]; + } + } + } + else + { + reps[0] = (dist - LZMA_NUM_REPS + 1); + reps[1] = b0; + reps[2] = prevOpt->reps[1]; + reps[3] = prevOpt->reps[2]; + } + } + + curOpt->state = (CState)state; + curOpt->reps[0] = reps[0]; + curOpt->reps[1] = reps[1]; + curOpt->reps[2] = reps[2]; + curOpt->reps[3] = reps[3]; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + curByte = *data; + matchByte = *(data - reps[0]); + + posState = (position & p->pbMask); + + /* + The order of Price checks: + < LIT + <= SHORT_REP + < LIT : REP_0 + < REP [ : LIT : REP_0 ] + < MATCH [ : LIT : REP_0 ] + */ + + { + UInt32 curPrice = curOpt->price; + unsigned prob = p->isMatch[state][posState]; + matchPrice = curPrice + GET_PRICE_1(prob); + litPrice = curPrice + GET_PRICE_0(prob); + } + + nextOpt = &p->opt[(size_t)cur + 1]; + nextIsLit = False; + + // here we can allow skip_items in p->opt, if we don't check (nextOpt->price < kInfinityPrice) + // 18.new.06 + if ((nextOpt->price < kInfinityPrice + // && !IsLitState(state) + && matchByte == curByte) + || litPrice > nextOpt->price + ) + litPrice = 0; + else + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + litPrice += (!IsLitState(state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + + if (litPrice < nextOpt->price) + { + nextOpt->price = litPrice; + nextOpt->len = 1; + MakeAs_Lit(nextOpt); + nextIsLit = True; + } + } + + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + numAvailFull = p->numAvail; + { + unsigned temp = kNumOpts - 1 - cur; + if (numAvailFull > temp) + numAvailFull = (UInt32)temp; + } + + // 18.06 + // ---------- SHORT_REP ---------- + if (IsLitState(state)) // 18.new + if (matchByte == curByte) + if (repMatchPrice < nextOpt->price) // 18.new + // if (numAvailFull < 2 || data[1] != *(data - reps[0] + 1)) + if ( + // nextOpt->price >= kInfinityPrice || + nextOpt->len < 2 // we can check nextOpt->len, if skip items are not allowed in p->opt + || (nextOpt->dist != 0 + // && nextOpt->extra <= 1 // 17.old + ) + ) + { + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); + // if (shortRepPrice <= nextOpt->price) // 17.old + if (shortRepPrice < nextOpt->price) // 18.new + { + nextOpt->price = shortRepPrice; + nextOpt->len = 1; + MakeAs_ShortRep(nextOpt); + nextIsLit = False; + } + } + + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + + // numAvail <= p->numFastBytes + + // ---------- LIT : REP_0 ---------- + + if (!nextIsLit + && litPrice != 0 // 18.new + && matchByte != curByte + && numAvailFull > 2) + { + const Byte *data2 = data - reps[0]; + if (data[1] == data2[1] && data[2] == data2[2]) + { + unsigned len; + unsigned limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + for (len = 3; len < limit && data[len] == data2[len]; len++) + {} + + { + unsigned state2 = kLiteralNextStates[state]; + unsigned posState2 = (position + 1) & p->pbMask; + UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); + { + unsigned offset = cur + len; + + if (last < offset) + last = offset; + + // do + { + UInt32 price2; + COptimal *opt; + len--; + // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len); + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len; + opt->dist = 0; + opt->extra = 1; + } + } + // while (len >= 3); + } + } + } + } + + startLen = 2; /* speed optimization */ + + { + // ---------- REP ---------- + unsigned repIndex = 0; // 17.old + // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused + for (; repIndex < LZMA_NUM_REPS; repIndex++) + { + unsigned len; + UInt32 price; + const Byte *data2 = data - reps[repIndex]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + + // if (len < startLen) continue; // 18.new: speed optimization + + { + unsigned offset = cur + len; + if (last < offset) + last = offset; + } + { + unsigned len2 = len; + price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); + do + { + UInt32 price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState, len2); + COptimal *opt = &p->opt[cur + len2]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->dist = (UInt32)repIndex; + opt->extra = 0; + } + } + while (--len2 >= 2); + } + + if (repIndex == 0) startLen = len + 1; // 17.old + // startLen = len + 1; // 18.new + + /* if (_maxMode) */ + { + // ---------- REP : LIT : REP_0 ---------- + // numFastBytes + 1 + numFastBytes + + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; + if (limit > numAvailFull) + limit = numAvailFull; + + len2 += 2; + if (len2 <= limit) + if (data[len2 - 2] == data2[len2 - 2]) + if (data[len2 - 1] == data2[len2 - 1]) + { + unsigned state2 = kRepNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + price += GET_PRICE_LEN(&p->repLenEnc, posState, len) + + GET_PRICE_0(p->isMatch[state2][posState2]) + + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterRep; + posState2 = (posState2 + 1) & p->pbMask; + + + price += GetPrice_Rep_0(p, state2, posState2); + + for (; len2 < limit && data[len2] == data2[len2]; len2++) + {} + + len2 -= len; + // if (len2 >= 3) + { + { + unsigned offset = cur + len + len2; + + if (last < offset) + last = offset; + // do + { + UInt32 price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->extra = (CExtra)(len + 1); + opt->dist = (UInt32)repIndex; + } + } + // while (len2 >= 3); + } + } + } + } + } + } + + + // ---------- MATCH ---------- + /* for (unsigned len = 2; len <= newLen; len++) */ + if (newLen > numAvail) + { + newLen = numAvail; + for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2); + MATCHES[numPairs] = (UInt32)newLen; + numPairs += 2; + } + + // startLen = 2; /* speed optimization */ + + if (newLen >= startLen) + { + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + UInt32 dist; + unsigned offs, posSlot, len; + + { + unsigned offset = cur + newLen; + if (last < offset) + last = offset; + } + + offs = 0; + while (startLen > MATCHES[offs]) + offs += 2; + dist = MATCHES[(size_t)offs + 1]; + + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); + + for (len = /*2*/ startLen; ; len++) + { + UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); + { + COptimal *opt; + unsigned lenNorm = len - 2; + lenNorm = GetLenToPosState2(lenNorm); + if (dist < kNumFullDistances) + price += p->distancesPrices[lenNorm][dist & (kNumFullDistances - 1)]; + else + price += p->posSlotPrices[lenNorm][posSlot] + p->alignPrices[dist & kAlignMask]; + + opt = &p->opt[cur + len]; + if (price < opt->price) + { + opt->price = price; + opt->len = (UInt32)len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + } + + if (len == MATCHES[offs]) + { + // if (p->_maxMode) { + // MATCH : LIT : REP_0 + + const Byte *data2 = data - dist - 1; + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; + if (limit > numAvailFull) + limit = numAvailFull; + + len2 += 2; + if (len2 <= limit) + if (data[len2 - 2] == data2[len2 - 2]) + if (data[len2 - 1] == data2[len2 - 1]) + { + for (; len2 < limit && data[len2] == data2[len2]; len2++) + {} + + len2 -= len; + + // if (len2 >= 3) + { + unsigned state2 = kMatchNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + unsigned offset; + price += GET_PRICE_0(p->isMatch[state2][posState2]); + price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterMatch; + + posState2 = (posState2 + 1) & p->pbMask; + price += GetPrice_Rep_0(p, state2, posState2); + + offset = cur + len + len2; + + if (last < offset) + last = offset; + // do + { + UInt32 price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + GET_PRICE_LEN(&p->repLenEnc, posState2, len2); + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = (UInt32)len2; + opt->extra = (CExtra)(len + 1); + opt->dist = dist + LZMA_NUM_REPS; + } + } + // while (len2 >= 3); + } + + } + + offs += 2; + if (offs == numPairs) + break; + dist = MATCHES[(size_t)offs + 1]; + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); + } + } + } + } + + do + p->opt[last].price = kInfinityPrice; + while (--last); + + return Backward(p, cur); +} + + + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + + + +static unsigned GetOptimumFast(CLzmaEnc *p) +{ + UInt32 numAvail, mainDist; + unsigned mainLen, numPairs, repIndex, repLen, i; + const Byte *data; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLen; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + p->backRes = MARK_LIT; + if (numAvail < 2) + return 1; + // if (mainLen < 2 && p->state == 0) return 1; // 18.06.notused + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repLen = repIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len; + const Byte *data2 = data - p->reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (len = 2; len < numAvail && data[len] == data2[len]; len++) + {} + if (len >= p->numFastBytes) + { + p->backRes = (UInt32)i; + MOVE_POS(p, len - 1) + return len; + } + if (len > repLen) + { + repIndex = i; + repLen = len; + } + } + + if (mainLen >= p->numFastBytes) + { + p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + mainDist = 0; /* for GCC */ + + if (mainLen >= 2) + { + mainDist = p->matches[(size_t)numPairs - 1]; + while (numPairs > 2) + { + UInt32 dist2; + if (mainLen != p->matches[(size_t)numPairs - 4] + 1) + break; + dist2 = p->matches[(size_t)numPairs - 3]; + if (!ChangePair(dist2, mainDist)) + break; + numPairs -= 2; + mainLen--; + mainDist = dist2; + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + + if (repLen >= 2) + if ( repLen + 1 >= mainLen + || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) + { + p->backRes = (UInt32)repIndex; + MOVE_POS(p, repLen - 1) + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + { + unsigned len1 = ReadMatchDistances(p, &p->numPairs); + p->longestMatchLen = len1; + + if (len1 >= 2) + { + UInt32 newDist = p->matches[(size_t)p->numPairs - 1]; + if ( (len1 >= mainLen && newDist < mainDist) + || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist)) + || (len1 > mainLen + 1) + || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) + return 1; + } + } + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len, limit; + const Byte *data2 = data - p->reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; + for (len = 2;; len++) + { + if (len >= limit) + return 1; + if (data[len] != data2[len]) + break; + } + } + + p->backRes = mainDist + LZMA_NUM_REPS; + if (mainLen != 2) + { + MOVE_POS(p, mainLen - 2) + } + return mainLen; +} + + + + +static void WriteEndMarker(CLzmaEnc *p, unsigned posState) +{ + UInt32 range; + range = p->rc.range; + { + UInt32 ttt, newBound; + CLzmaProb *prob = &p->isMatch[p->state][posState]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_1(&p->rc, prob) + prob = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_0(&p->rc, prob) + } + p->state = kMatchNextStates[p->state]; + + p->rc.range = range; + LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); + range = p->rc.range; + + { + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1); + CLzmaProb *probs = p->posSlotEncoder[0]; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < (1 << kNumPosSlotBits)); + } + { + // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range; + unsigned numBits = 30 - kNumAlignBits; + do + { + range >>= 1; + p->rc.low += range; + RC_NORM(&p->rc) + } + while (--numBits); + } + + { + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + CLzmaProb *probs = p->posAlignEncoder; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < kAlignTableSize); + } + p->rc.range = range; +} + + +static SRes CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + + #ifndef _7ZIP_ST + if ( + // p->mf_Failure || + (p->mtMode && + ( // p->matchFinderMt.failure_LZ_LZ || + p->matchFinderMt.failure_LZ_BT)) + ) + { + p->result = MY_HRES_ERROR__INTERNAL_ERROR; + // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); + } + #endif + + if (MFB.result != SZ_OK) + p->result = SZ_ERROR_READ; + + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} + + +MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) +{ + /* ReleaseMFStream(); */ + p->finished = True; + if (p->writeEndMark) + WriteEndMarker(p, nowPos & p->pbMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + return CheckErrors(p); +} + + +MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) +{ + unsigned i; + const CProbPrice *ProbPrices = p->ProbPrices; + const CLzmaProb *probs = p->posAlignEncoder; + // p->alignPriceCount = 0; + for (i = 0; i < kAlignTableSize / 2; i++) + { + UInt32 price = 0; + unsigned sym = i; + unsigned m = 1; + unsigned bit; + UInt32 prob; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + prob = probs[m]; + p->alignPrices[i ] = price + GET_PRICEa_0(prob); + p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); + // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + } +} + + +MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) +{ + // int y; for (y = 0; y < 100; y++) { + + UInt32 tempPrices[kNumFullDistances]; + unsigned i, lps; + + const CProbPrice *ProbPrices = p->ProbPrices; + p->matchPriceCount = 0; + + for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) + { + unsigned posSlot = GetPosSlot1(i); + unsigned footerBits = (posSlot >> 1) - 1; + unsigned base = ((2 | (posSlot & 1)) << footerBits); + const CLzmaProb *probs = p->posEncoders + (size_t)base * 2; + // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); + UInt32 price = 0; + unsigned m = 1; + unsigned sym = i; + unsigned offset = (unsigned)1 << footerBits; + base += i; + + if (footerBits) + do + { + unsigned bit = sym & 1; + sym >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) + bit; + } + while (--footerBits); + + { + unsigned prob = probs[m]; + tempPrices[base ] = price + GET_PRICEa_0(prob); + tempPrices[base + offset] = price + GET_PRICEa_1(prob); + } + } + + for (lps = 0; lps < kNumLenToPosStates; lps++) + { + unsigned slot; + unsigned distTableSize2 = (p->distTableSize + 1) >> 1; + UInt32 *posSlotPrices = p->posSlotPrices[lps]; + const CLzmaProb *probs = p->posSlotEncoder[lps]; + + for (slot = 0; slot < distTableSize2; slot++) + { + // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices); + UInt32 price; + unsigned bit; + unsigned sym = slot + (1 << (kNumPosSlotBits - 1)); + unsigned prob; + bit = sym & 1; sym >>= 1; price = GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit); + prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))]; + posSlotPrices[(size_t)slot * 2 ] = price + GET_PRICEa_0(prob); + posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob); + } + + { + UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++) + { + posSlotPrices[(size_t)slot * 2 ] += delta; + posSlotPrices[(size_t)slot * 2 + 1] += delta; + delta += ((UInt32)1 << kNumBitPriceShiftBits); + } + } + + { + UInt32 *dp = p->distancesPrices[lps]; + + dp[0] = posSlotPrices[0]; + dp[1] = posSlotPrices[1]; + dp[2] = posSlotPrices[2]; + dp[3] = posSlotPrices[3]; + + for (i = 4; i < kNumFullDistances; i += 2) + { + UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; + dp[i ] = slotPrice + tempPrices[i]; + dp[i + 1] = slotPrice + tempPrices[i + 1]; + } + } + } + // } +} + + + +static void LzmaEnc_Construct(CLzmaEnc *p) +{ + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&MFB); + + #ifndef _7ZIP_ST + p->matchFinderMt.MatchFinder = &MFB; + MatchFinderMt_Construct(&p->matchFinderMt); + #endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps(p, &props); + } + + #ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); + #endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = NULL; + p->saveState.litProbs = NULL; +} + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) +{ + void *p; + p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc)); + if (p) + LzmaEnc_Construct((CLzmaEnc *)p); + return p; +} + +static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +{ + ISzAlloc_Free(alloc, p->litProbs); + ISzAlloc_Free(alloc, p->saveState.litProbs); + p->litProbs = NULL; + p->saveState.litProbs = NULL; +} + +static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + #ifndef _7ZIP_ST + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); + #endif + + MatchFinder_Free(&MFB, allocBig); + LzmaEnc_FreeLits(p, alloc); + RangeEnc_Free(&p->rc, alloc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + ISzAlloc_Free(alloc, p); +} + + +MY_NO_INLINE +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) +{ + UInt32 nowPos32, startPos32; + if (p->needInit) + { + #ifndef _7ZIP_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + } + #endif + p->matchFinder.Init(p->matchFinderObj); + p->needInit = 0; + } + + if (p->finished) + return p->result; + RINOK(CheckErrors(p)); + + nowPos32 = (UInt32)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) + { + unsigned numPairs; + Byte curByte; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); + RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); + // p->state = kLiteralNextStates[p->state]; + curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + + for (;;) + { + UInt32 dist; + unsigned len, posState; + UInt32 range, ttt, newBound; + CLzmaProb *probs; + + if (p->fastMode) + len = GetOptimumFast(p); + else + { + unsigned oci = p->optCur; + if (p->optEnd == oci) + len = GetOptimum(p, nowPos32); + else + { + const COptimal *opt = &p->opt[oci]; + len = opt->len; + p->backRes = opt->dist; + p->optCur = oci + 1; + } + } + + posState = (unsigned)nowPos32 & p->pbMask; + range = p->rc.range; + probs = &p->isMatch[p->state][posState]; + + RC_BIT_PRE(&p->rc, probs) + + dist = p->backRes; + + #ifdef SHOW_STAT2 + printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); + #endif + + if (dist == MARK_LIT) + { + Byte curByte; + const Byte *data; + unsigned state; + + RC_BIT_0(&p->rc, probs); + p->rc.range = range; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; + probs = LIT_PROBS(nowPos32, *(data - 1)); + curByte = *data; + state = p->state; + p->state = kLiteralNextStates[state]; + if (IsLitState(state)) + LitEnc_Encode(&p->rc, probs, curByte); + else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); + } + else + { + RC_BIT_1(&p->rc, probs); + probs = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, probs) + + if (dist < LZMA_NUM_REPS) + { + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG0[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 0) + { + RC_BIT_0(&p->rc, probs); + probs = &p->isRep0Long[p->state][posState]; + RC_BIT_PRE(&p->rc, probs) + if (len != 1) + { + RC_BIT_1_BASE(&p->rc, probs); + } + else + { + RC_BIT_0_BASE(&p->rc, probs); + p->state = kShortRepNextStates[p->state]; + } + } + else + { + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG1[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 1) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[1]; + } + else + { + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG2[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 2) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[2]; + } + else + { + RC_BIT_1_BASE(&p->rc, probs); + dist = p->reps[3]; + p->reps[3] = p->reps[2]; + } + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; + p->reps[0] = dist; + } + + RC_NORM(&p->rc) + + p->rc.range = range; + + if (len != 1) + { + LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + --p->repLenEncCounter; + p->state = kRepNextStates[p->state]; + } + } + else + { + unsigned posSlot; + RC_BIT_0(&p->rc, probs); + p->rc.range = range; + p->state = kMatchNextStates[p->state]; + + LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + // --p->lenEnc.counter; + + dist -= LZMA_NUM_REPS; + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = dist + 1; + + p->matchPriceCount++; + GetPosSlot(dist, posSlot); + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); + { + UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); + range = p->rc.range; + probs = p->posSlotEncoder[GetLenToPosState(len)]; + do + { + CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); + UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; + sym <<= 1; + RC_BIT(&p->rc, prob, bit); + } + while (sym < (1 << kNumPosSlotBits * 2)); + p->rc.range = range; + } + + if (dist >= kStartPosModelIndex) + { + unsigned footerBits = ((posSlot >> 1) - 1); + + if (dist < kNumFullDistances) + { + unsigned base = ((2 | (posSlot & 1)) << footerBits); + RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, (unsigned)(dist /* - base */)); + } + else + { + UInt32 pos2 = (dist | 0xF) << (32 - footerBits); + range = p->rc.range; + // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + /* + do + { + range >>= 1; + p->rc.low += range & (0 - ((dist >> --footerBits) & 1)); + RC_NORM(&p->rc) + } + while (footerBits > kNumAlignBits); + */ + do + { + range >>= 1; + p->rc.low += range & (0 - (pos2 >> 31)); + pos2 += pos2; + RC_NORM(&p->rc) + } + while (pos2 != 0xF0000000); + + + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + + { + unsigned m = 1; + unsigned bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + p->rc.range = range; + // p->alignPriceCount++; + } + } + } + } + } + + nowPos32 += (UInt32)len; + p->additionalOffset -= len; + + if (p->additionalOffset == 0) + { + UInt32 processed; + + if (!p->fastMode) + { + /* + if (p->alignPriceCount >= 16) // kAlignTableSize + FillAlignPrices(p); + if (p->matchPriceCount >= 128) + FillDistancesPrices(p); + if (p->lenEnc.counter <= 0) + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + */ + if (p->matchPriceCount >= 64) + { + FillAlignPrices(p); + // { int y; for (y = 0; y < 100; y++) { + FillDistancesPrices(p); + // }} + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + } + if (p->repLenEncCounter <= 0) + { + p->repLenEncCounter = REP_LEN_COUNT; + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); + } + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; + + if (maxPackSize) + { + if (processed + kNumOpts + 300 >= maxUnpackSize + || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) + break; + } + else if (processed >= (1 << 17)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} + + + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + UInt32 beforeSize = kNumOpts; + UInt32 dictSize; + + if (!RangeEnc_Alloc(&p->rc, alloc)) + return SZ_ERROR_MEM; + + #ifndef _7ZIP_ST + p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); + #endif + + { + unsigned lclp = p->lc + p->lp; + if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp) + { + LzmaEnc_FreeLits(p, alloc); + p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb)); + if (!p->litProbs || !p->saveState.litProbs) + { + LzmaEnc_FreeLits(p, alloc); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + + + dictSize = p->dictSize; + if (dictSize == ((UInt32)2 << 30) || + dictSize == ((UInt32)3 << 30)) + { + /* 21.03 : here we reduce the dictionary for 2 reasons: + 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary. + 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases, + where data size is aligned for 1 GB: 5/6/8 GB. + That reducing must be >= 1 for such corner cases. */ + dictSize -= 1; + } + + if (beforeSize + dictSize < keepWindowSize) + beforeSize = keepWindowSize - dictSize; + + /* in worst case we can look ahead for + max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes. + we send larger value for (keepAfter) to MantchFinder_Create(): + (numFastBytes + LZMA_MATCH_LEN_MAX + 1) + */ + + #ifndef _7ZIP_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ + , allocBig)); + p->matchFinderObj = &p->matchFinderMt; + MFB.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } + else + #endif + { + if (!MatchFinder_Create(&MFB, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */ + , allocBig)) + return SZ_ERROR_MEM; + p->matchFinderObj = &MFB; + MatchFinder_CreateVTable(&MFB, &p->matchFinder); + } + + return SZ_OK; +} + +static void LzmaEnc_Init(CLzmaEnc *p) +{ + unsigned i; + p->state = 0; + p->reps[0] = + p->reps[1] = + p->reps[2] = + p->reps[3] = 1; + + RangeEnc_Init(&p->rc); + + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + + for (i = 0; i < kNumStates; i++) + { + unsigned j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) + { + CLzmaProb *probs = p->posSlotEncoder[i]; + unsigned j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { + for (i = 0; i < kNumFullDistances; i++) + p->posEncoders[i] = kProbInitValue; + } + + { + UInt32 num = (UInt32)0x300 << (p->lp + p->lc); + UInt32 k; + CLzmaProb *probs = p->litProbs; + for (k = 0; k < num; k++) + probs[k] = kProbInitValue; + } + + + LenEnc_Init(&p->lenProbs); + LenEnc_Init(&p->repLenProbs); + + p->optEnd = 0; + p->optCur = 0; + + { + for (i = 0; i < kNumOpts; i++) + p->opt[i].price = kInfinityPrice; + } + + p->additionalOffset = 0; + + p->pbMask = ((unsigned)1 << p->pb) - 1; + p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); + + // p->mf_Failure = False; +} + + +static void LzmaEnc_InitPrices(CLzmaEnc *p) +{ + if (!p->fastMode) + { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + + p->repLenEncCounter = REP_LEN_COUNT; + + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); +} + +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + unsigned i; + for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) + if (p->dictSize <= ((UInt32)1 << i)) + break; + p->distTableSize = i * 2; + + p->finished = False; + p->result = SZ_OK; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + p->nowPos64 = 0; + return SZ_OK; +} + +static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + MFB.stream = inStream; + p->needInit = 1; + p->rc.outStream = outStream; + return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); +} + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, + ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + MFB.stream = inStream; + p->needInit = 1; + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) +{ + MFB.directInput = 1; + MFB.bufferBase = (Byte *)src; + MFB.directInputRem = srcLen; +} + +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + LzmaEnc_SetInputBuf(p, src, srcLen); + p->needInit = 1; + + LzmaEnc_SetDataSize(pp, srcLen); + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +void LzmaEnc_Finish(CLzmaEncHandle pp) +{ + #ifndef _7ZIP_ST + CLzmaEnc *p = (CLzmaEnc *)pp; + if (p->mtMode) + MatchFinderMt_ReleaseStream(&p->matchFinderMt); + #else + UNUSED_VAR(pp); + #endif +} + + +typedef struct +{ + ISeqOutStream vt; + Byte *data; + SizeT rem; + BoolInt overflow; +} CLzmaEnc_SeqOutStreamBuf; + +static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) +{ + CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); + if (p->rem < size) + { + size = p->rem; + p->overflow = True; + } + if (size != 0) + { + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + } + return size; +} + + +/* +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} +*/ + +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + + +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + UInt64 nowPos64; + SRes res; + CLzmaEnc_SeqOutStreamBuf outStream; + + outStream.vt.Write = SeqOutStreamBuf_Write; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = False; + p->finished = False; + p->result = SZ_OK; + + if (reInit) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + + nowPos64 = p->nowPos64; + RangeEnc_Init(&p->rc); + p->rc.outStream = &outStream.vt; + + if (desiredPackSize == 0) + return SZ_ERROR_OUTPUT_EOF; + + res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); + + *unpackSize = (UInt32)(p->nowPos64 - nowPos64); + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + + return res; +} + + +MY_NO_INLINE +static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) +{ + SRes res = SZ_OK; + + #ifndef _7ZIP_ST + Byte allocaDummy[0x300]; + allocaDummy[0] = 0; + allocaDummy[1] = allocaDummy[0]; + #endif + + for (;;) + { + res = LzmaEnc_CodeOneBlock(p, 0, 0); + if (res != SZ_OK || p->finished) + break; + if (progress) + { + res = ICompressProgress_Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + if (res != SZ_OK) + { + res = SZ_ERROR_PROGRESS; + break; + } + } + } + + LzmaEnc_Finish(p); + + /* + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) + res = SZ_ERROR_FAIL; + } + */ + + return res; +} + + +SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, + ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); + return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); +} + + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +{ + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + { + const CLzmaEnc *p = (const CLzmaEnc *)pp; + const UInt32 dictSize = p->dictSize; + UInt32 v; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + // we write aligned dictionary value to properties for lzma decoder + if (dictSize >= ((UInt32)1 << 21)) + { + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; + v = (dictSize + kDictMask) & ~kDictMask; + if (v < dictSize) + v = dictSize; + } + else + { + unsigned i = 11 * 2; + do + { + v = (UInt32)(2 + (i & 1)) << (i >> 1); + i++; + } + while (v < dictSize); + } + + SetUi32(props + 1, v); + return SZ_OK; + } +} + + +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) +{ + return (unsigned)((CLzmaEnc *)pp)->writeEndMark; +} + + +SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + SRes res; + CLzmaEnc *p = (CLzmaEnc *)pp; + + CLzmaEnc_SeqOutStreamBuf outStream; + + outStream.vt.Write = SeqOutStreamBuf_Write; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = writeEndMark; + p->rc.outStream = &outStream.vt; + + res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); + + if (res == SZ_OK) + { + res = LzmaEnc_Encode2(p, progress); + if (res == SZ_OK && p->nowPos64 != srcLen) + res = SZ_ERROR_FAIL; + } + + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + return res; +} + + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); + SRes res; + if (!p) + return SZ_ERROR_MEM; + + res = LzmaEnc_SetProps(p, props); + if (res == SZ_OK) + { + res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); + if (res == SZ_OK) + res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, + writeEndMark, progress, alloc, allocBig); + } + + LzmaEnc_Destroy(p, alloc, allocBig); + return res; +} + + +/* +#ifndef _7ZIP_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + lz_threads[0] = p->matchFinderMt.hashSync.thread; + lz_threads[1] = p->matchFinderMt.btSync.thread; +} +#endif +*/ diff --git a/lib/LZMA/LzmaEnc.h b/lib/LZMA/LzmaEnc.h index 9194ee5..26757ba 100644 --- a/lib/LZMA/LzmaEnc.h +++ b/lib/LZMA/LzmaEnc.h @@ -1,76 +1,78 @@ -/* LzmaEnc.h -- LZMA Encoder -2017-07-27 : Igor Pavlov : Public domain */ - -#ifndef __LZMA_ENC_H -#define __LZMA_ENC_H - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -#define LZMA_PROPS_SIZE 5 - -typedef struct _CLzmaEncProps -{ - int level; /* 0 <= level <= 9 */ - UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version - (1 << 12) <= dictSize <= (3 << 29) for 64-bit version - default = (1 << 24) */ - int lc; /* 0 <= lc <= 8, default = 3 */ - int lp; /* 0 <= lp <= 4, default = 0 */ - int pb; /* 0 <= pb <= 4, default = 2 */ - int algo; /* 0 - fast, 1 - normal, default = 1 */ - int fb; /* 5 <= fb <= 273, default = 32 */ - int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ - int numHashBytes; /* 2, 3 or 4, default = 4 */ - UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ - unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ - int numThreads; /* 1 or 2, default = 2 */ - - UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. - Encoder uses this value to reduce dictionary size */ -} CLzmaEncProps; - -void LzmaEncProps_Init(CLzmaEncProps *p); -void LzmaEncProps_Normalize(CLzmaEncProps *p); -UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); - - -/* ---------- CLzmaEncHandle Interface ---------- */ - -/* LzmaEnc* functions can return the following exit codes: -SRes: - SZ_OK - OK - SZ_ERROR_MEM - Memory allocation error - SZ_ERROR_PARAM - Incorrect paramater in props - SZ_ERROR_WRITE - ISeqOutStream write callback error - SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output - SZ_ERROR_PROGRESS - some break from progress callback - SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) -*/ - -typedef void * CLzmaEncHandle; - -CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); -void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); - -SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); -void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); -SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); -unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); - -SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); -SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); - - -/* ---------- One Call Interface ---------- */ - -SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, - const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, - ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); - -EXTERN_C_END - -#endif +/* LzmaEnc.h -- LZMA Encoder +2019-10-30 : Igor Pavlov : Public domain */ + +#ifndef __LZMA_ENC_H +#define __LZMA_ENC_H + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaEncProps +{ + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (3 << 29) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ + + UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. + Encoder uses this value to reduce dictionary size */ + + UInt64 affinity; +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps *p); +void LzmaEncProps_Normalize(CLzmaEncProps *p); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); + + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc* functions can return the following exit codes: +SRes: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - ISeqOutStream write callback error + SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) +*/ + +typedef void * CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); + +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + + +/* ---------- One Call Interface ---------- */ + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); + +EXTERN_C_END + +#endif diff --git a/lib/LZMA/Precomp.h b/lib/LZMA/Precomp.h index e8ff8b4..edb5814 100644 --- a/lib/LZMA/Precomp.h +++ b/lib/LZMA/Precomp.h @@ -1,10 +1,10 @@ -/* Precomp.h -- StdAfx -2013-11-12 : Igor Pavlov : Public domain */ - -#ifndef __7Z_PRECOMP_H -#define __7Z_PRECOMP_H - -#include "Compiler.h" -/* #include "7zTypes.h" */ - -#endif +/* Precomp.h -- StdAfx +2013-11-12 : Igor Pavlov : Public domain */ + +#ifndef __7Z_PRECOMP_H +#define __7Z_PRECOMP_H + +#include "Compiler.h" +/* #include "7zTypes.h" */ + +#endif diff --git a/lib/LZMA/Threads.c b/lib/LZMA/Threads.c index 930ad27..6eb45b0 100644 --- a/lib/LZMA/Threads.c +++ b/lib/LZMA/Threads.c @@ -1,95 +1,540 @@ -/* Threads.c -- multithreading library -2017-06-26 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#ifndef UNDER_CE -#include -#endif - -#include "Threads.h" - -static WRes GetError() -{ - DWORD res = GetLastError(); - return res ? (WRes)res : 1; -} - -static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); } -static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } - -WRes HandlePtr_Close(HANDLE *p) -{ - if (*p != NULL) - { - if (!CloseHandle(*p)) - return GetError(); - *p = NULL; - } - return 0; -} - -WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); } - -WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) -{ - /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - - #ifdef UNDER_CE - - DWORD threadId; - *p = CreateThread(0, 0, func, param, 0, &threadId); - - #else - - unsigned threadId; - *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId); - - #endif - - /* maybe we must use errno here, but probably GetLastError() is also OK. */ - return HandleToWRes(*p); -} - -static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) -{ - *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); - return HandleToWRes(*p); -} - -WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); } -WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); } - -WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); } -WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); } -WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); } -WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } - - -WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) -{ - *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL); - return HandleToWRes(*p); -} - -static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount) - { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); } -WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num) - { return Semaphore_Release(p, (LONG)num, NULL); } -WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); } - -WRes CriticalSection_Init(CCriticalSection *p) -{ - /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ - #ifdef _MSC_VER - __try - #endif - { - InitializeCriticalSection(p); - /* InitializeCriticalSectionAndSpinCount(p, 0); */ - } - #ifdef _MSC_VER - __except (EXCEPTION_EXECUTE_HANDLER) { return 1; } - #endif - return 0; -} +/* Threads.c -- multithreading library +2021-12-21 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#ifdef _WIN32 + +#ifndef USE_THREADS_CreateThread +#include +#endif + +#include "Threads.h" + +static WRes GetError() +{ + DWORD res = GetLastError(); + return res ? (WRes)res : 1; +} + +static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); } +static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } + +WRes HandlePtr_Close(HANDLE *p) +{ + if (*p != NULL) + { + if (!CloseHandle(*p)) + return GetError(); + *p = NULL; + } + return 0; +} + +WRes Handle_WaitObject(HANDLE h) +{ + DWORD dw = WaitForSingleObject(h, INFINITE); + /* + (dw) result: + WAIT_OBJECT_0 // 0 + WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space + WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space + WAIT_FAILED // 0xFFFFFFFF + */ + if (dw == WAIT_FAILED) + { + dw = GetLastError(); + if (dw == 0) + return WAIT_FAILED; + } + return (WRes)dw; +} + +#define Thread_Wait(p) Handle_WaitObject(*(p)) + +WRes Thread_Wait_Close(CThread *p) +{ + WRes res = Thread_Wait(p); + WRes res2 = Thread_Close(p); + return (res != 0 ? res : res2); +} + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + + #ifdef USE_THREADS_CreateThread + + DWORD threadId; + *p = CreateThread(NULL, 0, func, param, 0, &threadId); + + #else + + unsigned threadId; + *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); + + #endif + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return HandleToWRes(*p); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + #ifdef USE_THREADS_CreateThread + + UNUSED_VAR(affinity) + return Thread_Create(p, func, param); + + #else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + { + // DWORD_PTR prevMask = + SetThreadAffinityMask(h, (DWORD_PTR)affinity); + /* + if (prevMask == 0) + { + // affinity change is non-critical error, so we can ignore it + // wres = GetError(); + } + */ + } + { + DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + +static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) +{ + *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); + return HandleToWRes(*p); +} + +WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); } +WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); } + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); } + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore() + *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL); + return HandleToWRes(*p); +} + +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + // if (Semaphore_IsCreated(p)) + { + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + } + return Semaphore_Create(p, initCount, maxCount); +} + +static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount) + { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); } +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num) + { return Semaphore_Release(p, (LONG)num, NULL); } +WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); } + +WRes CriticalSection_Init(CCriticalSection *p) +{ + /* InitializeCriticalSection() can raise exception: + Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception + Windows Vista+ : no exceptions */ + #ifdef _MSC_VER + __try + #endif + { + InitializeCriticalSection(p); + /* InitializeCriticalSectionAndSpinCount(p, 0); */ + } + #ifdef _MSC_VER + __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; } + #endif + return 0; +} + + + + +#else // _WIN32 + +// ---------- POSIX ---------- + +#ifndef __APPLE__ +#ifndef _7ZIP_AFFINITY_DISABLE +// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +#define _GNU_SOURCE +#endif +#endif + +#include "Threads.h" + +#include +#include +#include +#ifdef _7ZIP_AFFINITY_SUPPORTED +// #include +#endif + + +// #include +// #define PRF(p) p +#define PRF(p) + +#define Print(s) PRF(printf("\n%s\n", s)) + +// #include + +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) +{ + // new thread in Posix probably inherits affinity from parrent thread + Print("Thread_Create_With_CpuSet"); + + pthread_attr_t attr; + int ret; + // int ret2; + + p->_created = 0; + + RINOK(pthread_attr_init(&attr)); + + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + if (!ret) + { + if (cpuSet) + { + #ifdef _7ZIP_AFFINITY_SUPPORTED + + /* + printf("\n affinity :"); + unsigned i; + for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) + { + Byte b = *((const Byte *)cpuSet + i); + char temp[32]; + #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + temp[0] = GET_HEX_CHAR((b & 0xF)); + temp[1] = GET_HEX_CHAR((b >> 4)); + // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian + // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian + temp[2] = 0; + printf("%s", temp); + } + printf("\n"); + */ + + // ret2 = + pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + #endif + } + + ret = pthread_create(&p->_tid, &attr, func, param); + + if (!ret) + { + p->_created = 1; + /* + if (cpuSet) + { + // ret2 = + pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + } + */ + } + } + // ret2 = + pthread_attr_destroy(&attr); + // if (ret2 != 0) ret = ret2; + return ret; +} + + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + return Thread_Create_With_CpuSet(p, func, param, NULL); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + Print("Thread_Create_WithAffinity"); + CCpuSet cs; + unsigned i; + CpuSet_Zero(&cs); + for (i = 0; i < sizeof(affinity) * 8; i++) + { + if (affinity == 0) + break; + if (affinity & 1) + { + CpuSet_Set(&cs, i); + } + affinity >>= 1; + } + return Thread_Create_With_CpuSet(p, func, param, &cs); +} + + +WRes Thread_Close(CThread *p) +{ + // Print("Thread_Close"); + int ret; + if (!p->_created) + return 0; + + ret = pthread_detach(p->_tid); + p->_tid = 0; + p->_created = 0; + return ret; +} + + +WRes Thread_Wait_Close(CThread *p) +{ + // Print("Thread_Wait_Close"); + void *thread_return; + int ret; + if (!p->_created) + return EINVAL; + + ret = pthread_join(p->_tid, &thread_return); + // probably we can't use that (_tid) after pthread_join(), so we close thread here + p->_created = 0; + p->_tid = 0; + return ret; +} + + + +static WRes Event_Create(CEvent *p, int manualReset, int signaled) +{ + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_manual_reset = manualReset; + p->_state = (signaled ? True : False); + p->_created = 1; + return 0; +} + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) + { return Event_Create(p, True, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) + { return Event_Create(p, False, signaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + + +WRes Event_Set(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = True; + int res1 = pthread_cond_broadcast(&p->_cond); + int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); +} + +WRes Event_Reset(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = False; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Wait(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_state == False) + { + // ETIMEDOUT + // ret = + pthread_cond_wait(&p->_cond, &p->_mutex); + // if (ret != 0) break; + } + if (p->_manual_reset == False) + { + p->_state = False; + } + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Close(CEvent *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_count = initCount; + p->_maxCount = maxCount; + p->_created = 1; + return 0; +} + + +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (Semaphore_IsCreated(p)) + { + /* + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + */ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + // return EINVAL; // for debug + p->_count = initCount; + p->_maxCount = maxCount; + return 0; + } + return Semaphore_Create(p, initCount, maxCount); +} + + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + int ret; + + if (releaseCount < 1) + return EINVAL; + + RINOK(pthread_mutex_lock(&p->_mutex)); + + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + ret = ERROR_TOO_MANY_POSTS; // EINVAL; + else + { + p->_count = newCount; + ret = pthread_cond_broadcast(&p->_cond); + } + RINOK(pthread_mutex_unlock(&p->_mutex)); + return ret; +} + +WRes Semaphore_Wait(CSemaphore *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_count < 1) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + p->_count--; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Semaphore_Close(CSemaphore *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + + +WRes CriticalSection_Init(CCriticalSection *p) +{ + // Print("CriticalSection_Init"); + if (!p) + return EINTR; + return pthread_mutex_init(&p->_mutex, NULL); +} + +void CriticalSection_Enter(CCriticalSection *p) +{ + // Print("CriticalSection_Enter"); + if (p) + { + // int ret = + pthread_mutex_lock(&p->_mutex); + } +} + +void CriticalSection_Leave(CCriticalSection *p) +{ + // Print("CriticalSection_Leave"); + if (p) + { + // int ret = + pthread_mutex_unlock(&p->_mutex); + } +} + +void CriticalSection_Delete(CCriticalSection *p) +{ + // Print("CriticalSection_Delete"); + if (p) + { + // int ret = + pthread_mutex_destroy(&p->_mutex); + } +} + +LONG InterlockedIncrement(LONG volatile *addend) +{ + // Print("InterlockedIncrement"); + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend + 1; + *addend = val; + return val; + #else + return __sync_add_and_fetch(addend, 1); + #endif +} + +#endif // _WIN32 diff --git a/lib/LZMA/Threads.h b/lib/LZMA/Threads.h index e53ace4..e9493af 100644 --- a/lib/LZMA/Threads.h +++ b/lib/LZMA/Threads.h @@ -1,68 +1,232 @@ -/* Threads.h -- multithreading library -2017-06-18 : Igor Pavlov : Public domain */ - -#ifndef __7Z_THREADS_H -#define __7Z_THREADS_H - -#ifdef _WIN32 -#include -#endif - -#include "7zTypes.h" - -EXTERN_C_BEGIN - -WRes HandlePtr_Close(HANDLE *h); -WRes Handle_WaitObject(HANDLE h); - -typedef HANDLE CThread; -#define Thread_Construct(p) *(p) = NULL -#define Thread_WasCreated(p) (*(p) != NULL) -#define Thread_Close(p) HandlePtr_Close(p) -#define Thread_Wait(p) Handle_WaitObject(*(p)) - -typedef -#ifdef UNDER_CE - DWORD -#else - unsigned -#endif - THREAD_FUNC_RET_TYPE; - -#define THREAD_FUNC_CALL_TYPE MY_STD_CALL -#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE -typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); -WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); - -typedef HANDLE CEvent; -typedef CEvent CAutoResetEvent; -typedef CEvent CManualResetEvent; -#define Event_Construct(p) *(p) = NULL -#define Event_IsCreated(p) (*(p) != NULL) -#define Event_Close(p) HandlePtr_Close(p) -#define Event_Wait(p) Handle_WaitObject(*(p)) -WRes Event_Set(CEvent *p); -WRes Event_Reset(CEvent *p); -WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); -WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); -WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); -WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); - -typedef HANDLE CSemaphore; -#define Semaphore_Construct(p) *(p) = NULL -#define Semaphore_IsCreated(p) (*(p) != NULL) -#define Semaphore_Close(p) HandlePtr_Close(p) -#define Semaphore_Wait(p) Handle_WaitObject(*(p)) -WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); -WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); -WRes Semaphore_Release1(CSemaphore *p); - -typedef CRITICAL_SECTION CCriticalSection; -WRes CriticalSection_Init(CCriticalSection *p); -#define CriticalSection_Delete(p) DeleteCriticalSection(p) -#define CriticalSection_Enter(p) EnterCriticalSection(p) -#define CriticalSection_Leave(p) LeaveCriticalSection(p) - -EXTERN_C_END - -#endif +/* Threads.h -- multithreading library +2021-12-21 : Igor Pavlov : Public domain */ + +#ifndef __7Z_THREADS_H +#define __7Z_THREADS_H + +#ifdef _WIN32 +#include +#else + +#if defined(__linux__) +#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) +#ifndef _7ZIP_AFFINITY_DISABLE +#define _7ZIP_AFFINITY_SUPPORTED +// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") +// #define _GNU_SOURCE +#endif +#endif +#endif + +#include + +#endif + +#include "7zTypes.h" + +EXTERN_C_BEGIN + +#ifdef _WIN32 + +WRes HandlePtr_Close(HANDLE *h); +WRes Handle_WaitObject(HANDLE h); + +typedef HANDLE CThread; + +#define Thread_Construct(p) { *(p) = NULL; } +#define Thread_WasCreated(p) (*(p) != NULL) +#define Thread_Close(p) HandlePtr_Close(p) +// #define Thread_Wait(p) Handle_WaitObject(*(p)) + +#ifdef UNDER_CE + // if (USE_THREADS_CreateThread is defined), we use _beginthreadex() + // if (USE_THREADS_CreateThread is not definned), we use CreateThread() + #define USE_THREADS_CreateThread +#endif + +typedef + #ifdef USE_THREADS_CreateThread + DWORD + #else + unsigned + #endif + THREAD_FUNC_RET_TYPE; + +typedef DWORD_PTR CAffinityMask; +typedef DWORD_PTR CCpuSet; + +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } + +#else // _WIN32 + +typedef struct _CThread +{ + pthread_t _tid; + int _created; +} CThread; + +#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) +WRes Thread_Close(CThread *p); +// #define Thread_Wait Thread_Wait_Close + +typedef void * THREAD_FUNC_RET_TYPE; + +typedef UInt64 CAffinityMask; + +#ifdef _7ZIP_AFFINITY_SUPPORTED + +typedef cpu_set_t CCpuSet; +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) + +#else + +typedef UInt64 CCpuSet; +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) + +#endif + + +#endif // _WIN32 + + +#define THREAD_FUNC_CALL_TYPE MY_STD_CALL + +#if defined(_WIN32) && defined(__GNUC__) +/* GCC compiler for x86 32-bit uses the rule: + the stack is 16-byte aligned before CALL instruction for function calling. + But only root function main() contains instructions that + set 16-byte alignment for stack pointer. And another functions + just keep alignment, if it was set in some parent function. + + The problem: + if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(), + the root function of thread doesn't set 16-byte alignment. + And stack frames in all child functions also will be unaligned in that case. + + Here we set (force_align_arg_pointer) attribute for root function of new thread. + Do we need (force_align_arg_pointer) also for another systems? */ + + #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer)) + // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions +#else + #define THREAD_FUNC_ATTRIB_ALIGN_ARG +#endif + +#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + +typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity); +WRes Thread_Wait_Close(CThread *p); + +#ifdef _WIN32 +#define Thread_Create_With_CpuSet(p, func, param, cs) \ + Thread_Create_With_Affinity(p, func, param, *cs) +#else +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); +#endif + + +#ifdef _WIN32 + +typedef HANDLE CEvent; +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; +#define Event_Construct(p) *(p) = NULL +#define Event_IsCreated(p) (*(p) != NULL) +#define Event_Close(p) HandlePtr_Close(p) +#define Event_Wait(p) Handle_WaitObject(*(p)) +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); + +typedef HANDLE CSemaphore; +#define Semaphore_Construct(p) *(p) = NULL +#define Semaphore_IsCreated(p) (*(p) != NULL) +#define Semaphore_Close(p) HandlePtr_Close(p) +#define Semaphore_Wait(p) Handle_WaitObject(*(p)) +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +WRes Semaphore_Release1(CSemaphore *p); + +typedef CRITICAL_SECTION CCriticalSection; +WRes CriticalSection_Init(CCriticalSection *p); +#define CriticalSection_Delete(p) DeleteCriticalSection(p) +#define CriticalSection_Enter(p) EnterCriticalSection(p) +#define CriticalSection_Leave(p) LeaveCriticalSection(p) + + +#else // _WIN32 + +typedef struct _CEvent +{ + int _created; + int _manual_reset; + int _state; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(p) (p)->_created = 0 +#define Event_IsCreated(p) ((p)->_created) + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes Event_Wait(CEvent *p); +WRes Event_Close(CEvent *p); + + +typedef struct _CSemaphore +{ + int _created; + UInt32 _count; + UInt32 _maxCount; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->_created = 0 +#define Semaphore_IsCreated(p) ((p)->_created) + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + + +typedef struct _CCriticalSection +{ + pthread_mutex_t _mutex; +} CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +void CriticalSection_Delete(CCriticalSection *cs); +void CriticalSection_Enter(CCriticalSection *cs); +void CriticalSection_Leave(CCriticalSection *cs); + +LONG InterlockedIncrement(LONG volatile *addend); + +#endif // _WIN32 + +EXTERN_C_END + +#endif From 9eb7ca352facc0be1256ba202b8709fb9fa94c3d Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Sat, 15 Jan 2022 22:49:09 -0800 Subject: [PATCH 03/12] GitHub Actions: remove ubuntu 16.04 --- .github/workflows/ci.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 57448de..97617c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,6 @@ jobs: strategy: matrix: os: - - ubuntu-16.04 - ubuntu-18.04 - ubuntu-20.04 - macos-10.15 @@ -17,11 +16,6 @@ jobs: - { cc: gcc, cxx: g++ } - { cc: clang, cxx: clang++ } - exclude: - # gcc 5 doesn't support C++17 - - os: ubuntu-16.04 - compiler: { cc: gcc, cxx: g++ } - fail-fast: false env: From 14665217f4caa0a05dd46c5e64eeb6f660000b06 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Sat, 15 Jan 2022 23:55:01 -0800 Subject: [PATCH 04/12] workaround to fix clang build on windows use of undeclared identifier '__m256i' --- lib/LZMA/LzFind.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/LZMA/LzFind.c b/lib/LZMA/LzFind.c index a17c06b..8469407 100644 --- a/lib/LZMA/LzFind.c +++ b/lib/LZMA/LzFind.c @@ -507,14 +507,14 @@ void MatchFinder_Init(CMatchFinder *p) #ifdef MY_CPU_X86_OR_AMD64 - #if defined(__clang__) && (__clang_major__ >= 8) \ + #if defined(__clang__) && (__clang_major__ >= 8) && !defined(_WIN32) \ || defined(__GNUC__) && (__GNUC__ >= 8) \ || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) #define USE_SATUR_SUB_128 #define USE_AVX2 #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) - #elif defined(_MSC_VER) + #elif defined(_MSC_VER) && !defined(__clang__) #if (_MSC_VER >= 1600) #define USE_SATUR_SUB_128 #if (_MSC_VER >= 1900) From b5f2efccde6f57c4dfecc2006e75444621763fd4 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Sun, 16 Jan 2022 00:08:56 -0800 Subject: [PATCH 05/12] Enable LZMA multithread for Linux and Mac --- Makefile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index f9bd3c2..910e3e0 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ LEANIFY_SRC := leanify.cpp main.cpp utils.cpp $(wildcard formats/*.cpp) -LZMA_OBJ := lib/LZMA/Alloc.o lib/LZMA/LzFind.o lib/LZMA/LzmaDec.o lib/LZMA/LzmaEnc.o +LZMA_OBJ := lib/LZMA/Alloc.o lib/LZMA/LzFind.o lib/LZMA/LzFindMt.o lib/LZMA/LzFindOpt.o lib/LZMA/LzmaDec.o lib/LZMA/LzmaEnc.o lib/LZMA/Threads.o MOZJPEG_OBJ := lib/mozjpeg/jaricom.o lib/mozjpeg/jcapimin.o lib/mozjpeg/jcarith.o lib/mozjpeg/jcext.o lib/mozjpeg/jchuff.o lib/mozjpeg/jcmarker.o lib/mozjpeg/jcmaster.o lib/mozjpeg/jcomapi.o lib/mozjpeg/jcparam.o lib/mozjpeg/jcphuff.o lib/mozjpeg/jctrans.o lib/mozjpeg/jdapimin.o lib/mozjpeg/jdarith.o lib/mozjpeg/jdatadst.o lib/mozjpeg/jdatasrc.o lib/mozjpeg/jdcoefct.o lib/mozjpeg/jdhuff.o lib/mozjpeg/jdinput.o lib/mozjpeg/jdmarker.o lib/mozjpeg/jdphuff.o lib/mozjpeg/jdtrans.o lib/mozjpeg/jerror.o lib/mozjpeg/jmemmgr.o lib/mozjpeg/jmemnobs.o lib/mozjpeg/jsimd_none.o lib/mozjpeg/jutils.o PUGIXML_OBJ := lib/pugixml/pugixml.o ZOPFLI_OBJ := lib/zopfli/hash.o lib/zopfli/squeeze.o lib/zopfli/gzip_container.o lib/zopfli/katajainen.o lib/zopfli/zopfli_lib.o lib/zopfli/cache.o lib/zopfli/zlib_container.o lib/zopfli/util.o lib/zopfli/tree.o lib/zopfli/deflate.o lib/zopfli/blocksplitter.o lib/zopfli/lz77.o @@ -29,13 +29,10 @@ else LDFLAGS += -s endif -# Multithread in LZMA SDK is only supported on Windows ifeq ($(SYSTEM), Windows) LEANIFY_SRC += fileio_win.cpp - LZMA_OBJ += lib/LZMA/LzFindMt.o lib/LZMA/Threads.o else LEANIFY_SRC += fileio_linux.cpp - LZMA_CFLAGS := -D _7ZIP_ST endif .PHONY: leanify clean @@ -43,7 +40,7 @@ endif leanify: $(LEANIFY_SRC) $(LZMA_OBJ) $(MOZJPEG_OBJ) $(PUGIXML_OBJ) $(ZOPFLI_OBJ) $(ZOPFLIPNG_OBJ) $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) $(LDLIBS) -o $@ -$(LZMA_OBJ): CFLAGS := $(filter-out -Wextra,$(CFLAGS)) $(LZMA_CFLAGS) -Wno-parentheses +$(LZMA_OBJ): CFLAGS := $(filter-out -Wextra,$(CFLAGS)) $(MOZJPEG_OBJ): CFLAGS := $(filter-out -Wextra,$(CFLAGS)) From 5ec6c24ed1d508633e5f0561844ba62022e18a7e Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Sun, 30 Jan 2022 22:12:15 -0800 Subject: [PATCH 06/12] SWF: reserve LZMA_PROPS_SIZE fix #80 --- formats/swf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/formats/swf.cpp b/formats/swf.cpp index 5f5300c..a00efc9 100644 --- a/formats/swf.cpp +++ b/formats/swf.cpp @@ -41,7 +41,7 @@ size_t GetRECTSize(uint8_t* rect) { bool LZMACompress(const uint8_t* src, size_t src_len, vector* out) { // Reserve enough space. - out->resize(src_len + src_len / 8); + out->resize(src_len + src_len / 8 + LZMA_PROPS_SIZE); CLzmaEncProps props; LzmaEncProps_Init(&props); From feaa230ba49b86d4ed6bf176c94ad1cbc2a000ee Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Tue, 1 Feb 2022 22:47:57 -0800 Subject: [PATCH 07/12] Create SECURITY.md fix #81 --- SECURITY.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..923569c --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please report security vulnerabilities to jayxon@gmail.com From 72fd9d2d3a2bde23bdaaf77142fdcd87bc858362 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Fri, 4 Feb 2022 01:39:24 -0800 Subject: [PATCH 08/12] SWF: check boundary fix #80 --- formats/swf.cpp | 57 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/formats/swf.cpp b/formats/swf.cpp index a00efc9..14bdd00 100644 --- a/formats/swf.cpp +++ b/formats/swf.cpp @@ -81,8 +81,18 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { if (is_fast && compression != 'F') return Format::Leanify(size_leanified); + if (size_ < 8) { + cerr << "SWF file too small!" << endl; + return Format::Leanify(size_leanified); + } + uint8_t* in_buffer = fp_ + 8; - uint32_t in_len = *(uint32_t*)(fp_ + 4) - 8; + uint32_t in_len = *(uint32_t*)(fp_ + 4); + if (in_len < 9) { + cerr << "invalid file size in header: " << in_len << endl; + return Format::Leanify(size_leanified); + } + in_len -= 8; // if SWF is compressed, decompress it first if (compression == 'C') { @@ -101,6 +111,10 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { } else if (compression == 'Z') { // LZMA VerbosePrint("SWF is compressed with LZMA."); + if (size_ < 4 + 12 + LZMA_PROPS_SIZE) { + cerr << "SWF file too small!" << endl; + return Format::Leanify(size_leanified); + } // | 4 bytes | 4 bytes | 4 bytes | 5 bytes | n bytes | 6 bytes | // | 'ZWS' + version | scriptLen | compressedLen | LZMA props | LZMA data | LZMA end marker | uint8_t* dst_buffer = new uint8_t[in_len]; @@ -114,17 +128,23 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { in_buffer = dst_buffer; } else { VerbosePrint("SWF is not compressed."); + if (in_len + 8 > size_) { + cerr << "SWF file too small!" << endl; + return Format::Leanify(size_leanified); + } } // parsing SWF tags uint8_t* p = in_buffer + GetRECTSize(in_buffer); // skip FrameSize which is a RECT p += 4; // skip FrameRate(2 Byte) + FrameCount(2 Byte) = 4 Byte size_t tag_size_leanified = 0; - do { + while (p + 2 <= in_buffer + in_len) { uint16_t tag_type = *(uint16_t*)p >> 6; uint32_t tag_length = *p & 0x3F; size_t tag_header_length = 2; if (tag_length == 0x3F) { + if (p + 6 > in_buffer + in_len) + break; tag_length = *(uint32_t*)(p + 2); tag_header_length += 4; } @@ -132,13 +152,26 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { memmove(p - tag_size_leanified, p, tag_header_length); p += tag_header_length; + if (tag_length > in_len || p - in_buffer + tag_length > in_len) { + VerbosePrint("SWF tag too long: ", tag_length); + break; + } + switch (tag_type) { // DefineBitsLossless case 20: // DefineBitsLossless2 case 36: { - size_t header_size = 7 + (p[3] == 3); VerbosePrint("DefineBitsLossless tag found."); + if (3 > tag_length) { + VerbosePrint("SWF tag too short: ", tag_length); + break; + } + size_t header_size = 7 + (p[3] == 3); + if (header_size > tag_length) { + VerbosePrint("SWF tag too short: ", tag_length); + break; + } memmove(p - tag_size_leanified, p, header_size); // recompress Zlib bitmap data @@ -151,6 +184,10 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { // DefineBitsJPEG2 case 21: { VerbosePrint("DefineBitsJPEG2 tag found."); + if (2 > tag_length) { + VerbosePrint("SWF tag too short: ", tag_length); + break; + } // copy id *(uint16_t*)(p - tag_size_leanified) = *(uint16_t*)p; @@ -165,13 +202,21 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { case 35: // DefineBitsJPEG4 case 90: { + size_t header_size = tag_type == 90 ? 8 : 6; + VerbosePrint("DefineBitsJPEG", header_size / 2, " tag found."); + if (header_size > tag_length) { + VerbosePrint("SWF tag too short: ", tag_length); + break; + } // copy id *(uint16_t*)(p - tag_size_leanified) = *(uint16_t*)p; uint32_t img_size = *(uint32_t*)(p + 2); - size_t header_size = tag_type == 90 ? 8 : 6; + if (img_size > tag_length || header_size + img_size > tag_length) { + VerbosePrint("SWF tag too short: ", tag_length); + break; + } - VerbosePrint("DefineBitsJPEG", header_size / 2, " tag found."); // Leanify embedded image size_t new_img_size = LeanifyFile(p + header_size, img_size, tag_size_leanified); *(uint32_t*)(p + 2 - tag_size_leanified) = new_img_size; @@ -198,7 +243,7 @@ size_t Swf::Leanify(size_t size_leanified /*= 0*/) { memmove(p - tag_size_leanified, p, tag_length); } p += tag_length; - } while (p < in_buffer + in_len); + } VerbosePrint("Uncompressed SWF tags leanified: ", tag_size_leanified); in_len -= tag_size_leanified; From 8b7acdb9801784cfa320057bc8168a6f6b212688 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Fri, 4 Feb 2022 22:16:08 -0800 Subject: [PATCH 09/12] RDB: check boundary fix #82 --- formats/rdb.cpp | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/formats/rdb.cpp b/formats/rdb.cpp index 4a97152..c3e8687 100644 --- a/formats/rdb.cpp +++ b/formats/rdb.cpp @@ -23,21 +23,28 @@ size_t Rdb::Leanify(size_t size_leanified /*= 0*/) { } depth++; - uint8_t* p_read; + uint8_t* p_read = fp_; size_t rdb_size_leanified = 0; - // header - p_read = fp_; - fp_ -= size_leanified; - // total number of files including directory uint32_t file_num = *(uint32_t*)(p_read + 0x10); uint64_t index_offset = *(uint64_t*)(p_read + 0x14); + if (index_offset > size_) { + cerr << "index offset out of range: " << index_offset << endl; + return Format::Leanify(size_leanified); + } + uint64_t content_offset = index_offset + *(uint64_t*)(p_read + 0x1C); + if (content_offset < index_offset || content_offset > size_) { + cerr << "content offset out of range: " << content_offset << endl; + return Format::Leanify(size_leanified); + } + // move header and indexes + fp_ -= size_leanified; memmove(fp_, p_read, (size_t)content_offset); uint8_t* p_index = fp_ + index_offset; @@ -50,12 +57,26 @@ size_t Rdb::Leanify(size_t size_leanified /*= 0*/) { // note that on Linux wchar_t is 4 bytes instead of 2 // so I can't use wcslen // p_index += (wcslen(file_name) + 1) * 2; - while (*(uint16_t*)p_index) { + while (p_index + 2 < fp_ + content_offset && *(uint16_t*)p_index) { p_index += 2; } p_index += 2; + size_t remaining_size = fp_ + size_leanified + size_ - p_read; + if (p_index + 8 > fp_ + content_offset) { + cerr << "index is overlapping with content" << endl; + memmove(p_read - rdb_size_leanified - size_leanified, p_read, remaining_size); + p_read += remaining_size; + break; + } + uint64_t file_size = *(uint64_t*)(p_index + 8); + if (file_size > static_cast(remaining_size)) { + cerr << "file size out of range: " << file_size << endl; + memmove(p_read - rdb_size_leanified - size_leanified, p_read, remaining_size); + p_read += remaining_size; + break; + } *(uint64_t*)p_index -= rdb_size_leanified; From 39f3d22676732b2376d5e2833240feba625071f3 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Fri, 4 Feb 2022 22:27:18 -0800 Subject: [PATCH 10/12] Makefile: add asan target --- Makefile | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 910e3e0..e794ddb 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ -LEANIFY_SRC := leanify.cpp main.cpp utils.cpp $(wildcard formats/*.cpp) +LEANIFY_OBJ := leanify.o main.o utils.o $(patsubst %.cpp,%.o,$(wildcard formats/*.cpp)) LZMA_OBJ := lib/LZMA/Alloc.o lib/LZMA/LzFind.o lib/LZMA/LzFindMt.o lib/LZMA/LzFindOpt.o lib/LZMA/LzmaDec.o lib/LZMA/LzmaEnc.o lib/LZMA/Threads.o MOZJPEG_OBJ := lib/mozjpeg/jaricom.o lib/mozjpeg/jcapimin.o lib/mozjpeg/jcarith.o lib/mozjpeg/jcext.o lib/mozjpeg/jchuff.o lib/mozjpeg/jcmarker.o lib/mozjpeg/jcmaster.o lib/mozjpeg/jcomapi.o lib/mozjpeg/jcparam.o lib/mozjpeg/jcphuff.o lib/mozjpeg/jctrans.o lib/mozjpeg/jdapimin.o lib/mozjpeg/jdarith.o lib/mozjpeg/jdatadst.o lib/mozjpeg/jdatasrc.o lib/mozjpeg/jdcoefct.o lib/mozjpeg/jdhuff.o lib/mozjpeg/jdinput.o lib/mozjpeg/jdmarker.o lib/mozjpeg/jdphuff.o lib/mozjpeg/jdtrans.o lib/mozjpeg/jerror.o lib/mozjpeg/jmemmgr.o lib/mozjpeg/jmemnobs.o lib/mozjpeg/jsimd_none.o lib/mozjpeg/jutils.o PUGIXML_OBJ := lib/pugixml/pugixml.o ZOPFLI_OBJ := lib/zopfli/hash.o lib/zopfli/squeeze.o lib/zopfli/gzip_container.o lib/zopfli/katajainen.o lib/zopfli/zopfli_lib.o lib/zopfli/cache.o lib/zopfli/zlib_container.o lib/zopfli/util.o lib/zopfli/tree.o lib/zopfli/deflate.o lib/zopfli/blocksplitter.o lib/zopfli/lz77.o ZOPFLIPNG_OBJ := lib/zopflipng/lodepng/lodepng.o lib/zopflipng/lodepng/lodepng_util.o lib/zopflipng/zopflipng_lib.o -CFLAGS += -Wall -Wextra -Wno-unused-parameter -Werror -O3 -msse2 -mfpmath=sse -flto +CFLAGS += -Wall -Werror -O3 -msse2 -mfpmath=sse -flto CPPFLAGS += -I./lib CXXFLAGS += $(CFLAGS) -std=c++17 -fno-rtti LDFLAGS += -flto -lpthread @@ -30,21 +30,23 @@ else endif ifeq ($(SYSTEM), Windows) - LEANIFY_SRC += fileio_win.cpp + LEANIFY_OBJ += fileio_win.o else - LEANIFY_SRC += fileio_linux.cpp + LEANIFY_OBJ += fileio_linux.o endif -.PHONY: leanify clean +.PHONY: leanify asan clean -leanify: $(LEANIFY_SRC) $(LZMA_OBJ) $(MOZJPEG_OBJ) $(PUGIXML_OBJ) $(ZOPFLI_OBJ) $(ZOPFLIPNG_OBJ) - $(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) $(LDLIBS) -o $@ +leanify: $(LEANIFY_OBJ) $(LZMA_OBJ) $(MOZJPEG_OBJ) $(PUGIXML_OBJ) $(ZOPFLI_OBJ) $(ZOPFLIPNG_OBJ) + $(CXX) $^ $(LDFLAGS) $(LDLIBS) -o $@ -$(LZMA_OBJ): CFLAGS := $(filter-out -Wextra,$(CFLAGS)) - -$(MOZJPEG_OBJ): CFLAGS := $(filter-out -Wextra,$(CFLAGS)) +$(LEANIFY_OBJ): CFLAGS += -Wextra -Wno-unused-parameter $(ZOPFLI_OBJ): CFLAGS += -Wno-unused-function +asan: CFLAGS += -g -fsanitize=address -fno-omit-frame-pointer +asan: LDFLAGS := -fsanitize=address $(filter-out -s,$(LDFLAGS)) +asan: leanify + clean: - rm -f $(LZMA_OBJ) $(MOZJPEG_OBJ) $(PUGIXML_OBJ) $(ZOPFLI_OBJ) $(ZOPFLIPNG_OBJ) leanify + rm -f $(LEANIFY_OBJ) $(LZMA_OBJ) $(MOZJPEG_OBJ) $(PUGIXML_OBJ) $(ZOPFLI_OBJ) $(ZOPFLIPNG_OBJ) leanify From e3cedecacbfa705b7d831c32d4c9f194bc74e607 Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Fri, 4 Feb 2022 23:17:04 -0800 Subject: [PATCH 11/12] Makefile: stop using gold linker --- Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Makefile b/Makefile index e794ddb..7b79b40 100644 --- a/Makefile +++ b/Makefile @@ -17,11 +17,6 @@ else SYSTEM := $(shell uname -s) endif -# Gold linker only supports Linux -ifeq ($(SYSTEM), Linux) - LDFLAGS += -fuse-ld=gold -endif - ifeq ($(SYSTEM), Darwin) LDLIBS += -liconv else From 66d25e47613062117d8abbfef9e77fcab2abd57d Mon Sep 17 00:00:00 2001 From: Sen Jiang Date: Thu, 9 Jun 2022 02:04:33 -0700 Subject: [PATCH 12/12] LZMA: silence dangling-pointer warning fix #84 --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 7b79b40..a160d71 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,8 @@ leanify: $(LEANIFY_OBJ) $(LZMA_OBJ) $(MOZJPEG_OBJ) $(PUGIXML_OBJ) $(ZOPFLI_OB $(LEANIFY_OBJ): CFLAGS += -Wextra -Wno-unused-parameter +$(LZMA_OBJ): CFLAGS += -Wno-unknown-warning-option -Wno-dangling-pointer + $(ZOPFLI_OBJ): CFLAGS += -Wno-unused-function asan: CFLAGS += -g -fsanitize=address -fno-omit-frame-pointer