From 313454dfae37ac775f791d3b73587a6201eccc76 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Tue, 27 Feb 2018 23:09:09 -0800 Subject: [PATCH 1/7] "init" --- paddle/fluid/CMakeLists.txt | 1 + paddle/fluid/recordio/CMakeLists.txt | 2 + paddle/fluid/recordio/chunk.h | 119 ++++++++++++++++++++++++++ paddle/fluid/recordio/header.cc | 81 ++++++++++++++++++ paddle/fluid/recordio/header.h | 66 ++++++++++++++ paddle/fluid/recordio/header_test.cc | 45 ++++++++++ paddle/fluid/recordio/range_scanner.h | 69 +++++++++++++++ paddle/fluid/recordio/scanner.h | 44 ++++++++++ paddle/fluid/recordio/writer.cc | 45 ++++++++++ paddle/fluid/recordio/writer.h | 56 ++++++++++++ 10 files changed, 528 insertions(+) create mode 100644 paddle/fluid/recordio/CMakeLists.txt create mode 100644 paddle/fluid/recordio/chunk.h create mode 100644 paddle/fluid/recordio/header.cc create mode 100644 paddle/fluid/recordio/header.h create mode 100644 paddle/fluid/recordio/header_test.cc create mode 100644 paddle/fluid/recordio/range_scanner.h create mode 100644 paddle/fluid/recordio/scanner.h create mode 100644 paddle/fluid/recordio/writer.cc create mode 100644 paddle/fluid/recordio/writer.h diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index 7405ef17d3e01..d725763b01d59 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -5,3 +5,4 @@ add_subdirectory(operators) add_subdirectory(pybind) add_subdirectory(inference) add_subdirectory(string) +add_subdirectory(recordio) diff --git a/paddle/fluid/recordio/CMakeLists.txt b/paddle/fluid/recordio/CMakeLists.txt new file mode 100644 index 0000000000000..37c3214ff8673 --- /dev/null +++ b/paddle/fluid/recordio/CMakeLists.txt @@ -0,0 +1,2 @@ +cc_library(header SRCS header.cc) +cc_test(header_test SRCS header_test.cc DEPS header) diff --git a/paddle/fluid/recordio/chunk.h b/paddle/fluid/recordio/chunk.h new file mode 100644 index 0000000000000..77c0ae81b7420 --- /dev/null +++ b/paddle/fluid/recordio/chunk.h @@ -0,0 +1,119 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +// Chunk +// a chunk contains the Header and optionally compressed records. +class Chunk { +public: + Chunk() = default; + void Add(const char* record, size_t length); + void Add(const std::string&); + + bool Dump(std::ostream& os, Compressor ct); + void Parse(std::istream& iss, int64_t offset); + const std::string Record(int i) { return records_[i]; } + +private: + std::vector records_; + size_t num_bytes_; +}; + +size_t CompressData(const std::stringstream& ss, Compressor ct, char* buffer); + +uint32_t DeflateData(char* buffer, uint32_t size, Compressor c); + +// implementation +void Chunk::Add(const std::string& s) { + num_bytes_ += s.size() * sizeof(char); + records_.emplace_back(std::move(s)); + // records_.resize(records_.size()+1); + // records_[records_.size()-1] = s; +} + +void Chunk::Add(const char* record, size_t length) { + Add(std::string(record, length)); +} + +bool Chunk::Dump(std::ostream& os, Compressor ct) { + if (records_.size() == 0) return false; + + // TODO(dzhwinter): + // we pack the string with same size buffer, + // then compress with another buffer. + // Here can be optimized if it is the bottle-neck. + std::ostringstream oss; + for (auto& record : records_) { + unsigned len = record.size(); + oss << len; + oss << record; + // os.write(std::to_string(len).c_str(), sizeof(unsigned)); + // os.write(record.c_str(), record.size()); + } + std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); + size_t compressed = CompressData(oss.str(), ct, buffer.get()); + + // TODO(dzhwinter): crc32 checksum + size_t checksum = compressed; + + Header hdr(records_.size(), checksum, ct, compressed); + + return true; +} + +void Chunk::Parse(std::istream& iss, int64_t offset) { + iss.seekg(offset, iss.beg); + Header hdr; + hdr.Parse(iss); + + std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); + iss.read(buffer.get(), static_cast(hdr.CompressSize())); + // TODO(dzhwinter): checksum + uint32_t deflated_size = + DeflateData(buffer.get(), hdr.CompressSize(), hdr.CompressType()); + std::istringstream deflated(std::string(buffer.get(), deflated_size)); + for (size_t i = 0; i < hdr.NumRecords(); ++i) { + uint32_t rs; + deflated >> rs; + std::string record(rs, '\0'); + deflated.read(&record[0], rs); + records_.emplace_back(record); + num_bytes_ += record.size(); + } +} + +uint32_t DeflateData(char* buffer, uint32_t size, Compressor c) { + uint32_t deflated_size = 0; + std::string uncompressed; + switch (c) { + case Compressor::kNoCompress: + deflated_size = size; + break; + case Compressor::kSnappy: + // snappy::Uncompress(buffer, size, &uncompressed); + // deflated_size = uncompressed.size(); + // memcpy(buffer, uncompressed.data(), uncompressed.size() * + // sizeof(char)); + break; + } + return deflated_size; +} diff --git a/paddle/fluid/recordio/header.cc b/paddle/fluid/recordio/header.cc new file mode 100644 index 0000000000000..c82d05c3a2573 --- /dev/null +++ b/paddle/fluid/recordio/header.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/header.h" + +namespace paddle { +namespace recordio { + +Header::Header() + : num_records_(0), + checksum_(0), + compressor_(Compressor::kNoCompress), + compress_size_(0) {} + +Header::Header(uint32_t num, uint32_t sum, Compressor c, uint32_t cs) + : num_records_(num), checksum_(sum), compressor_(c), compress_size_(cs) {} + +void Header::Parse(std::istream& iss) { + iss.read(reinterpret_cast(&num_records_), sizeof(uint32_t)); + iss.read(reinterpret_cast(&checksum_), sizeof(uint32_t)); + iss.read(reinterpret_cast(&compressor_), sizeof(uint32_t)); + iss.read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); +} + +void Header::Write(std::ostream& os) { + os.write(reinterpret_cast(&num_records_), sizeof(uint32_t)); + os.write(reinterpret_cast(&checksum_), sizeof(uint32_t)); + os.write(reinterpret_cast(&compressor_), sizeof(uint32_t)); + os.write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); +} + +// std::ostream& operator << (std::ostream& os, Header h) { +// os << h.num_records_ +// << h.checksum_ +// << static_cast(h.compressor_) +// << h.compress_size_; +// return os; +// } + +std::ostream& operator<<(std::ostream& os, Header h) { + os << h.NumRecords() << h.Checksum() + << static_cast(h.CompressType()) << h.CompressSize(); + return os; +} + +// bool operator==(Header l, Header r) { +// return num_records_ == rhs.NumRecords() && +// checksum_ == rhs.Checksum() && +// compressor_ == rhs.CompressType() && +// compress_size_ == rhs.CompressSize(); +// } + +bool operator==(Header l, Header r) { + return l.NumRecords() == r.NumRecords() && l.Checksum() == r.Checksum() && + l.CompressType() == r.CompressType() && + l.CompressSize() == r.CompressSize(); +} + +// size_t CompressData(const std::string& os, Compressor ct, char* buffer) { +// size_t compress_size = 0; + +// // std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); +// // std::string compressed; +// compress_size =os.size(); +// memcpy(buffer, os.c_str(), compress_size); +// return compress_size; +// } + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/header.h b/paddle/fluid/recordio/header.h new file mode 100644 index 0000000000000..92c040617dba2 --- /dev/null +++ b/paddle/fluid/recordio/header.h @@ -0,0 +1,66 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace paddle { +namespace recordio { + +// Default ChunkSize +constexpr size_t kDefaultMaxChunkSize = 32 * 1024 * 1024; +// MagicNumber for memory checking +constexpr uint32_t kMagicNumber = 0x01020304; + +enum class Compressor { + // NoCompression means writing raw chunk data into files. + // With other choices, chunks are compressed before written. + kNoCompress = 0, + // Snappy had been the default compressing algorithm widely + // used in Google. It compromises between speech and + // compression ratio. + kSnappy = 1, + // Gzip is a well-known compression algorithm. It is + // recommmended only you are looking for compression ratio. + kGzip = 2, +}; + +// Header is the metadata of Chunk +class Header { +public: + Header(); + Header(uint32_t num, uint32_t sum, Compressor ct, uint32_t cs); + + void Write(std::ostream& os); + void Parse(std::istream& iss); + + uint32_t NumRecords() const { return num_records_; } + uint32_t Checksum() const { return checksum_; } + Compressor CompressType() const { return compressor_; } + uint32_t CompressSize() const { return compress_size_; } + +private: + uint32_t num_records_; + uint32_t checksum_; + Compressor compressor_; + uint32_t compress_size_; +}; + +// Allow Header Loggable +std::ostream& operator<<(std::ostream& os, Header h); +bool operator==(Header l, Header r); + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc new file mode 100644 index 0000000000000..ae8201ab00a2e --- /dev/null +++ b/paddle/fluid/recordio/header_test.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/header.h" + +#include + +#include "gtest/gtest.h" + +using namespace recordio; + +TEST(Recordio, ChunkHead) { + Header hdr(0, 1, Compressor::kGzip, 3); + std::ostringstream oss; + hdr.Write(oss); + + std::istringstream iss(oss.str()); + Header hdr2; + hdr2.Parse(iss); + + std::ostringstream oss2; + hdr2.Write(oss2); + EXPECT_STREQ(oss2.str().c_str(), oss.str().c_str()); +} + +TEST(Recordio, Stream) { + Header hdr(0, 1, static_cast(2), 3); + std::ostringstream oss1; + hdr.Write(oss1); + + std::ostringstream oss2; + oss2 << hdr; + EXPECT_STREQ(oss2.str().c_str(), oss1.str().c_str()); +} diff --git a/paddle/fluid/recordio/range_scanner.h b/paddle/fluid/recordio/range_scanner.h new file mode 100644 index 0000000000000..44b1b49abc2bf --- /dev/null +++ b/paddle/fluid/recordio/range_scanner.h @@ -0,0 +1,69 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +class Index { +public: + int NumRecords() { return num_records_; } + + // Locate returns the index of chunk that contains the given record, + // and the record index within the chunk. It returns (-1, -1) if the + // record is out of range. + void Locate(int record_idx, std::pair* out) { + size_t sum = 0; + for (size_t i = 0; i < chunk_lens_.size(); ++i) { + sum += chunk_lens_[i]; + if (static_cast(record_idx) < sum) { + out->first = i; + out->second = record_idx - sum + chunk_lens_[i]; + return; + } + } + // out->swap(std::make_pair(-1, -1)); + out->first = -1; + out->second = -1; + } + +private: + std::vector chunk_offsets_; + std::vector chunk_lens_; + int num_records_; + std::vector chunk_records_; +}; + +// RangeScanner +// creates a scanner that sequencially reads records in the +// range [start, start+len). If start < 0, it scans from the +// beginning. If len < 0, it scans till the end of file. +class RangeScanner { +public: + RangeScanner(std::istream is, Index idx, int start, int end); + bool Scan(); + const std::string Record(); + +private: + std::istream stream_; + Index index_; + int start_, end_, cur_; + int chunk_index_; + std::unique_ptr chunk_; +}; diff --git a/paddle/fluid/recordio/scanner.h b/paddle/fluid/recordio/scanner.h new file mode 100644 index 0000000000000..8bcdea3c6fe59 --- /dev/null +++ b/paddle/fluid/recordio/scanner.h @@ -0,0 +1,44 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +// Scanner + +class Scanner { +public: + Scanner(const char* paths); + const std::string Record(); + bool Scan(); + void Close(); + +private: + bool NextFile(); + int Err() { return err_; } + +private: + std::vector paths_; + FILE* cur_file_; + RangeScanner* cur_scanner_; + int path_idx_; + bool end_; + int err_; +}; diff --git a/paddle/fluid/recordio/writer.cc b/paddle/fluid/recordio/writer.cc new file mode 100644 index 0000000000000..9383199889d5e --- /dev/null +++ b/paddle/fluid/recordio/writer.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/writer.h" + +namespace paddle { +namespace recordio { + +Writer::Writer(std::ostream& os) + : stream_(os.rdbuf()), max_chunk_size_(0), compressor_(0) {} + +Writer::Writer(std::ostream& os, int maxChunkSize, int compressor) + : stream_(os.rdbuf()), + max_chunk_size_(maxChunkSize), + compressor_(compressor) { + // clear rdstate + stream_.clear(); + chunk_.reset(new Chunk); +} + +size_t Writer::Write(const std::string& buf) {} + +size_t Writer::Write(const char* buf, size_t length) { + // std::string s(buf, length); + Write(std::string(buf, length)); +} + +void Writer::Close() { + stream_.flush(); + stream_.setstate(std::ios::eofbit); +} + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/writer.h b/paddle/fluid/recordio/writer.h new file mode 100644 index 0000000000000..49b86a6a28a81 --- /dev/null +++ b/paddle/fluid/recordio/writer.h @@ -0,0 +1,56 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include + +#include "paddle/fluid/platform/macros.h" // for DISABLE COPY ASSIGN +#include "paddle/fluid/recordio/header.h" + +namespace paddle { +namespace recordio { + +// Writer creates a RecordIO file. +class Writer { +public: + Writer(std::ostream& os); + Writer(std::ostream& os, int maxChunkSize, int c); + + // Writes a record. It returns an error if Close has been called. + size_t Write(const char* buf, size_t length); + size_t Write(const std::string& buf); + size_t Write(std::string&& buf); + + // Close flushes the current chunk and makes the writer invalid. + void Close(); + +private: + // Set rdstate to mark a closed writer + std::ostream stream_; + std::unique_ptr chunk_; + // total records size, excluding metadata, before compression. + int max_chunk_size_; + int compressor_; + DISABLE_COPY_AND_ASSIGN(Writer); +}; + +template +Writer& operator<<(const T& val) { + stream_ << val; + return *this; +} + +} // namespace recordio +} // namespace paddle From af5dcda481fc5823bdcfedd4b56034fb5a461109 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 4 Mar 2018 22:43:36 +0800 Subject: [PATCH 2/7] "add testing" --- paddle/fluid/recordio/header_test.cc | 11 +---------- paddle/fluid/recordio/writer.cc | 7 ++++--- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc index ae8201ab00a2e..991ea05ec14e6 100644 --- a/paddle/fluid/recordio/header_test.cc +++ b/paddle/fluid/recordio/header_test.cc @@ -32,14 +32,5 @@ TEST(Recordio, ChunkHead) { std::ostringstream oss2; hdr2.Write(oss2); EXPECT_STREQ(oss2.str().c_str(), oss.str().c_str()); -} - -TEST(Recordio, Stream) { - Header hdr(0, 1, static_cast(2), 3); - std::ostringstream oss1; - hdr.Write(oss1); - - std::ostringstream oss2; - oss2 << hdr; - EXPECT_STREQ(oss2.str().c_str(), oss1.str().c_str()); + EXPECT_EQ(hdr == hdr2); } diff --git a/paddle/fluid/recordio/writer.cc b/paddle/fluid/recordio/writer.cc index 9383199889d5e..08d3d2c5778b4 100644 --- a/paddle/fluid/recordio/writer.cc +++ b/paddle/fluid/recordio/writer.cc @@ -29,13 +29,14 @@ Writer::Writer(std::ostream& os, int maxChunkSize, int compressor) chunk_.reset(new Chunk); } -size_t Writer::Write(const std::string& buf) {} +size_t Writer::Write(const std::string& buf) { return Write(std::string(buf)); } size_t Writer::Write(const char* buf, size_t length) { - // std::string s(buf, length); - Write(std::string(buf, length)); + return Write(std::string(buf, length)); } +size_t Writer::Write(std::string&& buf) {} + void Writer::Close() { stream_.flush(); stream_.setstate(std::ios::eofbit); From 69c79911086a66ce946a0c381653236aa68db449 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 5 Mar 2018 21:54:39 +0800 Subject: [PATCH 3/7] "add snappy library" --- cmake/external/snappy.cmake | 57 ++++++++++++++++++++++++++++ paddle/fluid/recordio/CMakeLists.txt | 2 + paddle/fluid/recordio/chunk.h | 2 + paddle/fluid/recordio/filesys.h | 24 ++++++++++++ paddle/fluid/recordio/header_test.cc | 4 +- paddle/fluid/recordio/io.cc | 53 ++++++++++++++++++++++++++ paddle/fluid/recordio/io.h | 53 ++++++++++++++++++++++++++ paddle/fluid/recordio/io_test.cc | 36 ++++++++++++++++++ paddle/fluid/recordio/scanner.h | 3 +- paddle/fluid/recordio/writer.cc | 35 ++++++++++------- paddle/fluid/recordio/writer.h | 24 +++++------- paddle/fluid/recordio/writer_test.cc | 21 ++++++++++ 12 files changed, 283 insertions(+), 31 deletions(-) create mode 100644 cmake/external/snappy.cmake create mode 100644 paddle/fluid/recordio/filesys.h create mode 100644 paddle/fluid/recordio/io.cc create mode 100644 paddle/fluid/recordio/io.h create mode 100644 paddle/fluid/recordio/io_test.cc create mode 100644 paddle/fluid/recordio/writer_test.cc diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake new file mode 100644 index 0000000000000..2c109727cfa64 --- /dev/null +++ b/cmake/external/snappy.cmake @@ -0,0 +1,57 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +IF(MOBILE_INFERENCE) + return() +ENDIF() + +include (ExternalProject) + +# NOTE: snappy is needed when linking with recordio + +SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy) +SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) +SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE) + +ExternalProject_Add( + extern_snappy + GIT_REPOSITORY "https://github.com/google/snappy" + GIT_TAG "1.1.7" + PREFIX ${SNAPPY_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_COMMAND make -j8 + INSTALL_COMMAND make install +) + +add_library(snappy STATIC IMPORTED GLOBAL) +set_property(TARGET snappy PROPERTY IMPORTED_LOCATION + "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a") + +include_directories(${SNAPPY_INCLUDE_DIR}) +add_dependencies(snappy extern_snappy) diff --git a/paddle/fluid/recordio/CMakeLists.txt b/paddle/fluid/recordio/CMakeLists.txt index 37c3214ff8673..86b4583c7b8be 100644 --- a/paddle/fluid/recordio/CMakeLists.txt +++ b/paddle/fluid/recordio/CMakeLists.txt @@ -1,2 +1,4 @@ cc_library(header SRCS header.cc) cc_test(header_test SRCS header_test.cc DEPS header) +cc_library(io SRCS io.cc DEPS stringpiece) +cc_test(io_test SRCS io_test.cc DEPS io) diff --git a/paddle/fluid/recordio/chunk.h b/paddle/fluid/recordio/chunk.h index 77c0ae81b7420..48626b92fed93 100644 --- a/paddle/fluid/recordio/chunk.h +++ b/paddle/fluid/recordio/chunk.h @@ -32,9 +32,11 @@ class Chunk { bool Dump(std::ostream& os, Compressor ct); void Parse(std::istream& iss, int64_t offset); const std::string Record(int i) { return records_[i]; } + size_t NumBytes() { return num_bytes_; } private: std::vector records_; + // sum of record lengths in bytes. size_t num_bytes_; }; diff --git a/paddle/fluid/recordio/filesys.h b/paddle/fluid/recordio/filesys.h new file mode 100644 index 0000000000000..b21702bf3a0dc --- /dev/null +++ b/paddle/fluid/recordio/filesys.h @@ -0,0 +1,24 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +class DefaultFileSys { +public: +private: +}; diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc index 991ea05ec14e6..322f63190a594 100644 --- a/paddle/fluid/recordio/header_test.cc +++ b/paddle/fluid/recordio/header_test.cc @@ -18,7 +18,7 @@ #include "gtest/gtest.h" -using namespace recordio; +using namespace paddle::recordio; TEST(Recordio, ChunkHead) { Header hdr(0, 1, Compressor::kGzip, 3); @@ -32,5 +32,5 @@ TEST(Recordio, ChunkHead) { std::ostringstream oss2; hdr2.Write(oss2); EXPECT_STREQ(oss2.str().c_str(), oss.str().c_str()); - EXPECT_EQ(hdr == hdr2); + EXPECT_TRUE(hdr == hdr2); } diff --git a/paddle/fluid/recordio/io.cc b/paddle/fluid/recordio/io.cc new file mode 100644 index 0000000000000..2c82d1d42d9b2 --- /dev/null +++ b/paddle/fluid/recordio/io.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/io.h" +#include "paddle/fluid/string/piece.h" + +namespace paddle { +namespace recordio { +Stream* Stream::Open(const char* filename, const char* mode) { + // Create IOStream for different filesystems + // HDFS: hdfs://tmp/file.txt + // Default: /tmp/file.txt + FILE* fp = nullptr; + if (string::HasPrefix(string::Piece(filename), string::Piece("/"))) { + fp = fopen(filename, mode); + } + return new FileStream(fp); +} + +size_t FileStream::Read(void* ptr, size_t size) { + return fread(ptr, 1, size, fp_); +} + +void FileStream::Write(const void* ptr, size_t size) { + size_t real = fwrite(ptr, 1, size, fp_); + PADDLE_ENFORCE(real == size, "FileStream write incomplete."); +} + +size_t FileStream::Tell() { return ftell(fp_); } +void FileStream::Seek(size_t p) { fseek(fp_, static_cast(p), SEEK_SET); } + +bool FileStream::Eof() { return feof(fp_); } + +void FileStream::Close() { + if (fp_ != nullptr) { + fclose(fp_); + fp_ = nullptr; + } +} + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/io.h b/paddle/fluid/recordio/io.h new file mode 100644 index 0000000000000..ff647b95d8ebf --- /dev/null +++ b/paddle/fluid/recordio/io.h @@ -0,0 +1,53 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace recordio { + +// Stream abstract object for read and write +class Stream { +public: + virtual ~Stream() {} + virtual size_t Read(void* ptr, size_t size); + virtual void Write(const void* ptr, size_t size); + virtual size_t Tell(); + virtual void Seek(); + // Create Stream Instance + static Stream* Open(const char* filename, const char* mode); +}; + +// FileStream +class FileStream : public Stream { +public: + explicit FileStream(FILE* fp) : fp_(fp) {} + ~FileStream() { this->Close(); } + size_t Read(void* ptr, size_t size); + void Write(const void* ptr, size_t size); + size_t Tell(); + void Seek(size_t p); + bool Eof(); + void Close(); + +private: + FILE* fp_; +}; + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/io_test.cc b/paddle/fluid/recordio/io_test.cc new file mode 100644 index 0000000000000..b2e5733ffed52 --- /dev/null +++ b/paddle/fluid/recordio/io_test.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/io.h" + +#include "gtest/gtest.h" + +using namespace paddle::recordio; + +TEST(FileStream, IO) { + { + // Write + Stream* fs = Stream::Open("/tmp/record_0", "rw"); + fs->Write("hello", 6); + delete fs; + } + { + // Read + Stream* fs = Stream::Open("/tmp/record_0", "r+"); + char buf[10]; + fs->Read(&buf, 6); + EXPECT_STREQ(buf, "hello"); + delete fs; + } +} diff --git a/paddle/fluid/recordio/scanner.h b/paddle/fluid/recordio/scanner.h index 8bcdea3c6fe59..dc09bd5fdda34 100644 --- a/paddle/fluid/recordio/scanner.h +++ b/paddle/fluid/recordio/scanner.h @@ -21,8 +21,9 @@ #include #include -// Scanner +class RangeScanner; +// Scanner is a scanner for multiple recordio files. class Scanner { public: Scanner(const char* paths); diff --git a/paddle/fluid/recordio/writer.cc b/paddle/fluid/recordio/writer.cc index 08d3d2c5778b4..acb84fb8e8d3d 100644 --- a/paddle/fluid/recordio/writer.cc +++ b/paddle/fluid/recordio/writer.cc @@ -17,29 +17,36 @@ namespace paddle { namespace recordio { -Writer::Writer(std::ostream& os) - : stream_(os.rdbuf()), max_chunk_size_(0), compressor_(0) {} +Writer::Writer(Stream* fo) : stream_(fo), max_chunk_size_(0), compressor_(0) {} -Writer::Writer(std::ostream& os, int maxChunkSize, int compressor) - : stream_(os.rdbuf()), +Writer::Writer(Stream* fo, int maxChunkSize, int compressor) + : stream_(fo), max_chunk_size_(maxChunkSize), - compressor_(compressor) { - // clear rdstate - stream_.clear(); + compressor_(static_cast(compressor)) { chunk_.reset(new Chunk); } -size_t Writer::Write(const std::string& buf) { return Write(std::string(buf)); } - -size_t Writer::Write(const char* buf, size_t length) { - return Write(std::string(buf, length)); +size_t Writer::Write(const std::string& record) { + if (stream_ == nullptr) { + LOG(WARNING) << "Cannot write since writer had been closed."; + return 0; + } + if ((record.size() + chunk_->NumBytes()) > max_chunk_size_) { + chunk_->Dump(stream_, compressor_); + } + chunk_->Add(record); + return record.size(); } -size_t Writer::Write(std::string&& buf) {} +// size_t Writer::Write(const char* buf, size_t length) { +// return Write(std::string(buf, length)); +// } + +// size_t Writer::Write(std::string&& buf) {} void Writer::Close() { - stream_.flush(); - stream_.setstate(std::ios::eofbit); + chunk_->Dump(stream_, compressor_); + stream_ = nullptr; } } // namespace recordio diff --git a/paddle/fluid/recordio/writer.h b/paddle/fluid/recordio/writer.h index 49b86a6a28a81..250d59813cbaf 100644 --- a/paddle/fluid/recordio/writer.h +++ b/paddle/fluid/recordio/writer.h @@ -16,8 +16,9 @@ #include #include -#include "paddle/fluid/platform/macros.h" // for DISABLE COPY ASSIGN +#include "paddle/fluid/platform/macros.h" // DISABLE_COPY_ASSIGN #include "paddle/fluid/recordio/header.h" +#include "paddle/fluid/recordio/io.h" namespace paddle { namespace recordio { @@ -25,32 +26,27 @@ namespace recordio { // Writer creates a RecordIO file. class Writer { public: - Writer(std::ostream& os); - Writer(std::ostream& os, int maxChunkSize, int c); + Writer(Stream* fo); + Writer(Stream* fo, int maxChunkSize, int c); // Writes a record. It returns an error if Close has been called. size_t Write(const char* buf, size_t length); - size_t Write(const std::string& buf); - size_t Write(std::string&& buf); // Close flushes the current chunk and makes the writer invalid. void Close(); private: - // Set rdstate to mark a closed writer - std::ostream stream_; + // Set nullptr to mark a closed writer + Stream* stream_; + // Chunk for store object std::unique_ptr chunk_; // total records size, excluding metadata, before compression. int max_chunk_size_; - int compressor_; + // Compressor used for chuck + Compressor compressor_; + DISABLE_COPY_AND_ASSIGN(Writer); }; -template -Writer& operator<<(const T& val) { - stream_ << val; - return *this; -} - } // namespace recordio } // namespace paddle diff --git a/paddle/fluid/recordio/writer_test.cc b/paddle/fluid/recordio/writer_test.cc new file mode 100644 index 0000000000000..1ba32bf2df523 --- /dev/null +++ b/paddle/fluid/recordio/writer_test.cc @@ -0,0 +1,21 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/writer.h" + +#include "gtest/gtest.h" + +using namespace paddle::recordio; + +TEST(Writer, Normal) { Stream } From 7016979cd55b519e404657f0a3a4015c0208f074 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 5 Mar 2018 21:55:27 +0800 Subject: [PATCH 4/7] "add crc32 encoder" --- paddle/fluid/recordio/CMakeLists.txt | 1 + paddle/fluid/recordio/chunk.cc | 105 ++ paddle/fluid/recordio/chunk.h | 113 +- paddle/fluid/recordio/chunk_test.cc | 23 + paddle/fluid/recordio/crc32.h | 33 + paddle/fluid/recordio/detail/crc.h | 1899 ++++++++++++++++++++++++++ paddle/fluid/recordio/header.cc | 40 +- paddle/fluid/recordio/header.h | 6 +- paddle/fluid/recordio/header_test.cc | 7 +- paddle/fluid/recordio/io.cc | 4 +- paddle/fluid/recordio/io.h | 13 +- paddle/fluid/recordio/io_test.cc | 2 +- paddle/fluid/recordio/writer.cc | 8 +- paddle/fluid/recordio/writer.h | 2 - paddle/fluid/recordio/writer_test.cc | 2 +- 15 files changed, 2114 insertions(+), 144 deletions(-) create mode 100644 paddle/fluid/recordio/chunk.cc create mode 100644 paddle/fluid/recordio/chunk_test.cc create mode 100644 paddle/fluid/recordio/crc32.h create mode 100644 paddle/fluid/recordio/detail/crc.h diff --git a/paddle/fluid/recordio/CMakeLists.txt b/paddle/fluid/recordio/CMakeLists.txt index 86b4583c7b8be..5d55709b4bbff 100644 --- a/paddle/fluid/recordio/CMakeLists.txt +++ b/paddle/fluid/recordio/CMakeLists.txt @@ -2,3 +2,4 @@ cc_library(header SRCS header.cc) cc_test(header_test SRCS header_test.cc DEPS header) cc_library(io SRCS io.cc DEPS stringpiece) cc_test(io_test SRCS io_test.cc DEPS io) +cc_library(chunk SRCS chunk.cc DEPS snappy) diff --git a/paddle/fluid/recordio/chunk.cc b/paddle/fluid/recordio/chunk.cc new file mode 100644 index 0000000000000..1ab2c7dd55828 --- /dev/null +++ b/paddle/fluid/recordio/chunk.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/chunk.h" + +#include +#include +#include + +#include "snappy.h" + +#include "paddle/fluid/recordio/crc32.h" + +namespace paddle { +namespace recordio { + +void Chunk::Add(const char* record, size_t length) { + records_.emplace_after(std::move(s)); + num_bytes_ += s.size() * sizeof(char); +} + +bool Chunk::Dump(Stream* fo, Compressor ct) { + // NOTE(dzhwinter): don't check records.numBytes instead, because + // empty records are allowed. + if (records_.size() == 0) return false; + + // pack the record into consecutive memory for compress + std::ostringstream os; + for (auto& record : records_) { + os.write(record.size(), sizeof(size_t)); + os.write(record.data(), static_cast(record.size())); + } + + std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); + size_t compressed = + CompressData(os.str().c_str(), num_bytes_, ct, buffer.get()); + uint32_t checksum = Crc32(buffer.get(), compressed); + Header hdr(records_.size(), checksum, ct, static_cast(compressed)); + hdr.Write(fo); + fo.Write(buffer.get(), compressed); + return true; +} + +void Chunk::Parse(Stream* fi, size_t offset) { + fi->Seek(offset); + Header hdr; + hdr.Parse(fi); + + std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); + fi->Read(buffer.get(), static_cast(hdr.CompressSize())); + uint32_t deflated_size = + DeflateData(buffer.get(), hdr.CompressSize(), hdr.CompressType()); + std::istringstream deflated(std::string(buffer.get(), deflated_size)); + for (size_t i = 0; i < hdr.NumRecords(); ++i) { + uint32_t rs; + deflated >> rs; + std::string record(rs, '\0'); + deflated.read(&record[0], rs); + records_.emplace_back(record); + num_bytes_ += record.size(); + } +} + +size_t CompressData(const char* in, + size_t in_length, + Compressor ct, + char* out) { + size_t compressd_size = 0; + switch (ct) { + case Compressor::kNoCompress: + // do nothing + memcpy(out, in, in_length); + compressd_size = in_length; + break; + case Compressor::kSnappy: + snappy::RawCompress(in, in_length, out, &compressd_size); + break; + } + return compressd_size; +} + +void DeflateData(const char* in, size_t in_length, Compressor ct, char* out) { + switch (c) { + case Compressor::kNoCompress: + memcpy(out, in, in_length); + break; + case Compressor::kSnappy: + snappy::RawUncompress(in, in_length, out); + break; + } +} + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/chunk.h b/paddle/fluid/recordio/chunk.h index 48626b92fed93..975604df3ca56 100644 --- a/paddle/fluid/recordio/chunk.h +++ b/paddle/fluid/recordio/chunk.h @@ -13,109 +13,36 @@ // limitations under the License. #pragma once - -#include -#include -#include +#include #include -#include -#include -// Chunk -// a chunk contains the Header and optionally compressed records. +#include "paddle/fluid/recordio/header.h" +#include "paddle/fluid/recordio/io.h" + +namespace paddle { +namespace recordio { + +// A Chunk contains the Header and optionally compressed records. class Chunk { public: - Chunk() = default; - void Add(const char* record, size_t length); - void Add(const std::string&); - - bool Dump(std::ostream& os, Compressor ct); - void Parse(std::istream& iss, int64_t offset); - const std::string Record(int i) { return records_[i]; } + Chunk() {} + void Add(const char* record, size_t size); + // dump the chunk into w, and clears the chunk and makes it ready for + // the next add invocation. + bool Dump(Stream* fo, Compressor ct); + void Parse(Stream* fi, size_t offset); size_t NumBytes() { return num_bytes_; } private: - std::vector records_; + std::forward_list records_; // sum of record lengths in bytes. size_t num_bytes_; + DISABLE_COPY_AND_ASSIGN(Chunk); }; -size_t CompressData(const std::stringstream& ss, Compressor ct, char* buffer); - -uint32_t DeflateData(char* buffer, uint32_t size, Compressor c); - -// implementation -void Chunk::Add(const std::string& s) { - num_bytes_ += s.size() * sizeof(char); - records_.emplace_back(std::move(s)); - // records_.resize(records_.size()+1); - // records_[records_.size()-1] = s; -} - -void Chunk::Add(const char* record, size_t length) { - Add(std::string(record, length)); -} - -bool Chunk::Dump(std::ostream& os, Compressor ct) { - if (records_.size() == 0) return false; - - // TODO(dzhwinter): - // we pack the string with same size buffer, - // then compress with another buffer. - // Here can be optimized if it is the bottle-neck. - std::ostringstream oss; - for (auto& record : records_) { - unsigned len = record.size(); - oss << len; - oss << record; - // os.write(std::to_string(len).c_str(), sizeof(unsigned)); - // os.write(record.c_str(), record.size()); - } - std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); - size_t compressed = CompressData(oss.str(), ct, buffer.get()); - - // TODO(dzhwinter): crc32 checksum - size_t checksum = compressed; - - Header hdr(records_.size(), checksum, ct, compressed); - - return true; -} - -void Chunk::Parse(std::istream& iss, int64_t offset) { - iss.seekg(offset, iss.beg); - Header hdr; - hdr.Parse(iss); +size_t CompressData(const char* in, size_t in_length, Compressor ct, char* out); - std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); - iss.read(buffer.get(), static_cast(hdr.CompressSize())); - // TODO(dzhwinter): checksum - uint32_t deflated_size = - DeflateData(buffer.get(), hdr.CompressSize(), hdr.CompressType()); - std::istringstream deflated(std::string(buffer.get(), deflated_size)); - for (size_t i = 0; i < hdr.NumRecords(); ++i) { - uint32_t rs; - deflated >> rs; - std::string record(rs, '\0'); - deflated.read(&record[0], rs); - records_.emplace_back(record); - num_bytes_ += record.size(); - } -} +void DeflateData(const char* in, size_t in_length, Compressor ct, char* out); -uint32_t DeflateData(char* buffer, uint32_t size, Compressor c) { - uint32_t deflated_size = 0; - std::string uncompressed; - switch (c) { - case Compressor::kNoCompress: - deflated_size = size; - break; - case Compressor::kSnappy: - // snappy::Uncompress(buffer, size, &uncompressed); - // deflated_size = uncompressed.size(); - // memcpy(buffer, uncompressed.data(), uncompressed.size() * - // sizeof(char)); - break; - } - return deflated_size; -} +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/chunk_test.cc b/paddle/fluid/recordio/chunk_test.cc new file mode 100644 index 0000000000000..8aec47c234d82 --- /dev/null +++ b/paddle/fluid/recordio/chunk_test.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/chunk.h" + +#include + +#include "gtest/gtest.h" + +using namespace paddle::recordio; + +TEST(Chunk, SaveLoad) {} diff --git a/paddle/fluid/recordio/crc32.h b/paddle/fluid/recordio/crc32.h new file mode 100644 index 0000000000000..77b430356f81b --- /dev/null +++ b/paddle/fluid/recordio/crc32.h @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A wrapper on crc library https://github.com/d-bahr/CRCpp +#include + +#include "paddle/fluid/recordio/detail/crc.h" + +namespace paddle { +namespace recordio { + +// usage +// char data[] = "hello,world"; +// crc = Crc32(data, 12); +// Assert_EQ(crc, 68a85159); + +uint32_t Crc32(const char* data, size_t size) { + return CRC::Calculate(data, size, CRC::CRC_32()) +} + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/detail/crc.h b/paddle/fluid/recordio/detail/crc.h new file mode 100644 index 0000000000000..ef8390c34a445 --- /dev/null +++ b/paddle/fluid/recordio/detail/crc.h @@ -0,0 +1,1899 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + @file CRC.h + @author Daniel Bahr + @version 0.2.0.6 + @copyright + @parblock + CRC++ + Copyright (c) 2016, Daniel Bahr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright notice, + this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, + this list of conditions and the following disclaimer in the + documentation + and/or other materials provided with the distribution. + + * Neither the name of CRC++ nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + @endparblock +*/ + +/* + CRC++ can be configured by setting various #defines before #including this + header file: + + #define crcpp_uint8 - Specifies the type + used to store CRCs that have a width of 8 bits or less. + This type is not used + in CRC calculations. Defaults to ::std::uint8_t. + #define crcpp_uint16 - Specifies the type + used to store CRCs that have a width between 9 and 16 bits (inclusive). + This type is not used + in CRC calculations. Defaults to ::std::uint16_t. + #define crcpp_uint32 - Specifies the type + used to store CRCs that have a width between 17 and 32 bits (inclusive). + This type is not used + in CRC calculations. Defaults to ::std::uint32_t. + #define crcpp_uint64 - Specifies the type + used to store CRCs that have a width between 33 and 64 bits (inclusive). + This type is not used + in CRC calculations. Defaults to ::std::uint64_t. + #define crcpp_size - This type is used for + loop iteration and function signatures only. Defaults to ::std::size_t. + #define CRCPP_USE_NAMESPACE - Define to place all + CRC++ code within the ::CRCPP namespace. + #define CRCPP_BRANCHLESS - Define to enable a + branchless CRC implementation. The branchless implementation uses a single + integer + multiplication in the + bit-by-bit calculation instead of a small conditional. The branchless + implementation + may be faster on + processor architectures which support single-instruction integer + multiplication. + #define CRCPP_USE_CPP11 - Define to enables + C++11 features (move semantics, constexpr, static_assert, etc.). + #define CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - Define to include + definitions for little-used CRCs. +*/ + +#ifndef CRCPP_CRC_H_ +#define CRCPP_CRC_H_ + +#include // Includes CHAR_BIT +#ifdef CRCPP_USE_CPP11 +#include // Includes ::std::size_t +#include // Includes ::std::uint8_t, ::std::uint16_t, ::std::uint32_t, ::std::uint64_t +#else +#include // Includes size_t +#include // Includes uint8_t, uint16_t, uint32_t, uint64_t +#endif +#include // Includes ::std::numeric_limits +#include // Includes ::std::move + +#ifndef crcpp_uint8 +#ifdef CRCPP_USE_CPP11 +/// @brief Unsigned 8-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint8 ::std::uint8_t +#else +/// @brief Unsigned 8-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint8 uint8_t +#endif +#endif + +#ifndef crcpp_uint16 +#ifdef CRCPP_USE_CPP11 +/// @brief Unsigned 16-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint16 ::std::uint16_t +#else +/// @brief Unsigned 16-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint16 uint16_t +#endif +#endif + +#ifndef crcpp_uint32 +#ifdef CRCPP_USE_CPP11 +/// @brief Unsigned 32-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint32 ::std::uint32_t +#else +/// @brief Unsigned 32-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint32 uint32_t +#endif +#endif + +#ifndef crcpp_uint64 +#ifdef CRCPP_USE_CPP11 +/// @brief Unsigned 64-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint64 ::std::uint64_t +#else +/// @brief Unsigned 64-bit integer definition, used primarily for parameter +/// definitions. +#define crcpp_uint64 uint64_t +#endif +#endif + +#ifndef crcpp_size +#ifdef CRCPP_USE_CPP11 +/// @brief Unsigned size definition, used for specifying data sizes. +#define crcpp_size ::std::size_t +#else +/// @brief Unsigned size definition, used for specifying data sizes. +#define crcpp_size size_t +#endif +#endif + +#ifdef CRCPP_USE_CPP11 +/// @brief Compile-time expression definition. +#define crcpp_constexpr constexpr +#else +/// @brief Compile-time expression definition. +#define crcpp_constexpr const +#endif + +#ifdef CRCPP_USE_NAMESPACE +namespace CRCPP { +#endif + +/** + @brief Static class for computing CRCs. + @note This class supports computation of full and multi-part CRCs, using a + bit-by-bit algorithm or a + byte-by-byte lookup table. The CRCs are calculated using as many + optimizations as is reasonable. + If compiling with C++11, the constexpr keyword is used liberally so that + many calculations are + performed at compile-time instead of at runtime. +*/ +class CRC { +public: + // Forward declaration + template + struct Table; + + /** + @brief CRC parameters. + */ + template + struct Parameters { + CRCType polynomial; ///< CRC polynomial + CRCType initialValue; ///< Initial CRC value + CRCType finalXOR; ///< Value to XOR with the final CRC + bool reflectInput; ///< true to reflect all input bytes + bool reflectOutput; ///< true to reflect the output CRC (reflection occurs + /// before the final XOR) + + Table MakeTable() const; + }; + + /** + @brief CRC lookup table. After construction, the CRC parameters are fixed. + @note A CRC table can be used for multiple CRC calculations. + */ + template + struct Table { + // Constructors are intentionally NOT marked explicit. + Table(const Parameters ¶meters); + +#ifdef CRCPP_USE_CPP11 + Table(Parameters &¶meters); +#endif + + const Parameters &GetParameters() const; + + const CRCType *GetTable() const; + + CRCType operator[](unsigned char index) const; + + private: + void InitTable(); + + Parameters + parameters; ///< CRC parameters used to construct the table + CRCType table[1 << CHAR_BIT]; ///< CRC lookup table + }; + + // The number of bits in CRCType must be at least as large as CRCWidth. + // CRCType must be an unsigned integer type or a custom type with operator + // overloads. + template + static CRCType Calculate(const void *data, + crcpp_size size, + const Parameters ¶meters); + + template + static CRCType Calculate(const void *data, + crcpp_size size, + const Parameters ¶meters, + CRCType crc); + + template + static CRCType Calculate(const void *data, + crcpp_size size, + const Table &lookupTable); + + template + static CRCType Calculate(const void *data, + crcpp_size size, + const Table &lookupTable, + CRCType crc); + +// Common CRCs up to 64 bits. +// Note: Check values are the computed CRCs when given an ASCII input of +// "123456789" (without null terminator) +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_4_ITU(); + static const Parameters &CRC_5_EPC(); + static const Parameters &CRC_5_ITU(); + static const Parameters &CRC_5_USB(); + static const Parameters &CRC_6_CDMA2000A(); + static const Parameters &CRC_6_CDMA2000B(); + static const Parameters &CRC_6_ITU(); + static const Parameters &CRC_7(); +#endif + static const Parameters &CRC_8(); +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_8_EBU(); + static const Parameters &CRC_8_MAXIM(); + static const Parameters &CRC_8_WCDMA(); + static const Parameters &CRC_10(); + static const Parameters &CRC_10_CDMA2000(); + static const Parameters &CRC_11(); + static const Parameters &CRC_12_CDMA2000(); + static const Parameters &CRC_12_DECT(); + static const Parameters &CRC_12_UMTS(); + static const Parameters &CRC_13_BBC(); + static const Parameters &CRC_15(); + static const Parameters &CRC_15_MPT1327(); +#endif + static const Parameters &CRC_16_ARC(); + static const Parameters &CRC_16_BUYPASS(); + static const Parameters &CRC_16_CCITTFALSE(); +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_16_CDMA2000(); + static const Parameters &CRC_16_DECTR(); + static const Parameters &CRC_16_DECTX(); + static const Parameters &CRC_16_DNP(); +#endif + static const Parameters &CRC_16_GENIBUS(); + static const Parameters &CRC_16_KERMIT(); +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_16_MAXIM(); + static const Parameters &CRC_16_MODBUS(); + static const Parameters &CRC_16_T10DIF(); + static const Parameters &CRC_16_USB(); +#endif + static const Parameters &CRC_16_X25(); + static const Parameters &CRC_16_XMODEM(); +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_17_CAN(); + static const Parameters &CRC_21_CAN(); + static const Parameters &CRC_24(); + static const Parameters &CRC_24_FLEXRAYA(); + static const Parameters &CRC_24_FLEXRAYB(); + static const Parameters &CRC_30(); +#endif + static const Parameters &CRC_32(); + static const Parameters &CRC_32_BZIP2(); +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_32_C(); +#endif + static const Parameters &CRC_32_MPEG2(); + static const Parameters &CRC_32_POSIX(); +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + static const Parameters &CRC_32_Q(); + static const Parameters &CRC_40_GSM(); + static const Parameters &CRC_64(); +#endif + +#ifdef CRCPP_USE_CPP11 + CRC() = delete; + CRC(const CRC &other) = delete; + CRC &operator=(const CRC &other) = delete; + CRC(CRC &&other) = delete; + CRC &operator=(CRC &&other) = delete; +#endif + +private: +#ifndef CRCPP_USE_CPP11 + CRC(); + CRC(const CRC &other); + CRC &operator=(const CRC &other); +#endif + + template + static IntegerType Reflect(IntegerType value, crcpp_uint16 numBits); + + template + static CRCType Finalize(CRCType remainder, + CRCType finalXOR, + bool reflectOutput); + + template + static CRCType UndoFinalize(CRCType remainder, + CRCType finalXOR, + bool reflectOutput); + + template + static CRCType CalculateRemainder( + const void *data, + crcpp_size size, + const Parameters ¶meters, + CRCType remainder); + + template + static CRCType CalculateRemainder(const void *data, + crcpp_size size, + const Table &lookupTable, + CRCType remainder); + + template + static crcpp_constexpr IntegerType BoundedConstexprValue(IntegerType x); +}; + +/** + @brief Returns a CRC lookup table construct using these CRC parameters. + @note This function primarily exists to allow use of the auto keyword + instead of instantiating + a table directly, since template parameters are not inferred in + constructors. + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC lookup table +*/ +template +inline CRC::Table +CRC::Parameters::MakeTable() const { + // This should take advantage of RVO and optimize out the copy. + return CRC::Table(*this); +} + +/** + @brief Constructs a CRC table from a set of CRC parameters + @param[in] parameters CRC parameters + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC +*/ +template +inline CRC::Table::Table( + const Parameters ¶meters) + : parameters(parameters) { + InitTable(); +} + +#ifdef CRCPP_USE_CPP11 +/** + @brief Constructs a CRC table from a set of CRC parameters + @param[in] parameters CRC parameters + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC +*/ +template +inline CRC::Table::Table( + Parameters &¶meters) + : parameters(::std::move(parameters)) { + InitTable(); +} +#endif + +/** + @brief Gets the CRC parameters used to construct the CRC table + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC parameters +*/ +template +inline const CRC::Parameters + &CRC::Table::GetParameters() const { + return parameters; +} + +/** + @brief Gets the CRC table + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC table +*/ +template +inline const CRCType *CRC::Table::GetTable() const { + return table; +} + +/** + @brief Gets an entry in the CRC table + @param[in] index Index into the CRC table + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC table entry +*/ +template +inline CRCType CRC::Table::operator[]( + unsigned char index) const { + return table[index]; +} + +/** + @brief Initializes a CRC table. + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC +*/ +template +inline void CRC::Table::InitTable() { + // For masking off the bits for the CRC (in the event that the number of bits + // in CRCType is larger than CRCWidth) + static crcpp_constexpr CRCType BIT_MASK( + (CRCType(1) << (CRCWidth - CRCType(1))) | + ((CRCType(1) << (CRCWidth - CRCType(1))) - CRCType(1))); + + static crcpp_constexpr CRCType SHIFT( + CRC::BoundedConstexprValue(CHAR_BIT - CRCWidth)); + + CRCType crc; + unsigned char byte = 0; + + // Loop over each dividend (each possible number storable in an unsigned char) + do { + crc = CRC::CalculateRemainder( + &byte, sizeof(byte), parameters, CRCType(0)); + + // This mask might not be necessary; all unit tests pass with this line + // commented out, + // but that might just be a coincidence based on the CRC parameters used for + // testing. + // In any case, this is harmless to leave in and only adds a single machine + // instruction per loop iteration. + crc &= BIT_MASK; + + if (!parameters.reflectInput && CRCWidth < CHAR_BIT) { + // Undo the special operation at the end of the CalculateRemainder() + // function for non-reflected CRCs < CHAR_BIT. + crc <<= SHIFT; + } + + table[byte] = crc; + } while (++byte); +} + +/** + @brief Computes a CRC. + @param[in] data Data over which CRC will be computed + @param[in] size Size of the data + @param[in] parameters CRC parameters + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC +*/ +template +inline CRCType CRC::Calculate(const void *data, + crcpp_size size, + const Parameters ¶meters) { + CRCType remainder = + CalculateRemainder(data, size, parameters, parameters.initialValue); + + // No need to mask the remainder here; the mask will be applied in the + // Finalize() function. + + return Finalize( + remainder, + parameters.finalXOR, + parameters.reflectInput != parameters.reflectOutput); +} +/** + @brief Appends additional data to a previous CRC calculation. + @note This function can be used to compute multi-part CRCs. + @param[in] data Data over which CRC will be computed + @param[in] size Size of the data + @param[in] parameters CRC parameters + @param[in] crc CRC from a previous calculation + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC +*/ +template +inline CRCType CRC::Calculate(const void *data, + crcpp_size size, + const Parameters ¶meters, + CRCType crc) { + CRCType remainder = UndoFinalize( + crc, + parameters.finalXOR, + parameters.reflectInput != parameters.reflectOutput); + + remainder = CalculateRemainder(data, size, parameters, remainder); + + // No need to mask the remainder here; the mask will be applied in the + // Finalize() function. + + return Finalize( + remainder, + parameters.finalXOR, + parameters.reflectInput != parameters.reflectOutput); +} + +/** + @brief Computes a CRC via a lookup table. + @param[in] data Data over which CRC will be computed + @param[in] size Size of the data + @param[in] lookupTable CRC lookup table + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC +*/ +template +inline CRCType CRC::Calculate(const void *data, + crcpp_size size, + const Table &lookupTable) { + const Parameters ¶meters = lookupTable.GetParameters(); + + CRCType remainder = + CalculateRemainder(data, size, lookupTable, parameters.initialValue); + + // No need to mask the remainder here; the mask will be applied in the + // Finalize() function. + + return Finalize( + remainder, + parameters.finalXOR, + parameters.reflectInput != parameters.reflectOutput); +} + +/** + @brief Appends additional data to a previous CRC calculation using a lookup + table. + @note This function can be used to compute multi-part CRCs. + @param[in] data Data over which CRC will be computed + @param[in] size Size of the data + @param[in] lookupTable CRC lookup table + @param[in] crc CRC from a previous calculation + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC +*/ +template +inline CRCType CRC::Calculate(const void *data, + crcpp_size size, + const Table &lookupTable, + CRCType crc) { + const Parameters ¶meters = lookupTable.GetParameters(); + + CRCType remainder = UndoFinalize( + crc, + parameters.finalXOR, + parameters.reflectInput != parameters.reflectOutput); + + remainder = CalculateRemainder(data, size, lookupTable, remainder); + + // No need to mask the remainder here; the mask will be applied in the + // Finalize() function. + + return Finalize( + remainder, + parameters.finalXOR, + parameters.reflectInput != parameters.reflectOutput); +} + +/** + @brief Reflects (i.e. reverses the bits within) an integer value. + @param[in] value Value to reflect + @param[in] numBits Number of bits in the integer which will be reflected + @tparam IntegerType Integer type of the value being reflected + @return Reflected value +*/ +template +inline IntegerType CRC::Reflect(IntegerType value, crcpp_uint16 numBits) { + IntegerType reversedValue(0); + + for (crcpp_uint16 i = 0; i < numBits; ++i) { + reversedValue = (reversedValue << 1) | (value & 1); + value >>= 1; + } + + return reversedValue; +} + +/** + @brief Computes the final reflection and XOR of a CRC remainder. + @param[in] remainder CRC remainder to reflect and XOR + @param[in] finalXOR Final value to XOR with the remainder + @param[in] reflectOutput true to reflect each byte of the remainder before + the XOR + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return Final CRC +*/ +template +inline CRCType CRC::Finalize(CRCType remainder, + CRCType finalXOR, + bool reflectOutput) { + // For masking off the bits for the CRC (in the event that the number of bits + // in CRCType is larger than CRCWidth) + static crcpp_constexpr CRCType BIT_MASK = + (CRCType(1) << (CRCWidth - CRCType(1))) | + ((CRCType(1) << (CRCWidth - CRCType(1))) - CRCType(1)); + + if (reflectOutput) { + remainder = Reflect(remainder, CRCWidth); + } + + return (remainder ^ finalXOR) & BIT_MASK; +} + +/** + @brief Undoes the process of computing the final reflection and XOR of a CRC + remainder. + @note This function allows for computation of multi-part CRCs + @note Calling UndoFinalize() followed by Finalize() (or vice versa) will + always return the original remainder value: + + CRCType x = ...; + CRCType y = Finalize(x, finalXOR, reflectOutput); + CRCType z = UndoFinalize(y, finalXOR, reflectOutput); + assert(x == z); + + @param[in] crc Reflected and XORed CRC + @param[in] finalXOR Final value XORed with the remainder + @param[in] reflectOutput true if the remainder is to be reflected + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return Un-finalized CRC remainder +*/ +template +inline CRCType CRC::UndoFinalize(CRCType crc, + CRCType finalXOR, + bool reflectOutput) { + // For masking off the bits for the CRC (in the event that the number of bits + // in CRCType is larger than CRCWidth) + static crcpp_constexpr CRCType BIT_MASK = + (CRCType(1) << (CRCWidth - CRCType(1))) | + ((CRCType(1) << (CRCWidth - CRCType(1))) - CRCType(1)); + + crc = (crc & BIT_MASK) ^ finalXOR; + + if (reflectOutput) { + crc = Reflect(crc, CRCWidth); + } + + return crc; +} + +/** + @brief Computes a CRC remainder. + @param[in] data Data over which the remainder will be computed + @param[in] size Size of the data + @param[in] parameters CRC parameters + @param[in] remainder Running CRC remainder. Can be an initial value or the + result of a previous CRC remainder calculation. + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC remainder +*/ +template +inline CRCType CRC::CalculateRemainder( + const void *data, + crcpp_size size, + const Parameters ¶meters, + CRCType remainder) { +#ifdef CRCPP_USE_CPP11 + // This static_assert is put here because this function will always be + // compiled in no matter what + // the template parameters are and whether or not a table lookup or bit-by-bit + // algorithm is used. + static_assert(::std::numeric_limits::digits >= CRCWidth, + "CRCType is too small to contain a CRC of width CRCWidth."); +#else + // Catching this compile-time error is very important. Sadly, the compiler + // error will be very cryptic, but it's + // better than nothing. + enum { + static_assert_failed_CRCType_is_too_small_to_contain_a_CRC_of_width_CRCWidth = + 1 / (::std::numeric_limits::digits >= CRCWidth ? 1 : 0) + }; +#endif + + const unsigned char *current = reinterpret_cast(data); + + // Slightly different implementations based on the parameters. The current + // implementations try to eliminate as much + // computation from the inner loop (looping over each bit) as possible. + if (parameters.reflectInput) { + CRCType polynomial = CRC::Reflect(parameters.polynomial, CRCWidth); + while (size--) { + remainder ^= *current++; + + // An optimizing compiler might choose to unroll this loop. + for (crcpp_size i = 0; i < CHAR_BIT; ++i) { +#ifdef CRCPP_BRANCHLESS + // Clever way to avoid a branch at the expense of a multiplication. This + // code is equivalent to the following: + // if (remainder & 1) + // remainder = (remainder >> 1) ^ polynomial; + // else + // remainder >>= 1; + remainder = (remainder >> 1) ^ ((remainder & 1) * polynomial); +#else + remainder = (remainder & 1) ? ((remainder >> 1) ^ polynomial) + : (remainder >> 1); +#endif + } + } + } else if (CRCWidth >= CHAR_BIT) { + static crcpp_constexpr CRCType CRC_WIDTH_MINUS_ONE(CRCWidth - CRCType(1)); +#ifndef CRCPP_BRANCHLESS + static crcpp_constexpr CRCType CRC_HIGHEST_BIT_MASK(CRCType(1) + << CRC_WIDTH_MINUS_ONE); +#endif + static crcpp_constexpr CRCType SHIFT( + BoundedConstexprValue(CRCWidth - CHAR_BIT)); + + while (size--) { + remainder ^= (static_cast(*current++) << SHIFT); + + // An optimizing compiler might choose to unroll this loop. + for (crcpp_size i = 0; i < CHAR_BIT; ++i) { +#ifdef CRCPP_BRANCHLESS + // Clever way to avoid a branch at the expense of a multiplication. This + // code is equivalent to the following: + // if (remainder & CRC_HIGHEST_BIT_MASK) + // remainder = (remainder << 1) ^ parameters.polynomial; + // else + // remainder <<= 1; + remainder = + (remainder << 1) ^ + (((remainder >> CRC_WIDTH_MINUS_ONE) & 1) * parameters.polynomial); +#else + remainder = (remainder & CRC_HIGHEST_BIT_MASK) + ? ((remainder << 1) ^ parameters.polynomial) + : (remainder << 1); +#endif + } + } + } else { + static crcpp_constexpr CRCType CHAR_BIT_MINUS_ONE(CHAR_BIT - 1); +#ifndef CRCPP_BRANCHLESS + static crcpp_constexpr CRCType CHAR_BIT_HIGHEST_BIT_MASK( + CRCType(1) << CHAR_BIT_MINUS_ONE); +#endif + static crcpp_constexpr CRCType SHIFT( + BoundedConstexprValue(CHAR_BIT - CRCWidth)); + + CRCType polynomial = parameters.polynomial << SHIFT; + remainder <<= SHIFT; + + while (size--) { + remainder ^= *current++; + + // An optimizing compiler might choose to unroll this loop. + for (crcpp_size i = 0; i < CHAR_BIT; ++i) { +#ifdef CRCPP_BRANCHLESS + // Clever way to avoid a branch at the expense of a multiplication. This + // code is equivalent to the following: + // if (remainder & CHAR_BIT_HIGHEST_BIT_MASK) + // remainder = (remainder << 1) ^ polynomial; + // else + // remainder <<= 1; + remainder = (remainder << 1) ^ + (((remainder >> CHAR_BIT_MINUS_ONE) & 1) * polynomial); +#else + remainder = (remainder & CHAR_BIT_HIGHEST_BIT_MASK) + ? ((remainder << 1) ^ polynomial) + : (remainder << 1); +#endif + } + } + + remainder >>= SHIFT; + } + + return remainder; +} + +/** + @brief Computes a CRC remainder using lookup table. + @param[in] data Data over which the remainder will be computed + @param[in] size Size of the data + @param[in] lookupTable CRC lookup table + @param[in] remainder Running CRC remainder. Can be an initial value or the + result of a previous CRC remainder calculation. + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return CRC remainder +*/ +template +inline CRCType CRC::CalculateRemainder( + const void *data, + crcpp_size size, + const Table &lookupTable, + CRCType remainder) { + const unsigned char *current = reinterpret_cast(data); + + if (lookupTable.GetParameters().reflectInput) { + while (size--) { +#if defined(WIN32) || defined(_WIN32) || defined(WINCE) +// Disable warning about data loss when doing (remainder >> CHAR_BIT) when +// remainder is one byte long. The algorithm is still correct in this case, +// though it's possible that one additional machine instruction will be +// executed. +#pragma warning(push) +#pragma warning(disable : 4333) +#endif + remainder = + (remainder >> CHAR_BIT) ^ + lookupTable[static_cast(remainder ^ *current++)]; +#if defined(WIN32) || defined(_WIN32) || defined(WINCE) +#pragma warning(pop) +#endif + } + } else if (CRCWidth >= CHAR_BIT) { + static crcpp_constexpr CRCType SHIFT( + BoundedConstexprValue(CRCWidth - CHAR_BIT)); + + while (size--) { + remainder = (remainder << CHAR_BIT) ^ + lookupTable[static_cast((remainder >> SHIFT) ^ + *current++)]; + } + } else { + static crcpp_constexpr CRCType SHIFT( + BoundedConstexprValue(CHAR_BIT - CRCWidth)); + + remainder <<= SHIFT; + + while (size--) { + // Note: no need to mask here since remainder is guaranteed to fit in a + // single byte. + remainder = + lookupTable[static_cast(remainder ^ *current++)]; + } + + remainder >>= SHIFT; + } + + return remainder; +} + +/** + @brief Function to force a compile-time expression to be >= 0. + @note This function is used to avoid compiler warnings because all constexpr + values are evaluated + in a function even in a branch will never be executed. This also means + we don't need pragmas + to get rid of warnings, but it still can be computed at compile-time. + Win-win! + @param[in] x Compile-time expression to bound + @tparam CRCType Integer type for storing the CRC result + @tparam CRCWidth Number of bits in the CRC + @return Non-negative compile-time expression +*/ +template +inline crcpp_constexpr IntegerType CRC::BoundedConstexprValue(IntegerType x) { + return (x < IntegerType(0)) ? IntegerType(0) : x; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-4 ITU. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-4 ITU has the following parameters and check value: + - polynomial = 0x3 + - initial value = 0x0 + - final XOR = 0x0 + - reflect input = true + - reflect output = true + - check value = 0x7 + @return CRC-4 ITU parameters +*/ +inline const CRC::Parameters &CRC::CRC_4_ITU() { + static const Parameters parameters = { + 0x3, 0x0, 0x0, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-5 EPC. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-5 EPC has the following parameters and check value: + - polynomial = 0x09 + - initial value = 0x09 + - final XOR = 0x00 + - reflect input = false + - reflect output = false + - check value = 0x00 + @return CRC-5 EPC parameters +*/ +inline const CRC::Parameters &CRC::CRC_5_EPC() { + static const Parameters parameters = { + 0x09, 0x09, 0x00, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-5 ITU. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-5 ITU has the following parameters and check value: + - polynomial = 0x15 + - initial value = 0x00 + - final XOR = 0x00 + - reflect input = true + - reflect output = true + - check value = 0x07 + @return CRC-5 ITU parameters +*/ +inline const CRC::Parameters &CRC::CRC_5_ITU() { + static const Parameters parameters = { + 0x15, 0x00, 0x00, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-5 USB. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-5 USB has the following parameters and check value: + - polynomial = 0x05 + - initial value = 0x1F + - final XOR = 0x1F + - reflect input = true + - reflect output = true + - check value = 0x19 + @return CRC-5 USB parameters +*/ +inline const CRC::Parameters &CRC::CRC_5_USB() { + static const Parameters parameters = { + 0x05, 0x1F, 0x1F, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-6 CDMA2000-A. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-6 CDMA2000-A has the following parameters and check value: + - polynomial = 0x27 + - initial value = 0x3F + - final XOR = 0x00 + - reflect input = false + - reflect output = false + - check value = 0x0D + @return CRC-6 CDMA2000-A parameters +*/ +inline const CRC::Parameters &CRC::CRC_6_CDMA2000A() { + static const Parameters parameters = { + 0x27, 0x3F, 0x00, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-6 CDMA2000-B. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-6 CDMA2000-A has the following parameters and check value: + - polynomial = 0x07 + - initial value = 0x3F + - final XOR = 0x00 + - reflect input = false + - reflect output = false + - check value = 0x3B + @return CRC-6 CDMA2000-B parameters +*/ +inline const CRC::Parameters &CRC::CRC_6_CDMA2000B() { + static const Parameters parameters = { + 0x07, 0x3F, 0x00, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-6 ITU. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-6 ITU has the following parameters and check value: + - polynomial = 0x03 + - initial value = 0x00 + - final XOR = 0x00 + - reflect input = true + - reflect output = true + - check value = 0x06 + @return CRC-6 ITU parameters +*/ +inline const CRC::Parameters &CRC::CRC_6_ITU() { + static const Parameters parameters = { + 0x03, 0x00, 0x00, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-7 JEDEC. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-7 JEDEC has the following parameters and check value: + - polynomial = 0x09 + - initial value = 0x00 + - final XOR = 0x00 + - reflect input = false + - reflect output = false + - check value = 0x75 + @return CRC-7 JEDEC parameters +*/ +inline const CRC::Parameters &CRC::CRC_7() { + static const Parameters parameters = { + 0x09, 0x00, 0x00, false, false}; + return parameters; +} +#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + +/** + @brief Returns a set of parameters for CRC-8 SMBus. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-8 SMBus has the following parameters and check value: + - polynomial = 0x07 + - initial value = 0x00 + - final XOR = 0x00 + - reflect input = false + - reflect output = false + - check value = 0xF4 + @return CRC-8 SMBus parameters +*/ +inline const CRC::Parameters &CRC::CRC_8() { + static const Parameters parameters = { + 0x07, 0x00, 0x00, false, false}; + return parameters; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-8 EBU (aka CRC-8 AES). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-8 EBU has the following parameters and check value: + - polynomial = 0x1D + - initial value = 0xFF + - final XOR = 0x00 + - reflect input = true + - reflect output = true + - check value = 0x97 + @return CRC-8 EBU parameters +*/ +inline const CRC::Parameters &CRC::CRC_8_EBU() { + static const Parameters parameters = { + 0x1D, 0xFF, 0x00, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-8 MAXIM (aka CRC-8 DOW-CRC). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-8 MAXIM has the following parameters and check value: + - polynomial = 0x31 + - initial value = 0x00 + - final XOR = 0x00 + - reflect input = true + - reflect output = true + - check value = 0xA1 + @return CRC-8 MAXIM parameters +*/ +inline const CRC::Parameters &CRC::CRC_8_MAXIM() { + static const Parameters parameters = { + 0x31, 0x00, 0x00, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-8 WCDMA. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-8 WCDMA has the following parameters and check value: + - polynomial = 0x9B + - initial value = 0x00 + - final XOR = 0x00 + - reflect input = true + - reflect output = true + - check value = 0x25 + @return CRC-8 WCDMA parameters +*/ +inline const CRC::Parameters &CRC::CRC_8_WCDMA() { + static const Parameters parameters = { + 0x9B, 0x00, 0x00, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-10 ITU. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-10 ITU has the following parameters and check value: + - polynomial = 0x233 + - initial value = 0x000 + - final XOR = 0x000 + - reflect input = false + - reflect output = false + - check value = 0x199 + @return CRC-10 ITU parameters +*/ +inline const CRC::Parameters &CRC::CRC_10() { + static const Parameters parameters = { + 0x233, 0x000, 0x000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-10 CDMA2000. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-10 CDMA2000 has the following parameters and check value: + - polynomial = 0x3D9 + - initial value = 0x3FF + - final XOR = 0x000 + - reflect input = false + - reflect output = false + - check value = 0x233 + @return CRC-10 CDMA2000 parameters +*/ +inline const CRC::Parameters &CRC::CRC_10_CDMA2000() { + static const Parameters parameters = { + 0x3D9, 0x3FF, 0x000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-11 FlexRay. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-11 FlexRay has the following parameters and check value: + - polynomial = 0x385 + - initial value = 0x01A + - final XOR = 0x000 + - reflect input = false + - reflect output = false + - check value = 0x5A3 + @return CRC-11 FlexRay parameters +*/ +inline const CRC::Parameters &CRC::CRC_11() { + static const Parameters parameters = { + 0x385, 0x01A, 0x000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-12 CDMA2000. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-12 CDMA2000 has the following parameters and check value: + - polynomial = 0xF13 + - initial value = 0xFFF + - final XOR = 0x000 + - reflect input = false + - reflect output = false + - check value = 0xD4D + @return CRC-12 CDMA2000 parameters +*/ +inline const CRC::Parameters &CRC::CRC_12_CDMA2000() { + static const Parameters parameters = { + 0xF13, 0xFFF, 0x000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-12 DECT (aka CRC-12 X-CRC). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-12 DECT has the following parameters and check value: + - polynomial = 0x80F + - initial value = 0x000 + - final XOR = 0x000 + - reflect input = false + - reflect output = false + - check value = 0xF5B + @return CRC-12 DECT parameters +*/ +inline const CRC::Parameters &CRC::CRC_12_DECT() { + static const Parameters parameters = { + 0x80F, 0x000, 0x000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-12 UMTS (aka CRC-12 3GPP). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-12 UMTS has the following parameters and check value: + - polynomial = 0x80F + - initial value = 0x000 + - final XOR = 0x000 + - reflect input = false + - reflect output = true + - check value = 0xDAF + @return CRC-12 UMTS parameters +*/ +inline const CRC::Parameters &CRC::CRC_12_UMTS() { + static const Parameters parameters = { + 0x80F, 0x000, 0x000, false, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-13 BBC. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-13 BBC has the following parameters and check value: + - polynomial = 0x1CF5 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0x04FA + @return CRC-13 BBC parameters +*/ +inline const CRC::Parameters &CRC::CRC_13_BBC() { + static const Parameters parameters = { + 0x1CF5, 0x0000, 0x0000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-15 CAN. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-15 CAN has the following parameters and check value: + - polynomial = 0x4599 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0x059E + @return CRC-15 CAN parameters +*/ +inline const CRC::Parameters &CRC::CRC_15() { + static const Parameters parameters = { + 0x4599, 0x0000, 0x0000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-15 MPT1327. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-15 MPT1327 has the following parameters and check value: + - polynomial = 0x6815 + - initial value = 0x0000 + - final XOR = 0x0001 + - reflect input = false + - reflect output = false + - check value = 0x2566 + @return CRC-15 MPT1327 parameters +*/ +inline const CRC::Parameters &CRC::CRC_15_MPT1327() { + static const Parameters parameters = { + 0x6815, 0x0000, 0x0001, false, false}; + return parameters; +} +#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + +/** + @brief Returns a set of parameters for CRC-16 ARC (aka CRC-16 IBM, CRC-16 + LHA). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 ARC has the following parameters and check value: + - polynomial = 0x8005 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = true + - reflect output = true + - check value = 0xBB3D + @return CRC-16 ARC parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_ARC() { + static const Parameters parameters = { + 0x8005, 0x0000, 0x0000, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 BUYPASS (aka CRC-16 VERIFONE, + CRC-16 UMTS). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 BUYPASS has the following parameters and check value: + - polynomial = 0x8005 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0xFEE8 + @return CRC-16 BUYPASS parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_BUYPASS() { + static const Parameters parameters = { + 0x8005, 0x0000, 0x0000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 CCITT FALSE. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 CCITT FALSE has the following parameters and check value: + - polynomial = 0x1021 + - initial value = 0xFFFF + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0x29B1 + @return CRC-16 CCITT FALSE parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_CCITTFALSE() { + static const Parameters parameters = { + 0x1021, 0xFFFF, 0x0000, false, false}; + return parameters; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-16 CDMA2000. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 CDMA2000 has the following parameters and check value: + - polynomial = 0xC867 + - initial value = 0xFFFF + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0x4C06 + @return CRC-16 CDMA2000 parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_CDMA2000() { + static const Parameters parameters = { + 0xC867, 0xFFFF, 0x0000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 DECT-R (aka CRC-16 R-CRC). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 DECT-R has the following parameters and check value: + - polynomial = 0x0589 + - initial value = 0x0000 + - final XOR = 0x0001 + - reflect input = false + - reflect output = false + - check value = 0x007E + @return CRC-16 DECT-R parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_DECTR() { + static const Parameters parameters = { + 0x0589, 0x0000, 0x0001, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 DECT-X (aka CRC-16 X-CRC). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 DECT-X has the following parameters and check value: + - polynomial = 0x0589 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0x007F + @return CRC-16 DECT-X parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_DECTX() { + static const Parameters parameters = { + 0x0589, 0x0000, 0x0000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 DNP. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 DNP has the following parameters and check value: + - polynomial = 0x3D65 + - initial value = 0x0000 + - final XOR = 0xFFFF + - reflect input = true + - reflect output = true + - check value = 0xEA82 + @return CRC-16 DNP parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_DNP() { + static const Parameters parameters = { + 0x3D65, 0x0000, 0xFFFF, true, true}; + return parameters; +} +#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + +/** + @brief Returns a set of parameters for CRC-16 GENIBUS (aka CRC-16 EPC, + CRC-16 I-CODE, CRC-16 DARC). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 GENIBUS has the following parameters and check value: + - polynomial = 0x1021 + - initial value = 0xFFFF + - final XOR = 0xFFFF + - reflect input = false + - reflect output = false + - check value = 0xD64E + @return CRC-16 GENIBUS parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_GENIBUS() { + static const Parameters parameters = { + 0x1021, 0xFFFF, 0xFFFF, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 KERMIT (aka CRC-16 CCITT, + CRC-16 CCITT-TRUE). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 KERMIT has the following parameters and check value: + - polynomial = 0x1021 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = true + - reflect output = true + - check value = 0x2189 + @return CRC-16 KERMIT parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_KERMIT() { + static const Parameters parameters = { + 0x1021, 0x0000, 0x0000, true, true}; + return parameters; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-16 MAXIM. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 MAXIM has the following parameters and check value: + - polynomial = 0x8005 + - initial value = 0x0000 + - final XOR = 0xFFFF + - reflect input = true + - reflect output = true + - check value = 0x44C2 + @return CRC-16 MAXIM parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_MAXIM() { + static const Parameters parameters = { + 0x8005, 0x0000, 0xFFFF, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 MODBUS. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 MODBUS has the following parameters and check value: + - polynomial = 0x8005 + - initial value = 0xFFFF + - final XOR = 0x0000 + - reflect input = true + - reflect output = true + - check value = 0x4B37 + @return CRC-16 MODBUS parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_MODBUS() { + static const Parameters parameters = { + 0x8005, 0xFFFF, 0x0000, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 T10-DIF. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 T10-DIF has the following parameters and check value: + - polynomial = 0x8BB7 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0xD0DB + @return CRC-16 T10-DIF parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_T10DIF() { + static const Parameters parameters = { + 0x8BB7, 0x0000, 0x0000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 USB. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 USB has the following parameters and check value: + - polynomial = 0x8005 + - initial value = 0xFFFF + - final XOR = 0xFFFF + - reflect input = true + - reflect output = true + - check value = 0xB4C8 + @return CRC-16 USB parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_USB() { + static const Parameters parameters = { + 0x8005, 0xFFFF, 0xFFFF, true, true}; + return parameters; +} +#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + +/** + @brief Returns a set of parameters for CRC-16 X-25 (aka CRC-16 IBM-SDLC, + CRC-16 ISO-HDLC, CRC-16 B). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 X-25 has the following parameters and check value: + - polynomial = 0x1021 + - initial value = 0xFFFF + - final XOR = 0xFFFF + - reflect input = true + - reflect output = true + - check value = 0x906E + @return CRC-16 X-25 parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_X25() { + static const Parameters parameters = { + 0x1021, 0xFFFF, 0xFFFF, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-16 XMODEM (aka CRC-16 ZMODEM, + CRC-16 ACORN, CRC-16 LTE). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-16 XMODEM has the following parameters and check value: + - polynomial = 0x1021 + - initial value = 0x0000 + - final XOR = 0x0000 + - reflect input = false + - reflect output = false + - check value = 0x31C3 + @return CRC-16 XMODEM parameters +*/ +inline const CRC::Parameters &CRC::CRC_16_XMODEM() { + static const Parameters parameters = { + 0x1021, 0x0000, 0x0000, false, false}; + return parameters; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-17 CAN. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-17 CAN has the following parameters and check value: + - polynomial = 0x1685B + - initial value = 0x00000 + - final XOR = 0x00000 + - reflect input = false + - reflect output = false + - check value = 0x04F03 + @return CRC-17 CAN parameters +*/ +inline const CRC::Parameters &CRC::CRC_17_CAN() { + static const Parameters parameters = { + 0x1685B, 0x00000, 0x00000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-21 CAN. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-21 CAN has the following parameters and check value: + - polynomial = 0x102899 + - initial value = 0x000000 + - final XOR = 0x000000 + - reflect input = false + - reflect output = false + - check value = 0x0ED841 + @return CRC-21 CAN parameters +*/ +inline const CRC::Parameters &CRC::CRC_21_CAN() { + static const Parameters parameters = { + 0x102899, 0x000000, 0x000000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-24 OPENPGP. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-24 OPENPGP has the following parameters and check value: + - polynomial = 0x864CFB + - initial value = 0xB704CE + - final XOR = 0x000000 + - reflect input = false + - reflect output = false + - check value = 0x21CF02 + @return CRC-24 OPENPGP parameters +*/ +inline const CRC::Parameters &CRC::CRC_24() { + static const Parameters parameters = { + 0x864CFB, 0xB704CE, 0x000000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-24 FlexRay-A. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-24 FlexRay-A has the following parameters and check value: + - polynomial = 0x5D6DCB + - initial value = 0xFEDCBA + - final XOR = 0x000000 + - reflect input = false + - reflect output = false + - check value = 0x7979BD + @return CRC-24 FlexRay-A parameters +*/ +inline const CRC::Parameters &CRC::CRC_24_FLEXRAYA() { + static const Parameters parameters = { + 0x5D6DCB, 0xFEDCBA, 0x000000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-24 FlexRay-B. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-24 FlexRay-B has the following parameters and check value: + - polynomial = 0x5D6DCB + - initial value = 0xABCDEF + - final XOR = 0x000000 + - reflect input = false + - reflect output = false + - check value = 0x1F23B8 + @return CRC-24 FlexRay-B parameters +*/ +inline const CRC::Parameters &CRC::CRC_24_FLEXRAYB() { + static const Parameters parameters = { + 0x5D6DCB, 0xABCDEF, 0x000000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-30 CDMA. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-30 CDMA has the following parameters and check value: + - polynomial = 0x2030B9C7 + - initial value = 0x3FFFFFFF + - final XOR = 0x00000000 + - reflect input = false + - reflect output = false + - check value = 0x3B3CB540 + @return CRC-30 CDMA parameters +*/ +inline const CRC::Parameters &CRC::CRC_30() { + static const Parameters parameters = { + 0x2030B9C7, 0x3FFFFFFF, 0x00000000, false, false}; + return parameters; +} +#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + +/** + @brief Returns a set of parameters for CRC-32 (aka CRC-32 ADCCP, CRC-32 + PKZip). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-32 has the following parameters and check value: + - polynomial = 0x04C11DB7 + - initial value = 0xFFFFFFFF + - final XOR = 0xFFFFFFFF + - reflect input = true + - reflect output = true + - check value = 0xCBF43926 + @return CRC-32 parameters +*/ +inline const CRC::Parameters &CRC::CRC_32() { + static const Parameters parameters = { + 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, true, true}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-32 BZIP2 (aka CRC-32 AAL5, CRC-32 + DECT-B, CRC-32 B-CRC). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-32 BZIP2 has the following parameters and check value: + - polynomial = 0x04C11DB7 + - initial value = 0xFFFFFFFF + - final XOR = 0xFFFFFFFF + - reflect input = false + - reflect output = false + - check value = 0xFC891918 + @return CRC-32 BZIP2 parameters +*/ +inline const CRC::Parameters &CRC::CRC_32_BZIP2() { + static const Parameters parameters = { + 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, false, false}; + return parameters; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-32 C (aka CRC-32 ISCSI, CRC-32 + Castagnoli, CRC-32 Interlaken). + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-32 C has the following parameters and check value: + - polynomial = 0x1EDC6F41 + - initial value = 0xFFFFFFFF + - final XOR = 0xFFFFFFFF + - reflect input = true + - reflect output = true + - check value = 0xE3069283 + @return CRC-32 C parameters +*/ +inline const CRC::Parameters &CRC::CRC_32_C() { + static const Parameters parameters = { + 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true}; + return parameters; +} +#endif + +/** + @brief Returns a set of parameters for CRC-32 MPEG-2. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-32 MPEG-2 has the following parameters and check value: + - polynomial = 0x04C11DB7 + - initial value = 0xFFFFFFFF + - final XOR = 0x00000000 + - reflect input = false + - reflect output = false + - check value = 0x0376E6E7 + @return CRC-32 MPEG-2 parameters +*/ +inline const CRC::Parameters &CRC::CRC_32_MPEG2() { + static const Parameters parameters = { + 0x04C11DB7, 0xFFFFFFFF, 0x00000000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-32 POSIX. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-32 POSIX has the following parameters and check value: + - polynomial = 0x04C11DB7 + - initial value = 0x00000000 + - final XOR = 0xFFFFFFFF + - reflect input = false + - reflect output = false + - check value = 0x765E7680 + @return CRC-32 POSIX parameters +*/ +inline const CRC::Parameters &CRC::CRC_32_POSIX() { + static const Parameters parameters = { + 0x04C11DB7, 0x00000000, 0xFFFFFFFF, false, false}; + return parameters; +} + +#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS +/** + @brief Returns a set of parameters for CRC-32 Q. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-32 Q has the following parameters and check value: + - polynomial = 0x814141AB + - initial value = 0x00000000 + - final XOR = 0x00000000 + - reflect input = false + - reflect output = false + - check value = 0x3010BF7F + @return CRC-32 Q parameters +*/ +inline const CRC::Parameters &CRC::CRC_32_Q() { + static const Parameters parameters = { + 0x814141AB, 0x00000000, 0x00000000, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-40 GSM. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-40 GSM has the following parameters and check value: + - polynomial = 0x0004820009 + - initial value = 0x0000000000 + - final XOR = 0xFFFFFFFFFF + - reflect input = false + - reflect output = false + - check value = 0xD4164FC646 + @return CRC-40 GSM parameters +*/ +inline const CRC::Parameters &CRC::CRC_40_GSM() { + static const Parameters parameters = { + 0x0004820009, 0x0000000000, 0xFFFFFFFFFF, false, false}; + return parameters; +} + +/** + @brief Returns a set of parameters for CRC-64 ECMA. + @note The parameters are static and are delayed-constructed to reduce memory + footprint. + @note CRC-64 ECMA has the following parameters and check value: + - polynomial = 0x42F0E1EBA9EA3693 + - initial value = 0x0000000000000000 + - final XOR = 0x0000000000000000 + - reflect input = false + - reflect output = false + - check value = 0x6C40DF5F0B497347 + @return CRC-64 ECMA parameters +*/ +inline const CRC::Parameters &CRC::CRC_64() { + static const Parameters parameters = { + 0x42F0E1EBA9EA3693, 0x0000000000000000, 0x0000000000000000, false, false}; + return parameters; +} +#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS + +#ifdef CRCPP_USE_NAMESPACE +} +#endif + +#endif // CRCPP_CRC_H_ diff --git a/paddle/fluid/recordio/header.cc b/paddle/fluid/recordio/header.cc index c82d05c3a2573..4e35e62d0ab9a 100644 --- a/paddle/fluid/recordio/header.cc +++ b/paddle/fluid/recordio/header.cc @@ -26,18 +26,18 @@ Header::Header() Header::Header(uint32_t num, uint32_t sum, Compressor c, uint32_t cs) : num_records_(num), checksum_(sum), compressor_(c), compress_size_(cs) {} -void Header::Parse(std::istream& iss) { - iss.read(reinterpret_cast(&num_records_), sizeof(uint32_t)); - iss.read(reinterpret_cast(&checksum_), sizeof(uint32_t)); - iss.read(reinterpret_cast(&compressor_), sizeof(uint32_t)); - iss.read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); +void Header::Parse(Stream* iss) { + iss.Read(reinterpret_cast(&num_records_), sizeof(uint32_t)); + iss.Read(reinterpret_cast(&checksum_), sizeof(uint32_t)); + iss.Read(reinterpret_cast(&compressor_), sizeof(uint32_t)); + iss.Read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); } -void Header::Write(std::ostream& os) { - os.write(reinterpret_cast(&num_records_), sizeof(uint32_t)); - os.write(reinterpret_cast(&checksum_), sizeof(uint32_t)); - os.write(reinterpret_cast(&compressor_), sizeof(uint32_t)); - os.write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); +void Header::Write(Stream* os) { + os.Write(reinterpret_cast(&num_records_), sizeof(uint32_t)); + os.Write(reinterpret_cast(&checksum_), sizeof(uint32_t)); + os.Write(reinterpret_cast(&compressor_), sizeof(uint32_t)); + os.Write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); } // std::ostream& operator << (std::ostream& os, Header h) { @@ -54,28 +54,8 @@ std::ostream& operator<<(std::ostream& os, Header h) { return os; } -// bool operator==(Header l, Header r) { -// return num_records_ == rhs.NumRecords() && -// checksum_ == rhs.Checksum() && -// compressor_ == rhs.CompressType() && -// compress_size_ == rhs.CompressSize(); -// } - bool operator==(Header l, Header r) { return l.NumRecords() == r.NumRecords() && l.Checksum() == r.Checksum() && l.CompressType() == r.CompressType() && l.CompressSize() == r.CompressSize(); } - -// size_t CompressData(const std::string& os, Compressor ct, char* buffer) { -// size_t compress_size = 0; - -// // std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); -// // std::string compressed; -// compress_size =os.size(); -// memcpy(buffer, os.c_str(), compress_size); -// return compress_size; -// } - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/header.h b/paddle/fluid/recordio/header.h index 92c040617dba2..21e23f0a25619 100644 --- a/paddle/fluid/recordio/header.h +++ b/paddle/fluid/recordio/header.h @@ -16,6 +16,8 @@ #include +#include "paddle/fluid/recordio/io.h" + namespace paddle { namespace recordio { @@ -43,8 +45,8 @@ class Header { Header(); Header(uint32_t num, uint32_t sum, Compressor ct, uint32_t cs); - void Write(std::ostream& os); - void Parse(std::istream& iss); + void Write(Stream* os); + void Parse(Stream* iss); uint32_t NumRecords() const { return num_records_; } uint32_t Checksum() const { return checksum_; } diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc index 322f63190a594..df52d7feef258 100644 --- a/paddle/fluid/recordio/header_test.cc +++ b/paddle/fluid/recordio/header_test.cc @@ -22,15 +22,12 @@ using namespace paddle::recordio; TEST(Recordio, ChunkHead) { Header hdr(0, 1, Compressor::kGzip, 3); - std::ostringstream oss; + Stream* oss = Stream::Open("/tmp/record_1", "w"); hdr.Write(oss); - std::istringstream iss(oss.str()); + Stream* iss = Stream::Open("/tmp/record_1", "r"); Header hdr2; hdr2.Parse(iss); - std::ostringstream oss2; - hdr2.Write(oss2); - EXPECT_STREQ(oss2.str().c_str(), oss.str().c_str()); EXPECT_TRUE(hdr == hdr2); } diff --git a/paddle/fluid/recordio/io.cc b/paddle/fluid/recordio/io.cc index 2c82d1d42d9b2..e5571ddf5d08e 100644 --- a/paddle/fluid/recordio/io.cc +++ b/paddle/fluid/recordio/io.cc @@ -15,6 +15,8 @@ #include "paddle/fluid/recordio/io.h" #include "paddle/fluid/string/piece.h" +#include + namespace paddle { namespace recordio { Stream* Stream::Open(const char* filename, const char* mode) { @@ -38,7 +40,7 @@ void FileStream::Write(const void* ptr, size_t size) { } size_t FileStream::Tell() { return ftell(fp_); } -void FileStream::Seek(size_t p) { fseek(fp_, static_cast(p), SEEK_SET); } +void FileStream::Seek(size_t p) { fseek(fp_, p, SEEK_SET); } bool FileStream::Eof() { return feof(fp_); } diff --git a/paddle/fluid/recordio/io.h b/paddle/fluid/recordio/io.h index ff647b95d8ebf..dedfed787d517 100644 --- a/paddle/fluid/recordio/io.h +++ b/paddle/fluid/recordio/io.h @@ -16,19 +16,21 @@ #include #include + #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/macros.h" // DISABLE_COPY_ASSIGN namespace paddle { namespace recordio { -// Stream abstract object for read and write +// Seekable Stream Interface for read and write class Stream { public: virtual ~Stream() {} - virtual size_t Read(void* ptr, size_t size); - virtual void Write(const void* ptr, size_t size); - virtual size_t Tell(); - virtual void Seek(); + virtual size_t Read(void* ptr, size_t size) = 0; + virtual void Write(const void* ptr, size_t size) = 0; + virtual size_t Tell() = 0; + virtual void Seek(size_t p) = 0; // Create Stream Instance static Stream* Open(const char* filename, const char* mode); }; @@ -47,6 +49,7 @@ class FileStream : public Stream { private: FILE* fp_; + DISABLE_COPY_AND_ASSIGN(FileStream); }; } // namespace recordio diff --git a/paddle/fluid/recordio/io_test.cc b/paddle/fluid/recordio/io_test.cc index b2e5733ffed52..8311494787637 100644 --- a/paddle/fluid/recordio/io_test.cc +++ b/paddle/fluid/recordio/io_test.cc @@ -21,7 +21,7 @@ using namespace paddle::recordio; TEST(FileStream, IO) { { // Write - Stream* fs = Stream::Open("/tmp/record_0", "rw"); + Stream* fs = Stream::Open("/tmp/record_0", "w"); fs->Write("hello", 6); delete fs; } diff --git a/paddle/fluid/recordio/writer.cc b/paddle/fluid/recordio/writer.cc index acb84fb8e8d3d..b2b0dd1017171 100644 --- a/paddle/fluid/recordio/writer.cc +++ b/paddle/fluid/recordio/writer.cc @@ -26,16 +26,16 @@ Writer::Writer(Stream* fo, int maxChunkSize, int compressor) chunk_.reset(new Chunk); } -size_t Writer::Write(const std::string& record) { +size_t Writer::Write(const char* buf, size_t length) { if (stream_ == nullptr) { LOG(WARNING) << "Cannot write since writer had been closed."; return 0; } - if ((record.size() + chunk_->NumBytes()) > max_chunk_size_) { + if ((length + chunk_->NumBytes()) > max_chunk_size_) { chunk_->Dump(stream_, compressor_); } - chunk_->Add(record); - return record.size(); + chunk_->Add(buf, length); + return length; } // size_t Writer::Write(const char* buf, size_t length) { diff --git a/paddle/fluid/recordio/writer.h b/paddle/fluid/recordio/writer.h index 250d59813cbaf..d610450c53083 100644 --- a/paddle/fluid/recordio/writer.h +++ b/paddle/fluid/recordio/writer.h @@ -16,7 +16,6 @@ #include #include -#include "paddle/fluid/platform/macros.h" // DISABLE_COPY_ASSIGN #include "paddle/fluid/recordio/header.h" #include "paddle/fluid/recordio/io.h" @@ -44,7 +43,6 @@ class Writer { int max_chunk_size_; // Compressor used for chuck Compressor compressor_; - DISABLE_COPY_AND_ASSIGN(Writer); }; diff --git a/paddle/fluid/recordio/writer_test.cc b/paddle/fluid/recordio/writer_test.cc index 1ba32bf2df523..7c7f823c8d848 100644 --- a/paddle/fluid/recordio/writer_test.cc +++ b/paddle/fluid/recordio/writer_test.cc @@ -18,4 +18,4 @@ using namespace paddle::recordio; -TEST(Writer, Normal) { Stream } +TEST(Writer, Normal) {} From 7364348d04587f5f9c7d267a2610c56d5a831433 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 6 Mar 2018 00:21:37 +0800 Subject: [PATCH 5/7] "move from recordio repo to paddle" --- CMakeLists.txt | 1 + paddle/fluid/recordio/chunk.cc | 25 +++++++---- paddle/fluid/recordio/chunk.h | 2 +- paddle/fluid/recordio/chunk_test.cc | 34 ++++++++++++++- paddle/fluid/recordio/header.cc | 27 +++++------- paddle/fluid/recordio/header_test.cc | 10 ++--- paddle/fluid/recordio/range_scanner.cc | 46 ++++++++++++++++++++ paddle/fluid/recordio/range_scanner.h | 30 +++++++++---- paddle/fluid/recordio/scanner.cc | 58 ++++++++++++++++++++++++++ paddle/fluid/recordio/scanner.h | 17 ++++---- paddle/fluid/recordio/scanner_test.cc | 21 ++++++++++ paddle/fluid/recordio/writer_test.cc | 10 ++++- 12 files changed, 231 insertions(+), 50 deletions(-) create mode 100644 paddle/fluid/recordio/range_scanner.cc create mode 100644 paddle/fluid/recordio/scanner.cc create mode 100644 paddle/fluid/recordio/scanner_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 469af0f7859b9..0e9a2a8e7548a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,6 +144,7 @@ include(external/eigen) # download eigen3 include(external/pybind11) # download pybind11 include(external/cares) include(external/grpc) +include(external/snappy) # download snappy include(cudnn) # set cudnn libraries, must before configure include(cupti) diff --git a/paddle/fluid/recordio/chunk.cc b/paddle/fluid/recordio/chunk.cc index 1ab2c7dd55828..f498c64b0824f 100644 --- a/paddle/fluid/recordio/chunk.cc +++ b/paddle/fluid/recordio/chunk.cc @@ -26,7 +26,7 @@ namespace paddle { namespace recordio { void Chunk::Add(const char* record, size_t length) { - records_.emplace_after(std::move(s)); + records_.emplace_after(std::string(record, length)); num_bytes_ += s.size() * sizeof(char); } @@ -42,13 +42,16 @@ bool Chunk::Dump(Stream* fo, Compressor ct) { os.write(record.data(), static_cast(record.size())); } - std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); + std::unique_ptr buffer(new char[num_bytes_]); size_t compressed = CompressData(os.str().c_str(), num_bytes_, ct, buffer.get()); uint32_t checksum = Crc32(buffer.get(), compressed); Header hdr(records_.size(), checksum, ct, static_cast(compressed)); hdr.Write(fo); fo.Write(buffer.get(), compressed); + // clear the content + records_.clear(); + num_bytes_ = 0; return true; } @@ -57,14 +60,18 @@ void Chunk::Parse(Stream* fi, size_t offset) { Header hdr; hdr.Parse(fi); - std::unique_ptr buffer(new char[kDefaultMaxChunkSize]); - fi->Read(buffer.get(), static_cast(hdr.CompressSize())); - uint32_t deflated_size = - DeflateData(buffer.get(), hdr.CompressSize(), hdr.CompressType()); - std::istringstream deflated(std::string(buffer.get(), deflated_size)); + size_t size = static_cast(hdr.CompressSize()); + std::unique_ptr buffer(new char[size]); + fi->Read(buffer.get(), size); + size_t deflated_size = 0; + snappy::GetUncompressedLength(buffer.get(), size, &deflated_size); + std::unique_ptr deflated_buffer(new char[deflated_size]); + DeflateData(buffer.get(), size, hdr.CompressType(), deflated_buffer.get()); + std::istringstream deflated( + std::string(deflated_buffer.get(), deflated_size)); for (size_t i = 0; i < hdr.NumRecords(); ++i) { - uint32_t rs; - deflated >> rs; + size_t rs; + deflated.read(&rs, sizeof(size_t)); std::string record(rs, '\0'); deflated.read(&record[0], rs); records_.emplace_back(record); diff --git a/paddle/fluid/recordio/chunk.h b/paddle/fluid/recordio/chunk.h index 975604df3ca56..a36c71cf4cce2 100644 --- a/paddle/fluid/recordio/chunk.h +++ b/paddle/fluid/recordio/chunk.h @@ -25,7 +25,7 @@ namespace recordio { // A Chunk contains the Header and optionally compressed records. class Chunk { public: - Chunk() {} + Chunk() : num_bytes_(0) {} void Add(const char* record, size_t size); // dump the chunk into w, and clears the chunk and makes it ready for // the next add invocation. diff --git a/paddle/fluid/recordio/chunk_test.cc b/paddle/fluid/recordio/chunk_test.cc index 8aec47c234d82..938e101fcd161 100644 --- a/paddle/fluid/recordio/chunk_test.cc +++ b/paddle/fluid/recordio/chunk_test.cc @@ -20,4 +20,36 @@ using namespace paddle::recordio; -TEST(Chunk, SaveLoad) {} +TEST(Chunk, SaveLoad) { + Chunk ch; + ch.Add("12345", 6); + ch.Add("123", 4); + { + Stream* fs = Stream::Open("/tmp/record_11", "w"); + ch.Dump(fs, Compressor::kNoCompress); + EXPECT_EQ(ch.NumBytes(), 0); + } + { + Stream* fs = Stream::Open("/tmp/record_11", "r"); + ch.Parse(fs, 0); + EXPECT_EQ(ch.NumBytes(), 10); + } +} + +TEST(Chunk, Compressor) { + Chunk ch; + ch.Add("12345", 6); + ch.Add("123", 4); + ch.Add("123", 4); + ch.Add("123", 4); + { + Stream* fs = Stream::Open("/tmp/record_12", "w"); + ch.Dump(fs, Compressor::kSnappy); + EXPECT_EQ(ch.NumBytes(), 0); + } + { + Stream* fs = Stream::Open("/tmp/record_12", "r"); + ch.Parse(fs, 0); + EXPECT_EQ(ch.NumBytes(), 10); + } +} diff --git a/paddle/fluid/recordio/header.cc b/paddle/fluid/recordio/header.cc index 4e35e62d0ab9a..31ee410bfb074 100644 --- a/paddle/fluid/recordio/header.cc +++ b/paddle/fluid/recordio/header.cc @@ -27,27 +27,19 @@ Header::Header(uint32_t num, uint32_t sum, Compressor c, uint32_t cs) : num_records_(num), checksum_(sum), compressor_(c), compress_size_(cs) {} void Header::Parse(Stream* iss) { - iss.Read(reinterpret_cast(&num_records_), sizeof(uint32_t)); - iss.Read(reinterpret_cast(&checksum_), sizeof(uint32_t)); - iss.Read(reinterpret_cast(&compressor_), sizeof(uint32_t)); - iss.Read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); + iss->Read(reinterpret_cast(&num_records_), sizeof(uint32_t)); + iss->Read(reinterpret_cast(&checksum_), sizeof(uint32_t)); + iss->Read(reinterpret_cast(&compressor_), sizeof(uint32_t)); + iss->Read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); } void Header::Write(Stream* os) { - os.Write(reinterpret_cast(&num_records_), sizeof(uint32_t)); - os.Write(reinterpret_cast(&checksum_), sizeof(uint32_t)); - os.Write(reinterpret_cast(&compressor_), sizeof(uint32_t)); - os.Write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); + os->Write(reinterpret_cast(&num_records_), sizeof(uint32_t)); + os->Write(reinterpret_cast(&checksum_), sizeof(uint32_t)); + os->Write(reinterpret_cast(&compressor_), sizeof(uint32_t)); + os->Write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); } -// std::ostream& operator << (std::ostream& os, Header h) { -// os << h.num_records_ -// << h.checksum_ -// << static_cast(h.compressor_) -// << h.compress_size_; -// return os; -// } - std::ostream& operator<<(std::ostream& os, Header h) { os << h.NumRecords() << h.Checksum() << static_cast(h.CompressType()) << h.CompressSize(); @@ -59,3 +51,6 @@ bool operator==(Header l, Header r) { l.CompressType() == r.CompressType() && l.CompressSize() == r.CompressSize(); } + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc index df52d7feef258..12e8f14ced426 100644 --- a/paddle/fluid/recordio/header_test.cc +++ b/paddle/fluid/recordio/header_test.cc @@ -23,11 +23,11 @@ using namespace paddle::recordio; TEST(Recordio, ChunkHead) { Header hdr(0, 1, Compressor::kGzip, 3); Stream* oss = Stream::Open("/tmp/record_1", "w"); - hdr.Write(oss); + hdr->Write(oss); - Stream* iss = Stream::Open("/tmp/record_1", "r"); - Header hdr2; - hdr2.Parse(iss); + // Stream* iss = Stream::Open("/tmp/record_1", "r"); + // Header hdr2; + // hdr2.Parse(iss); - EXPECT_TRUE(hdr == hdr2); + // EXPECT_TRUE(hdr == hdr2); } diff --git a/paddle/fluid/recordio/range_scanner.cc b/paddle/fluid/recordio/range_scanner.cc new file mode 100644 index 0000000000000..4c0e80e2f8888 --- /dev/null +++ b/paddle/fluid/recordio/range_scanner.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/range_scanner.h" + +namespace paddle { +namespace recordio { + +Index Index::ChunkIndex(int i) { Index idx; } + +RangeScanner::RangeScanner(std::istream is, Index idx, int start, int len) + : stream_(is.rdbuf()), index_(idx) { + if (start < 0) { + start = 0; + } + if (len < 0 || start + len >= idx.NumRecords()) { + len = idx.NumRecords() - start; + } + + start_ = start; + end_ = start + len; + cur_ = start - 1; + chunk_index_ = -1; + // chunk_->reset(new Chunk()); +} + +bool RangeScanner::Scan() {} + +const std::string RangeScanner::Record() { + // int i = index_.Locate(cur_); + // return chunk_->Record(i); +} + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/range_scanner.h b/paddle/fluid/recordio/range_scanner.h index 44b1b49abc2bf..000a328d77478 100644 --- a/paddle/fluid/recordio/range_scanner.h +++ b/paddle/fluid/recordio/range_scanner.h @@ -14,16 +14,23 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include "paddle/fluid/recordio/io.h" +namespace paddle { +namespace recordio { + +// Index consists offsets and sizes of the consequetive chunks in a RecordIO +// file. +// +// Index supports Gob. Every field in the Index needs to be exported +// for the correct encoding and decoding using Gob. class Index { public: int NumRecords() { return num_records_; } + // NumChunks returns the total number of chunks in a RecordIO file. + int NumChunks() { return chunk_lens_.size(); } + // ChunkIndex return the Index of i-th Chunk. + int ChunkIndex(int i); // Locate returns the index of chunk that contains the given record, // and the record index within the chunk. It returns (-1, -1) if the @@ -44,9 +51,13 @@ class Index { } private: + // the offset of each chunk in a file. std::vector chunk_offsets_; + // the length of each chunk in a file. std::vector chunk_lens_; + // the numer of all records in a file. int num_records_; + // the number of records in chunks. std::vector chunk_records_; }; @@ -56,14 +67,17 @@ class Index { // beginning. If len < 0, it scans till the end of file. class RangeScanner { public: - RangeScanner(std::istream is, Index idx, int start, int end); + RangeScanner(Stream* fi, Index idx, int start, int end); bool Scan(); const std::string Record(); private: - std::istream stream_; + Stream* fi; Index index_; int start_, end_, cur_; int chunk_index_; std::unique_ptr chunk_; }; + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/scanner.cc b/paddle/fluid/recordio/scanner.cc new file mode 100644 index 0000000000000..d5464ae9d8df8 --- /dev/null +++ b/paddle/fluid/recordio/scanner.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/chunk.h" + +#include // glob + +namespace paddle { +namespace recordio { + +Scanner::Scanner(const char* paths) + : cur_file_(nullptr), path_idx_(0), end_(false) { + glob_t glob_result; + glob(paths, GLOB_TILDE, NULL, &glob_result); + + for (size_t i = 0; i < glob_result.gl_pathc; ++i) { + paths_.emplace_back(std::string(glob_result.gl_pathv[i])); + } + globfree(&glob_result); +} + +bool Scanner::Scan() { + if (err_ == -1 || end_ == true) { + return false; + } + if (cur_scanner_ == nullptr) { + if (!NextFile()) { + end_ = true; + return false; + } + if (err_ == -1) { + return false; + } + } + if (!cur_scanner_->Scan()) { + if (err_ == -1) { + return false; + } + } + + return true; +} + +bool Scanner::NextFile() {} + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/scanner.h b/paddle/fluid/recordio/scanner.h index dc09bd5fdda34..76a3448839f91 100644 --- a/paddle/fluid/recordio/scanner.h +++ b/paddle/fluid/recordio/scanner.h @@ -14,12 +14,10 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include "paddle/fluid/recordio/io.h" + +namespace paddle { +namespace recordio { class RangeScanner; @@ -30,16 +28,17 @@ class Scanner { const std::string Record(); bool Scan(); void Close(); - -private: bool NextFile(); int Err() { return err_; } private: std::vector paths_; - FILE* cur_file_; + Stream* cur_file_; RangeScanner* cur_scanner_; int path_idx_; bool end_; int err_; }; + +} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/scanner_test.cc b/paddle/fluid/recordio/scanner_test.cc new file mode 100644 index 0000000000000..7191500de7750 --- /dev/null +++ b/paddle/fluid/recordio/scanner_test.cc @@ -0,0 +1,21 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/scanner.h" + +#include "gtest/gtest.h" + +using namespace paddle::recordio; + +TEST(Scanner, Normal) { Scanner s("/tmp/record_*"); } diff --git a/paddle/fluid/recordio/writer_test.cc b/paddle/fluid/recordio/writer_test.cc index 7c7f823c8d848..094815be2c022 100644 --- a/paddle/fluid/recordio/writer_test.cc +++ b/paddle/fluid/recordio/writer_test.cc @@ -18,4 +18,12 @@ using namespace paddle::recordio; -TEST(Writer, Normal) {} +TEST(Writer, Normal) { + Stream* fs = Stream::Open("/tmp/record_21", "w"); + Writer w(fs); + w.Write("123", 4); + + // test exception + w.Close(); + EXPECT_ANY_THROW(w.Write("123", 4)); +} From fe18341585e1cc1f9ecca18e9c5ec612aea8ef81 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Tue, 6 Mar 2018 09:37:04 +0800 Subject: [PATCH 6/7] "seperate internal library and exported library" --- paddle/fluid/recordio/CMakeLists.txt | 13 ++++- paddle/fluid/recordio/chunk.h | 3 +- paddle/fluid/recordio/header_test.cc | 18 ++++--- paddle/fluid/recordio/range_scanner.cc | 53 ++++++++++++++++--- paddle/fluid/recordio/range_scanner.h | 32 ++++++----- paddle/fluid/recordio/range_scanner_test.cc | 23 ++++++++ .../fluid/recordio/{filesys.h => recordio.cc} | 14 ++--- paddle/fluid/recordio/recordio.h | 20 +++++++ paddle/fluid/recordio/scanner.cc | 28 ++++++---- 9 files changed, 153 insertions(+), 51 deletions(-) create mode 100644 paddle/fluid/recordio/range_scanner_test.cc rename paddle/fluid/recordio/{filesys.h => recordio.cc} (79%) create mode 100644 paddle/fluid/recordio/recordio.h diff --git a/paddle/fluid/recordio/CMakeLists.txt b/paddle/fluid/recordio/CMakeLists.txt index 5d55709b4bbff..46188e0a5b024 100644 --- a/paddle/fluid/recordio/CMakeLists.txt +++ b/paddle/fluid/recordio/CMakeLists.txt @@ -1,5 +1,14 @@ -cc_library(header SRCS header.cc) -cc_test(header_test SRCS header_test.cc DEPS header) +# internal library. cc_library(io SRCS io.cc DEPS stringpiece) cc_test(io_test SRCS io_test.cc DEPS io) +cc_library(header SRCS header.cc DEPS io) +cc_test(header_test SRCS header_test.cc DEPS header) cc_library(chunk SRCS chunk.cc DEPS snappy) +cc_test(chunk_test SRCS chunk_test.cc DEPS chunk) +cc_library(range_scanner SRCS range_scanner.cc DEPS io chunk) +cc_test(range_scanner_test SRCS range_scanner_test.cc DEPS range_scanner) +cc_library(scanner SRCS scanner.cc DEPS range_scanner) +cc_test(scanner_test SRCS scanner_test.cc DEPS scanner) +# exported library. +cc_library(recordio SRCS recordio.cc DEPS scanner chunk header) +cc_test(recordio_test SRCS recordio_test.cc DEPS scanner) diff --git a/paddle/fluid/recordio/chunk.h b/paddle/fluid/recordio/chunk.h index a36c71cf4cce2..661364cd5d801 100644 --- a/paddle/fluid/recordio/chunk.h +++ b/paddle/fluid/recordio/chunk.h @@ -32,9 +32,10 @@ class Chunk { bool Dump(Stream* fo, Compressor ct); void Parse(Stream* fi, size_t offset); size_t NumBytes() { return num_bytes_; } + const std::string Record(int i) { return records_[i]; } private: - std::forward_list records_; + std::forward_list records_; // sum of record lengths in bytes. size_t num_bytes_; DISABLE_COPY_AND_ASSIGN(Chunk); diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc index 12e8f14ced426..d6ab267329016 100644 --- a/paddle/fluid/recordio/header_test.cc +++ b/paddle/fluid/recordio/header_test.cc @@ -22,12 +22,18 @@ using namespace paddle::recordio; TEST(Recordio, ChunkHead) { Header hdr(0, 1, Compressor::kGzip, 3); - Stream* oss = Stream::Open("/tmp/record_1", "w"); - hdr->Write(oss); + { + Stream* oss = Stream::Open("/tmp/record_1", "w"); + hdr.Write(oss); + delete oss; + } - // Stream* iss = Stream::Open("/tmp/record_1", "r"); - // Header hdr2; - // hdr2.Parse(iss); + Header hdr2; + { + Stream* iss = Stream::Open("/tmp/record_1", "r"); + hdr2.Parse(iss); + delete iss; + } - // EXPECT_TRUE(hdr == hdr2); + EXPECT_TRUE(hdr == hdr2); } diff --git a/paddle/fluid/recordio/range_scanner.cc b/paddle/fluid/recordio/range_scanner.cc index 4c0e80e2f8888..faf5078ba90ec 100644 --- a/paddle/fluid/recordio/range_scanner.cc +++ b/paddle/fluid/recordio/range_scanner.cc @@ -17,10 +17,37 @@ namespace paddle { namespace recordio { +void Index::LoadIndex(FileStream* fi) { + int64_t offset = 0; + while (!fi->Eof()) { + Header hdr; + hdr.Parse(fi); + chunk_offsets_.push_back(offset); + chunk_lens_.push_back(hdr.NumRecords()); + chunk_records_.push_back(hdr.NumRecords()); + num_records_ += hdr.NumRecords(); + offset += hdr.CompressSize(); + } +} + Index Index::ChunkIndex(int i) { Index idx; } -RangeScanner::RangeScanner(std::istream is, Index idx, int start, int len) - : stream_(is.rdbuf()), index_(idx) { +std::pair Index::Locate(int record_idx) { + std::pair range(-1, -1); + int sum = 0; + for (size_t i = 0; i < chunk_lens_.size(); ++i) { + int len = static_cast(chunk_lens_[i]); + sum += len; + if (record_idx < sum) { + range.first = static_cast(i); + range.second = record_idx - sum + len; + } + } + return range; +} + +RangeScanner::RangeScanner(Stream* fi, Index idx, int start, int len) + : stream_(fi), index_(idx) { if (start < 0) { start = 0; } @@ -30,16 +57,28 @@ RangeScanner::RangeScanner(std::istream is, Index idx, int start, int len) start_ = start; end_ = start + len; - cur_ = start - 1; + cur_ = start - 1; // The intial status required by Scan chunk_index_ = -1; - // chunk_->reset(new Chunk()); + chunk_.reset(new Chunk); } -bool RangeScanner::Scan() {} +bool RangeScanner::Scan() { + ++cur_; + if (cur_ >= end_) { + return false; + } else { + auto cursor = index_.Locate(cur_); + if (chunk_index_ != cursor.first) { + chunk_index_ = cursor.first; + chunk_->Parse(fi, index_.ChunkOffsets[chunk_index_]); + } + } + return true; +} const std::string RangeScanner::Record() { - // int i = index_.Locate(cur_); - // return chunk_->Record(i); + auto cursor = index_.Locate(cur_); + return chunk_->Record(cursor.second); } } // namespace recordio diff --git a/paddle/fluid/recordio/range_scanner.h b/paddle/fluid/recordio/range_scanner.h index 000a328d77478..043fd8091e8e1 100644 --- a/paddle/fluid/recordio/range_scanner.h +++ b/paddle/fluid/recordio/range_scanner.h @@ -14,6 +14,9 @@ #pragma once +#include + +#include "paddle/fluid/recordio/chunk.h" #include "paddle/fluid/recordio/io.h" namespace paddle { @@ -26,29 +29,22 @@ namespace recordio { // for the correct encoding and decoding using Gob. class Index { public: + Index() : num_records_(0) {} + // LoadIndex scans the file and parse chunkOffsets, chunkLens, and len. + void LoadIndex(Stream* fi); + // NumRecords returns the total number of all records in a RecordIO file. int NumRecords() { return num_records_; } // NumChunks returns the total number of chunks in a RecordIO file. int NumChunks() { return chunk_lens_.size(); } // ChunkIndex return the Index of i-th Chunk. int ChunkIndex(int i); + int64_t ChunkOffsets(int i) { return chunk_offsets_[i]; } + // Locate returns the index of chunk that contains the given record, // and the record index within the chunk. It returns (-1, -1) if the // record is out of range. - void Locate(int record_idx, std::pair* out) { - size_t sum = 0; - for (size_t i = 0; i < chunk_lens_.size(); ++i) { - sum += chunk_lens_[i]; - if (static_cast(record_idx) < sum) { - out->first = i; - out->second = record_idx - sum + chunk_lens_[i]; - return; - } - } - // out->swap(std::make_pair(-1, -1)); - out->first = -1; - out->second = -1; - } + std::pair Locate(int record_idx); private: // the offset of each chunk in a file. @@ -62,12 +58,14 @@ class Index { }; // RangeScanner -// creates a scanner that sequencially reads records in the -// range [start, start+len). If start < 0, it scans from the -// beginning. If len < 0, it scans till the end of file. class RangeScanner { public: + // creates a scanner that sequencially reads records in the + // range [start, start+len). If start < 0, it scans from the + // beginning. If len < 0, it scans till the end of file. RangeScanner(Stream* fi, Index idx, int start, int end); + // Scan moves the cursor forward for one record and loads the chunk + // containing the record if not yet. bool Scan(); const std::string Record(); diff --git a/paddle/fluid/recordio/range_scanner_test.cc b/paddle/fluid/recordio/range_scanner_test.cc new file mode 100644 index 0000000000000..e365efc48b6aa --- /dev/null +++ b/paddle/fluid/recordio/range_scanner_test.cc @@ -0,0 +1,23 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/recordio/range_scanner.h" + +#include "gtest/gtest.h" + +using namespace paddle::recordio; + +TEST(RangeScanner, Recordio) { + Stream* fo = Stream::Open("/tmp/record_range", "w"); +} diff --git a/paddle/fluid/recordio/filesys.h b/paddle/fluid/recordio/recordio.cc similarity index 79% rename from paddle/fluid/recordio/filesys.h rename to paddle/fluid/recordio/recordio.cc index b21702bf3a0dc..f8ed1fedf6399 100644 --- a/paddle/fluid/recordio/filesys.h +++ b/paddle/fluid/recordio/recordio.cc @@ -12,13 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once +#include "paddle/fluid/recordio/io.h" +#include "paddle/fluid/string/piece.h" -#include -#include -#include - -class DefaultFileSys { -public: -private: -}; +namespace paddle { +namespace recordio {} // namespace recordio +} // namespace paddle diff --git a/paddle/fluid/recordio/recordio.h b/paddle/fluid/recordio/recordio.h new file mode 100644 index 0000000000000..39ae953ce1a10 --- /dev/null +++ b/paddle/fluid/recordio/recordio.h @@ -0,0 +1,20 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/recordio/chunk.h" +#include "paddle/fluid/recordio/header.h" +#include "paddle/fluid/recordio/io.h" +#include "paddle/fluid/recordio/scanner.h" +#include "paddle/fluid/recordio/writer.h" diff --git a/paddle/fluid/recordio/scanner.cc b/paddle/fluid/recordio/scanner.cc index d5464ae9d8df8..45cf472e9d070 100644 --- a/paddle/fluid/recordio/scanner.cc +++ b/paddle/fluid/recordio/scanner.cc @@ -31,7 +31,7 @@ Scanner::Scanner(const char* paths) } bool Scanner::Scan() { - if (err_ == -1 || end_ == true) { + if (end_ == true) { return false; } if (cur_scanner_ == nullptr) { @@ -39,20 +39,30 @@ bool Scanner::Scan() { end_ = true; return false; } - if (err_ == -1) { - return false; - } } if (!cur_scanner_->Scan()) { - if (err_ == -1) { - return false; - } + end_ = true; + cur_file_ = nullptr; + return false; } - return true; } -bool Scanner::NextFile() {} +bool Scanner::NextFile() { + if (path_idx_ >= paths_.size()) { + return false; + } + std::string path = paths_[path_idx_]; + ++path_idx_; + cur_file_ = Stream::Open(path); + if (cur_file_ == nullptr) { + return false; + } + Index idx; + idx.LoadIndex(cur_file_); + cur_scanner_ = RangeScanner(cur_file_, idx, 0, -1); + return true; +} } // namespace recordio } // namespace paddle From 9dc69582de383a34a81ccea9f9ffa33172aa9219 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 6 Mar 2018 15:07:26 +0800 Subject: [PATCH 7/7] Make recordio simple --- CMakeLists.txt | 1 + cmake/external/snappystream.cmake | 58 + paddle/fluid/recordio/CMakeLists.txt | 14 +- paddle/fluid/recordio/chunk.cc | 156 +- paddle/fluid/recordio/chunk.h | 21 +- paddle/fluid/recordio/chunk_test.cc | 46 +- paddle/fluid/recordio/crc32.h | 33 - paddle/fluid/recordio/detail/crc.h | 1899 ------------------- paddle/fluid/recordio/header.cc | 20 +- paddle/fluid/recordio/header.h | 8 +- paddle/fluid/recordio/header_test.cc | 16 +- paddle/fluid/recordio/io.cc | 55 - paddle/fluid/recordio/io.h | 56 - paddle/fluid/recordio/io_test.cc | 36 - paddle/fluid/recordio/range_scanner.cc | 85 - paddle/fluid/recordio/range_scanner.h | 81 - paddle/fluid/recordio/range_scanner_test.cc | 23 - paddle/fluid/recordio/recordio.cc | 20 - paddle/fluid/recordio/recordio.h | 20 - paddle/fluid/recordio/scanner.cc | 68 - paddle/fluid/recordio/scanner.h | 44 - paddle/fluid/recordio/scanner_test.cc | 21 - paddle/fluid/recordio/writer.cc | 53 - paddle/fluid/recordio/writer.h | 50 - paddle/fluid/recordio/writer_test.cc | 29 - 25 files changed, 202 insertions(+), 2711 deletions(-) create mode 100644 cmake/external/snappystream.cmake delete mode 100644 paddle/fluid/recordio/crc32.h delete mode 100644 paddle/fluid/recordio/detail/crc.h delete mode 100644 paddle/fluid/recordio/io.cc delete mode 100644 paddle/fluid/recordio/io.h delete mode 100644 paddle/fluid/recordio/io_test.cc delete mode 100644 paddle/fluid/recordio/range_scanner.cc delete mode 100644 paddle/fluid/recordio/range_scanner.h delete mode 100644 paddle/fluid/recordio/range_scanner_test.cc delete mode 100644 paddle/fluid/recordio/recordio.cc delete mode 100644 paddle/fluid/recordio/recordio.h delete mode 100644 paddle/fluid/recordio/scanner.cc delete mode 100644 paddle/fluid/recordio/scanner.h delete mode 100644 paddle/fluid/recordio/scanner_test.cc delete mode 100644 paddle/fluid/recordio/writer.cc delete mode 100644 paddle/fluid/recordio/writer.h delete mode 100644 paddle/fluid/recordio/writer_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e9a2a8e7548a..c86889c05c8cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,6 +145,7 @@ include(external/pybind11) # download pybind11 include(external/cares) include(external/grpc) include(external/snappy) # download snappy +include(external/snappystream) include(cudnn) # set cudnn libraries, must before configure include(cupti) diff --git a/cmake/external/snappystream.cmake b/cmake/external/snappystream.cmake new file mode 100644 index 0000000000000..5377a0b046a79 --- /dev/null +++ b/cmake/external/snappystream.cmake @@ -0,0 +1,58 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +IF(MOBILE_INFERENCE) + return() +ENDIF() + +include (ExternalProject) + +# NOTE: snappy is needed when linking with recordio + +SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream) +SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream) +SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE) + +ExternalProject_Add( + extern_snappystream + GIT_REPOSITORY "https://github.com/hoxnox/snappystream.git" + GIT_TAG "0.2.8" + PREFIX ${SNAPPYSTREAM_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DSNAPPY_ROOT=${SNAPPY_INSTALL_DIR} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPYSTREAM_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPYSTREAM_INSTALL_DIR}/lib + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_COMMAND make -j8 + INSTALL_COMMAND make install + DEPENDS snappy +) + +add_library(snappystream STATIC IMPORTED GLOBAL) +set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION + "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a") + +include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) +add_dependencies(snappystream extern_snappystream) diff --git a/paddle/fluid/recordio/CMakeLists.txt b/paddle/fluid/recordio/CMakeLists.txt index 46188e0a5b024..e1e7c2cdb3d0c 100644 --- a/paddle/fluid/recordio/CMakeLists.txt +++ b/paddle/fluid/recordio/CMakeLists.txt @@ -1,14 +1,6 @@ # internal library. -cc_library(io SRCS io.cc DEPS stringpiece) -cc_test(io_test SRCS io_test.cc DEPS io) -cc_library(header SRCS header.cc DEPS io) +cc_library(header SRCS header.cc) cc_test(header_test SRCS header_test.cc DEPS header) -cc_library(chunk SRCS chunk.cc DEPS snappy) +cc_library(chunk SRCS chunk.cc DEPS snappystream snappy header zlib) cc_test(chunk_test SRCS chunk_test.cc DEPS chunk) -cc_library(range_scanner SRCS range_scanner.cc DEPS io chunk) -cc_test(range_scanner_test SRCS range_scanner_test.cc DEPS range_scanner) -cc_library(scanner SRCS scanner.cc DEPS range_scanner) -cc_test(scanner_test SRCS scanner_test.cc DEPS scanner) -# exported library. -cc_library(recordio SRCS recordio.cc DEPS scanner chunk header) -cc_test(recordio_test SRCS recordio_test.cc DEPS scanner) +cc_library(recordio DEPS chunk header) diff --git a/paddle/fluid/recordio/chunk.cc b/paddle/fluid/recordio/chunk.cc index f498c64b0824f..587fd375c38ca 100644 --- a/paddle/fluid/recordio/chunk.cc +++ b/paddle/fluid/recordio/chunk.cc @@ -14,97 +14,119 @@ #include "paddle/fluid/recordio/chunk.h" -#include +#include #include -#include - -#include "snappy.h" - -#include "paddle/fluid/recordio/crc32.h" +#include "paddle/fluid/platform/enforce.h" +#include "snappystream.hpp" +#include "zlib.h" namespace paddle { namespace recordio { +constexpr size_t kMaxBufSize = 1024; -void Chunk::Add(const char* record, size_t length) { - records_.emplace_after(std::string(record, length)); - num_bytes_ += s.size() * sizeof(char); +template +static void ReadStreamByBuf(std::istream& in, int limit, Callback callback) { + char buf[kMaxBufSize]; + std::streamsize actual_size; + size_t counter = 0; + do { + auto actual_max = + limit > 0 ? std::min(limit - counter, kMaxBufSize) : kMaxBufSize; + actual_size = in.readsome(buf, actual_max); + if (actual_size == 0) { + break; + } + callback(buf, actual_size); + if (limit > 0) { + counter += actual_size; + } + } while (actual_size == kMaxBufSize); } -bool Chunk::Dump(Stream* fo, Compressor ct) { +static void PipeStream(std::istream& in, std::ostream& os) { + ReadStreamByBuf( + in, -1, [&os](const char* buf, size_t len) { os.write(buf, len); }); +} +static uint32_t Crc32Stream(std::istream& in, int limit = -1) { + auto crc = crc32(0, nullptr, 0); + ReadStreamByBuf(in, limit, [&crc](const char* buf, size_t len) { + crc = crc32(crc, reinterpret_cast(buf), len); + }); + return crc; +} + +bool Chunk::Write(std::ostream& os, Compressor ct) const { // NOTE(dzhwinter): don't check records.numBytes instead, because // empty records are allowed. - if (records_.size() == 0) return false; + if (records_.empty()) { + return false; + } + std::stringstream sout; + std::unique_ptr compressed_stream; + switch (ct) { + case Compressor::kNoCompress: + break; + case Compressor::kSnappy: + compressed_stream.reset(new snappy::oSnappyStream(sout)); + break; + default: + PADDLE_THROW("Not implemented"); + } + + std::ostream& buf_stream = compressed_stream ? *compressed_stream : sout; - // pack the record into consecutive memory for compress - std::ostringstream os; for (auto& record : records_) { - os.write(record.size(), sizeof(size_t)); - os.write(record.data(), static_cast(record.size())); + size_t sz = record.size(); + buf_stream.write(reinterpret_cast(&sz), sizeof(uint32_t)) + .write(record.data(), record.size()); } - std::unique_ptr buffer(new char[num_bytes_]); - size_t compressed = - CompressData(os.str().c_str(), num_bytes_, ct, buffer.get()); - uint32_t checksum = Crc32(buffer.get(), compressed); - Header hdr(records_.size(), checksum, ct, static_cast(compressed)); - hdr.Write(fo); - fo.Write(buffer.get(), compressed); - // clear the content - records_.clear(); - num_bytes_ = 0; + if (compressed_stream) { + compressed_stream.reset(); + } + + auto end_pos = sout.tellg(); + sout.seekg(0, std::ios::beg); + uint32_t len = static_cast(end_pos - sout.tellg()); + uint32_t crc = Crc32Stream(sout); + sout.seekg(0, std::ios::beg); + + Header hdr(static_cast(records_.size()), crc, ct, len); + hdr.Write(os); + PipeStream(sout, os); return true; } -void Chunk::Parse(Stream* fi, size_t offset) { - fi->Seek(offset); +void Chunk::Parse(std::istream& sin) { Header hdr; - hdr.Parse(fi); - - size_t size = static_cast(hdr.CompressSize()); - std::unique_ptr buffer(new char[size]); - fi->Read(buffer.get(), size); - size_t deflated_size = 0; - snappy::GetUncompressedLength(buffer.get(), size, &deflated_size); - std::unique_ptr deflated_buffer(new char[deflated_size]); - DeflateData(buffer.get(), size, hdr.CompressType(), deflated_buffer.get()); - std::istringstream deflated( - std::string(deflated_buffer.get(), deflated_size)); - for (size_t i = 0; i < hdr.NumRecords(); ++i) { - size_t rs; - deflated.read(&rs, sizeof(size_t)); - std::string record(rs, '\0'); - deflated.read(&record[0], rs); - records_.emplace_back(record); - num_bytes_ += record.size(); - } -} + hdr.Parse(sin); + auto beg_pos = sin.tellg(); + auto crc = Crc32Stream(sin, hdr.CompressSize()); + PADDLE_ENFORCE_EQ(hdr.Checksum(), crc); -size_t CompressData(const char* in, - size_t in_length, - Compressor ct, - char* out) { - size_t compressd_size = 0; - switch (ct) { + Clear(); + + sin.seekg(beg_pos, std::ios::beg); + std::unique_ptr compressed_stream; + switch (hdr.CompressType()) { case Compressor::kNoCompress: - // do nothing - memcpy(out, in, in_length); - compressd_size = in_length; break; case Compressor::kSnappy: - snappy::RawCompress(in, in_length, out, &compressd_size); + compressed_stream.reset(new snappy::iSnappyStream(sin)); break; + default: + PADDLE_THROW("Not implemented"); } - return compressd_size; -} -void DeflateData(const char* in, size_t in_length, Compressor ct, char* out) { - switch (c) { - case Compressor::kNoCompress: - memcpy(out, in, in_length); - break; - case Compressor::kSnappy: - snappy::RawUncompress(in, in_length, out); - break; + std::istream& stream = compressed_stream ? *compressed_stream : sin; + + for (uint32_t i = 0; i < hdr.NumRecords(); ++i) { + uint32_t rec_len; + stream.read(reinterpret_cast(&rec_len), sizeof(uint32_t)); + std::string buf; + buf.resize(rec_len); + stream.read(&buf[0], rec_len); + Add(buf); } } diff --git a/paddle/fluid/recordio/chunk.h b/paddle/fluid/recordio/chunk.h index 661364cd5d801..0ba9c63abbe72 100644 --- a/paddle/fluid/recordio/chunk.h +++ b/paddle/fluid/recordio/chunk.h @@ -13,11 +13,11 @@ // limitations under the License. #pragma once -#include #include +#include +#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/recordio/header.h" -#include "paddle/fluid/recordio/io.h" namespace paddle { namespace recordio { @@ -26,16 +26,23 @@ namespace recordio { class Chunk { public: Chunk() : num_bytes_(0) {} - void Add(const char* record, size_t size); + void Add(std::string buf) { + records_.push_back(buf); + num_bytes_ += buf.size(); + } // dump the chunk into w, and clears the chunk and makes it ready for // the next add invocation. - bool Dump(Stream* fo, Compressor ct); - void Parse(Stream* fi, size_t offset); + bool Write(std::ostream& fo, Compressor ct) const; + void Clear() { + records_.clear(); + num_bytes_ = 0; + } + void Parse(std::istream& sin); size_t NumBytes() { return num_bytes_; } - const std::string Record(int i) { return records_[i]; } + const std::string& Record(int i) const { return records_[i]; } private: - std::forward_list records_; + std::vector records_; // sum of record lengths in bytes. size_t num_bytes_; DISABLE_COPY_AND_ASSIGN(Chunk); diff --git a/paddle/fluid/recordio/chunk_test.cc b/paddle/fluid/recordio/chunk_test.cc index 938e101fcd161..a67ba32ed6ab8 100644 --- a/paddle/fluid/recordio/chunk_test.cc +++ b/paddle/fluid/recordio/chunk_test.cc @@ -22,34 +22,28 @@ using namespace paddle::recordio; TEST(Chunk, SaveLoad) { Chunk ch; - ch.Add("12345", 6); - ch.Add("123", 4); - { - Stream* fs = Stream::Open("/tmp/record_11", "w"); - ch.Dump(fs, Compressor::kNoCompress); - EXPECT_EQ(ch.NumBytes(), 0); - } - { - Stream* fs = Stream::Open("/tmp/record_11", "r"); - ch.Parse(fs, 0); - EXPECT_EQ(ch.NumBytes(), 10); - } + ch.Add(std::string("12345", 6)); + ch.Add(std::string("123", 4)); + std::stringstream ss; + ch.Write(ss, Compressor::kNoCompress); + ch.Clear(); + ch.Parse(ss); + ASSERT_EQ(ch.NumBytes(), 10U); } TEST(Chunk, Compressor) { Chunk ch; - ch.Add("12345", 6); - ch.Add("123", 4); - ch.Add("123", 4); - ch.Add("123", 4); - { - Stream* fs = Stream::Open("/tmp/record_12", "w"); - ch.Dump(fs, Compressor::kSnappy); - EXPECT_EQ(ch.NumBytes(), 0); - } - { - Stream* fs = Stream::Open("/tmp/record_12", "r"); - ch.Parse(fs, 0); - EXPECT_EQ(ch.NumBytes(), 10); - } + ch.Add(std::string("12345", 6)); + ch.Add(std::string("123", 4)); + ch.Add(std::string("123", 4)); + ch.Add(std::string("123", 4)); + std::stringstream ss; + ch.Write(ss, Compressor::kSnappy); + std::stringstream ss2; + ch.Write(ss2, Compressor::kNoCompress); + ASSERT_LE(ss.tellp(), ss2.tellp()); // Compress should contain less data; + + ch.Clear(); + ch.Parse(ss); + ASSERT_EQ(ch.NumBytes(), 18); } diff --git a/paddle/fluid/recordio/crc32.h b/paddle/fluid/recordio/crc32.h deleted file mode 100644 index 77b430356f81b..0000000000000 --- a/paddle/fluid/recordio/crc32.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// A wrapper on crc library https://github.com/d-bahr/CRCpp -#include - -#include "paddle/fluid/recordio/detail/crc.h" - -namespace paddle { -namespace recordio { - -// usage -// char data[] = "hello,world"; -// crc = Crc32(data, 12); -// Assert_EQ(crc, 68a85159); - -uint32_t Crc32(const char* data, size_t size) { - return CRC::Calculate(data, size, CRC::CRC_32()) -} - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/detail/crc.h b/paddle/fluid/recordio/detail/crc.h deleted file mode 100644 index ef8390c34a445..0000000000000 --- a/paddle/fluid/recordio/detail/crc.h +++ /dev/null @@ -1,1899 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - @file CRC.h - @author Daniel Bahr - @version 0.2.0.6 - @copyright - @parblock - CRC++ - Copyright (c) 2016, Daniel Bahr - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright notice, - this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, - this list of conditions and the following disclaimer in the - documentation - and/or other materials provided with the distribution. - - * Neither the name of CRC++ nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF - THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - @endparblock -*/ - -/* - CRC++ can be configured by setting various #defines before #including this - header file: - - #define crcpp_uint8 - Specifies the type - used to store CRCs that have a width of 8 bits or less. - This type is not used - in CRC calculations. Defaults to ::std::uint8_t. - #define crcpp_uint16 - Specifies the type - used to store CRCs that have a width between 9 and 16 bits (inclusive). - This type is not used - in CRC calculations. Defaults to ::std::uint16_t. - #define crcpp_uint32 - Specifies the type - used to store CRCs that have a width between 17 and 32 bits (inclusive). - This type is not used - in CRC calculations. Defaults to ::std::uint32_t. - #define crcpp_uint64 - Specifies the type - used to store CRCs that have a width between 33 and 64 bits (inclusive). - This type is not used - in CRC calculations. Defaults to ::std::uint64_t. - #define crcpp_size - This type is used for - loop iteration and function signatures only. Defaults to ::std::size_t. - #define CRCPP_USE_NAMESPACE - Define to place all - CRC++ code within the ::CRCPP namespace. - #define CRCPP_BRANCHLESS - Define to enable a - branchless CRC implementation. The branchless implementation uses a single - integer - multiplication in the - bit-by-bit calculation instead of a small conditional. The branchless - implementation - may be faster on - processor architectures which support single-instruction integer - multiplication. - #define CRCPP_USE_CPP11 - Define to enables - C++11 features (move semantics, constexpr, static_assert, etc.). - #define CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - Define to include - definitions for little-used CRCs. -*/ - -#ifndef CRCPP_CRC_H_ -#define CRCPP_CRC_H_ - -#include // Includes CHAR_BIT -#ifdef CRCPP_USE_CPP11 -#include // Includes ::std::size_t -#include // Includes ::std::uint8_t, ::std::uint16_t, ::std::uint32_t, ::std::uint64_t -#else -#include // Includes size_t -#include // Includes uint8_t, uint16_t, uint32_t, uint64_t -#endif -#include // Includes ::std::numeric_limits -#include // Includes ::std::move - -#ifndef crcpp_uint8 -#ifdef CRCPP_USE_CPP11 -/// @brief Unsigned 8-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint8 ::std::uint8_t -#else -/// @brief Unsigned 8-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint8 uint8_t -#endif -#endif - -#ifndef crcpp_uint16 -#ifdef CRCPP_USE_CPP11 -/// @brief Unsigned 16-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint16 ::std::uint16_t -#else -/// @brief Unsigned 16-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint16 uint16_t -#endif -#endif - -#ifndef crcpp_uint32 -#ifdef CRCPP_USE_CPP11 -/// @brief Unsigned 32-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint32 ::std::uint32_t -#else -/// @brief Unsigned 32-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint32 uint32_t -#endif -#endif - -#ifndef crcpp_uint64 -#ifdef CRCPP_USE_CPP11 -/// @brief Unsigned 64-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint64 ::std::uint64_t -#else -/// @brief Unsigned 64-bit integer definition, used primarily for parameter -/// definitions. -#define crcpp_uint64 uint64_t -#endif -#endif - -#ifndef crcpp_size -#ifdef CRCPP_USE_CPP11 -/// @brief Unsigned size definition, used for specifying data sizes. -#define crcpp_size ::std::size_t -#else -/// @brief Unsigned size definition, used for specifying data sizes. -#define crcpp_size size_t -#endif -#endif - -#ifdef CRCPP_USE_CPP11 -/// @brief Compile-time expression definition. -#define crcpp_constexpr constexpr -#else -/// @brief Compile-time expression definition. -#define crcpp_constexpr const -#endif - -#ifdef CRCPP_USE_NAMESPACE -namespace CRCPP { -#endif - -/** - @brief Static class for computing CRCs. - @note This class supports computation of full and multi-part CRCs, using a - bit-by-bit algorithm or a - byte-by-byte lookup table. The CRCs are calculated using as many - optimizations as is reasonable. - If compiling with C++11, the constexpr keyword is used liberally so that - many calculations are - performed at compile-time instead of at runtime. -*/ -class CRC { -public: - // Forward declaration - template - struct Table; - - /** - @brief CRC parameters. - */ - template - struct Parameters { - CRCType polynomial; ///< CRC polynomial - CRCType initialValue; ///< Initial CRC value - CRCType finalXOR; ///< Value to XOR with the final CRC - bool reflectInput; ///< true to reflect all input bytes - bool reflectOutput; ///< true to reflect the output CRC (reflection occurs - /// before the final XOR) - - Table MakeTable() const; - }; - - /** - @brief CRC lookup table. After construction, the CRC parameters are fixed. - @note A CRC table can be used for multiple CRC calculations. - */ - template - struct Table { - // Constructors are intentionally NOT marked explicit. - Table(const Parameters ¶meters); - -#ifdef CRCPP_USE_CPP11 - Table(Parameters &¶meters); -#endif - - const Parameters &GetParameters() const; - - const CRCType *GetTable() const; - - CRCType operator[](unsigned char index) const; - - private: - void InitTable(); - - Parameters - parameters; ///< CRC parameters used to construct the table - CRCType table[1 << CHAR_BIT]; ///< CRC lookup table - }; - - // The number of bits in CRCType must be at least as large as CRCWidth. - // CRCType must be an unsigned integer type or a custom type with operator - // overloads. - template - static CRCType Calculate(const void *data, - crcpp_size size, - const Parameters ¶meters); - - template - static CRCType Calculate(const void *data, - crcpp_size size, - const Parameters ¶meters, - CRCType crc); - - template - static CRCType Calculate(const void *data, - crcpp_size size, - const Table &lookupTable); - - template - static CRCType Calculate(const void *data, - crcpp_size size, - const Table &lookupTable, - CRCType crc); - -// Common CRCs up to 64 bits. -// Note: Check values are the computed CRCs when given an ASCII input of -// "123456789" (without null terminator) -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_4_ITU(); - static const Parameters &CRC_5_EPC(); - static const Parameters &CRC_5_ITU(); - static const Parameters &CRC_5_USB(); - static const Parameters &CRC_6_CDMA2000A(); - static const Parameters &CRC_6_CDMA2000B(); - static const Parameters &CRC_6_ITU(); - static const Parameters &CRC_7(); -#endif - static const Parameters &CRC_8(); -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_8_EBU(); - static const Parameters &CRC_8_MAXIM(); - static const Parameters &CRC_8_WCDMA(); - static const Parameters &CRC_10(); - static const Parameters &CRC_10_CDMA2000(); - static const Parameters &CRC_11(); - static const Parameters &CRC_12_CDMA2000(); - static const Parameters &CRC_12_DECT(); - static const Parameters &CRC_12_UMTS(); - static const Parameters &CRC_13_BBC(); - static const Parameters &CRC_15(); - static const Parameters &CRC_15_MPT1327(); -#endif - static const Parameters &CRC_16_ARC(); - static const Parameters &CRC_16_BUYPASS(); - static const Parameters &CRC_16_CCITTFALSE(); -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_16_CDMA2000(); - static const Parameters &CRC_16_DECTR(); - static const Parameters &CRC_16_DECTX(); - static const Parameters &CRC_16_DNP(); -#endif - static const Parameters &CRC_16_GENIBUS(); - static const Parameters &CRC_16_KERMIT(); -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_16_MAXIM(); - static const Parameters &CRC_16_MODBUS(); - static const Parameters &CRC_16_T10DIF(); - static const Parameters &CRC_16_USB(); -#endif - static const Parameters &CRC_16_X25(); - static const Parameters &CRC_16_XMODEM(); -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_17_CAN(); - static const Parameters &CRC_21_CAN(); - static const Parameters &CRC_24(); - static const Parameters &CRC_24_FLEXRAYA(); - static const Parameters &CRC_24_FLEXRAYB(); - static const Parameters &CRC_30(); -#endif - static const Parameters &CRC_32(); - static const Parameters &CRC_32_BZIP2(); -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_32_C(); -#endif - static const Parameters &CRC_32_MPEG2(); - static const Parameters &CRC_32_POSIX(); -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - static const Parameters &CRC_32_Q(); - static const Parameters &CRC_40_GSM(); - static const Parameters &CRC_64(); -#endif - -#ifdef CRCPP_USE_CPP11 - CRC() = delete; - CRC(const CRC &other) = delete; - CRC &operator=(const CRC &other) = delete; - CRC(CRC &&other) = delete; - CRC &operator=(CRC &&other) = delete; -#endif - -private: -#ifndef CRCPP_USE_CPP11 - CRC(); - CRC(const CRC &other); - CRC &operator=(const CRC &other); -#endif - - template - static IntegerType Reflect(IntegerType value, crcpp_uint16 numBits); - - template - static CRCType Finalize(CRCType remainder, - CRCType finalXOR, - bool reflectOutput); - - template - static CRCType UndoFinalize(CRCType remainder, - CRCType finalXOR, - bool reflectOutput); - - template - static CRCType CalculateRemainder( - const void *data, - crcpp_size size, - const Parameters ¶meters, - CRCType remainder); - - template - static CRCType CalculateRemainder(const void *data, - crcpp_size size, - const Table &lookupTable, - CRCType remainder); - - template - static crcpp_constexpr IntegerType BoundedConstexprValue(IntegerType x); -}; - -/** - @brief Returns a CRC lookup table construct using these CRC parameters. - @note This function primarily exists to allow use of the auto keyword - instead of instantiating - a table directly, since template parameters are not inferred in - constructors. - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC lookup table -*/ -template -inline CRC::Table -CRC::Parameters::MakeTable() const { - // This should take advantage of RVO and optimize out the copy. - return CRC::Table(*this); -} - -/** - @brief Constructs a CRC table from a set of CRC parameters - @param[in] parameters CRC parameters - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC -*/ -template -inline CRC::Table::Table( - const Parameters ¶meters) - : parameters(parameters) { - InitTable(); -} - -#ifdef CRCPP_USE_CPP11 -/** - @brief Constructs a CRC table from a set of CRC parameters - @param[in] parameters CRC parameters - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC -*/ -template -inline CRC::Table::Table( - Parameters &¶meters) - : parameters(::std::move(parameters)) { - InitTable(); -} -#endif - -/** - @brief Gets the CRC parameters used to construct the CRC table - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC parameters -*/ -template -inline const CRC::Parameters - &CRC::Table::GetParameters() const { - return parameters; -} - -/** - @brief Gets the CRC table - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC table -*/ -template -inline const CRCType *CRC::Table::GetTable() const { - return table; -} - -/** - @brief Gets an entry in the CRC table - @param[in] index Index into the CRC table - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC table entry -*/ -template -inline CRCType CRC::Table::operator[]( - unsigned char index) const { - return table[index]; -} - -/** - @brief Initializes a CRC table. - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC -*/ -template -inline void CRC::Table::InitTable() { - // For masking off the bits for the CRC (in the event that the number of bits - // in CRCType is larger than CRCWidth) - static crcpp_constexpr CRCType BIT_MASK( - (CRCType(1) << (CRCWidth - CRCType(1))) | - ((CRCType(1) << (CRCWidth - CRCType(1))) - CRCType(1))); - - static crcpp_constexpr CRCType SHIFT( - CRC::BoundedConstexprValue(CHAR_BIT - CRCWidth)); - - CRCType crc; - unsigned char byte = 0; - - // Loop over each dividend (each possible number storable in an unsigned char) - do { - crc = CRC::CalculateRemainder( - &byte, sizeof(byte), parameters, CRCType(0)); - - // This mask might not be necessary; all unit tests pass with this line - // commented out, - // but that might just be a coincidence based on the CRC parameters used for - // testing. - // In any case, this is harmless to leave in and only adds a single machine - // instruction per loop iteration. - crc &= BIT_MASK; - - if (!parameters.reflectInput && CRCWidth < CHAR_BIT) { - // Undo the special operation at the end of the CalculateRemainder() - // function for non-reflected CRCs < CHAR_BIT. - crc <<= SHIFT; - } - - table[byte] = crc; - } while (++byte); -} - -/** - @brief Computes a CRC. - @param[in] data Data over which CRC will be computed - @param[in] size Size of the data - @param[in] parameters CRC parameters - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC -*/ -template -inline CRCType CRC::Calculate(const void *data, - crcpp_size size, - const Parameters ¶meters) { - CRCType remainder = - CalculateRemainder(data, size, parameters, parameters.initialValue); - - // No need to mask the remainder here; the mask will be applied in the - // Finalize() function. - - return Finalize( - remainder, - parameters.finalXOR, - parameters.reflectInput != parameters.reflectOutput); -} -/** - @brief Appends additional data to a previous CRC calculation. - @note This function can be used to compute multi-part CRCs. - @param[in] data Data over which CRC will be computed - @param[in] size Size of the data - @param[in] parameters CRC parameters - @param[in] crc CRC from a previous calculation - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC -*/ -template -inline CRCType CRC::Calculate(const void *data, - crcpp_size size, - const Parameters ¶meters, - CRCType crc) { - CRCType remainder = UndoFinalize( - crc, - parameters.finalXOR, - parameters.reflectInput != parameters.reflectOutput); - - remainder = CalculateRemainder(data, size, parameters, remainder); - - // No need to mask the remainder here; the mask will be applied in the - // Finalize() function. - - return Finalize( - remainder, - parameters.finalXOR, - parameters.reflectInput != parameters.reflectOutput); -} - -/** - @brief Computes a CRC via a lookup table. - @param[in] data Data over which CRC will be computed - @param[in] size Size of the data - @param[in] lookupTable CRC lookup table - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC -*/ -template -inline CRCType CRC::Calculate(const void *data, - crcpp_size size, - const Table &lookupTable) { - const Parameters ¶meters = lookupTable.GetParameters(); - - CRCType remainder = - CalculateRemainder(data, size, lookupTable, parameters.initialValue); - - // No need to mask the remainder here; the mask will be applied in the - // Finalize() function. - - return Finalize( - remainder, - parameters.finalXOR, - parameters.reflectInput != parameters.reflectOutput); -} - -/** - @brief Appends additional data to a previous CRC calculation using a lookup - table. - @note This function can be used to compute multi-part CRCs. - @param[in] data Data over which CRC will be computed - @param[in] size Size of the data - @param[in] lookupTable CRC lookup table - @param[in] crc CRC from a previous calculation - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC -*/ -template -inline CRCType CRC::Calculate(const void *data, - crcpp_size size, - const Table &lookupTable, - CRCType crc) { - const Parameters ¶meters = lookupTable.GetParameters(); - - CRCType remainder = UndoFinalize( - crc, - parameters.finalXOR, - parameters.reflectInput != parameters.reflectOutput); - - remainder = CalculateRemainder(data, size, lookupTable, remainder); - - // No need to mask the remainder here; the mask will be applied in the - // Finalize() function. - - return Finalize( - remainder, - parameters.finalXOR, - parameters.reflectInput != parameters.reflectOutput); -} - -/** - @brief Reflects (i.e. reverses the bits within) an integer value. - @param[in] value Value to reflect - @param[in] numBits Number of bits in the integer which will be reflected - @tparam IntegerType Integer type of the value being reflected - @return Reflected value -*/ -template -inline IntegerType CRC::Reflect(IntegerType value, crcpp_uint16 numBits) { - IntegerType reversedValue(0); - - for (crcpp_uint16 i = 0; i < numBits; ++i) { - reversedValue = (reversedValue << 1) | (value & 1); - value >>= 1; - } - - return reversedValue; -} - -/** - @brief Computes the final reflection and XOR of a CRC remainder. - @param[in] remainder CRC remainder to reflect and XOR - @param[in] finalXOR Final value to XOR with the remainder - @param[in] reflectOutput true to reflect each byte of the remainder before - the XOR - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return Final CRC -*/ -template -inline CRCType CRC::Finalize(CRCType remainder, - CRCType finalXOR, - bool reflectOutput) { - // For masking off the bits for the CRC (in the event that the number of bits - // in CRCType is larger than CRCWidth) - static crcpp_constexpr CRCType BIT_MASK = - (CRCType(1) << (CRCWidth - CRCType(1))) | - ((CRCType(1) << (CRCWidth - CRCType(1))) - CRCType(1)); - - if (reflectOutput) { - remainder = Reflect(remainder, CRCWidth); - } - - return (remainder ^ finalXOR) & BIT_MASK; -} - -/** - @brief Undoes the process of computing the final reflection and XOR of a CRC - remainder. - @note This function allows for computation of multi-part CRCs - @note Calling UndoFinalize() followed by Finalize() (or vice versa) will - always return the original remainder value: - - CRCType x = ...; - CRCType y = Finalize(x, finalXOR, reflectOutput); - CRCType z = UndoFinalize(y, finalXOR, reflectOutput); - assert(x == z); - - @param[in] crc Reflected and XORed CRC - @param[in] finalXOR Final value XORed with the remainder - @param[in] reflectOutput true if the remainder is to be reflected - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return Un-finalized CRC remainder -*/ -template -inline CRCType CRC::UndoFinalize(CRCType crc, - CRCType finalXOR, - bool reflectOutput) { - // For masking off the bits for the CRC (in the event that the number of bits - // in CRCType is larger than CRCWidth) - static crcpp_constexpr CRCType BIT_MASK = - (CRCType(1) << (CRCWidth - CRCType(1))) | - ((CRCType(1) << (CRCWidth - CRCType(1))) - CRCType(1)); - - crc = (crc & BIT_MASK) ^ finalXOR; - - if (reflectOutput) { - crc = Reflect(crc, CRCWidth); - } - - return crc; -} - -/** - @brief Computes a CRC remainder. - @param[in] data Data over which the remainder will be computed - @param[in] size Size of the data - @param[in] parameters CRC parameters - @param[in] remainder Running CRC remainder. Can be an initial value or the - result of a previous CRC remainder calculation. - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC remainder -*/ -template -inline CRCType CRC::CalculateRemainder( - const void *data, - crcpp_size size, - const Parameters ¶meters, - CRCType remainder) { -#ifdef CRCPP_USE_CPP11 - // This static_assert is put here because this function will always be - // compiled in no matter what - // the template parameters are and whether or not a table lookup or bit-by-bit - // algorithm is used. - static_assert(::std::numeric_limits::digits >= CRCWidth, - "CRCType is too small to contain a CRC of width CRCWidth."); -#else - // Catching this compile-time error is very important. Sadly, the compiler - // error will be very cryptic, but it's - // better than nothing. - enum { - static_assert_failed_CRCType_is_too_small_to_contain_a_CRC_of_width_CRCWidth = - 1 / (::std::numeric_limits::digits >= CRCWidth ? 1 : 0) - }; -#endif - - const unsigned char *current = reinterpret_cast(data); - - // Slightly different implementations based on the parameters. The current - // implementations try to eliminate as much - // computation from the inner loop (looping over each bit) as possible. - if (parameters.reflectInput) { - CRCType polynomial = CRC::Reflect(parameters.polynomial, CRCWidth); - while (size--) { - remainder ^= *current++; - - // An optimizing compiler might choose to unroll this loop. - for (crcpp_size i = 0; i < CHAR_BIT; ++i) { -#ifdef CRCPP_BRANCHLESS - // Clever way to avoid a branch at the expense of a multiplication. This - // code is equivalent to the following: - // if (remainder & 1) - // remainder = (remainder >> 1) ^ polynomial; - // else - // remainder >>= 1; - remainder = (remainder >> 1) ^ ((remainder & 1) * polynomial); -#else - remainder = (remainder & 1) ? ((remainder >> 1) ^ polynomial) - : (remainder >> 1); -#endif - } - } - } else if (CRCWidth >= CHAR_BIT) { - static crcpp_constexpr CRCType CRC_WIDTH_MINUS_ONE(CRCWidth - CRCType(1)); -#ifndef CRCPP_BRANCHLESS - static crcpp_constexpr CRCType CRC_HIGHEST_BIT_MASK(CRCType(1) - << CRC_WIDTH_MINUS_ONE); -#endif - static crcpp_constexpr CRCType SHIFT( - BoundedConstexprValue(CRCWidth - CHAR_BIT)); - - while (size--) { - remainder ^= (static_cast(*current++) << SHIFT); - - // An optimizing compiler might choose to unroll this loop. - for (crcpp_size i = 0; i < CHAR_BIT; ++i) { -#ifdef CRCPP_BRANCHLESS - // Clever way to avoid a branch at the expense of a multiplication. This - // code is equivalent to the following: - // if (remainder & CRC_HIGHEST_BIT_MASK) - // remainder = (remainder << 1) ^ parameters.polynomial; - // else - // remainder <<= 1; - remainder = - (remainder << 1) ^ - (((remainder >> CRC_WIDTH_MINUS_ONE) & 1) * parameters.polynomial); -#else - remainder = (remainder & CRC_HIGHEST_BIT_MASK) - ? ((remainder << 1) ^ parameters.polynomial) - : (remainder << 1); -#endif - } - } - } else { - static crcpp_constexpr CRCType CHAR_BIT_MINUS_ONE(CHAR_BIT - 1); -#ifndef CRCPP_BRANCHLESS - static crcpp_constexpr CRCType CHAR_BIT_HIGHEST_BIT_MASK( - CRCType(1) << CHAR_BIT_MINUS_ONE); -#endif - static crcpp_constexpr CRCType SHIFT( - BoundedConstexprValue(CHAR_BIT - CRCWidth)); - - CRCType polynomial = parameters.polynomial << SHIFT; - remainder <<= SHIFT; - - while (size--) { - remainder ^= *current++; - - // An optimizing compiler might choose to unroll this loop. - for (crcpp_size i = 0; i < CHAR_BIT; ++i) { -#ifdef CRCPP_BRANCHLESS - // Clever way to avoid a branch at the expense of a multiplication. This - // code is equivalent to the following: - // if (remainder & CHAR_BIT_HIGHEST_BIT_MASK) - // remainder = (remainder << 1) ^ polynomial; - // else - // remainder <<= 1; - remainder = (remainder << 1) ^ - (((remainder >> CHAR_BIT_MINUS_ONE) & 1) * polynomial); -#else - remainder = (remainder & CHAR_BIT_HIGHEST_BIT_MASK) - ? ((remainder << 1) ^ polynomial) - : (remainder << 1); -#endif - } - } - - remainder >>= SHIFT; - } - - return remainder; -} - -/** - @brief Computes a CRC remainder using lookup table. - @param[in] data Data over which the remainder will be computed - @param[in] size Size of the data - @param[in] lookupTable CRC lookup table - @param[in] remainder Running CRC remainder. Can be an initial value or the - result of a previous CRC remainder calculation. - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return CRC remainder -*/ -template -inline CRCType CRC::CalculateRemainder( - const void *data, - crcpp_size size, - const Table &lookupTable, - CRCType remainder) { - const unsigned char *current = reinterpret_cast(data); - - if (lookupTable.GetParameters().reflectInput) { - while (size--) { -#if defined(WIN32) || defined(_WIN32) || defined(WINCE) -// Disable warning about data loss when doing (remainder >> CHAR_BIT) when -// remainder is one byte long. The algorithm is still correct in this case, -// though it's possible that one additional machine instruction will be -// executed. -#pragma warning(push) -#pragma warning(disable : 4333) -#endif - remainder = - (remainder >> CHAR_BIT) ^ - lookupTable[static_cast(remainder ^ *current++)]; -#if defined(WIN32) || defined(_WIN32) || defined(WINCE) -#pragma warning(pop) -#endif - } - } else if (CRCWidth >= CHAR_BIT) { - static crcpp_constexpr CRCType SHIFT( - BoundedConstexprValue(CRCWidth - CHAR_BIT)); - - while (size--) { - remainder = (remainder << CHAR_BIT) ^ - lookupTable[static_cast((remainder >> SHIFT) ^ - *current++)]; - } - } else { - static crcpp_constexpr CRCType SHIFT( - BoundedConstexprValue(CHAR_BIT - CRCWidth)); - - remainder <<= SHIFT; - - while (size--) { - // Note: no need to mask here since remainder is guaranteed to fit in a - // single byte. - remainder = - lookupTable[static_cast(remainder ^ *current++)]; - } - - remainder >>= SHIFT; - } - - return remainder; -} - -/** - @brief Function to force a compile-time expression to be >= 0. - @note This function is used to avoid compiler warnings because all constexpr - values are evaluated - in a function even in a branch will never be executed. This also means - we don't need pragmas - to get rid of warnings, but it still can be computed at compile-time. - Win-win! - @param[in] x Compile-time expression to bound - @tparam CRCType Integer type for storing the CRC result - @tparam CRCWidth Number of bits in the CRC - @return Non-negative compile-time expression -*/ -template -inline crcpp_constexpr IntegerType CRC::BoundedConstexprValue(IntegerType x) { - return (x < IntegerType(0)) ? IntegerType(0) : x; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-4 ITU. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-4 ITU has the following parameters and check value: - - polynomial = 0x3 - - initial value = 0x0 - - final XOR = 0x0 - - reflect input = true - - reflect output = true - - check value = 0x7 - @return CRC-4 ITU parameters -*/ -inline const CRC::Parameters &CRC::CRC_4_ITU() { - static const Parameters parameters = { - 0x3, 0x0, 0x0, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-5 EPC. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-5 EPC has the following parameters and check value: - - polynomial = 0x09 - - initial value = 0x09 - - final XOR = 0x00 - - reflect input = false - - reflect output = false - - check value = 0x00 - @return CRC-5 EPC parameters -*/ -inline const CRC::Parameters &CRC::CRC_5_EPC() { - static const Parameters parameters = { - 0x09, 0x09, 0x00, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-5 ITU. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-5 ITU has the following parameters and check value: - - polynomial = 0x15 - - initial value = 0x00 - - final XOR = 0x00 - - reflect input = true - - reflect output = true - - check value = 0x07 - @return CRC-5 ITU parameters -*/ -inline const CRC::Parameters &CRC::CRC_5_ITU() { - static const Parameters parameters = { - 0x15, 0x00, 0x00, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-5 USB. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-5 USB has the following parameters and check value: - - polynomial = 0x05 - - initial value = 0x1F - - final XOR = 0x1F - - reflect input = true - - reflect output = true - - check value = 0x19 - @return CRC-5 USB parameters -*/ -inline const CRC::Parameters &CRC::CRC_5_USB() { - static const Parameters parameters = { - 0x05, 0x1F, 0x1F, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-6 CDMA2000-A. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-6 CDMA2000-A has the following parameters and check value: - - polynomial = 0x27 - - initial value = 0x3F - - final XOR = 0x00 - - reflect input = false - - reflect output = false - - check value = 0x0D - @return CRC-6 CDMA2000-A parameters -*/ -inline const CRC::Parameters &CRC::CRC_6_CDMA2000A() { - static const Parameters parameters = { - 0x27, 0x3F, 0x00, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-6 CDMA2000-B. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-6 CDMA2000-A has the following parameters and check value: - - polynomial = 0x07 - - initial value = 0x3F - - final XOR = 0x00 - - reflect input = false - - reflect output = false - - check value = 0x3B - @return CRC-6 CDMA2000-B parameters -*/ -inline const CRC::Parameters &CRC::CRC_6_CDMA2000B() { - static const Parameters parameters = { - 0x07, 0x3F, 0x00, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-6 ITU. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-6 ITU has the following parameters and check value: - - polynomial = 0x03 - - initial value = 0x00 - - final XOR = 0x00 - - reflect input = true - - reflect output = true - - check value = 0x06 - @return CRC-6 ITU parameters -*/ -inline const CRC::Parameters &CRC::CRC_6_ITU() { - static const Parameters parameters = { - 0x03, 0x00, 0x00, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-7 JEDEC. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-7 JEDEC has the following parameters and check value: - - polynomial = 0x09 - - initial value = 0x00 - - final XOR = 0x00 - - reflect input = false - - reflect output = false - - check value = 0x75 - @return CRC-7 JEDEC parameters -*/ -inline const CRC::Parameters &CRC::CRC_7() { - static const Parameters parameters = { - 0x09, 0x00, 0x00, false, false}; - return parameters; -} -#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - -/** - @brief Returns a set of parameters for CRC-8 SMBus. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-8 SMBus has the following parameters and check value: - - polynomial = 0x07 - - initial value = 0x00 - - final XOR = 0x00 - - reflect input = false - - reflect output = false - - check value = 0xF4 - @return CRC-8 SMBus parameters -*/ -inline const CRC::Parameters &CRC::CRC_8() { - static const Parameters parameters = { - 0x07, 0x00, 0x00, false, false}; - return parameters; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-8 EBU (aka CRC-8 AES). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-8 EBU has the following parameters and check value: - - polynomial = 0x1D - - initial value = 0xFF - - final XOR = 0x00 - - reflect input = true - - reflect output = true - - check value = 0x97 - @return CRC-8 EBU parameters -*/ -inline const CRC::Parameters &CRC::CRC_8_EBU() { - static const Parameters parameters = { - 0x1D, 0xFF, 0x00, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-8 MAXIM (aka CRC-8 DOW-CRC). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-8 MAXIM has the following parameters and check value: - - polynomial = 0x31 - - initial value = 0x00 - - final XOR = 0x00 - - reflect input = true - - reflect output = true - - check value = 0xA1 - @return CRC-8 MAXIM parameters -*/ -inline const CRC::Parameters &CRC::CRC_8_MAXIM() { - static const Parameters parameters = { - 0x31, 0x00, 0x00, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-8 WCDMA. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-8 WCDMA has the following parameters and check value: - - polynomial = 0x9B - - initial value = 0x00 - - final XOR = 0x00 - - reflect input = true - - reflect output = true - - check value = 0x25 - @return CRC-8 WCDMA parameters -*/ -inline const CRC::Parameters &CRC::CRC_8_WCDMA() { - static const Parameters parameters = { - 0x9B, 0x00, 0x00, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-10 ITU. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-10 ITU has the following parameters and check value: - - polynomial = 0x233 - - initial value = 0x000 - - final XOR = 0x000 - - reflect input = false - - reflect output = false - - check value = 0x199 - @return CRC-10 ITU parameters -*/ -inline const CRC::Parameters &CRC::CRC_10() { - static const Parameters parameters = { - 0x233, 0x000, 0x000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-10 CDMA2000. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-10 CDMA2000 has the following parameters and check value: - - polynomial = 0x3D9 - - initial value = 0x3FF - - final XOR = 0x000 - - reflect input = false - - reflect output = false - - check value = 0x233 - @return CRC-10 CDMA2000 parameters -*/ -inline const CRC::Parameters &CRC::CRC_10_CDMA2000() { - static const Parameters parameters = { - 0x3D9, 0x3FF, 0x000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-11 FlexRay. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-11 FlexRay has the following parameters and check value: - - polynomial = 0x385 - - initial value = 0x01A - - final XOR = 0x000 - - reflect input = false - - reflect output = false - - check value = 0x5A3 - @return CRC-11 FlexRay parameters -*/ -inline const CRC::Parameters &CRC::CRC_11() { - static const Parameters parameters = { - 0x385, 0x01A, 0x000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-12 CDMA2000. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-12 CDMA2000 has the following parameters and check value: - - polynomial = 0xF13 - - initial value = 0xFFF - - final XOR = 0x000 - - reflect input = false - - reflect output = false - - check value = 0xD4D - @return CRC-12 CDMA2000 parameters -*/ -inline const CRC::Parameters &CRC::CRC_12_CDMA2000() { - static const Parameters parameters = { - 0xF13, 0xFFF, 0x000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-12 DECT (aka CRC-12 X-CRC). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-12 DECT has the following parameters and check value: - - polynomial = 0x80F - - initial value = 0x000 - - final XOR = 0x000 - - reflect input = false - - reflect output = false - - check value = 0xF5B - @return CRC-12 DECT parameters -*/ -inline const CRC::Parameters &CRC::CRC_12_DECT() { - static const Parameters parameters = { - 0x80F, 0x000, 0x000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-12 UMTS (aka CRC-12 3GPP). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-12 UMTS has the following parameters and check value: - - polynomial = 0x80F - - initial value = 0x000 - - final XOR = 0x000 - - reflect input = false - - reflect output = true - - check value = 0xDAF - @return CRC-12 UMTS parameters -*/ -inline const CRC::Parameters &CRC::CRC_12_UMTS() { - static const Parameters parameters = { - 0x80F, 0x000, 0x000, false, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-13 BBC. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-13 BBC has the following parameters and check value: - - polynomial = 0x1CF5 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0x04FA - @return CRC-13 BBC parameters -*/ -inline const CRC::Parameters &CRC::CRC_13_BBC() { - static const Parameters parameters = { - 0x1CF5, 0x0000, 0x0000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-15 CAN. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-15 CAN has the following parameters and check value: - - polynomial = 0x4599 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0x059E - @return CRC-15 CAN parameters -*/ -inline const CRC::Parameters &CRC::CRC_15() { - static const Parameters parameters = { - 0x4599, 0x0000, 0x0000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-15 MPT1327. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-15 MPT1327 has the following parameters and check value: - - polynomial = 0x6815 - - initial value = 0x0000 - - final XOR = 0x0001 - - reflect input = false - - reflect output = false - - check value = 0x2566 - @return CRC-15 MPT1327 parameters -*/ -inline const CRC::Parameters &CRC::CRC_15_MPT1327() { - static const Parameters parameters = { - 0x6815, 0x0000, 0x0001, false, false}; - return parameters; -} -#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - -/** - @brief Returns a set of parameters for CRC-16 ARC (aka CRC-16 IBM, CRC-16 - LHA). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 ARC has the following parameters and check value: - - polynomial = 0x8005 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = true - - reflect output = true - - check value = 0xBB3D - @return CRC-16 ARC parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_ARC() { - static const Parameters parameters = { - 0x8005, 0x0000, 0x0000, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 BUYPASS (aka CRC-16 VERIFONE, - CRC-16 UMTS). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 BUYPASS has the following parameters and check value: - - polynomial = 0x8005 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0xFEE8 - @return CRC-16 BUYPASS parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_BUYPASS() { - static const Parameters parameters = { - 0x8005, 0x0000, 0x0000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 CCITT FALSE. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 CCITT FALSE has the following parameters and check value: - - polynomial = 0x1021 - - initial value = 0xFFFF - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0x29B1 - @return CRC-16 CCITT FALSE parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_CCITTFALSE() { - static const Parameters parameters = { - 0x1021, 0xFFFF, 0x0000, false, false}; - return parameters; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-16 CDMA2000. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 CDMA2000 has the following parameters and check value: - - polynomial = 0xC867 - - initial value = 0xFFFF - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0x4C06 - @return CRC-16 CDMA2000 parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_CDMA2000() { - static const Parameters parameters = { - 0xC867, 0xFFFF, 0x0000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 DECT-R (aka CRC-16 R-CRC). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 DECT-R has the following parameters and check value: - - polynomial = 0x0589 - - initial value = 0x0000 - - final XOR = 0x0001 - - reflect input = false - - reflect output = false - - check value = 0x007E - @return CRC-16 DECT-R parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_DECTR() { - static const Parameters parameters = { - 0x0589, 0x0000, 0x0001, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 DECT-X (aka CRC-16 X-CRC). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 DECT-X has the following parameters and check value: - - polynomial = 0x0589 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0x007F - @return CRC-16 DECT-X parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_DECTX() { - static const Parameters parameters = { - 0x0589, 0x0000, 0x0000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 DNP. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 DNP has the following parameters and check value: - - polynomial = 0x3D65 - - initial value = 0x0000 - - final XOR = 0xFFFF - - reflect input = true - - reflect output = true - - check value = 0xEA82 - @return CRC-16 DNP parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_DNP() { - static const Parameters parameters = { - 0x3D65, 0x0000, 0xFFFF, true, true}; - return parameters; -} -#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - -/** - @brief Returns a set of parameters for CRC-16 GENIBUS (aka CRC-16 EPC, - CRC-16 I-CODE, CRC-16 DARC). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 GENIBUS has the following parameters and check value: - - polynomial = 0x1021 - - initial value = 0xFFFF - - final XOR = 0xFFFF - - reflect input = false - - reflect output = false - - check value = 0xD64E - @return CRC-16 GENIBUS parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_GENIBUS() { - static const Parameters parameters = { - 0x1021, 0xFFFF, 0xFFFF, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 KERMIT (aka CRC-16 CCITT, - CRC-16 CCITT-TRUE). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 KERMIT has the following parameters and check value: - - polynomial = 0x1021 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = true - - reflect output = true - - check value = 0x2189 - @return CRC-16 KERMIT parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_KERMIT() { - static const Parameters parameters = { - 0x1021, 0x0000, 0x0000, true, true}; - return parameters; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-16 MAXIM. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 MAXIM has the following parameters and check value: - - polynomial = 0x8005 - - initial value = 0x0000 - - final XOR = 0xFFFF - - reflect input = true - - reflect output = true - - check value = 0x44C2 - @return CRC-16 MAXIM parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_MAXIM() { - static const Parameters parameters = { - 0x8005, 0x0000, 0xFFFF, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 MODBUS. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 MODBUS has the following parameters and check value: - - polynomial = 0x8005 - - initial value = 0xFFFF - - final XOR = 0x0000 - - reflect input = true - - reflect output = true - - check value = 0x4B37 - @return CRC-16 MODBUS parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_MODBUS() { - static const Parameters parameters = { - 0x8005, 0xFFFF, 0x0000, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 T10-DIF. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 T10-DIF has the following parameters and check value: - - polynomial = 0x8BB7 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0xD0DB - @return CRC-16 T10-DIF parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_T10DIF() { - static const Parameters parameters = { - 0x8BB7, 0x0000, 0x0000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 USB. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 USB has the following parameters and check value: - - polynomial = 0x8005 - - initial value = 0xFFFF - - final XOR = 0xFFFF - - reflect input = true - - reflect output = true - - check value = 0xB4C8 - @return CRC-16 USB parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_USB() { - static const Parameters parameters = { - 0x8005, 0xFFFF, 0xFFFF, true, true}; - return parameters; -} -#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - -/** - @brief Returns a set of parameters for CRC-16 X-25 (aka CRC-16 IBM-SDLC, - CRC-16 ISO-HDLC, CRC-16 B). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 X-25 has the following parameters and check value: - - polynomial = 0x1021 - - initial value = 0xFFFF - - final XOR = 0xFFFF - - reflect input = true - - reflect output = true - - check value = 0x906E - @return CRC-16 X-25 parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_X25() { - static const Parameters parameters = { - 0x1021, 0xFFFF, 0xFFFF, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-16 XMODEM (aka CRC-16 ZMODEM, - CRC-16 ACORN, CRC-16 LTE). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-16 XMODEM has the following parameters and check value: - - polynomial = 0x1021 - - initial value = 0x0000 - - final XOR = 0x0000 - - reflect input = false - - reflect output = false - - check value = 0x31C3 - @return CRC-16 XMODEM parameters -*/ -inline const CRC::Parameters &CRC::CRC_16_XMODEM() { - static const Parameters parameters = { - 0x1021, 0x0000, 0x0000, false, false}; - return parameters; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-17 CAN. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-17 CAN has the following parameters and check value: - - polynomial = 0x1685B - - initial value = 0x00000 - - final XOR = 0x00000 - - reflect input = false - - reflect output = false - - check value = 0x04F03 - @return CRC-17 CAN parameters -*/ -inline const CRC::Parameters &CRC::CRC_17_CAN() { - static const Parameters parameters = { - 0x1685B, 0x00000, 0x00000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-21 CAN. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-21 CAN has the following parameters and check value: - - polynomial = 0x102899 - - initial value = 0x000000 - - final XOR = 0x000000 - - reflect input = false - - reflect output = false - - check value = 0x0ED841 - @return CRC-21 CAN parameters -*/ -inline const CRC::Parameters &CRC::CRC_21_CAN() { - static const Parameters parameters = { - 0x102899, 0x000000, 0x000000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-24 OPENPGP. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-24 OPENPGP has the following parameters and check value: - - polynomial = 0x864CFB - - initial value = 0xB704CE - - final XOR = 0x000000 - - reflect input = false - - reflect output = false - - check value = 0x21CF02 - @return CRC-24 OPENPGP parameters -*/ -inline const CRC::Parameters &CRC::CRC_24() { - static const Parameters parameters = { - 0x864CFB, 0xB704CE, 0x000000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-24 FlexRay-A. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-24 FlexRay-A has the following parameters and check value: - - polynomial = 0x5D6DCB - - initial value = 0xFEDCBA - - final XOR = 0x000000 - - reflect input = false - - reflect output = false - - check value = 0x7979BD - @return CRC-24 FlexRay-A parameters -*/ -inline const CRC::Parameters &CRC::CRC_24_FLEXRAYA() { - static const Parameters parameters = { - 0x5D6DCB, 0xFEDCBA, 0x000000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-24 FlexRay-B. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-24 FlexRay-B has the following parameters and check value: - - polynomial = 0x5D6DCB - - initial value = 0xABCDEF - - final XOR = 0x000000 - - reflect input = false - - reflect output = false - - check value = 0x1F23B8 - @return CRC-24 FlexRay-B parameters -*/ -inline const CRC::Parameters &CRC::CRC_24_FLEXRAYB() { - static const Parameters parameters = { - 0x5D6DCB, 0xABCDEF, 0x000000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-30 CDMA. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-30 CDMA has the following parameters and check value: - - polynomial = 0x2030B9C7 - - initial value = 0x3FFFFFFF - - final XOR = 0x00000000 - - reflect input = false - - reflect output = false - - check value = 0x3B3CB540 - @return CRC-30 CDMA parameters -*/ -inline const CRC::Parameters &CRC::CRC_30() { - static const Parameters parameters = { - 0x2030B9C7, 0x3FFFFFFF, 0x00000000, false, false}; - return parameters; -} -#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - -/** - @brief Returns a set of parameters for CRC-32 (aka CRC-32 ADCCP, CRC-32 - PKZip). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-32 has the following parameters and check value: - - polynomial = 0x04C11DB7 - - initial value = 0xFFFFFFFF - - final XOR = 0xFFFFFFFF - - reflect input = true - - reflect output = true - - check value = 0xCBF43926 - @return CRC-32 parameters -*/ -inline const CRC::Parameters &CRC::CRC_32() { - static const Parameters parameters = { - 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, true, true}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-32 BZIP2 (aka CRC-32 AAL5, CRC-32 - DECT-B, CRC-32 B-CRC). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-32 BZIP2 has the following parameters and check value: - - polynomial = 0x04C11DB7 - - initial value = 0xFFFFFFFF - - final XOR = 0xFFFFFFFF - - reflect input = false - - reflect output = false - - check value = 0xFC891918 - @return CRC-32 BZIP2 parameters -*/ -inline const CRC::Parameters &CRC::CRC_32_BZIP2() { - static const Parameters parameters = { - 0x04C11DB7, 0xFFFFFFFF, 0xFFFFFFFF, false, false}; - return parameters; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-32 C (aka CRC-32 ISCSI, CRC-32 - Castagnoli, CRC-32 Interlaken). - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-32 C has the following parameters and check value: - - polynomial = 0x1EDC6F41 - - initial value = 0xFFFFFFFF - - final XOR = 0xFFFFFFFF - - reflect input = true - - reflect output = true - - check value = 0xE3069283 - @return CRC-32 C parameters -*/ -inline const CRC::Parameters &CRC::CRC_32_C() { - static const Parameters parameters = { - 0x1EDC6F41, 0xFFFFFFFF, 0xFFFFFFFF, true, true}; - return parameters; -} -#endif - -/** - @brief Returns a set of parameters for CRC-32 MPEG-2. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-32 MPEG-2 has the following parameters and check value: - - polynomial = 0x04C11DB7 - - initial value = 0xFFFFFFFF - - final XOR = 0x00000000 - - reflect input = false - - reflect output = false - - check value = 0x0376E6E7 - @return CRC-32 MPEG-2 parameters -*/ -inline const CRC::Parameters &CRC::CRC_32_MPEG2() { - static const Parameters parameters = { - 0x04C11DB7, 0xFFFFFFFF, 0x00000000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-32 POSIX. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-32 POSIX has the following parameters and check value: - - polynomial = 0x04C11DB7 - - initial value = 0x00000000 - - final XOR = 0xFFFFFFFF - - reflect input = false - - reflect output = false - - check value = 0x765E7680 - @return CRC-32 POSIX parameters -*/ -inline const CRC::Parameters &CRC::CRC_32_POSIX() { - static const Parameters parameters = { - 0x04C11DB7, 0x00000000, 0xFFFFFFFF, false, false}; - return parameters; -} - -#ifdef CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS -/** - @brief Returns a set of parameters for CRC-32 Q. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-32 Q has the following parameters and check value: - - polynomial = 0x814141AB - - initial value = 0x00000000 - - final XOR = 0x00000000 - - reflect input = false - - reflect output = false - - check value = 0x3010BF7F - @return CRC-32 Q parameters -*/ -inline const CRC::Parameters &CRC::CRC_32_Q() { - static const Parameters parameters = { - 0x814141AB, 0x00000000, 0x00000000, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-40 GSM. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-40 GSM has the following parameters and check value: - - polynomial = 0x0004820009 - - initial value = 0x0000000000 - - final XOR = 0xFFFFFFFFFF - - reflect input = false - - reflect output = false - - check value = 0xD4164FC646 - @return CRC-40 GSM parameters -*/ -inline const CRC::Parameters &CRC::CRC_40_GSM() { - static const Parameters parameters = { - 0x0004820009, 0x0000000000, 0xFFFFFFFFFF, false, false}; - return parameters; -} - -/** - @brief Returns a set of parameters for CRC-64 ECMA. - @note The parameters are static and are delayed-constructed to reduce memory - footprint. - @note CRC-64 ECMA has the following parameters and check value: - - polynomial = 0x42F0E1EBA9EA3693 - - initial value = 0x0000000000000000 - - final XOR = 0x0000000000000000 - - reflect input = false - - reflect output = false - - check value = 0x6C40DF5F0B497347 - @return CRC-64 ECMA parameters -*/ -inline const CRC::Parameters &CRC::CRC_64() { - static const Parameters parameters = { - 0x42F0E1EBA9EA3693, 0x0000000000000000, 0x0000000000000000, false, false}; - return parameters; -} -#endif // CRCPP_INCLUDE_ESOTERIC_CRC_DEFINITIONS - -#ifdef CRCPP_USE_NAMESPACE -} -#endif - -#endif // CRCPP_CRC_H_ diff --git a/paddle/fluid/recordio/header.cc b/paddle/fluid/recordio/header.cc index 31ee410bfb074..3641caaa89810 100644 --- a/paddle/fluid/recordio/header.cc +++ b/paddle/fluid/recordio/header.cc @@ -26,18 +26,18 @@ Header::Header() Header::Header(uint32_t num, uint32_t sum, Compressor c, uint32_t cs) : num_records_(num), checksum_(sum), compressor_(c), compress_size_(cs) {} -void Header::Parse(Stream* iss) { - iss->Read(reinterpret_cast(&num_records_), sizeof(uint32_t)); - iss->Read(reinterpret_cast(&checksum_), sizeof(uint32_t)); - iss->Read(reinterpret_cast(&compressor_), sizeof(uint32_t)); - iss->Read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); +void Header::Parse(std::istream& is) { + is.read(reinterpret_cast(&num_records_), sizeof(uint32_t)) + .read(reinterpret_cast(&checksum_), sizeof(uint32_t)) + .read(reinterpret_cast(&compressor_), sizeof(uint32_t)) + .read(reinterpret_cast(&compress_size_), sizeof(uint32_t)); } -void Header::Write(Stream* os) { - os->Write(reinterpret_cast(&num_records_), sizeof(uint32_t)); - os->Write(reinterpret_cast(&checksum_), sizeof(uint32_t)); - os->Write(reinterpret_cast(&compressor_), sizeof(uint32_t)); - os->Write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); +void Header::Write(std::ostream& os) const { + os.write(reinterpret_cast(&num_records_), sizeof(uint32_t)) + .write(reinterpret_cast(&checksum_), sizeof(uint32_t)) + .write(reinterpret_cast(&compressor_), sizeof(uint32_t)) + .write(reinterpret_cast(&compress_size_), sizeof(uint32_t)); } std::ostream& operator<<(std::ostream& os, Header h) { diff --git a/paddle/fluid/recordio/header.h b/paddle/fluid/recordio/header.h index 21e23f0a25619..cbd52642a668d 100644 --- a/paddle/fluid/recordio/header.h +++ b/paddle/fluid/recordio/header.h @@ -16,8 +16,6 @@ #include -#include "paddle/fluid/recordio/io.h" - namespace paddle { namespace recordio { @@ -26,7 +24,7 @@ constexpr size_t kDefaultMaxChunkSize = 32 * 1024 * 1024; // MagicNumber for memory checking constexpr uint32_t kMagicNumber = 0x01020304; -enum class Compressor { +enum class Compressor : uint32_t { // NoCompression means writing raw chunk data into files. // With other choices, chunks are compressed before written. kNoCompress = 0, @@ -45,8 +43,8 @@ class Header { Header(); Header(uint32_t num, uint32_t sum, Compressor ct, uint32_t cs); - void Write(Stream* os); - void Parse(Stream* iss); + void Write(std::ostream& os) const; + void Parse(std::istream& is); uint32_t NumRecords() const { return num_records_; } uint32_t Checksum() const { return checksum_; } diff --git a/paddle/fluid/recordio/header_test.cc b/paddle/fluid/recordio/header_test.cc index d6ab267329016..a7d627c3eb4a7 100644 --- a/paddle/fluid/recordio/header_test.cc +++ b/paddle/fluid/recordio/header_test.cc @@ -22,18 +22,10 @@ using namespace paddle::recordio; TEST(Recordio, ChunkHead) { Header hdr(0, 1, Compressor::kGzip, 3); - { - Stream* oss = Stream::Open("/tmp/record_1", "w"); - hdr.Write(oss); - delete oss; - } - + std::stringstream ss; + hdr.Write(ss); + ss.seekg(0, std::ios::beg); Header hdr2; - { - Stream* iss = Stream::Open("/tmp/record_1", "r"); - hdr2.Parse(iss); - delete iss; - } - + hdr2.Parse(ss); EXPECT_TRUE(hdr == hdr2); } diff --git a/paddle/fluid/recordio/io.cc b/paddle/fluid/recordio/io.cc deleted file mode 100644 index e5571ddf5d08e..0000000000000 --- a/paddle/fluid/recordio/io.cc +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/io.h" -#include "paddle/fluid/string/piece.h" - -#include - -namespace paddle { -namespace recordio { -Stream* Stream::Open(const char* filename, const char* mode) { - // Create IOStream for different filesystems - // HDFS: hdfs://tmp/file.txt - // Default: /tmp/file.txt - FILE* fp = nullptr; - if (string::HasPrefix(string::Piece(filename), string::Piece("/"))) { - fp = fopen(filename, mode); - } - return new FileStream(fp); -} - -size_t FileStream::Read(void* ptr, size_t size) { - return fread(ptr, 1, size, fp_); -} - -void FileStream::Write(const void* ptr, size_t size) { - size_t real = fwrite(ptr, 1, size, fp_); - PADDLE_ENFORCE(real == size, "FileStream write incomplete."); -} - -size_t FileStream::Tell() { return ftell(fp_); } -void FileStream::Seek(size_t p) { fseek(fp_, p, SEEK_SET); } - -bool FileStream::Eof() { return feof(fp_); } - -void FileStream::Close() { - if (fp_ != nullptr) { - fclose(fp_); - fp_ = nullptr; - } -} - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/io.h b/paddle/fluid/recordio/io.h deleted file mode 100644 index dedfed787d517..0000000000000 --- a/paddle/fluid/recordio/io.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/macros.h" // DISABLE_COPY_ASSIGN - -namespace paddle { -namespace recordio { - -// Seekable Stream Interface for read and write -class Stream { -public: - virtual ~Stream() {} - virtual size_t Read(void* ptr, size_t size) = 0; - virtual void Write(const void* ptr, size_t size) = 0; - virtual size_t Tell() = 0; - virtual void Seek(size_t p) = 0; - // Create Stream Instance - static Stream* Open(const char* filename, const char* mode); -}; - -// FileStream -class FileStream : public Stream { -public: - explicit FileStream(FILE* fp) : fp_(fp) {} - ~FileStream() { this->Close(); } - size_t Read(void* ptr, size_t size); - void Write(const void* ptr, size_t size); - size_t Tell(); - void Seek(size_t p); - bool Eof(); - void Close(); - -private: - FILE* fp_; - DISABLE_COPY_AND_ASSIGN(FileStream); -}; - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/io_test.cc b/paddle/fluid/recordio/io_test.cc deleted file mode 100644 index 8311494787637..0000000000000 --- a/paddle/fluid/recordio/io_test.cc +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/io.h" - -#include "gtest/gtest.h" - -using namespace paddle::recordio; - -TEST(FileStream, IO) { - { - // Write - Stream* fs = Stream::Open("/tmp/record_0", "w"); - fs->Write("hello", 6); - delete fs; - } - { - // Read - Stream* fs = Stream::Open("/tmp/record_0", "r+"); - char buf[10]; - fs->Read(&buf, 6); - EXPECT_STREQ(buf, "hello"); - delete fs; - } -} diff --git a/paddle/fluid/recordio/range_scanner.cc b/paddle/fluid/recordio/range_scanner.cc deleted file mode 100644 index faf5078ba90ec..0000000000000 --- a/paddle/fluid/recordio/range_scanner.cc +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/range_scanner.h" - -namespace paddle { -namespace recordio { - -void Index::LoadIndex(FileStream* fi) { - int64_t offset = 0; - while (!fi->Eof()) { - Header hdr; - hdr.Parse(fi); - chunk_offsets_.push_back(offset); - chunk_lens_.push_back(hdr.NumRecords()); - chunk_records_.push_back(hdr.NumRecords()); - num_records_ += hdr.NumRecords(); - offset += hdr.CompressSize(); - } -} - -Index Index::ChunkIndex(int i) { Index idx; } - -std::pair Index::Locate(int record_idx) { - std::pair range(-1, -1); - int sum = 0; - for (size_t i = 0; i < chunk_lens_.size(); ++i) { - int len = static_cast(chunk_lens_[i]); - sum += len; - if (record_idx < sum) { - range.first = static_cast(i); - range.second = record_idx - sum + len; - } - } - return range; -} - -RangeScanner::RangeScanner(Stream* fi, Index idx, int start, int len) - : stream_(fi), index_(idx) { - if (start < 0) { - start = 0; - } - if (len < 0 || start + len >= idx.NumRecords()) { - len = idx.NumRecords() - start; - } - - start_ = start; - end_ = start + len; - cur_ = start - 1; // The intial status required by Scan - chunk_index_ = -1; - chunk_.reset(new Chunk); -} - -bool RangeScanner::Scan() { - ++cur_; - if (cur_ >= end_) { - return false; - } else { - auto cursor = index_.Locate(cur_); - if (chunk_index_ != cursor.first) { - chunk_index_ = cursor.first; - chunk_->Parse(fi, index_.ChunkOffsets[chunk_index_]); - } - } - return true; -} - -const std::string RangeScanner::Record() { - auto cursor = index_.Locate(cur_); - return chunk_->Record(cursor.second); -} - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/range_scanner.h b/paddle/fluid/recordio/range_scanner.h deleted file mode 100644 index 043fd8091e8e1..0000000000000 --- a/paddle/fluid/recordio/range_scanner.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/fluid/recordio/chunk.h" -#include "paddle/fluid/recordio/io.h" - -namespace paddle { -namespace recordio { - -// Index consists offsets and sizes of the consequetive chunks in a RecordIO -// file. -// -// Index supports Gob. Every field in the Index needs to be exported -// for the correct encoding and decoding using Gob. -class Index { -public: - Index() : num_records_(0) {} - // LoadIndex scans the file and parse chunkOffsets, chunkLens, and len. - void LoadIndex(Stream* fi); - // NumRecords returns the total number of all records in a RecordIO file. - int NumRecords() { return num_records_; } - // NumChunks returns the total number of chunks in a RecordIO file. - int NumChunks() { return chunk_lens_.size(); } - // ChunkIndex return the Index of i-th Chunk. - int ChunkIndex(int i); - - int64_t ChunkOffsets(int i) { return chunk_offsets_[i]; } - - // Locate returns the index of chunk that contains the given record, - // and the record index within the chunk. It returns (-1, -1) if the - // record is out of range. - std::pair Locate(int record_idx); - -private: - // the offset of each chunk in a file. - std::vector chunk_offsets_; - // the length of each chunk in a file. - std::vector chunk_lens_; - // the numer of all records in a file. - int num_records_; - // the number of records in chunks. - std::vector chunk_records_; -}; - -// RangeScanner -class RangeScanner { -public: - // creates a scanner that sequencially reads records in the - // range [start, start+len). If start < 0, it scans from the - // beginning. If len < 0, it scans till the end of file. - RangeScanner(Stream* fi, Index idx, int start, int end); - // Scan moves the cursor forward for one record and loads the chunk - // containing the record if not yet. - bool Scan(); - const std::string Record(); - -private: - Stream* fi; - Index index_; - int start_, end_, cur_; - int chunk_index_; - std::unique_ptr chunk_; -}; - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/range_scanner_test.cc b/paddle/fluid/recordio/range_scanner_test.cc deleted file mode 100644 index e365efc48b6aa..0000000000000 --- a/paddle/fluid/recordio/range_scanner_test.cc +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/range_scanner.h" - -#include "gtest/gtest.h" - -using namespace paddle::recordio; - -TEST(RangeScanner, Recordio) { - Stream* fo = Stream::Open("/tmp/record_range", "w"); -} diff --git a/paddle/fluid/recordio/recordio.cc b/paddle/fluid/recordio/recordio.cc deleted file mode 100644 index f8ed1fedf6399..0000000000000 --- a/paddle/fluid/recordio/recordio.cc +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/io.h" -#include "paddle/fluid/string/piece.h" - -namespace paddle { -namespace recordio {} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/recordio.h b/paddle/fluid/recordio/recordio.h deleted file mode 100644 index 39ae953ce1a10..0000000000000 --- a/paddle/fluid/recordio/recordio.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include "paddle/fluid/recordio/chunk.h" -#include "paddle/fluid/recordio/header.h" -#include "paddle/fluid/recordio/io.h" -#include "paddle/fluid/recordio/scanner.h" -#include "paddle/fluid/recordio/writer.h" diff --git a/paddle/fluid/recordio/scanner.cc b/paddle/fluid/recordio/scanner.cc deleted file mode 100644 index 45cf472e9d070..0000000000000 --- a/paddle/fluid/recordio/scanner.cc +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/chunk.h" - -#include // glob - -namespace paddle { -namespace recordio { - -Scanner::Scanner(const char* paths) - : cur_file_(nullptr), path_idx_(0), end_(false) { - glob_t glob_result; - glob(paths, GLOB_TILDE, NULL, &glob_result); - - for (size_t i = 0; i < glob_result.gl_pathc; ++i) { - paths_.emplace_back(std::string(glob_result.gl_pathv[i])); - } - globfree(&glob_result); -} - -bool Scanner::Scan() { - if (end_ == true) { - return false; - } - if (cur_scanner_ == nullptr) { - if (!NextFile()) { - end_ = true; - return false; - } - } - if (!cur_scanner_->Scan()) { - end_ = true; - cur_file_ = nullptr; - return false; - } - return true; -} - -bool Scanner::NextFile() { - if (path_idx_ >= paths_.size()) { - return false; - } - std::string path = paths_[path_idx_]; - ++path_idx_; - cur_file_ = Stream::Open(path); - if (cur_file_ == nullptr) { - return false; - } - Index idx; - idx.LoadIndex(cur_file_); - cur_scanner_ = RangeScanner(cur_file_, idx, 0, -1); - return true; -} - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/scanner.h b/paddle/fluid/recordio/scanner.h deleted file mode 100644 index 76a3448839f91..0000000000000 --- a/paddle/fluid/recordio/scanner.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/recordio/io.h" - -namespace paddle { -namespace recordio { - -class RangeScanner; - -// Scanner is a scanner for multiple recordio files. -class Scanner { -public: - Scanner(const char* paths); - const std::string Record(); - bool Scan(); - void Close(); - bool NextFile(); - int Err() { return err_; } - -private: - std::vector paths_; - Stream* cur_file_; - RangeScanner* cur_scanner_; - int path_idx_; - bool end_; - int err_; -}; - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/scanner_test.cc b/paddle/fluid/recordio/scanner_test.cc deleted file mode 100644 index 7191500de7750..0000000000000 --- a/paddle/fluid/recordio/scanner_test.cc +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/scanner.h" - -#include "gtest/gtest.h" - -using namespace paddle::recordio; - -TEST(Scanner, Normal) { Scanner s("/tmp/record_*"); } diff --git a/paddle/fluid/recordio/writer.cc b/paddle/fluid/recordio/writer.cc deleted file mode 100644 index b2b0dd1017171..0000000000000 --- a/paddle/fluid/recordio/writer.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/writer.h" - -namespace paddle { -namespace recordio { - -Writer::Writer(Stream* fo) : stream_(fo), max_chunk_size_(0), compressor_(0) {} - -Writer::Writer(Stream* fo, int maxChunkSize, int compressor) - : stream_(fo), - max_chunk_size_(maxChunkSize), - compressor_(static_cast(compressor)) { - chunk_.reset(new Chunk); -} - -size_t Writer::Write(const char* buf, size_t length) { - if (stream_ == nullptr) { - LOG(WARNING) << "Cannot write since writer had been closed."; - return 0; - } - if ((length + chunk_->NumBytes()) > max_chunk_size_) { - chunk_->Dump(stream_, compressor_); - } - chunk_->Add(buf, length); - return length; -} - -// size_t Writer::Write(const char* buf, size_t length) { -// return Write(std::string(buf, length)); -// } - -// size_t Writer::Write(std::string&& buf) {} - -void Writer::Close() { - chunk_->Dump(stream_, compressor_); - stream_ = nullptr; -} - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/writer.h b/paddle/fluid/recordio/writer.h deleted file mode 100644 index d610450c53083..0000000000000 --- a/paddle/fluid/recordio/writer.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include - -#include "paddle/fluid/recordio/header.h" -#include "paddle/fluid/recordio/io.h" - -namespace paddle { -namespace recordio { - -// Writer creates a RecordIO file. -class Writer { -public: - Writer(Stream* fo); - Writer(Stream* fo, int maxChunkSize, int c); - - // Writes a record. It returns an error if Close has been called. - size_t Write(const char* buf, size_t length); - - // Close flushes the current chunk and makes the writer invalid. - void Close(); - -private: - // Set nullptr to mark a closed writer - Stream* stream_; - // Chunk for store object - std::unique_ptr chunk_; - // total records size, excluding metadata, before compression. - int max_chunk_size_; - // Compressor used for chuck - Compressor compressor_; - DISABLE_COPY_AND_ASSIGN(Writer); -}; - -} // namespace recordio -} // namespace paddle diff --git a/paddle/fluid/recordio/writer_test.cc b/paddle/fluid/recordio/writer_test.cc deleted file mode 100644 index 094815be2c022..0000000000000 --- a/paddle/fluid/recordio/writer_test.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/recordio/writer.h" - -#include "gtest/gtest.h" - -using namespace paddle::recordio; - -TEST(Writer, Normal) { - Stream* fs = Stream::Open("/tmp/record_21", "w"); - Writer w(fs); - w.Write("123", 4); - - // test exception - w.Close(); - EXPECT_ANY_THROW(w.Write("123", 4)); -}