Skip to content

Commit

Permalink
reindex, log, test: fixes bitcoin#21379
Browse files Browse the repository at this point in the history
This fixes a blk file size calculation made during reindex that results in increased blk file malformity.
The fix is to avoid double counting the size of the serialization header during reindex.
This adds a unit test to reproduce the bug before the fix and to ensure that it does not recur.
These changes include a log message change also so as to not be as alarming. This is a common and recoverable
data corruption. These messages can now be filtered by the debug log reindex category.
  • Loading branch information
mruddy authored and LarryRuane committed Jul 27, 2022
1 parent 59ac8ba commit e8b90d4
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 6 deletions.
1 change: 1 addition & 0 deletions src/Makefile.test.include
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ BITCOIN_TESTS =\
test/blockencodings_tests.cpp \
test/blockfilter_index_tests.cpp \
test/blockfilter_tests.cpp \
test/blockmanager_tests.cpp \
test/bloom_tests.cpp \
test/bswap_tests.cpp \
test/checkqueue_tests.cpp \
Expand Down
15 changes: 10 additions & 5 deletions src/node/blockstorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,11 @@ fs::path GetBlockPosFilename(const FlatFilePos& pos)

bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, CChain& active_chain, uint64_t nTime, bool fKnown)
{
if (!fKnown) {
// Since this block is not already on disk, an extra 8 bytes will need
// to be written to disk for the serialization header.
nAddSize += static_cast<unsigned int>(BLOCK_SERIALIZATION_HEADER_SIZE);
}
LOCK(cs_LastBlockFile);

unsigned int nFile = fKnown ? pos.nFile : m_last_blockfile;
Expand Down Expand Up @@ -788,19 +793,19 @@ bool ReadRawBlockFromDisk(std::vector<uint8_t>& block, const FlatFilePos& pos, c
return true;
}

/** Store block on disk. If dbp is non-nullptr, the file is known to already reside on disk */
FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp)
{
unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION);
const unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION);
FlatFilePos blockPos;
if (dbp != nullptr) {
const bool position_known{dbp != nullptr};
if (position_known) {
blockPos = *dbp;
}
if (!FindBlockPos(blockPos, nBlockSize + 8, nHeight, active_chain, block.GetBlockTime(), dbp != nullptr)) {
if (!FindBlockPos(blockPos, nBlockSize, nHeight, active_chain, block.GetBlockTime(), position_known)) {
error("%s: FindBlockPos failed", __func__);
return FlatFilePos();
}
if (dbp == nullptr) {
if (!position_known) {
if (!WriteBlockToDisk(block, blockPos, chainparams.MessageStart())) {
AbortNode("Failed to write block");
return FlatFilePos();
Expand Down
4 changes: 4 additions & 0 deletions src/node/blockstorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB
/** The maximum size of a blk?????.dat file (since 0.8) */
static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB

/** Size of header written by WriteBlockToDisk before a serialized CBlock */
static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE = CMessageHeader::MESSAGE_START_SIZE + sizeof(unsigned int);

extern std::atomic_bool fImporting;
extern std::atomic_bool fReindex;
/** Pruning-related variables and constants */
Expand Down Expand Up @@ -171,6 +174,7 @@ class BlockManager
bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex* pindex, const CChainParams& chainparams)
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);

/** Store block on disk. If dbp is not nullptr, then it provides the known position of the block within a block file on disk. */
FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp);

/** Calculate the amount of disk space the block & undo files currently use */
Expand Down
43 changes: 43 additions & 0 deletions src/test/blockmanager_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2022 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <chainparams.h>
#include <node/blockstorage.h>
#include <node/context.h>
#include <validation.h>

#include <boost/test/unit_test.hpp>
#include <test/util/setup_common.h>

using node::BlockManager;
using node::BLOCK_SERIALIZATION_HEADER_SIZE;

// use BasicTestingSetup here for the data directory configuration, setup, and cleanup
BOOST_FIXTURE_TEST_SUITE(blockmanager_tests, BasicTestingSetup)

BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos)
{
const auto params {CreateChainParams(ArgsManager{}, CBaseChainParams::MAIN)};
BlockManager blockman {};
CChain chain {};
// simulate adding a genesis block normally
FlatFilePos pos{blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, nullptr)};
BOOST_CHECK_EQUAL(pos.nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// simulate what happens during reindex
// simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file
// the block is found at offset 8 because there is an 8 byte serialization header
// consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file.
pos = blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, &pos);
BOOST_CHECK_EQUAL(pos.nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// now simulate what happens after reindex for the first new block processed
// the actual block contents don't matter, just that it's a block.
// verify that the write position is at offset 0x12d.
// this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur
// 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293
// add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301
pos = blockman.SaveBlockToDisk(params->GenesisBlock(), 1, chain, *params, nullptr);
BOOST_CHECK_EQUAL(pos.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(params->GenesisBlock(), CLIENT_VERSION) + BLOCK_SERIALIZATION_HEADER_SIZE);
}

BOOST_AUTO_TEST_SUITE_END()
13 changes: 12 additions & 1 deletion src/validation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4378,7 +4378,18 @@ void CChainState::LoadExternalBlockFile(FILE* fileIn, FlatFilePos* dbp)
}
}
} catch (const std::exception& e) {
LogPrintf("%s: Deserialize or I/O error - %s\n", __func__, e.what());
// historical bugs added extra data to the block files that does not deserialize cleanly.
// commonly this data is between readable blocks, but it does not really matter. such data is not fatal to the import process.
// the code that reads the block files deals with invalid data by simply ignoring it.
// it continues to search for the next {4 byte magic message start bytes + 4 byte length + block} that does deserialize cleanly
// and passes all of the other block validation checks dealing with POW and the merkle root, etc...
// we merely note with this informational log message when unexpected data is encountered.
// we could also be experiencing a storage system read error, or a read of a previous bad write. these are possible, but
// less likely scenarios. we don't have enough information to tell a difference here.
// the reindex process is not the place to attempt to clean and/or compact the block files. if so desired, a studious node operator
// may use knowledge of the fact that the block files are not entirely pristine in order to prepare a set of pristine, and
// perhaps ordered, block files for later reindexing.
LogPrint(BCLog::REINDEX, "%s: unexpected data at file offset 0x%x - %s. continuing\n", __func__, (nRewind - 1), e.what());
}
}
} catch (const std::runtime_error& e) {
Expand Down

0 comments on commit e8b90d4

Please sign in to comment.