Skip to content

Commit

Permalink
Replaced GetHandle()/FindNextActive() with iterator.
Browse files Browse the repository at this point in the history
Motivations for this change include:
1. Eliminating the pattern where an exception is used to detect the end of a slice.
2. Making the iterator thread safe.
  • Loading branch information
MikeHopcroft committed May 21, 2018
1 parent 393e690 commit cf21399
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 68 deletions.
39 changes: 25 additions & 14 deletions inc/BitFunnel/Index/IShard.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@

#include <cstddef> // ptrdiff_t return value.
#include <iosfwd> // std::ostream parameter.

#include "BitFunnel/BitFunnelTypes.h" // DocIndex return value.
#include "BitFunnel/IInterface.h" // Base class.
#include "BitFunnel/Index/RowId.h" // RowId parameter.
#include "BitFunnel/Noncopyable.h" // Base class.


namespace BitFunnel
Expand Down Expand Up @@ -63,20 +63,31 @@ namespace BitFunnel

virtual void TemporaryWriteAllSlices(IFileManager& fileManager) const = 0;

// Get the document handle for the nth document in a shard
// Warning: this method may not be thread-safe.
// If the document's slice is recycled, the returned handle
// could point to a different document than intended.
virtual DocumentHandle GetHandle(size_t docNumber) const = 0;

// Find next active document in shard, looking first at docNumber
// - Return true if found and modify docNumber to position of active document
// - Return false if no more active documents
// Warning: this method may not be thread-safe.
// If the document's slice is recycled, the altered docNumber
// may no longer be valid or could reference a different document than intended.
virtual bool FindNextActive(size_t& docNumber) const = 0;

//
// DocumentHandle iterator
//
// DESIGN NOTE: Can't use the C++ <iterator> pattern because
// iterators cannot be abstract base classes (they must be copyable)
// and IShard cannot know the implementation of an iterator of its
// subclass. Chose to implement a pattern that is similar to
// the C++ iterator pattern. In this approach, const_iterator is an
// abstract base class which is provided via and std::unique_ptr
// from GetIterator().
class const_iterator : public IInterface, NonCopyable
{
public:
virtual const_iterator& operator++() = 0;
virtual DocumentHandle operator*() const = 0;
virtual bool AtEnd() const = 0;
};

// Returns an iterator to the DocumentHandles corresponding to
// documents currently active in the index. This method is
// thread safe with respect to document addition and deletion.
// WARNING: this iterator holds a Token which will prevent
// recycling. Be sure to release iterator when finished.
virtual std::unique_ptr<const_iterator> GetIterator() = 0;

// Returns an std::vector containing the bit densities for each row in
// the RowTable with the specified rank. Bit densities are computed
Expand Down
108 changes: 73 additions & 35 deletions src/Index/src/Shard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ namespace BitFunnel
}
catch (std::exception e)
{
// LogB(Logging::Error, "LoadSliceBuffer", "Error reading slice buffer data from stream");
// LogB(Logging::Error, "LoadSliceBuffer", "Error reading slice buffer data from stream");
m_sliceBufferAllocator.Release(buffer);
throw e;
throw e;
}

return buffer;
Expand Down Expand Up @@ -511,61 +511,99 @@ namespace BitFunnel
}


DocumentHandle Shard::GetHandle(size_t docNumber) const
//*************************************************************************
//
// DocumentHandle iterator.
//
//*************************************************************************
Shard::ConstIterator::ConstIterator(Shard const & shard)
: m_token(shard.m_tokenManager.RequestToken()),
m_sliceBuffers(shard.m_sliceBuffers),
m_sliceCapacity(shard.m_sliceCapacity),
m_sliceIndex(0),
m_slice(nullptr),
m_sliceOffset(0)
{
// Hold a token to ensure that m_sliceBuffers won't be recycled.
auto token = m_tokenManager.RequestToken();

size_t sliceIndex = docNumber / GetSliceCapacity();
size_t docSliceIndex = docNumber % GetSliceCapacity();

if (sliceIndex < (*m_sliceBuffers).size())
{
Slice* slice = Slice::GetSliceFromBuffer((*m_sliceBuffers)[sliceIndex],
GetSlicePtrOffset());
return DocumentHandleInternal::DocumentHandleInternal(slice, docSliceIndex);
}
else
{
throw RecoverableError("Invalid document number");
}
// Advance to first active document, if not already at one.
EnsureActive();
}


bool Shard::FindNextActive(size_t& docNumber) const
void Shard::ConstIterator::EnsureActive()
{
// Hold a token to ensure that m_sliceBuffers won't be recycled.
auto token = m_tokenManager.RequestToken();

try
for (; !AtEnd(); ++m_sliceIndex)
{
while (1)
m_slice = Slice::GetSliceFromBuffer(
(*m_sliceBuffers)[m_sliceIndex],
GetSlicePtrOffset());

for (; m_sliceOffset < m_sliceCapacity; ++m_sliceOffset)
{
DocumentHandleInternal handle = GetHandle(docNumber);
DocumentHandleInternal handle(m_slice, m_sliceOffset);
if (handle.IsActive())
{
return true;
return;
}
++docNumber;
}
}
catch (RecoverableError e)
}


bool Shard::ConstIterator::AtEnd() const
{
return m_sliceIndex >= m_sliceBuffers->size();
}


Shard::const_iterator& Shard::ConstIterator::operator++()
{
if (AtEnd())
{
RecoverableError
error("Shard::ConstIterator::operator++: iterator beyond end.");
throw error;
}

++m_sliceOffset;
EnsureActive();
return *this;
}


DocumentHandle Shard::ConstIterator::operator*() const
{
if (AtEnd())
{
return false;
RecoverableError
error("Shard::ConstIterator::operator*: iterator beyond end.");
throw error;
}

return DocumentHandleInternal(m_slice, m_sliceOffset);
}


std::unique_ptr<IShard::const_iterator> Shard::GetIterator()
{
std::unique_ptr<IShard::const_iterator> it(new ConstIterator(*this));
return std::move(it);
}


//*************************************************************************
//
// GetDensities
//
//*************************************************************************
std::vector<double> Shard::GetDensities(Rank rank) const
{
// Hold a token to ensure that m_sliceBuffers won't be recycled.
auto token = m_tokenManager.RequestToken();

// m_sliceBuffers can change at any time, but we can safely grab a copy
// because
// 1. m_sliceBuffers is std::atomic.
// 2. no m_sliceBuffers value observed while holding token can be
// recycled.
// m_sliceBuffers can change at any time, but we can safely grab the
// pointer because
// 1. m_sliceBuffers is std::atomic
// 2. token guarantees that m_sliceBuffers pointer cannot be recycled.
std::vector<void*> const & buffers = *m_sliceBuffers;

RowTableDescriptor const & rowTable = m_rowTables[rank];
Expand Down
64 changes: 50 additions & 14 deletions src/Index/src/Shard.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "BitFunnel/BitFunnelTypes.h" // ShardId parameter, embedded.
#include "BitFunnel/Index/IShard.h" // Base class.
#include "BitFunnel/Index/Token.h" // Token embedded.
#include "BitFunnel/NonCopyable.h" // Base class.
#include "BitFunnel/Term.h" // Term parameter.
#include "DocTableDescriptor.h" // Required for embedded std::unique_ptr.
Expand Down Expand Up @@ -115,27 +116,20 @@ namespace BitFunnel
virtual void TemporaryWriteAllSlices(IFileManager& fileManager) const override;


// Get the document handle for the nth document in a shard
// Warning: this method may not be thread-safe.
// If the document's slice is recycled, the returned handle
// could point to a different document than intended.
virtual DocumentHandle GetHandle(size_t docNumber) const override;

// Find next active document in shard, looking first at docNumber
// - Return true if found and modify docNumber to position of active document
// - Return false if no more active documents
// Warning: this method may not be thread-safe.
// If the document's slice is recycled, the altered docNumber
// may no longer be valid or could reference a different document than intended.
virtual bool FindNextActive(size_t& docNumber) const override;

// Returns an iterator to the DocumentHandles corresponding to
// documents currently active in the index. This method is
// thread safe with respect to document addition and deletion.
// WARNING: this iterator holds a Token which will prevent
// recycling. Be sure to release iterator when finished.
virtual std::unique_ptr<const_iterator> GetIterator() override;

// Returns an std::vector containing the bit densities for each row in
// the RowTable with the specified rank. Bit densities are computed
// over all slices, for those columns that correspond to active
// documents.
virtual std::vector<double>
GetDensities(Rank rank) const override;

//
// Shard exclusive members.
//
Expand Down Expand Up @@ -338,5 +332,47 @@ namespace BitFunnel

std::unique_ptr<DocumentFrequencyTableBuilder> m_docFrequencyTableBuilder;
std::mutex m_temporaryFrequencyTableMutex;

//
// DocumentHandle iterator
//
class ConstIterator : public IShard::const_iterator
{
public:
ConstIterator(Shard const & shard);

//
// IShard::const_iterator methods
//

virtual const_iterator& operator++() override;
virtual DocumentHandle operator*() const override;
virtual bool AtEnd() const override;

private:
// If the current document is not active, advance until an active
// document is found or we reach the end of the shard.
void EnsureActive();

// DESIGN NOTE: class is NonCopyable because Token is NonCopyable.
// WARNING: m_token must be declared before m_sliceBuffers to ensure
// that the constructor will be holding the token before initializing
// m_sliceBuffers. If m_sliceBuffers were to be initialized before
// m_token, there is a possibility that m_sliceBuffers could be
// recycled.
Token m_token;

// Holds a snapshot of the shard's m_sliceBuffers vector.
// m_token ensures this snapshot won't be recycled.
std::vector<void*>* m_sliceBuffers;

const DocIndex m_sliceCapacity;

// State of the iteration is specified by m_sliceIndex and
// m_sliceOffset. The member m_slice is a convenience variable.
size_t m_sliceIndex;
Slice* m_slice;
size_t m_sliceOffset;
};
};
}
10 changes: 5 additions & 5 deletions tools/BitFunnel/src/ShowCommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,14 @@ namespace BitFunnel
output
<< "Shard: " << shardId << std::endl;

// Gather ids for first 64 documents in shard
// Gather DocIds for first 64 documents in shard
IShard& shard = ingestor.GetShard(shardId);
std::vector<DocId> ids;
size_t docNumber = 0;
for (int i = 0; i <= 64 && shard.FindNextActive(docNumber); ++i)
auto itPtr = shard.GetIterator();
auto & it = *itPtr;
for (size_t i = 0; i < 64 && !it.AtEnd(); ++it, ++i)
{
ids.push_back(shard.GetHandle(docNumber).GetDocId());
++docNumber;
ids.push_back((*it).GetDocId());
}

// Print out 100s digit of DocId.
Expand Down

0 comments on commit cf21399

Please sign in to comment.