Skip to content

Commit

Permalink
Several changes to mruset bitcoin#6064
Browse files Browse the repository at this point in the history
  • Loading branch information
Cryptoslave committed Mar 8, 2016
1 parent 22aa691 commit fc20c94
Show file tree
Hide file tree
Showing 8 changed files with 220 additions and 52 deletions.
84 changes: 68 additions & 16 deletions src/bloom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,33 @@ const uint32_t MAX_BLOOM_FILTER_SIZE = 36000; // bytes
const uint32_t MAX_HASH_FUNCS = 50;

CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
/**
* The ideal size for a bloom filter with a given number of elements and false positive rate is:
* - nElements * log(fp rate) / ln(2)^2
* We ignore filter parameters which will create a bloom filter larger than the protocol limits
*/
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
/**
* The ideal number of hash functions is filter size * ln(2) / number of elements
* Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
* See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
*/
isFull(false),
isEmpty(false),
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
nTweak(nTweakIn),
nFlags(nFlagsIn)
/**
* The ideal size for a bloom filter with a given number of elements and false positive rate is:
* - nElements * log(fp rate) / ln(2)^2
* We ignore filter parameters which will create a bloom filter larger than the protocol limits
*/
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
/**
* The ideal number of hash functions is filter size * ln(2) / number of elements
* Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
* See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
*/
isFull(false),
isEmpty(false),
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
nTweak(nTweakIn),
nFlags(nFlagsIn)
{
}

// Private constructor used by CRollingBloomFilter
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
isFull(false),
isEmpty(true),
nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
nTweak(nTweakIn),
nFlags(BLOOM_UPDATE_NONE)
{
}

Expand Down Expand Up @@ -203,3 +214,44 @@ void CBloomFilter::UpdateEmptyFull()
isFull = full;
isEmpty = empty;
}

CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
{
// Implemented using two bloom filters of 2 * nElements each.
// We fill them up, and clear them, staggered, every nElements
// inserted, so at least one always contains the last nElements
// inserted.
nBloomSize = nElements * 2;
nInsertions = 0;
}

void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
{
if (nInsertions == 0) {
b1.clear();
} else if (nInsertions == nBloomSize / 2) {
b2.clear();
}
b1.insert(vKey);
b2.insert(vKey);
if (++nInsertions == nBloomSize) {
nInsertions = 0;
}
}

bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
{
if (nInsertions < nBloomSize / 2) {
return b2.contains(vKey);
}
return b1.contains(vKey);
}

void CRollingBloomFilter::clear()
{
b1.clear();
b2.clear();
nInsertions = 0;
}

29 changes: 29 additions & 0 deletions src/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ class CBloomFilter

unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;

// Private constructor for CRollingBloomFilter, no restrictions on size
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
friend class CRollingBloomFilter;

public:
/**
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
Expand Down Expand Up @@ -96,4 +100,29 @@ class CBloomFilter
//! Checks for empty and full filters to avoid wasting cpu
void UpdateEmptyFull();
};

/**
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
* Construct it with the number of items to keep track of, and a false-positive rate.
*
* contains(item) will always return true if item was one of the last N things
* insert()'ed ... but may also return true for items that were not inserted.
*/
class CRollingBloomFilter
{
public:
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);

void insert(const std::vector<unsigned char>& vKey);
bool contains(const std::vector<unsigned char>& vKey) const;

void clear();

private:
unsigned int nBloomSize;
unsigned int nInsertions;
CBloomFilter b1, b2;
};


#endif /* ANONCOIN_BLOOM_H */
15 changes: 8 additions & 7 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4508,7 +4508,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
{
LOCK(cs_vNodes);
//! Use deterministic randomness to send to the same nodes for 24 hours
//! at a time so the setAddrKnowns of the chosen nodes prevent repeats
//! at a time so the addrKnowns of the chosen nodes prevent repeats
static uint256 hashSalt;
if (hashSalt == 0)
hashSalt = GetRandHash();
Expand Down Expand Up @@ -5324,9 +5324,9 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
LOCK(cs_vNodes);
BOOST_FOREACH(CNode* pnode, vNodes)
{
// Periodically clear setAddrKnown to allow refresh broadcasts
// Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast)
pnode->setAddrKnown.clear();
pnode->addrKnown.clear();

// Rebroadcast our address
AdvertizeLocal(pnode);
Expand All @@ -5344,10 +5344,10 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
vAddr.reserve(pto->vAddrToSend.size());
BOOST_FOREACH(const CAddress& addr, pto->vAddrToSend)
{
// returns true if wasn't already contained in the set
if (pto->setAddrKnown.insert(addr).second)
{
vAddr.push_back(addr);
if (!pto->addrKnown.contains(addr.GetKey()))
{
pto->addrKnown.insert(addr.GetKey());
vAddr.push_back(addr);
//! I2P addresses are MUCH larger than IP addresses, a trickle set to 1K is over 1/2 megabyte of payload
//! over 33x larger per addr, so lets reduce that amount, down to what the max addrman will return
//! or 1000, whichever is less. Any more than 1K, and various nodes will start marking ours as misbehaving.
Expand Down Expand Up @@ -5664,3 +5664,4 @@ int64_t GetMinFee(const CTransaction& tx, unsigned int nBytes, bool fAllowFree,
nMinFee = MAX_MONEY;
return nMinFee;
}

46 changes: 23 additions & 23 deletions src/mruset.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
// Copyright (c) 2012 The Bitcoin developers
// Copyright (c) 2013-2015 The Anoncoin Core developers
// Copyright (c) 2012-2015 The Bitcoin developers
// Copyright (c) 2013-2016 The Anoncoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#ifndef ANONCOIN_MRUSET_H
#define ANONCOIN_MRUSET_H

#include <deque>

#include <set>
#include <vector>
#include <utility>

/** STL-like set container that only keeps the most recent N elements. */
Expand All @@ -24,11 +25,13 @@ template <typename T> class mruset

protected:
std::set<T> set;
std::deque<T> queue;
size_type nMaxSize;
std::vector<iterator> order;
size_type first_used;
size_type first_unused;
const size_type nMaxSize;

public:
mruset(size_type nMaxSizeIn = 0) { nMaxSize = nMaxSizeIn; }
mruset(size_type nMaxSizeIn = 1) : nMaxSize(nMaxSizeIn) { clear(); }
iterator begin() const { return set.begin(); }
iterator end() const { return set.end(); }
reverse_iterator rbegin() const { return set.rbegin(); }
Expand All @@ -37,7 +40,13 @@ template <typename T> class mruset
bool empty() const { return set.empty(); }
iterator find(const key_type& k) const { return set.find(k); }
size_type count(const key_type& k) const { return set.count(k); }
void clear() { set.clear(); queue.clear(); }
void clear()
{
set.clear();
order.assign(nMaxSize, set.end());
first_used = 0;
first_unused = 0;
}
bool inline friend operator==(const mruset<T>& a, const mruset<T>& b) { return a.set == b.set; }
bool inline friend operator==(const mruset<T>& a, const std::set<T>& b) { return a.set == b; }
bool inline friend operator<(const mruset<T>& a, const mruset<T>& b) { return a.set < b.set; }
Expand All @@ -46,27 +55,18 @@ template <typename T> class mruset
std::pair<iterator, bool> ret = set.insert(x);
if (ret.second)
{
if (nMaxSize && queue.size() == nMaxSize)
{
set.erase(queue.front());
queue.pop_front();
if (set.size() == nMaxSize + 1) {
set.erase(order[first_used]);
order[first_used] = set.end();
if (++first_used == nMaxSize) first_used = 0;
}
queue.push_back(x);
order[first_unused] = ret.first;
if (++first_unused == nMaxSize) first_unused = 0;
}
return ret;
}
size_type max_size() const { return nMaxSize; }
size_type max_size(size_type s)
{
if (s)
while (queue.size() > s)
{
set.erase(queue.front());
queue.pop_front();
}
nMaxSize = s;
return nMaxSize;
}

};

#endif
6 changes: 4 additions & 2 deletions src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2459,7 +2459,10 @@ unsigned int ReceiveFloodSize() { return 1000*GetArg("-maxreceivebuffer", 5*1000
unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }

//! As i2p addrs are MUCH larger than ip addresses, we're reducing the most-recently-used(mru) setAddrKnown to 1250, to have a smaller memory profile per node.
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) : ssSend(SER_NETWORK, INIT_PROTO_VERSION), setAddrKnown(1250)
CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fInboundIn) :
ssSend(SER_NETWORK, INIT_PROTO_VERSION),
addrKnown(1250, 0.001, insecure_rand()),
setInventoryKnown(SendBufferSize() / 1000)
{
//! Protocol 70009 changes the node creation process so it is deterministic.
//! Every node starts out with an IP only stream type, except for I2P addresses, they are set immediately to a full size address space.
Expand Down Expand Up @@ -2502,7 +2505,6 @@ CNode::CNode(SOCKET hSocketIn, CAddress addrIn, std::string addrNameIn, bool fIn
nStartingHeight = -1;
fGetAddr = false;
fRelayTxes = false;
setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = new CBloomFilter();
nPingNonceSent = 0;
nPingUsecStart = 0;
Expand Down
11 changes: 8 additions & 3 deletions src/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ class CNode

// flood relay
std::vector<CAddress> vAddrToSend;
mruset<CAddress> setAddrKnown;
CRollingBloomFilter addrKnown;
bool fGetAddr;
std::set<uint256> setKnown;

Expand Down Expand Up @@ -409,16 +409,21 @@ class CNode

void AddAddressKnown(const CAddress& addr)
{
setAddrKnown.insert(addr);
addrKnown.insert(addr.GetKey());
}

void PushAddress(const CAddress& addr)
{
// Known checking here is only to save space from duplicates.
// SendMessages will filter it again for knowns that were added
// after addresses were pushed.
if (addr.IsValid() && !setAddrKnown.count(addr))
if (addr.IsValid() && !addrKnown.contains(addr.GetKey())) {
if (vAddrToSend.size() >= MAX_ADDR_TO_SEND) {
vAddrToSend[insecure_rand() % vAddrToSend.size()] = addr;
} else {
vAddrToSend.push_back(addr);
}
}
}


Expand Down
Loading

0 comments on commit fc20c94

Please sign in to comment.