Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bitfield optimization of phase2 #120

Merged
merged 11 commits into from
Nov 9, 2020
4 changes: 2 additions & 2 deletions .github/workflows/build-test-cplusplus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ jobs:
cd build
cmake ../
cmake --build . -- -j 6
ctest -j 6
valgrind --leak-check=full --show-leak-kinds=all --errors-for-leak-kinds=all ctest -j 6
ctest -j 6 --output-on-failure
valgrind --leak-check=full --show-leak-kinds=all --errors-for-leak-kinds=all ctest -j 6 --output-on-failure

- name: cmake, RunTests with address- and undefined sanitizer on Ubuntu
if: startsWith(matrix.os, 'ubuntu')
Expand Down
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ FetchContent_Declare(
FetchContent_MakeAvailable(pybind11-src)

IF (CMAKE_BUILD_TYPE STREQUAL "RELEASE")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mtune=native")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arvidn isn't this a potential problem if you want to create binaries to distribute?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be, if we build such binary distribution on an exotic machine. The problem of not tuning it at all is that GCC doesn't even think it has access to popcnt, which the bitfield_index relies on

set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -mtune=native")
ELSE()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Og")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Og")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0")
ENDIF()

IF (CMAKE_BUILD_TYPE STREQUAL "ASAN")
Expand Down
92 changes: 92 additions & 0 deletions src/bitfield.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2020 Chia Network Inc

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>

struct bitfield
{
explicit bitfield(int64_t size)
: buffer_(new uint64_t[(size + 63) / 64])
, size_((size + 63) / 64)
{
clear();
}

void set(int64_t const bit)
{
assert(bit / 64 < size_);
buffer_[bit / 64] |= uint64_t(1) << (bit % 64);
}

bool get(int64_t const bit) const
{
assert(bit / 64 < size_);
return (buffer_[bit / 64] & (uint64_t(1) << (bit % 64))) != 0;
}

void clear()
{
std::memset(buffer_.get(), 0, size_ * 8);
}

int64_t size() const { return size_ * 64; }

void swap(bitfield& rhs)
{
using std::swap;
swap(buffer_, rhs.buffer_);
swap(size_, rhs.size_);
}

int64_t count(int64_t const start_bit, int64_t const end_bit) const
{
assert((start_bit % 64) == 0);
assert(start_bit <= end_bit);

uint64_t const* start = buffer_.get() + start_bit / 64;
uint64_t const* end = buffer_.get() + end_bit / 64;
int64_t ret = 0;
while (start != end) {
#ifdef _MSC_VER
ret += __popcnt64(*start);
#else
ret += __builtin_popcountl(*start);
#endif
++start;
}
int const tail = end_bit % 64;
if (tail > 0) {
uint64_t const mask = (uint64_t(1) << tail) - 1;
#ifdef _MSC_VER
ret += __popcnt64(*end & mask);
#else
ret += __builtin_popcountl(*end & mask);
#endif
}
return ret;
}

void free_memory()
{
buffer_.reset();
size_ = 0;
}
private:
std::unique_ptr<uint64_t[]> buffer_;

// number of 64-bit words
int64_t size_;
};
63 changes: 63 additions & 0 deletions src/bitfield_index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2020 Chia Network Inc

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include "bitfield.hpp"

struct bitfield_index
{
// cache the number of set bits evey kIndexBucket bits.
// For a bitfield of size 2^32, this means a 4 MiB index
static inline const int64_t kIndexBucket = 8 * 1024;

bitfield_index(bitfield const& b) : bitfield_(b)
{
uint64_t counter = 0;
index_.reserve(bitfield_.size() / kIndexBucket);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

index_.reserve(bitfield_.size() / kIndexBucket);

Should this be index_.reserve((bitfield_.size() / kIndexBucket)+1);, or is the index not used for the last bucket where bucket size < kIndexBucket ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, I think this is right. This provides an index for the number of set bits at the start of every kIndexBucket bits. So it rounds down.


for (int64_t idx = 0; idx < int64_t(bitfield_.size()); idx += kIndexBucket) {
index_.push_back(counter);
int64_t const left = std::min(int64_t(bitfield_.size()) - idx, kIndexBucket);
counter += bitfield_.count(idx, idx + left);
}
}

std::pair<uint64_t, uint64_t> lookup(uint64_t pos, uint64_t offset) const
{
uint64_t const bucket = pos / kIndexBucket;

assert(bucket < index_.size());
assert(pos < uint64_t(bitfield_.size()));
assert(pos + offset < uint64_t(bitfield_.size()));
assert(bitfield_.get(pos) && bitfield_.get(pos + offset));

uint64_t const base = index_[bucket];

int64_t const aligned_pos = pos & ~uint64_t(63);

uint64_t const aligned_pos_count = bitfield_.count(bucket * kIndexBucket, aligned_pos);
uint64_t const offset_count = aligned_pos_count + bitfield_.count(aligned_pos, pos + offset);
uint64_t const pos_count = aligned_pos_count + bitfield_.count(aligned_pos, pos);

assert(offset_count >= pos_count);

return { base + pos_count, offset_count - pos_count };
}
private:
bitfield const& bitfield_;
std::vector<uint64_t> index_;
};

Loading