Skip to content

Commit

Permalink
Merge branch 'master' into doc
Browse files Browse the repository at this point in the history
  • Loading branch information
tpoisonooo committed May 18, 2019
2 parents 701eacd + 9dacd3a commit 1f9f677
Show file tree
Hide file tree
Showing 21 changed files with 308 additions and 98 deletions.
7 changes: 7 additions & 0 deletions .daq_pm/configs/net_test_debug
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
name binary-nn
type cpp
build_dir build_net_test_debug
target net_test
cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug
binary ~/adb_push_and_run.sh tests/net_test
7 changes: 7 additions & 0 deletions .daq_pm/configs/run_net_debug
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
name binary-nn
type cpp
build_dir build_arm64_no_test_debug
cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug -DBNN_BUILD_TEST=OFF -DBNN_BUILD_BENCHMARK=OFF -DBNN_CHECK_CONSISTENCY=ON
# binary ~/adb_push_and_run.sh binaries/run /data/local/tmp/model_imagenet.dab 144 145
binary ~/adb_push_and_run.sh binaries/run /data/local/tmp/simple5.dab output --v=6
6 changes: 6 additions & 0 deletions .daq_pm/configs/run_net_new_bitpack
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# It is a configuration file for [project_manager.vim](https://github.com/daquexian/project_manager.vim)
name binary-nn
type cpp
build_dir build_arm64_no_test
cmake_options -DCMAKE_TOOLCHAIN_FILE=~/Android/Sdk/ndk-bundle/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-28 -DANDROID_ABI=arm64-v8a -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Release -DBNN_BUILD_TEST=OFF -DBNN_BUILD_BENCHMARK=OFF
binary ~/adb_push_and_run.sh binaries/run /data/local/tmp/br24g2.daq 242 && adb pull /data/local/tmp/mat.txt
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ project(dabnn CXX)
message(STATUS "CMake Version: ${CMAKE_VERSION}")

if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "")
message("No build type, set to Release")
message(STATUS "No build type, set to Release")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()
message("Build type: ${CMAKE_BUILD_TYPE}")

set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

Expand Down Expand Up @@ -41,6 +39,8 @@ endif()
if (${BNN_BUILD_ANDROID})
set(CMAKE_CXX_STANDARD 17)

set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")

if (${BNN_BUILD_TEST})
include(cmake/gtest.cmake)
configure_gtest()
Expand Down
16 changes: 16 additions & 0 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@
#include <dabnn/mat.h>
#include <dabnn/net.h>

static void BM_pack_mat_64_small(benchmark::State &state) {
const bnn::Mat a(1, 32, 32, 128, bnn::DataType::Float, 0);
bnn::Mat b(1, 32, 32, 128, bnn::DataType::Bit, 0);
for (auto _ : state) {
pack_mat_64(a, b);
}
}

static void BM_pack_mat_128_small(benchmark::State &state) {
const bnn::Mat a(1, 32, 32, 128, bnn::DataType::Float, 0);
bnn::Mat b(1, 32, 32, 128, bnn::DataType::Bit, 0);
for (auto _ : state) {
pack_mat_128(a, b);
}
}

static void BM_pack_mat_64(benchmark::State &state) {
const bnn::Mat a(1, 64, 64, 128, bnn::DataType::Float);
bnn::Mat b(1, 64, 64, 128, bnn::DataType::Bit);
Expand Down
56 changes: 17 additions & 39 deletions binaries/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
#include <algorithm>
#include <chrono>

#include <common/argh.h>
#include <common/flatbuffers_helper.h>
#include <dabnn/net.h>

int main(int argc, char **argv) {
(void)argc;
argh::parser cmdl(argc, argv);
google::InitGoogleLogging(argv[0]);
FLAGS_v = 1;
cmdl("v", 1) >> FLAGS_v;
FLAGS_alsologtostderr = true;
// FLAGS_logbuflevel = -1;

float *input = new float[3 * 224 * 224];
FORZ(i, 3 * 224 * 224) { input[i] = 1; }

// const std::string blob_name = "125";
auto net1 = bnn::Net::create();
net1->optimize = true;
net1->run_fconv = true;
Expand All @@ -31,48 +31,26 @@ int main(int argc, char **argv) {
FORZ(i, N) {
LOG(INFO) << "------";
net1->run(input);
// LOG(INFO) << "hh";
}
const auto t2 = Clock::now();
css blob_name = argv[2];
LOG(INFO) << "Fetching blob: " << blob_name;
const auto &blob1 = net1->get_blob(blob_name);
LOG(INFO) << blob1->total();
if (blob1->data_type == bnn::DataType::Float) {
blob1->dump("/data/local/tmp/mat.txt");
}
FORZ(i, std::min(static_cast<int>(blob1->total()), 10)) {

for (int i = 2; i < cmdl.size(); i++) {
css blob_name = argv[i];
LOG(INFO) << "Fetching blob: " << blob_name;
const auto &blob1 = net1->get_blob(blob_name);
LOG(INFO) << static_cast<float *>(blob1->data)[0];
if (blob1->data_type == bnn::DataType::Float) {
LOG(INFO) << static_cast<float *>(blob1->data)[i];
} else {
LOG(INFO) << binrep(static_cast<uint64_t *>(blob1->data)[i]);
blob1->dump("/data/local/tmp/mat_" + blob_name + ".txt");
}
FORZ(j, std::min(static_cast<int>(blob1->total()), 10)) {
if (blob1->data_type == bnn::DataType::Float) {
LOG(INFO) << blob_name << ": " << static_cast<float *>(blob1->data)[j];
} else {
LOG(INFO) << blob_name << ": " << binrep(static_cast<uint64_t *>(blob1->data) + j, 64, true);
}
}
}
LOG(INFO) << "Time: "
<< 1.f *
std::chrono::duration_cast<std::chrono::nanoseconds>(t2 -
t1)
.count() /
N / 1000000000;
#ifdef BNN_BENCHMARK
net1->print_time();
#endif

/*
bnn::Net net2;
net2.model_ = model;
net2.prepare();
LOG(INFO) << "-----";
net2.optimize = false;
net2.run(input);
const auto &blob2 = net2.get_blob(blob_name);
LOG(INFO) << blob2->total();
FORZ(i, std::min(static_cast<int>(blob2->total()), 10)) {
LOG(INFO) << static_cast<float *>(blob2->data)[i];
}
const bool eq = (*blob1 == *blob2);
BNN_ASSERT(eq, "");
*/
}
2 changes: 1 addition & 1 deletion ci/build_aar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ if [[ "$OSTYPE" == "darwin"* ]]; then
echo "The system is Mac OS X, alias sed to gsed"
export PATH="/usr/local/opt/gnu-sed/libexec/gnubin:$PATH"
echo "Output of sed -v:"
sed -v
sed --version
fi

MY_ANDROID_HOME="${ANDROID_HOME:-$HOME/Android/Sdk}"
Expand Down
2 changes: 1 addition & 1 deletion common/baseline.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <bitset>

#include <common/helper.h>
#include <dabnn/bitpack.h>
#include <common/common_bitpack.h>
#include <dabnn/mat.h>

inline int bitcount(uint64_t x) {
Expand Down
20 changes: 20 additions & 0 deletions common/common_bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <cstdint>

#include <common/helper.h>
#include <dabnn/mat.h>

inline void pack_128_fallback(const float *float_ptr, void *binary_ptr,
size_t size) {
Expand Down Expand Up @@ -183,4 +184,23 @@ inline void pack_64_bitfield(const float *fptr, uint64_t *buf) {
*buf = u.u64;
}

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(
float_mat.w * float_mat.c > 0 && float_mat.w * float_mat.c % 64 == 0,
float_mat.w * float_mat.c);
BNN_ASSERT(float_mat.c / 64 == binary_mat.c && float_mat.c % 64 == 0, "");

FORZ(n, float_mat.n) {
FORZ(h, float_mat.h) {
auto *fptr = float_mat.point<float>(n, h, 0);
auto *bptr = binary_mat.point<uint64_t>(n, h, 0);
FORZ(i, float_mat.w * float_mat.c / 64) {
pack_64_bitfield(fptr, bptr);
fptr += 64;
bptr++;
}
}
}
}

#endif /* COMMON_BITPACK_H */
30 changes: 13 additions & 17 deletions common/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ inline float random_float() {
static std::normal_distribution<float> distr;

float rand_float = distr(eng) / 10;
if (rand_float == 0) {
return random_float();
}
// LOG(INFO) << "Random float: " << rand_float;

return rand_float;
Expand Down Expand Up @@ -113,27 +116,20 @@ inline void fill_rand_uint64(uint64_t *data, size_t num) {
FORZ(i, num) { *(data + i) = random_uint64(); }
}

template <typename T>
std::string binrep(const T &a) {
const char *beg = reinterpret_cast<const char *>(&a);
const char *end = beg + sizeof(a);

std::stringstream ss;

while (beg != end) ss << std::bitset<CHAR_BIT>(*beg++) << ' ';
ss << '\n';
return ss.str();
}

template <typename T>
std::string binrep(const T &a, const size_t size) {
const char *beg = reinterpret_cast<const char *>(&a);
/**
* parameter human will make the output on little endian machines human-readable
*/
inline std::string binrep(const void *a, const size_t size, bool reverse) {
const char *beg = static_cast<const char *>(a);
const char *end = beg + size;

std::stringstream ss;

while (beg != end) ss << std::bitset<CHAR_BIT>(*beg++) << ' ';
ss << '\n';
if (reverse) {
while (beg != end) ss << std::bitset<CHAR_BIT>(*(end-- - 1)) << ' ';
} else {
while (beg != end) ss << std::bitset<CHAR_BIT>(*beg++) << ' ';
}
return ss.str();
}

Expand Down
105 changes: 87 additions & 18 deletions dabnn/bitpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,85 @@
#include <glog/logging.h>
#include "mat.h"

inline void pack_mat_64(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(
float_mat.w * float_mat.c > 0 && float_mat.w * float_mat.c % 64 == 0,
float_mat.w * float_mat.c);
BNN_ASSERT(float_mat.c / 64 == binary_mat.c && float_mat.c % 64 == 0, "");

FORZ(n, float_mat.n) {
FORZ(h, float_mat.h) {
auto *fptr = float_mat.point<float>(n, h, 0);
auto *bptr = binary_mat.point<uint64_t>(n, h, 0);
FORZ(i, float_mat.w * float_mat.c / 64) {
pack_64_bitfield(fptr, bptr);
fptr += 64;
bptr++;
}
}
}
inline void pack_128_2(const float *float_ptr, void *binary_ptr, size_t size) {
size_t nn_size = size >> 7;

asm volatile(
"0: \n"
"prfm pldl1keep, [%0] \n"
"ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [%0], #64 \n"
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%0], #64 \n"
"sri v0.4s, v4.4s, #1 \n"
"sri v1.4s, v5.4s, #1 \n"
"sri v2.4s, v6.4s, #1 \n"
"sri v3.4s, v7.4s, #1 \n"

"ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0], #64 \n"
"prfm pldl1keep, [%0, #64] \n"
"ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0], #64 \n"
"sri v8.4s, v12.4s, #1 \n"
"sri v9.4s, v13.4s, #1 \n"
"sri v10.4s, v14.4s, #1 \n"
"sri v11.4s, v15.4s, #1 \n"

"subs %2, %2, #1 \n"

"ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%0], #64 \n"
"prfm pldl1keep, [%0, #64] \n"
"ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%0], #64 \n"

"sri v0.4s, v8.4s, #2 \n"
"sri v1.4s, v9.4s, #2 \n"
"sri v2.4s, v10.4s, #2 \n"
"sri v3.4s, v11.4s, #2 \n"

"sri v16.4s, v20.4s, #1 \n"
"sri v17.4s, v21.4s, #1 \n"
"sri v18.4s, v22.4s, #1 \n"
"sri v19.4s, v23.4s, #1 \n"

"ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%0], #64 \n"
"prfm pldl1keep, [%0, #64] \n"
"ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%0], #64 \n"
"sri v8.4s, v12.4s, #1 \n"
"sri v9.4s, v13.4s, #1 \n"
"sri v10.4s, v14.4s, #1 \n"
"sri v11.4s, v15.4s, #1 \n"

"sri v16.4s, v8.4s, #2 \n"
"sri v17.4s, v9.4s, #2 \n"
"sri v18.4s, v10.4s, #2 \n"
"sri v19.4s, v11.4s, #2 \n"

"sri v0.4s, v16.4s, #4 \n"
"sri v1.4s, v17.4s, #4 \n"
"sri v2.4s, v18.4s, #4 \n"
"sri v3.4s, v19.4s, #4 \n"

"sri v0.4s, v1.4s, #8 \n"
"sri v2.4s, v3.4s, #8 \n"
"sri v0.4s, v2.4s, #16 \n"

// Bit-packing with sign bit is introduced after the first version
// of dabnn is published. Sign bit will be 1 when x < 0, 0 when x > 0,
// which is different with the way we used before --- set bit to 1 if
// x > 0 or 0 if x < 0
// So for the compatibility we add a "not" instruction here.
// Maybe we can save this instruction by introducing "version" for
// dabnn model and force users to upgrade.
// Note: If this line is removed, the padding value of binary convolution
// should also be changed from 0 (-1 in xnor) to -1 (1 in xnor)
"not v0.16b, v0.16b \n"

"st1 {v0.4s}, [%1], #16 \n"
"bne 0b \n"
: "+r"(float_ptr), // %0
"+r"(binary_ptr), // %1
"+r"(nn_size) // %2
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "v20", "v21", "v22", "v23", "x0");
}

inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) {
Expand Down Expand Up @@ -123,6 +185,13 @@ inline void pack_128(const float *float_ptr, void *binary_ptr, size_t size) {
"x0");
}

inline void pack_mat_128_2(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

pack_128_2(static_cast<float *>(float_mat.data), binary_mat.data,
float_mat.total());
}

inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
assert(!binary_mat.empty());

Expand All @@ -133,7 +202,7 @@ inline void pack_mat_128(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
inline void pack_mat(const bnn::Mat &float_mat, bnn::Mat &binary_mat) {
BNN_ASSERT(float_mat.c % 64 == 0, float_mat.c);
if (float_mat.c % 128 == 0) {
pack_mat_128(float_mat, binary_mat);
pack_mat_128_2(float_mat, binary_mat);
} else {
pack_mat_64(float_mat, binary_mat);
}
Expand Down
Loading

0 comments on commit 1f9f677

Please sign in to comment.