From a530848dad2817dc592e0167f29274677c0884e0 Mon Sep 17 00:00:00 2001
From: "F. Huizinga" <folkerthuizinga@gmail.com>
Date: Sun, 15 Jul 2018 21:17:23 +0200
Subject: [PATCH] gzipped protobuf compressed neural networks (#166)

* gzipped protobuf compressed neural networks

* remove debug info and bump version (corresponds with tf)

* clang-format

* clang-format style=Google

* make protobuf a submodule

* add protobuf.wrap

* update submodule for appveyor

* update submodule for appveyor try 2

* update submodule using https

* fix pr comments

* add clang-format and fix format in loader

* store clangformat delta only

* update submodule

* various fixes

* check for v1 weights and conform google style

* temporary protobuf wrap until windows fixes are upstreamed

* add compiler to docker for circleci

* protobuf compiler for circleci

* protobuf may be installed without protoc

* [CircleCI] Install protobuf dependency on the docker image (#172)

* [CircleCI] Install protobuf dependency on the docker image

* [CircleCI] Add a step to pull git submodules

* [CircleCI] Use protoc version 3.5.1

* [CircleCI] Don't install protobuf deps from Ubuntu repos

* [CircleCI] Use the docker image with the latest changes

* pr review fixes

* update lczero-common

* check buffer size < 2

* moar pr comments
---
 .circleci/Dockerfile      |   9 ++-
 .circleci/config.yml      |   7 +-
 .clang-format             |   6 ++
 .gitmodules               |   3 +
 appveyor.yml              |   1 +
 libs/lczero-common        |   1 +
 meson.build               |  12 ++++
 src/neural/loader.cc      | 142 +++++++++++++++++++++++++++++---------
 src/neural/loader.h       |   5 +-
 src/version.inc           |   2 +-
 subprojects/protobuf.wrap |  10 +++
 11 files changed, 161 insertions(+), 37 deletions(-)
 create mode 100644 .clang-format
 create mode 100644 .gitmodules
 create mode 160000 libs/lczero-common
 create mode 100644 subprojects/protobuf.wrap

diff --git a/.circleci/Dockerfile b/.circleci/Dockerfile
index 73e519d195..ddf8f8001a 100644
--- a/.circleci/Dockerfile
+++ b/.circleci/Dockerfile
@@ -1,6 +1,11 @@
-FROM floopcz/tensorflow_cc:ubuntu-shared-cuda:0.0.1
+FROM floopcz/tensorflow_cc:ubuntu-shared-cuda
 
 RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && apt-get update && apt-get install -y intel-mkl-64bit-2018.2-046
-RUN apt-get install -y clang-6.0 ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev nvidia-cuda-dev nvidia-cuda-toolkit libgtest-dev libprotobuf-dev git ssh tar gzip ca-certificates sudo
+RUN apt-get install -y clang-6.0 ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev nvidia-cuda-dev nvidia-cuda-toolkit libgtest-dev git ssh tar gzip ca-certificates sudo
 RUN pip3 install meson
 RUN ln -s /usr/include/ /usr/include/openblas
+
+RUN curl -OL https://github.com/google/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip
+RUN unzip protoc-3.5.1-linux-x86_64.zip -d protoc3
+RUN sudo mv protoc3/bin/* /usr/local/bin/
+RUN sudo mv protoc3/include/* /usr/local/include/
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 90bf2a4c0c..d55b7b7af1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -2,9 +2,14 @@ version: 2
 jobs:
   build:
     docker:
-      - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.1
+      - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.4
     steps:
       - checkout
+      - run:
+          name: "Pull Submodules"
+          command: |
+            git submodule init
+            git submodule update --remote
       - run:
           name: Build clang version
           command: CC=clang-6.0 CXX=clang++-6.0 ./build.sh
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000000..ed4f2a778c
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,6 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+DerivePointerAlignment: false
+...
+
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000000..6575e63266
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "libs/lczero-common"]
+	path = libs/lczero-common
+	url = https://github.com/LeelaChessZero/lczero-common.git
diff --git a/appveyor.yml b/appveyor.yml
index 4220264e45..e71c6702ba 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -46,6 +46,7 @@ cache:
   - C:\Python36\Lib\site-packages
   - 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2'
 before_build:
+- cmd: git submodule update --init --recursive
 - cmd: meson.py build --backend vs2015 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BLAS% -Dcudnn=%CUDA% -Dcudnn_include="%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS.0.2.14.1\lib\native\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS.0.2.14.1\lib\native\lib\x64" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\lib\x64" -Ddefault_library=static
 build:
   project: build/lc0.sln
diff --git a/libs/lczero-common b/libs/lczero-common
new file mode 160000
index 0000000000..ab563d1949
--- /dev/null
+++ b/libs/lczero-common
@@ -0,0 +1 @@
+Subproject commit ab563d1949d382fe1b3d07c6aa1e5cb1f82da55f
diff --git a/meson.build b/meson.build
index 6187e1c6d4..9eb908514d 100644
--- a/meson.build
+++ b/meson.build
@@ -39,6 +39,18 @@ files = []
 includes = []
 has_backends = false
 
+deps += dependency('protobuf', fallback : ['protobuf', 'protobuf_dep'], required: true)
+protoc = find_program('protoc', required : false)
+if not protoc.found()
+  message('protoc will be built from the subproject')
+  protoc = subproject('protobuf').get_variable('protoc')
+endif
+
+gen = generator(protoc, output: ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
+  arguments : ['--proto_path=@CURRENT_SOURCE_DIR@/libs/lczero-common', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
+
+files += gen.process('libs/lczero-common/proto/net.proto', 
+  preserve_path_from : meson.current_source_dir() + '/libs/lczero-common/')
 
 #############################################################################
 ## Main files
diff --git a/src/neural/loader.cc b/src/neural/loader.cc
index f0d432637e..929dd26bb2 100644
--- a/src/neural/loader.cc
+++ b/src/neural/loader.cc
@@ -24,20 +24,39 @@
 #include <fstream>
 #include <iostream>
 #include <sstream>
+#include <string>
 #include "utils/commandline.h"
 #include "utils/exception.h"
 #include "utils/filesystem.h"
+#include "proto/net.pb.h"
+#include "version.inc"
+
+
 
 namespace lczero {
 
-FloatVectors LoadFloatsFromFile(const std::string& filename) {
+namespace {
+void PopulateLastIntoVector(FloatVectors* vecs, Weights::Vec* out) {
+  *out = std::move(vecs->back());
+  vecs->pop_back();
+}
+
+void PopulateConvBlockWeights(FloatVectors* vecs, Weights::ConvBlock* block) {
+  PopulateLastIntoVector(vecs, &block->bn_stddivs);
+  PopulateLastIntoVector(vecs, &block->bn_means);
+  PopulateLastIntoVector(vecs, &block->biases);
+  PopulateLastIntoVector(vecs, &block->weights);
+}
+
+std::string DecompressGzip(const std::string& filename) {
   const int kStartingSize = 8 * 1024 * 1024;  // 8M
-  std::vector<char> buffer(kStartingSize);
+  std::string buffer;
+  buffer.resize(kStartingSize);
   int bytes_read = 0;
 
   // Read whole file into a buffer.
   gzFile file = gzopen(filename.c_str(), "rb");
-  if (!file) throw Exception("Cannot read weights from " + filename);
+  if (!file) throw lczero::Exception("Cannot read weights from " + filename);
   while (true) {
     int sz = gzread(file, &buffer[bytes_read], buffer.size() - bytes_read);
     if (sz == static_cast<int>(buffer.size()) - bytes_read) {
@@ -46,25 +65,92 @@ FloatVectors LoadFloatsFromFile(const std::string& filename) {
     } else {
       bytes_read += sz;
       buffer.resize(bytes_read);
-      // Add newline in the end for the case it was not there.
-      buffer.push_back('\n');
       break;
     }
   }
   gzclose(file);
 
+  return buffer;
+}
+
+FloatVector DenormLayer(const pblczero::Weights_Layer& layer) {
+  FloatVector vec;
+  auto& buffer = layer.params();
+  auto data = reinterpret_cast<const std::uint16_t*>(buffer.data());
+  int n = buffer.length() / 2;
+  vec.resize(n);
+  for (int i = 0; i < n; i++) {
+    vec[i] = data[i] / float(0xffff);
+    vec[i] *= layer.max_val() - layer.min_val();
+    vec[i] += layer.min_val();
+  }
+  return vec;
+}
+
+void DenormConvBlock(const pblczero::Weights_ConvBlock& conv, FloatVectors* vecs) {
+  vecs->emplace_back(DenormLayer(conv.weights()));
+  vecs->emplace_back(DenormLayer(conv.biases()));
+  vecs->emplace_back(DenormLayer(conv.bn_means()));
+  vecs->emplace_back(DenormLayer(conv.bn_stddivs()));
+}
+
+} // namespace 
+
+
+FloatVectors LoadFloatsFromPbFile(const std::string& buffer) {
+  auto net = pblczero::Net();
+  FloatVectors vecs;
+  net.ParseFromString(buffer);
+
+  std::string min_version(std::to_string(net.min_version().major()) + ".");
+  min_version += std::to_string(net.min_version().minor()) + ".";
+  min_version += std::to_string(net.min_version().patch());
+
+  if (net.min_version().major() > LC0_VERSION_MAJOR)
+    throw Exception("Weights require at least lc0 version: " + min_version);
+  if (net.min_version().minor() > LC0_VERSION_MINOR)
+    throw Exception("Weights require at least lc0 version: " + min_version);
+  if (net.min_version().patch() > LC0_VERSION_PATCH)
+    throw Exception("Weights require at least lc0 version: " + min_version);
+
+  if (net.format().weights_encoding() != pblczero::Format::LINEAR16)
+    throw Exception("Invalid weight encoding");
+
+  const auto& w = net.weights();
+
+  DenormConvBlock(w.input(), &vecs);
+
+  for (int i = 0, n = w.residual_size(); i < n; i++) {
+    DenormConvBlock(w.residual(i).conv1(), &vecs);
+    DenormConvBlock(w.residual(i).conv2(), &vecs);
+  }
+
+  DenormConvBlock(w.policy(), &vecs);
+  vecs.emplace_back(DenormLayer(w.ip_pol_w()));
+  vecs.emplace_back(DenormLayer(w.ip_pol_b()));
+  DenormConvBlock(w.value(), &vecs);
+  vecs.emplace_back(DenormLayer(w.ip1_val_w()));
+  vecs.emplace_back(DenormLayer(w.ip1_val_b()));
+  vecs.emplace_back(DenormLayer(w.ip2_val_w()));
+  vecs.emplace_back(DenormLayer(w.ip2_val_b()));
+
+  return vecs;
+}
+
+FloatVectors LoadFloatsFromFile(std::string* buffer) {
   // Parse buffer.
   FloatVectors result;
   FloatVector line;
+  (*buffer) += "\n";
   size_t start = 0;
-  for (size_t i = 0; i < buffer.size(); ++i) {
-    char& c = buffer[i];
+  for (size_t i = 0; i < buffer->size(); ++i) {
+    char& c = (*buffer)[i];
     const bool is_newline = (c == '\n' || c == '\r');
     if (!std::isspace(c)) continue;
     if (start < i) {
       // If previous character was not space too.
       c = '\0';
-      line.push_back(std::atof(&buffer[start]));
+      line.push_back(std::atof(&(*buffer)[start]));
     }
     if (is_newline && !line.empty()) {
       result.emplace_back();
@@ -73,30 +159,22 @@ FloatVectors LoadFloatsFromFile(const std::string& filename) {
     start = i + 1;
   }
 
+  result.erase(result.begin());
   return result;
 }
 
-namespace {
-void PopulateLastIntoVector(FloatVectors* vecs, Weights::Vec* out) {
-  *out = std::move(vecs->back());
-  vecs->pop_back();
-}
-
-void PopulateConvBlockWeights(FloatVectors* vecs, Weights::ConvBlock* block) {
-  PopulateLastIntoVector(vecs, &block->bn_stddivs);
-  PopulateLastIntoVector(vecs, &block->bn_means);
-  PopulateLastIntoVector(vecs, &block->biases);
-  PopulateLastIntoVector(vecs, &block->weights);
-}
-}  // namespace
-
 Weights LoadWeightsFromFile(const std::string& filename) {
-  FloatVectors vecs = LoadFloatsFromFile(filename);
-
-  if (vecs.size() <= 19)
-    throw Exception("Weights file " + filename +
-                    " should have at least 19 lines");
-  if (vecs[0][0] != 2) throw Exception("Weights version 2 expected");
+  FloatVectors vecs;
+  auto buffer = DecompressGzip(filename);
+
+  if (buffer.size() < 2)
+    throw Exception("Weight file invalid");
+  else if (buffer[0] == '1' && buffer[1] == '\n')
+    throw Exception("Weight file no longer supported");
+  else if (buffer[0] == '2' && buffer[1] == '\n')
+    vecs = LoadFloatsFromFile(&buffer);
+  else
+    vecs = LoadFloatsFromPbFile(buffer);
 
   Weights result;
   // Populating backwards.
@@ -111,10 +189,10 @@ Weights LoadWeightsFromFile(const std::string& filename) {
   PopulateConvBlockWeights(&vecs, &result.policy);
 
   // Version, Input + all the residual should be left.
-  if ((vecs.size() - 5) % 8 != 0)
+  if ((vecs.size() - 4) % 8 != 0)
     throw Exception("Bad number of lines in weights file");
 
-  const int num_residual = (vecs.size() - 5) / 8;
+  const int num_residual = (vecs.size() - 4) / 8;
   result.residual.resize(num_residual);
   for (int i = num_residual - 1; i >= 0; --i) {
     PopulateConvBlockWeights(&vecs, &result.residual[i].conv2);
@@ -126,13 +204,13 @@ Weights LoadWeightsFromFile(const std::string& filename) {
 }
 
 std::string DiscoveryWeightsFile() {
-  const int kMinFileSize = 10000000;  // 10 MB
+  const int kMinFileSize = 500000;  // 500 KB
 
   std::string root_path = CommandLine::BinaryDirectory();
 
   // Open all files in <binary dir> amd <binary dir>/networks,
   // ones which are >= kMinFileSize are candidates.
-  std::vector<std::pair<time_t, std::string>> time_and_filename;
+  std::vector<std::pair<time_t, std::string> > time_and_filename;
   for (const auto& path : {"", "/networks"}) {
     for (const auto& file : GetFileList(root_path + path)) {
       const std::string filename = root_path + path + "/" + file;
diff --git a/src/neural/loader.h b/src/neural/loader.h
index 29fc50e5fe..29eff4feb8 100644
--- a/src/neural/loader.h
+++ b/src/neural/loader.h
@@ -28,8 +28,11 @@ namespace lczero {
 using FloatVector = std::vector<float>;
 using FloatVectors = std::vector<FloatVector>;
 
+// Read from protobuf.
+FloatVectors LoadFloatsFromPbFile(const std::string& buffer);
+
 // Read space separated file of floats and return it as a vector of vectors.
-FloatVectors LoadFloatsFromFile(const std::string& filename);
+FloatVectors LoadFloatsFromFile(std::string* buffer);
 
 // Read v2 weights file and fill the weights structure.
 Weights LoadWeightsFromFile(const std::string& filename);
diff --git a/src/version.inc b/src/version.inc
index cfdcbca717..ba95d8dfa4 100644
--- a/src/version.inc
+++ b/src/version.inc
@@ -1,3 +1,3 @@
 #define LC0_VERSION_MAJOR 0
-#define LC0_VERSION_MINOR 14
+#define LC0_VERSION_MINOR 15
 #define LC0_VERSION_PATCH 1
diff --git a/subprojects/protobuf.wrap b/subprojects/protobuf.wrap
new file mode 100644
index 0000000000..6106ce39b5
--- /dev/null
+++ b/subprojects/protobuf.wrap
@@ -0,0 +1,10 @@
+[wrap-file]
+directory = protobuf-3.5.1
+
+source_url = https://github.com/google/protobuf/releases/download/v3.5.1/protobuf-all-3.5.1.tar.gz
+source_filename = protobuf-all-3.5.1.tar.gz
+source_hash = 72d43863f58567a9ea2054671fdb667867f9cf7865df623c7be630978ff97dff
+
+patch_url = https://github.com/borg323/protobuf/releases/download/3.5.1-2w/protobuf-3.5.1-2w-wrap.zip
+patch_filename = protobuf-3.5.1-2w-wrap.zip
+patch_hash = 5185ae7252941e252b075d3f845768296b079516f9f6feb0bd3ae63de7e9a52e