Merge c96953f into 66a8420

JamesYang007 · Aug 4, 2020 · 021011b · 021011b
2 parents 66a8420 + c96953f
commit 021011b
Show file tree

Hide file tree

Showing 96 changed files with 3,204 additions and 5,033 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.7)
 project("autoppl"
-        VERSION 0.8
+        VERSION 1.1
         DESCRIPTION "C++ template library for probabilistic programming."
         LANGUAGES C CXX)
 
@@ -32,15 +32,11 @@ target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17)
 # Add AutoPPL header-only library to variable
 set(AUTOPPL_LIBS ${PROJECT_NAME})
 
-# Find Armadillo C++
-find_package(Armadillo CONFIG REQUIRED
-    HINTS ${CMAKE_CURRENT_SOURCE_DIR}/lib/armadillo)
-if (Armadillo_FOUND)
-    message(STATUS "Found Armadillo config at: ${Armadillo_DIR}")
-    add_compile_options(-DARMA_DONT_USE_WRAPPER)
-    set(AUTOPPL_INCLUDE_DIRS ${AUTOPPL_INCLUDE_DIRS} ${ARMADILLO_INCLUDE_DIRS})
-    set(AUTOPPL_LIBS ${AUTOPPL_LIBS} ${ARMADILLO_LIBRARIES})
-endif()
+# Find Eigen3
+find_package(Eigen3 3.3 CONFIG REQUIRED NO_MODULE
+    HINTS ${CMAKE_CURRENT_SOURCE_DIR}/lib/FastAD/libs/eigen-3.3.7/build/share)
+    set(AUTOPPL_LIBS ${AUTOPPL_LIBS} Eigen3::Eigen)
+message(STATUS "Eigen3 found at ${EIGEN3_INCLUDE_DIR}")
 
 # Find FastAD
 find_package(FastAD CONFIG REQUIRED

diff --git a/README.md b/README.md
@@ -303,10 +303,10 @@ and perform NUTS.
 
 The runtimes have similar log-like behavior,
 but it is clear that STAN (dotted lines) takes far longer
-in both sampling and warmup times.
+in both sampling and warmup times by a factor of about 5.
 As for ESS/s, upon comparing by colors (corresponding to a parameter)
 between dotted (STAN) and solid (AutoPPL) lines,
-we see that AutoPPL has uniformly larger ESS/s.
+we see that AutoPPL has uniformly larger ESS/s by a factor of 5 as well.
 This difference quickly becomes larger as sample size grows.
 From these plots and that sampling results were identical
 show that the drastic difference in ESS/s is simply from faster
@@ -324,19 +324,12 @@ The full code can be found
 [here](benchmark/regression_autoppl.cpp):
 
 ```cpp
-auto X = ppl::make_data_view<ppl::mat>(X_data);
-auto y = ppl::make_data_view<ppl::vec>(y_data); 
+DataView<double, mat> X(X_data.memptr(), X_data.n_rows, X_data.n_cols);
+DataView<double, vec> y(y_data.memptr(), y_data.n_rows);
 ppl::Param<double, ppl::vec> w(3);
 ppl::Param<double> b;
 ppl::Param<double> s;
 
-arma::mat storage(num_samples, w.size() + b.size() + s.size());
-for (size_t i = 0; i < w.size(); ++i) {
-    w.storage(i) = storage.colptr(i);
-}
-b.storage() = storage.colptr(w.size());
-s.storage() = storage.colptr(w.size() + b.size());
-
 auto model = (s |= ppl::uniform(0.5, 8.),
               b |= ppl::normal(0., 5.),
               w |= ppl::normal(0., 5.),
@@ -345,6 +338,7 @@ auto model = (s |= ppl::uniform(0.5, 8.),
 NUTSConfig<> config;
 config.warmup = num_samples;
 config.n_samples = num_samples;
+
 auto res = ppl::nuts(model, config);
 ```
 
@@ -377,11 +371,12 @@ means, standard deviation, and ESS.
 The runtimes have a similar linear trend,
 and it is clear that STAN (dotted lines) takes far longer
 in both sampling and warmup times.
+Comparing the sampling times, for example, we see about 20 times improvement.
 The ESS/s for `l1` and `l2` overlap completely (red and blue) in both STAN and AutoPPL
 and this is expected since they are symmetric in the model specification.
 With the exception of the two smallest sample sizes (100, 500),
 ESS/s is fairly constant as sample size varies.
-It is quite evident that AutoPPL (solid) has a larger ESS/s by a significant factor.
+It is quite evident that AutoPPL (solid) has a larger ESS/s by a factor of 20.
 
 The reason for this difference is in how we handle expressions
 where data vector elements are iid (independent and identically distributed).
@@ -391,26 +386,23 @@ However, it is worth noting that this optimization does not apply when
 the data are simply independent but not identically distributed
 (as in the [linear regression](#benchmarks-bayesian-linear-regression) case),
 or when the variable is a parameter, not data.
+Nonetheless, our AD is extremely fast due to vectorization and is in general faster than STAN.
 
 The following is the AutoPPL code without data generation.
 The full code can be found 
 [here](benchmark/normal_two_prior_distribution.cpp).
 
 ```cpp
-ppl::Param<double> l1, l2, s;
+ppl::Data<double, ppl::vec> y(n_data);
+ppl::Param<double> lambda1, lambda2, sigma;
 
 auto model = (
-    s |= ppl::uniform(0.0, 20.0),
-    l1 |= ppl::normal(0.0, 10.0),
-    l2 |= ppl::normal(0.0, 10.0),
-    y |= ppl::normal(l1 + l2, s)
+    sigma |= ppl::uniform(0.0, 20.0),
+    lambda1 |= ppl::normal(0.0, 10.0),
+    lambda2 |= ppl::normal(0.0, 10.0),
+    y |= ppl::normal(lambda1 + lambda2, sigma)
 );
 
-arma::mat storage(n_samples, 3);
-l1.storage() = storage.colptr(0);
-l2.storage() = storage.colptr(1);
-s.storage() = storage.colptr(2);
-
 ppl::NUTSConfig<> config;
 config.n_samples = n_samples;
 config.warmup = n_samples;
@@ -439,11 +431,11 @@ auto res = ppl::nuts(model, config);
 ## Third Party Tools
 
 Many thanks to the following third party tools used in this project:
-- [Armadillo](http://arma.sourceforge.net/): matrix library used for inference algorithms.
 - [Clang](https://clang.llvm.org/): one of the main compilers used.
 - [CMake](https://cmake.org/): build system.
 - [Coveralls](https://coveralls.io/): check test coverage.
 - [Cpp Coveralls](https://github.com/eddyxu/cpp-coveralls): check test coverage specifically for C++ code.
+- [Eigen](http://eigen.tuxfamily.org/index.php?title=Main_Page): matrix library.
 - [FastAD](https://github.com/JamesYang007/FastAD): automatic differentiation library.
 - [GCC](https://gcc.gnu.org/): one of the main compilers used.
 - [Google Benchmark](https://github.com/google/benchmark): benchmark library algorithms.

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -1,3 +1,10 @@
+add_executable(mh_bayes_net ${CMAKE_CURRENT_SOURCE_DIR}/mh_bayes_net.cpp)
+target_include_directories(mh_bayes_net PRIVATE ${GBENCH_DIR}/include ${AUTOPPL_INCLUDE_DIRS})
+target_link_libraries(mh_bayes_net benchmark benchmark_main ${AUTOPPL_LIBS})
+if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+	target_link_libraries(mh_bayes_net pthread)
+endif()
+
 add_executable(normal_two_prior_distribution ${CMAKE_CURRENT_SOURCE_DIR}/normal_two_prior_distribution.cpp)
 target_include_directories(normal_two_prior_distribution PRIVATE ${GBENCH_DIR}/include ${AUTOPPL_INCLUDE_DIRS})
 target_link_libraries(normal_two_prior_distribution benchmark benchmark_main ${AUTOPPL_LIBS})

diff --git a/benchmark/benchmark_utils.hpp b/benchmark/benchmark_utils.hpp
@@ -1,15 +1,49 @@
 #pragma once
 #include <numeric>
+#include <iostream>
+#include <Eigen/Dense>
+#include <autoppl/math/ess.hpp>
 
 namespace ppl {
 
-template <class ArrayType>
-double sample_average(const ArrayType& storage) {
-    double sum = std::accumulate(
-        storage.begin(),
-        storage.end(),
-        0.);
-    return sum / (storage.size());
+template <class Derived>
+inline auto mean(const Eigen::MatrixBase<Derived>& m)
+{ return m.colwise().mean(); }
+
+template <class Derived>
+inline auto sd(const Eigen::MatrixBase<Derived>& m)
+{
+    assert(m.rows() > 1);
+    auto var = (m.rowwise() - ppl::mean(m))
+                    .colwise().squaredNorm() / (m.rows() - 1);
+    return var.array().sqrt().matrix();
+}
+
+inline void summary(const std::string& header,
+                    const Eigen::MatrixXd& m,
+                    double warmup_time,
+                    double sampling_time)
+{
+    std::cout << "Warmup: " << warmup_time << std::endl;
+    std::cout << "Sampling: " << sampling_time << std::endl;
+
+    std::cout << header << std::endl;
+
+    Eigen::MatrixXd mean = ppl::mean(m);
+    std::cout << "Mean:\n"
+              << mean << std::endl; 
+
+    Eigen::MatrixXd sd = ppl::sd(m);
+    std::cout << "SD:\n"
+              << sd << std::endl; 
+
+    Eigen::MatrixXd ess = ppl::math::ess(m);
+    std::cout << "ESS:\n"
+              << ess << std::endl;
+
+    Eigen::MatrixXd ess_per_s = ess / sampling_time;
+    std::cout << "ESS/s:\n"
+              << ess_per_s << std::endl;
 }
 
 } // namespace ppl
diff --git a/benchmark/mh_bayes_net.cpp b/benchmark/mh_bayes_net.cpp
@@ -0,0 +1,60 @@
+#include <random>
+#include <benchmark/benchmark.h>
+#include "benchmark_utils.hpp"
+#include <autoppl/expression/expr_builder.hpp>
+#include <autoppl/expression/variable/data.hpp>
+#include <autoppl/expression/variable/param.hpp>
+#include <autoppl/mcmc/mh/mh.hpp>
+
+namespace ppl {
+
+static void BM_MHBayesNet(benchmark::State& state) {
+    size_t n_samples = state.range(0);
+    constexpr size_t n_data = 1000;
+
+	std::bernoulli_distribution b(0.341);
+	std::mt19937 gen(0);
+	ppl::Data<util::disc_param_t, ppl::vec> y(n_data);
+
+	ppl::Param<double> p1, p2, m1, m2, M1, M2;
+    ppl::Param<int> w;
+	auto model = (
+        m1 |= ppl::uniform(0., 1.),
+        m2 |= ppl::uniform(0., 1.),
+        M1 |= ppl::uniform(0., 1.),
+        M2 |= ppl::uniform(0., 1.),
+        p1 |= ppl::uniform(m1, M1),
+        p2 |= ppl::uniform(m2, M2),
+        w |= ppl::bernoulli(0.3 * p1),
+        y |= ppl::bernoulli(w * p1 + (1-w) * p2)
+    );
+
+	for (size_t i = 0; i < n_data; ++i) {
+		y.get()(i) = b(gen);
+	}
+
+    ppl::MCMCResult res;
+
+    ppl::MHConfig config;
+    config.warmup = n_samples;
+    config.samples = n_samples;
+
+	for (auto _ : state) {
+		res = ppl::mh(model, config);
+	}
+
+    ppl::summary("m1, m2, M1, M2, p1, p2", 
+                 res.cont_samples,
+                 res.warmup_time,
+                 res.sampling_time);
+    ppl::summary("w", 
+                 res.disc_samples.cast<double>(),
+                 res.warmup_time, 
+                 res.sampling_time);
+}
+
+BENCHMARK(BM_MHBayesNet)->Arg(100000)
+                        ->Arg(200000)
+                        ->Arg(300000)
+                        ;
+} // namespace ppl
diff --git a/benchmark/normal_two_prior_distribution.cpp b/benchmark/normal_two_prior_distribution.cpp
@@ -1,10 +1,9 @@
-#include <vector>
 #include <benchmark/benchmark.h>
+#include "benchmark_utils.hpp"
 #include <autoppl/expression/variable/param.hpp>
 #include <autoppl/mcmc/hmc/nuts/nuts.hpp>
 #include <autoppl/expression/variable/data.hpp>
 #include <autoppl/expression/expr_builder.hpp>
-#include <autoppl/math/ess.hpp>
 
 namespace ppl {
 
@@ -14,7 +13,7 @@ static void BM_NormalTwoPrior(benchmark::State& state) {
 
 	std::normal_distribution n(0.0, 1.0);
 	std::mt19937 gen(0);
-	ppl::Data<double, ppl::vec> y;
+	ppl::Data<double, ppl::vec> y(n_data);
 
 	ppl::Param<double> lambda1, lambda2, sigma;
 	auto model = (
@@ -25,38 +24,21 @@ static void BM_NormalTwoPrior(benchmark::State& state) {
 	);
 
 	for (size_t i = 0; i < n_data; ++i) {
-		y.push_back(n(gen));
+		y.get()(i) = n(gen);
 	}
 
-    arma::mat storage(n_samples, 3);
-	lambda1.storage() = storage.colptr(0);
-	lambda2.storage() = storage.colptr(1);
-    sigma.storage() = storage.colptr(2);
-
     ppl::NUTSConfig<> config;
-    config.n_samples = n_samples;
+    config.samples = n_samples;
     config.warmup = n_samples;
 
-    ppl::NUTSResult res;
+    ppl::MCMCResult res;
 
 	for (auto _ : state) {
 		res = ppl::nuts(model, config);
 	}
 
-    std::cout << "Warmup: " << res.warmup_time << std::endl;
-    std::cout << "Sampling: " << res.sampling_time << std::endl;
-
-    arma::mat mean = arma::mean(storage, 0);
-    mean.print("Mean: l1, l2, s");
-
-    arma::mat stddev = arma::stddev(storage, 0);
-    stddev.print("Stddev: l1, l2, s");
-
-    arma::mat ess = ppl::math::ess(storage);
-    ess.print("ESS: l1, l2, s");
-
-    arma::vec ess_per_s = ess / res.sampling_time;
-    ess_per_s.print("ESS/s: w[0], w[1], w[2], b, s");
+    ppl::summary("s, l1, l2", res.cont_samples,
+                 res.warmup_time, res.sampling_time);
 }
 
 BENCHMARK(BM_NormalTwoPrior)->Arg(100)