Skip to content

Commit

Permalink
Merge 6874b33 into 13c9118
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesYang007 committed Jul 17, 2020
2 parents 13c9118 + 6874b33 commit 8f671f3
Show file tree
Hide file tree
Showing 104 changed files with 7,438 additions and 2,916 deletions.
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ project("autoppl"
LANGUAGES C CXX)

option(AUTOPPL_ENABLE_TEST "Enable unit tests to be built." ON)
# TODO: later when we make benchmarks, this should be ON
option(AUTOPPL_ENABLE_BENCHMARK "Enable benchmarks to be built." OFF)
option(AUTOPPL_ENABLE_TEST_COVERAGE "Build with test coverage (AUTOPPL_ENABLE_TEST must be ON)" OFF)
option(AUTOPPL_ENABLE_EXAMPLE "Enable compilation of examples." OFF)
Expand Down Expand Up @@ -72,5 +71,5 @@ endif()

# Compile examples if enabled
if (AUTOPPL_ENABLE_EXAMPLE)
add_subdirectory(${PROJECT_SOURCE_DIR}/docs/example ${PROJECT_BINARY_DIR}/example)
add_subdirectory(${PROJECT_SOURCE_DIR}/example ${PROJECT_BINARY_DIR}/example)
endif()
22 changes: 15 additions & 7 deletions benchmark/normal_two_prior_distribution.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include <chrono>
#include <array>
#include <benchmark/benchmark.h>
#include <autoppl/autoppl.hpp>
#include "benchmark_utils.hpp"
#include <autoppl/expression/variable/param.hpp>
#include <autoppl/mcmc/hmc/nuts/nuts.hpp>
#include <autoppl/expression/variable/data.hpp>
#include <autoppl/expression/expr_builder.hpp>

namespace ppl {

Expand All @@ -11,7 +15,7 @@ static void BM_NormalTwoPrior(benchmark::State& state) {

std::normal_distribution n(0.0, 1.0);
std::mt19937 gen(0);
ppl::Data<double> y;
ppl::Data<double, ppl::vec> y;

ppl::Param<double> lambda1, lambda2, sigma;
auto model = (
Expand All @@ -22,16 +26,20 @@ static void BM_NormalTwoPrior(benchmark::State& state) {
);

for (size_t i = 0; i < n_data; ++i) {
y.observe(n(gen));
y.push_back(n(gen));
}

std::array<double, n_samples> l1_storage, l2_storage, s_storage;
lambda1.set_storage(l1_storage.data());
lambda2.set_storage(l2_storage.data());
sigma.set_storage(s_storage.data());
lambda1.storage() = l1_storage.data();
lambda2.storage() = l2_storage.data();
sigma.storage() = s_storage.data();

ppl::NUTSConfig<> config;
config.n_samples = n_samples;
config.warmup = n_samples;

for (auto _ : state) {
ppl::nuts(model);
ppl::nuts(model, config);
}

std::cout << "l1: " << sample_average(l1_storage) << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion benchmark/normal_two_prior_distribution_stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

stan_file = 'normal_two_prior_distribution_stan.stan'
sm = CmdStanModel(stan_file=stan_file)
fit = sm.sample(data=cool_dat, chains=4, cores=1,
fit = sm.sample(data=cool_dat, chains=1, cores=1,
iter_warmup=1000, iter_sampling=1000, thin=1,
max_treedepth=10, metric='diag', adapt_engaged=True,
output_dir='.')
Expand Down
127 changes: 55 additions & 72 deletions benchmark/regression_autoppl.cpp
Original file line number Diff line number Diff line change
@@ -1,91 +1,74 @@
#include <chrono>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <array>
#include <sstream>
#include <unordered_map>

#include <autoppl/variable.hpp>
#include <autoppl/expr_builder.hpp>
#include <autoppl/expression/variable/data.hpp>
#include <autoppl/expression/variable/param.hpp>
#include <autoppl/expression/expr_builder.hpp>
#include <autoppl/mcmc/hmc/nuts/nuts.hpp>

#include "benchmark_utils.hpp"

#include <benchmark/benchmark.h>
#include <autoppl/math/ess.hpp>

namespace ppl {

template <class ArrayType>
inline double stddev(const ArrayType& v)
{
double mean = std::accumulate(v.begin(), v.end(), 0.)/v.size();
double var = 0.;
for (auto x : v) {
auto diff = (x - mean);
var += diff * diff;
}
return std::sqrt(var/(v.size()));
}

static void BM_Regression(benchmark::State& state) {
size_t num_samples = state.range(0);

std::array<std::string, 4> headers = {"Life expectancy", "Alcohol", "HIV/AIDS", "GDP"};

std::unordered_map<std::string, ppl::Data<double>> data;
std::unordered_map<std::string, ppl::Param<double>> params;
std::array<std::vector<double>, 4> storage;

// Read in data
std::fstream fin;
fin.open("life-clean.csv", std::ios::in);
std::string line;
double value;
while (std::getline(fin, line, '\n')) {
auto it = headers.begin();
std::stringstream s(line);
while (s >> value) {
data[*it].observe(value);
++it;
}
}

// resize each storage and bind with param
int i = 0;
for (auto it = headers.begin(); it != headers.end(); ++it, ++i) {
storage[i].resize(num_samples);
params[*it].set_storage(storage[i].data());
// load data
std::string datapath = "/Users/jhyang/sandbox/autoppl/build/benchmark/life-clean.csv";
arma::mat data;
data.load(datapath);
arma::mat X_data = data.tail_cols(data.n_cols-1);
arma::vec y_data = data.col(0); // life expectancy

// create data and param tags
auto X = ppl::make_data_view<ppl::mat>(X_data);
auto y = ppl::make_data_view<ppl::vec>(y_data);
ppl::Param<double, ppl::vec> w(3);
ppl::Param<double> b;
ppl::Param<double> s;

// create and bind sample storage
arma::mat storage(num_samples, w.size() + b.size() + s.size());

for (size_t i = 0; i < w.size(); ++i) {
w.storage(i) = storage.colptr(i);
}

auto model = (params["Alcohol"] |= ppl::normal(0., 5.),
params["HIV/AIDS"] |= ppl::normal(0., 5.),
params["GDP"] |= ppl::normal(0., 5.),
params["Life expectancy"] |= ppl::normal(0., 5.),

data["Life expectancy"] |= ppl::normal(
params["Alcohol"] * data["Alcohol"] +
params["HIV/AIDS"] * data["HIV/AIDS"] +
params["GDP"] * data["GDP"] + params["Life expectancy"], 5.0));
b.storage() = storage.colptr(w.size());
s.storage() = storage.colptr(w.size() + b.size());

// define model
auto model = (s |= ppl::uniform(0.5, 8.),
b |= ppl::normal(0., 5.),
w |= ppl::normal(0., 5.),
y |= ppl::normal(ppl::dot(X, w) + b, s * s + 2.));

NUTSConfig<> config = {
.warmup = num_samples,
.n_samples = num_samples
};
// perform NUTS sampling
NUTSConfig<> config;
config.warmup = num_samples;
config.n_samples = num_samples;

for (auto _ : state) {
ppl::nuts(model, config);
}

std::cout << "Bias: " << sample_average(storage[0]) << std::endl;
std::cout << "Alcohol w: " << sample_average(storage[1]) << std::endl;
std::cout << "HIV/AIDS w: " << sample_average(storage[2]) << std::endl;
std::cout << "GDP: " << sample_average(storage[3]) << std::endl;

std::cout << "Bias: " << stddev(storage[0]) << std::endl;
std::cout << "Alcohol w: " << stddev(storage[1]) << std::endl;
std::cout << "HIV/AIDS w: " << stddev(storage[2]) << std::endl;
std::cout << "GDP: " << stddev(storage[3]) << std::endl;
arma::cube out(storage.n_rows,
storage.n_cols,
1);
out.slice(0) = storage;
arma::vec ess_res = math::ess(out);
ess_res.print("ESS");

// print mean and stddev results
std::cout << "Bias: " << arma::mean(storage.col(3)) << std::endl;
std::cout << "Alcohol: " << arma::mean(storage.col(0)) << std::endl;
std::cout << "HIV/AIDS: " << arma::mean(storage.col(1)) << std::endl;
std::cout << "GDP: " << arma::mean(storage.col(2)) << std::endl;
std::cout << "s: " << arma::mean(storage.col(4)) << std::endl;

std::cout << "Bias: " << arma::stddev(storage.col(3)) << std::endl;
std::cout << "Alcohol w: " << arma::stddev(storage.col(0)) << std::endl;
std::cout << "HIV/AIDS w: " << arma::stddev(storage.col(1)) << std::endl;
std::cout << "GDP: " << arma::stddev(storage.col(2)) << std::endl;
std::cout << "s: " << arma::stddev(storage.col(4)) << std::endl;
}

BENCHMARK(BM_Regression)->Arg(100)->Arg(500)->Arg(1000)->Arg(5000)->Arg(10000)->Arg(50000)->Arg(100000);
Expand Down
17 changes: 9 additions & 8 deletions benchmark/regression_autoppl_2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#include <sstream>
#include <unordered_map>

#include <autoppl/variable.hpp>
#include <autoppl/expr_builder.hpp>
#include <autoppl/expression/variable/data.hpp>
#include <autoppl/expression/variable/param.hpp>
#include <autoppl/expression/expr_builder.hpp>
#include <autoppl/mcmc/hmc/nuts/nuts.hpp>

#include "benchmark_utils.hpp"
Expand All @@ -23,7 +24,7 @@ static void BM_Regression(benchmark::State& state) {

std::array<std::string, 4> headers = {"b", "x1", "x2", "x3"};

std::unordered_map<std::string, ppl::Data<double>> data;
std::unordered_map<std::string, ppl::Data<double, ppl::vec>> data;
std::unordered_map<std::string, ppl::Param<double>> params;
std::array<std::vector<double>, 4> storage;

Expand All @@ -37,17 +38,17 @@ static void BM_Regression(benchmark::State& state) {
double x1 = n1(gen);
double x2 = n2(gen);
double x3 = n3(gen);
data[headers[1]].observe(x1);
data[headers[2]].observe(x2);
data[headers[3]].observe(x3);
data["y"].observe(x1 * 1.4 + x2 * 2. + x3 * 0.32 + eps(gen));
data[headers[1]].push_back(x1);
data[headers[2]].push_back(x2);
data[headers[3]].push_back(x3);
data["y"].push_back(x1 * 1.4 + x2 * 2. + x3 * 0.32 + eps(gen));
}

// resize each storage and bind with param
int i = 0;
for (auto it = headers.begin(); it != headers.end(); ++it, ++i) {
storage[i].resize(num_samples);
params[*it].set_storage(storage[i].data());
params[*it].storage() = storage[i].data();
}

auto model = (params["b"] |= ppl::normal(0., 5.),
Expand Down
4 changes: 3 additions & 1 deletion benchmark/regression_stan.stan
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ data {
parameters {
real alpha;
vector[K] beta;
real s;
}
model {
s ~ uniform(0.5, 8);
alpha ~ normal(0, 5);
beta ~ normal(0, 5);
y ~ normal(alpha + x * beta, 5);
y ~ normal(alpha + x * beta, s * s + 2);
}

0 comments on commit 8f671f3

Please sign in to comment.