Skip to content

Commit

Permalink
Merge e936992 into 13c9118
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesYang007 committed Jul 12, 2020
2 parents 13c9118 + e936992 commit 3db0279
Show file tree
Hide file tree
Showing 78 changed files with 4,398 additions and 2,449 deletions.
16 changes: 12 additions & 4 deletions benchmark/normal_two_prior_distribution.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include <chrono>
#include <array>
#include <benchmark/benchmark.h>
#include <autoppl/autoppl.hpp>
#include "benchmark_utils.hpp"
#include <autoppl/expression/variable/param.hpp>
#include <autoppl/mcmc/hmc/nuts/nuts.hpp>
#include <autoppl/expression/variable/data.hpp>
#include <autoppl/expression/expr_builder.hpp>

namespace ppl {

Expand All @@ -11,7 +15,7 @@ static void BM_NormalTwoPrior(benchmark::State& state) {

std::normal_distribution n(0.0, 1.0);
std::mt19937 gen(0);
ppl::Data<double> y;
ppl::Data<double, ppl::vec> y;

ppl::Param<double> lambda1, lambda2, sigma;
auto model = (
Expand All @@ -22,16 +26,20 @@ static void BM_NormalTwoPrior(benchmark::State& state) {
);

for (size_t i = 0; i < n_data; ++i) {
y.observe(n(gen));
y.push_back(n(gen));
}

std::array<double, n_samples> l1_storage, l2_storage, s_storage;
lambda1.set_storage(l1_storage.data());
lambda2.set_storage(l2_storage.data());
sigma.set_storage(s_storage.data());

ppl::NUTSConfig<> config;
config.n_samples = n_samples;
config.warmup = n_samples;

for (auto _ : state) {
ppl::nuts(model);
ppl::nuts(model, config);
}

std::cout << "l1: " << sample_average(l1_storage) << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion benchmark/normal_two_prior_distribution_stan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

stan_file = 'normal_two_prior_distribution_stan.stan'
sm = CmdStanModel(stan_file=stan_file)
fit = sm.sample(data=cool_dat, chains=4, cores=1,
fit = sm.sample(data=cool_dat, chains=1, cores=1,
iter_warmup=1000, iter_sampling=1000, thin=1,
max_treedepth=10, metric='diag', adapt_engaged=True,
output_dir='.')
Expand Down
9 changes: 5 additions & 4 deletions benchmark/regression_autoppl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#include <sstream>
#include <unordered_map>

#include <autoppl/variable.hpp>
#include <autoppl/expr_builder.hpp>
#include <autoppl/expression/variable/data.hpp>
#include <autoppl/expression/variable/param.hpp>
#include <autoppl/expression/expr_builder.hpp>
#include <autoppl/mcmc/hmc/nuts/nuts.hpp>

#include "benchmark_utils.hpp"
Expand All @@ -34,7 +35,7 @@ static void BM_Regression(benchmark::State& state) {

std::array<std::string, 4> headers = {"Life expectancy", "Alcohol", "HIV/AIDS", "GDP"};

std::unordered_map<std::string, ppl::Data<double>> data;
std::unordered_map<std::string, ppl::Data<double, ppl::vec>> data;
std::unordered_map<std::string, ppl::Param<double>> params;
std::array<std::vector<double>, 4> storage;

Expand All @@ -47,7 +48,7 @@ static void BM_Regression(benchmark::State& state) {
auto it = headers.begin();
std::stringstream s(line);
while (s >> value) {
data[*it].observe(value);
data[*it].push_back(value);
++it;
}
}
Expand Down
15 changes: 8 additions & 7 deletions benchmark/regression_autoppl_2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#include <sstream>
#include <unordered_map>

#include <autoppl/variable.hpp>
#include <autoppl/expr_builder.hpp>
#include <autoppl/expression/variable/data.hpp>
#include <autoppl/expression/variable/param.hpp>
#include <autoppl/expression/expr_builder.hpp>
#include <autoppl/mcmc/hmc/nuts/nuts.hpp>

#include "benchmark_utils.hpp"
Expand All @@ -23,7 +24,7 @@ static void BM_Regression(benchmark::State& state) {

std::array<std::string, 4> headers = {"b", "x1", "x2", "x3"};

std::unordered_map<std::string, ppl::Data<double>> data;
std::unordered_map<std::string, ppl::Data<double, ppl::vec>> data;
std::unordered_map<std::string, ppl::Param<double>> params;
std::array<std::vector<double>, 4> storage;

Expand All @@ -37,10 +38,10 @@ static void BM_Regression(benchmark::State& state) {
double x1 = n1(gen);
double x2 = n2(gen);
double x3 = n3(gen);
data[headers[1]].observe(x1);
data[headers[2]].observe(x2);
data[headers[3]].observe(x3);
data["y"].observe(x1 * 1.4 + x2 * 2. + x3 * 0.32 + eps(gen));
data[headers[1]].push_back(x1);
data[headers[2]].push_back(x2);
data[headers[3]].push_back(x3);
data["y"].push_back(x1 * 1.4 + x2 * 2. + x3 * 0.32 + eps(gen));
}

// resize each storage and bind with param
Expand Down
157 changes: 157 additions & 0 deletions docs/design/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Design Overview

## Example

```cpp
DataView<std::vector<double>, ppl::vec> x(raw_x);
// Data<double, ppl::vec> x({...}); // another option
Param<double> l1;
ParamFixed<double, 3> l2;
// Param<double, ppl::vec> l2(3); // another option
auto model = (
l1 |= normal(0., 1.),
l2 |= normal(l1, 2.),
x |= normal(l2[0] * l2[1] - l2[2], 1.)
);
l1.storage(ptr);
l2.storage(ptr, i);
ppl::nuts(model);
```
- `l1` is a scalar that is standard normally distributed
- `l2` is a vector of size 3 that is each independently ~ N(l1, 2)
- `x` is a vector of data ~ N(l2[0]*l2[1]-l2[2], 1.)
- `l2` is subscriptable
## Variable
A variable really is only satisfied by Param, ParamView, Data, DataView, or alike.
Every first variable has a unique ID or views a unique ID.
This is so that we have a way to know which variable that gets referenced
in the model is pointing to the "same" entity.
This can be useful when checking correct construction of model such as:
- no variable gets assigned a distribution more than once
- no variable gets assigned a distribution, which references the same variable
- no distribution uses variables that reference variables below it
### Param
A Param should be a variable expression and also a variable.
The model will only be built using ParamView since Param may own values
that the model should only view.
If Param is multi-dimensional (vec, mat), size of the shape must be known
at construction and cannot change.
The model may reference old size values if changed.
Logically, a parameter denoted by a symbol was defined from fathoming a model.
If it is immediately used in a different model, it's most likely that the parameter
represents the same kind of quantity, but assigned to a different distribution.
## Concepts
### model_expr
Implements:
```cpp
template <class F>
void traverse(F&& elt_f); // + const version
template <class F1, class F2>
void traverse(F1&& elt_f, F2&& combine_f); // + const version
/*...*/ pdf() const;
/*...*/ log_pdf() const;
template <class MapType, class VecType>
/*...*/ ad_log_pdf(const MapType& map,
const VecType& vars) const;
```

- map is expected to be a hashmap of:
```
addresses of unique parameters (const void*) ->
begin idx of corresponding vector of vars
```
- Ex.
```
(mu |= normal(0,1), s |= normal(0,1), x |= normal(mu, s))
addr(mu) -> 0
addr(s) -> 1
AD Var vec: [v1, v2]
```

## Expression Nodes

The core of AutoPPL is how we construct expressions.
These expressions and their interaction define a language to express model construction.

#### Glue Node

```
glue_node = (model_expr, model_expr);
```

##### Sketch of Interface

```cpp
struct GlueNode
{
traverse(elt_f)
traverse(elt_f, combine_f)
pdf()
log_pdf()
ad_log_pdf(map, vars)
};
```
Example:
```cpp
// apply log_pdf to get and add them all
double lgpdf = model.traverse(log_pdf, add);
// apply ad_log_pdf to get AD expr and add them all
// if ad_log_pdf or add requires extra parameters, lambdafy them:
// [&](auto& elt) {return ad_log_pdf(elt, other_params...);}
auto ad_expr = model.traverse(ad_log_pdf, add);
// get each "unique quantity" and add them to the mapping
model.traverse(update_map);
```

#### Eq Node

```
eq_node = (quantity_expr |= dist_expr);
```

An eq expression relates a quantity with a distribution.
While the arguments can be generalized further,
we're most motivated by the example when quantity is a parameter/data
of either variable/vector/mat (vvm) form and dist_expr is one such as normal distribution.

##### Sketch of Interface

```cpp
struct EqNode
{
traverse(eq_f);
traverse(eq_f, combine_f);
pdf();
log_pdf();
ad_log_pdf(map, vars);
get_variable();
get_distribution();
};
```
- map is the mapping of addresses of params/data to corresponding
index of a vector of AD vectors.
- Ex.
```
mu |= normal(0,1), x |= normal(mu, 1)
addr(mu) -> 0
addr(x) -> 1
AD Var vec: [v1, v2]
```
2 changes: 1 addition & 1 deletion docs/example/normal_posterior_mean_stddev.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ int main()
{
std::array<double, 1000> mu_samples, sigma_samples;

ppl::Data<double> x {1.0, 1.5, 1.7, 1.2, 1.5};
ppl::Data<double, ppl::vec> x {1.0, 1.5, 1.7, 1.2, 1.5};
ppl::Param<double> mu {mu_samples.data()};
ppl::Param<double> sigma {sigma_samples.data()};

Expand Down
7 changes: 3 additions & 4 deletions include/autoppl/autoppl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
#include "expression/distribution/normal.hpp"
#include "expression/model/eq_node.hpp"
#include "expression/model/glue_node.hpp"
#include "expression/model/model_utils.hpp"
#include "expression/variable/binop.hpp"
#include "expression/variable/data.hpp"
#include "expression/variable/param.hpp"
#include "expression/variable/constant.hpp"
#include "expression/variable/variable_viewer.hpp"
#include "expression/expr_builder.hpp"
#include "mcmc/mh.hpp"
#include "mcmc/hmc/nuts/nuts.hpp"
#include "expr_builder.hpp"
#include "variable.hpp"
Loading

0 comments on commit 3db0279

Please sign in to comment.