Skip to content

Commit

Permalink
Merge 4771a1b into 8bb020d
Browse files Browse the repository at this point in the history
  • Loading branch information
jenchen1398 committed Apr 21, 2020
2 parents 8bb020d + 4771a1b commit 5b58fb8
Show file tree
Hide file tree
Showing 43 changed files with 2,515 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
coveralls
--root ../../
--build-root ./
--include autoppl/include
--include include
--exclude lib
--gcov 'gcov-7'
--gcov-options '\-lp'
Expand Down
18 changes: 16 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ option(AUTOPPL_ENABLE_BENCHMARK "Enable benchmarks to be built." OFF)
option(AUTOPPL_ENABLE_TEST_COVERAGE "Build with test coverage (AUTOPPL_ENABLE_TEST must be ON)" OFF)

# Automate the choosing of config
# if CMAKE_BUILD_TYPE not defined
if (NOT CMAKE_BUILD_TYPE)
# if binary directory ends with "release", use release mode
if (${PROJECT_BINARY_DIR} MATCHES "release$")
Expand All @@ -22,6 +21,16 @@ if (NOT CMAKE_BUILD_TYPE)
endif()
message(STATUS "Compiling in ${CMAKE_BUILD_TYPE} mode")

# Add this library as interface (header-only)
add_library(${PROJECT_NAME} INTERFACE)

target_include_directories(${PROJECT_NAME}
INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)

# Set C++17 standard for project target
target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17)

# Configure tests
if (AUTOPPL_ENABLE_TEST)
include(CTest) # enable memcheck
Expand All @@ -35,6 +44,11 @@ if (AUTOPPL_ENABLE_TEST)
add_subdirectory(${PROJECT_SOURCE_DIR}/test ${PROJECT_BINARY_DIR}/test)
endif()

# TODO: add src dir if needed
#set(AUTOPPL_SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/src)
#file(GLOB_RECURSE AUTOPPL_SOURCE_FILES RELATIVE src LIST_DIRECTORIES false *.cpp)
#set(AUTOPPL_SOURCE_FILES ${AUTOPPL_SOURCE_DIR}/autoppl.cpp)
#set(AUTOPPL_HEADER_FILES ${AUTOPPL_INCLUDE_DIR}/autoppl.h)

# Add subdirectories
add_subdirectory(${PROJECT_SOURCE_DIR}/autoppl ${PROJECT_BINARY_DIR}/autoppl)
add_subdirectory(${PROJECT_SOURCE_DIR}/lib ${PROJECT_BINARY_DIR}/lib)
15 changes: 0 additions & 15 deletions autoppl/CMakeLists.txt

This file was deleted.

11 changes: 0 additions & 11 deletions autoppl/include/autoppl.hpp

This file was deleted.

32 changes: 32 additions & 0 deletions doc/design/model_design2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Y ~ W.x + epsilon
Y ~ N(W.x, sigma^2)

Parameter<double> X {4.0}; // observed
Parameter<double> Y {5.0}; // observed
Parameter<double> W; // hidden
Model m1 = Model( // Model class defines a distribution over existing Parameters.
W |= Uniform(-10, 10), // linear regression
Y |= Normal(W * X, 3), // overload multiplication to build a graph from W * X
​);

Model m2 = Model(
W |= Normal(0, 1), // ridge regression instead
Y |= Normal(W * X, 3),
);
m1.sample(1000);

(3*x).pdf(10) => x.pdf(10 / 3)

X.observe(3); // observe more data

// P(Y, W | X) = P(Y | W, X) P(W | X) which is doable for multiple samples, just need to
// assert len(Y) == len(X) and then multiply out over all pairs of (X, Y) values.

// P(Y | X) => this is a fine distribution, but I can't talk about P(Y, X) or P(X | Y) until I put a prior on Y.
// I don't have a joint distribution yet.

// Some issues:
// how do we do (x ** 2).pdf(5)? This is pretty damn hard for non-bijective functions, need to integrate?
//
106 changes: 106 additions & 0 deletions doc/design/model_inttest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "gtest/gtest.h"
#include <autoppl/expression/model.hpp>
#include <autoppl/expression/rv_tag.hpp>
#include <autoppl/expression/uniform.hpp>

namespace ppl {

template <class VectorType, class IndexType>
struct BracketNode
{
VectorType v;
IndexType i;
};

struct myvector
{

rv_tag<double> operator[](rv_tag)
{
return rv
}
std::vector<rv_tags> v; // 3 things
};

template <class MuType, class SigType>
auto normal(const MuType& mu, const SigType& sig)
{
Normal<MuType, SigType>(mu, sig);
}

TEST(dummy, dummy_test)
{
double x_data = 2.3; // 1-sample data

std::vector<double> sampled_theta_1(100);
std::vector<double> sampled_theta_2(100);

double* ptr;
rv_tag<double, ...> x;
rv_tag<double> theta_1(sampled_theta_1.data());
rv_tag<double> theta_2(sampled_theta_2.data());

std::vector<rv_tag<double>> v;
std::for_each(..., ... , [](){v[i].set_sample_storage(&mat.row(i));});

x.observe(x_data);

x_1.observe(...);
x_2.observe(...);

auto model = (
mu |= uniform(-10000, 10000),
y |= uniform({1,2,3}) //
x_1 |= normal(mu[y], 1),
x_2 |= normal(mu[y], 1),
);

x.observe(...);

rv_tag<double> var, mu, x;
auto normal_model = (
var |= normal(0,1),
mu |= normal(1,5),
x |= normal(mu, var)
);

std::vector<double> var_storage(1000);
std::vector<double> mu_storage(1000);

var.set_storage(var_storage.data());
mu.set_storage(mu_storage.data());

metropolis_hastings(model, 1000, 400);

auto gmm_model = (
mu |=
);

std::vector<rv_tag<double>> vec(model.param_num);
model.bind_storage(vec.begin(), vec.end(), ...);
model.pdf();

metropolis_hastings(model, 100);

std::vector<double> sampled_theta_1_again(1000);
std::vector<double> sampled_theta_2_again(1000);

theta_1.set_storage(sampled_theta_1_again.data());
theta_2.set_storage(sampled_theta_2_again.data());

metropolis_hastings(model, 1000);







auto model = (
w |= normal(0,1),
y |= normal(w*x, 1)
)
metropolis_hastings(modeli)
}

}
123 changes: 123 additions & 0 deletions include/autoppl/algorithm/mh.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#pragma once
#include <chrono>
#include <random>
#include <algorithm>
#include <vector>
#include <autoppl/util/traits.hpp>
#include <autoppl/variable.hpp>
#include <autoppl/expression/distribution/base.hpp>

/*
* Assumptions:
* - every variable referenced in model is of type Variable<double>
*/

namespace ppl {
namespace details {

struct MHData
{
double next;
// TODO: maybe keep an array for batch sampling?
};

} // namespace details

/*
* Metropolis-Hastings algorithm to sample from posterior distribution.
* The posterior distribution is a constant multiple of model.pdf().
* Any variables that model references which are in state "parameter"
* is sampled and in state "data" are not.
* So, model.pdf() is proportional to p(parameters... | data...).
*
* User must ensure that they allocated at least as large as n_sample
* in the storage associated with every parameter referenced in model.
*/
template <class ModelType>
inline void mh_posterior(ModelType& model,
double n_sample,
double stddev = 1.0,
double seed = std::chrono::duration_cast<
std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch()
).count()
)
{
using data_t = details::MHData;

// set-up auxiliary tools
std::mt19937 gen(seed);
std::uniform_real_distribution unif_sampler(0., 1.);

// get number of parameters to sample
size_t n_params = 0.;
auto get_n_params = [&](auto& eq_node) {
auto& var = eq_node.get_variable();
using var_t = std::decay_t<decltype(var)>;
using state_t = typename util::var_traits<var_t>::state_t;
n_params += (var.get_state() == state_t::parameter);
};
model.traverse(get_n_params);

// vector of parameter-related data with candidate
std::vector<data_t> params(n_params);
double curr_log_pdf = model.log_pdf();
auto params_it = params.begin();

for (size_t iter = 0; iter < n_sample; ++iter) {

double log_alpha = -curr_log_pdf;

// generate next candidates and place them in parameter
// variables as next values; update log_alpha
// The old values are temporary stored in the params vector.
auto get_candidate = [=, &gen](auto& eq_node) mutable {
auto& var = eq_node.get_variable();
using var_t = std::decay_t<decltype(var)>;
using state_t = typename util::var_traits<var_t>::state_t;

if (var.get_state() == state_t::parameter) {
auto curr = var.get_value();
std::normal_distribution norm_sampler(curr, stddev);

// sample new candidate, place old value in params,
// fill next candidate in var, and update log_alpha
auto cand = norm_sampler(gen);
params_it->next = curr;
var.set_value(cand);

++params_it;
}
};
model.traverse(get_candidate);

// compute next candidate log pdf and update log_alpha
double cand_log_pdf = model.log_pdf();
log_alpha += cand_log_pdf;
bool accept = (std::log(unif_sampler(gen)) <= log_alpha);

// If accept, "current" sample for next iteration is already in the variables
// so simply append to storage.
// Otherwise, "current" sample for next iteration must be moved back from
// params vector into variables.
auto add_to_storage = [params_it, iter, accept](auto& eq_node) mutable {
auto& var = eq_node.get_variable();
using var_t = std::decay_t<decltype(var)>;
using state_t = typename util::var_traits<var_t>::state_t;
if (var.get_state() == state_t::parameter) {
if (!accept) {
var.set_value(params_it->next);
++params_it;
}
auto storage = var.get_storage();
storage[iter] = var.get_value();
}
};
model.traverse(add_to_storage);

// update current log pdf for next iteration
if (accept) curr_log_pdf = cand_log_pdf;
}
}

} // namespace ppl
2 changes: 2 additions & 0 deletions include/autoppl/autoppl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#pragma once
// TODO: export all headers later!
48 changes: 48 additions & 0 deletions include/autoppl/distribution/discrete.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#pragma once
#include <cassert>
#include <random>
#include <cmath>
#include <numeric>

namespace ppl {

// TODO: change name to DiscreteDist and make class template.
// Discrete should be a function that creates this kind of object.

template <typename weight_type>
struct Discrete
{
using value_t = int;
using dist_value_t = double;

Discrete(std::initializer_list<weight_type> weights)
: weights_{weights} { assert(weights.size() > 0); }

template <class GeneratorType>
value_t sample(GeneratorType& gen) const
{
std::discrete_distribution dist(weights_.begin(), weights_.end());
return dist(gen);
}

dist_value_t pdf(value_t i) const
{
assert( i >= 0 && i < (int) weights_.size() );
return weights(i) / std::accumulate(weights_.begin(), weights_.end(), 0.0 );

}

dist_value_t log_pdf(value_t i) const
{
assert( i >= 0 && i < (int) weights_.size() );
return log(weights(i) / std::accumulate(weights_.begin(), weights_.end(), 0.0 ));
}

inline dist_value_t weights(value_t i) const { return static_cast<dist_value_t>(weights_[i]); }

private:
std::vector<weight_type> weights_;
};

} // namespace ppl

Loading

0 comments on commit 5b58fb8

Please sign in to comment.