Skip to content

Commit

Permalink
James.yang/regression benchmark tidy (#36)
Browse files Browse the repository at this point in the history
* Add regression for pymc3

* Add compiletime and runtime plots and outputs mean/stddev
  • Loading branch information
JamesYang007 committed Jun 7, 2020
1 parent 005a2f2 commit 931fd0e
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 1 deletion.
19 changes: 18 additions & 1 deletion benchmark/regression_autoppl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@

namespace ppl {

template <class ArrayType>
inline double stddev(const ArrayType& v)
{
double mean = std::accumulate(v.begin(), v.end(), 0.)/v.size();
double var = 0.;
for (auto x : v) {
auto diff = (x - mean);
var += diff * diff;
}
return std::sqrt(var/(v.size()));
}

static void BM_Regression(benchmark::State& state) {
size_t num_samples = state.range(0);

Expand Down Expand Up @@ -65,10 +77,15 @@ static void BM_Regression(benchmark::State& state) {
ppl::nuts(model, config);
}

std::cout << "Bias: " << sample_average(storage[0]) << std::endl;
std::cout << "Alcohol w: " << sample_average(storage[1]) << std::endl;
std::cout << "HIV/AIDS w: " << sample_average(storage[2]) << std::endl;
std::cout << "GDP: " << sample_average(storage[3]) << std::endl;
std::cout << "Bias: " << sample_average(storage[0]) << std::endl;

std::cout << "Bias: " << stddev(storage[0]) << std::endl;
std::cout << "Alcohol w: " << stddev(storage[1]) << std::endl;
std::cout << "HIV/AIDS w: " << stddev(storage[2]) << std::endl;
std::cout << "GDP: " << stddev(storage[3]) << std::endl;
}

BENCHMARK(BM_Regression)->Arg(100)->Arg(500)->Arg(1000)->Arg(5000)->Arg(10000)->Arg(50000)->Arg(100000);
Expand Down
24 changes: 24 additions & 0 deletions benchmark/regression_pymc3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pymc3 as pm
import pandas as pd
import numpy as np

df = pd.read_csv("life-clean.csv", names=['le', 'alc', 'hiv', 'gdp'], delimiter=' ')
X, y = np.array(df[['alc', 'hiv', 'gdp']]), np.array(df['le'])

basic_model = pm.Model()

n_cols = np.size(X, 1)

with basic_model:
a = pm.Normal('a', mu=0, sigma=5)
b = pm.MvNormal('b', mu=np.zeros(n_cols),
cov=5*np.identity(n_cols),
shape=(1,n_cols))
y_data = pm.Normal('y_data', mu=(pm.math.dot(X, b.T) + a),
sigma=5, observed=y)

with basic_model:
data = pm.sample(draws=1000, n_init=1000,
chains=1, cores=1, tune=1000)

print(data)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
54 changes: 54 additions & 0 deletions docs/figures/regression_benchmark_plot/figplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import matplotlib.pyplot as plt
import numpy as np

# N values
n_range = np.array([
1e2, 5e2, 1e3, 5e3, 1e4, 5e4, 1e5
])

# AutoPPL benchmark in seconds
autoppl_res = np.array([
0.184537766,
0.792546206,
1.273571554,
3.687687061,
6.339291371,
16.626929522,
23.583710674
])

# STAN benchmark in seconds
stan_res = np.array([
0.520629,
0.698193,
2.45387,
9.30525,
13.3364,
35.1523,
53.9648
])

plt.plot(n_range, autoppl_res, '-',
marker='o', color='blue',
label='autoppl', alpha=0.5)
plt.plot(n_range, stan_res, '-',
marker='o', color='red',
label='stan', alpha=0.5)
plt.title('Regression Benchmark')
plt.xlabel('Number of Samples Drawn')
plt.ylabel('Time (s)')
plt.legend()
plt.savefig('runtime.png')
plt.show()

width=0.3
eps=0.15
plt.bar([-width/2-eps, width/2+eps], [4.62, 9.98808],
width=0.3,
color=['blue', 'red'],
tick_label=['autoppl', 'stan'],
alpha=0.5)
plt.title('Compilation Time')
plt.ylabel('Time (s)')
plt.savefig('compiletime.png')
plt.show()
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 931fd0e

Please sign in to comment.