James.yang/regression benchmark tidy (#36)

* Add regression for pymc3 * Add compiletime and runtime plots and outputs mean/stddev
JamesYang007 · Jun 7, 2020 · 931fd0e · 931fd0e
1 parent 005a2f2
commit 931fd0e
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 1 deletion.
diff --git a/benchmark/regression_autoppl.cpp b/benchmark/regression_autoppl.cpp
@@ -17,6 +17,18 @@
 
 namespace ppl {
 
+template <class ArrayType>
+inline double stddev(const ArrayType& v)
+{
+    double mean = std::accumulate(v.begin(), v.end(), 0.)/v.size();
+    double var = 0.;
+    for (auto x : v) {
+        auto diff = (x - mean);
+        var += diff * diff;
+    }
+    return std::sqrt(var/(v.size()));
+}
+
 static void BM_Regression(benchmark::State& state) {
     size_t num_samples = state.range(0);
 
@@ -65,10 +77,15 @@ static void BM_Regression(benchmark::State& state) {
 		ppl::nuts(model, config);
     }
 
+	std::cout << "Bias: " << sample_average(storage[0]) << std::endl;
 	std::cout << "Alcohol w: " << sample_average(storage[1]) << std::endl;
 	std::cout << "HIV/AIDS w: " << sample_average(storage[2]) << std::endl;
 	std::cout << "GDP: " << sample_average(storage[3]) << std::endl;
-	std::cout << "Bias: " << sample_average(storage[0]) << std::endl;
+
+	std::cout << "Bias: " << stddev(storage[0]) << std::endl;
+	std::cout << "Alcohol w: " << stddev(storage[1]) << std::endl;
+	std::cout << "HIV/AIDS w: " << stddev(storage[2]) << std::endl;
+	std::cout << "GDP: " << stddev(storage[3]) << std::endl;
 }
 
 BENCHMARK(BM_Regression)->Arg(100)->Arg(500)->Arg(1000)->Arg(5000)->Arg(10000)->Arg(50000)->Arg(100000);

diff --git a/benchmark/regression_pymc3.py b/benchmark/regression_pymc3.py
@@ -0,0 +1,24 @@
+import pymc3 as pm
+import pandas as pd
+import numpy as np
+
+df = pd.read_csv("life-clean.csv", names=['le', 'alc', 'hiv', 'gdp'], delimiter=' ')
+X, y = np.array(df[['alc', 'hiv', 'gdp']]), np.array(df['le'])
+
+basic_model = pm.Model()
+
+n_cols = np.size(X, 1)
+
+with basic_model:
+    a = pm.Normal('a', mu=0, sigma=5)
+    b = pm.MvNormal('b', mu=np.zeros(n_cols),
+                    cov=5*np.identity(n_cols),
+                    shape=(1,n_cols))
+    y_data = pm.Normal('y_data', mu=(pm.math.dot(X, b.T) + a),
+                       sigma=5, observed=y)
+
+with basic_model:
+    data = pm.sample(draws=1000, n_init=1000,
+                     chains=1, cores=1, tune=1000)
+
+print(data)
diff --git a/docs/figures/regression_benchmark_plot/compiletime.png b/docs/figures/regression_benchmark_plot/compiletime.png
diff --git a/docs/figures/regression_benchmark_plot/figplot.py b/docs/figures/regression_benchmark_plot/figplot.py
@@ -0,0 +1,54 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+# N values
+n_range = np.array([
+    1e2, 5e2, 1e3, 5e3, 1e4, 5e4, 1e5
+])
+
+# AutoPPL benchmark in seconds
+autoppl_res = np.array([
+    0.184537766,
+    0.792546206,
+    1.273571554,
+    3.687687061,
+    6.339291371,
+    16.626929522,
+    23.583710674
+])
+
+# STAN benchmark in seconds
+stan_res = np.array([
+    0.520629,
+    0.698193,
+    2.45387,
+    9.30525,
+    13.3364,
+    35.1523,
+    53.9648
+])
+
+plt.plot(n_range, autoppl_res, '-',
+         marker='o', color='blue',
+         label='autoppl', alpha=0.5)
+plt.plot(n_range, stan_res, '-',
+         marker='o', color='red',
+         label='stan', alpha=0.5)
+plt.title('Regression Benchmark')
+plt.xlabel('Number of Samples Drawn')
+plt.ylabel('Time (s)')
+plt.legend()
+plt.savefig('runtime.png')
+plt.show()
+
+width=0.3
+eps=0.15
+plt.bar([-width/2-eps, width/2+eps], [4.62, 9.98808],
+        width=0.3,
+        color=['blue', 'red'],
+        tick_label=['autoppl', 'stan'],
+        alpha=0.5)
+plt.title('Compilation Time')
+plt.ylabel('Time (s)')
+plt.savefig('compiletime.png')
+plt.show()
diff --git a/docs/figures/regression_benchmark_plot/runtime.png b/docs/figures/regression_benchmark_plot/runtime.png