In [4]:
import numpy as np
from pymc3 import * 
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
#The following cell creates the two models with noise based on a target. One should note that the first model has more noise than the second model so one would expect model 1 to perform worse than model 2


size = 200
true_intercept = 1
true_slope = 2
x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
model1 = true_regression_line + np.random.normal(scale=.5, size=size) #Noisy
model2 = true_regression_line + np.random.normal(scale=.2, size=size) #Less Noisy

In [6]:
# Let us see what the MAE looks like
print(mean_absolute_error(true_regression_line,model1))
print(mean_absolute_error(true_regression_line,model2))


0.3854364032632698
0.15433572310989543


In [7]:
# As expected the noisier model does worse
# Now let us look at the straight average

print(mean_absolute_error(true_regression_line,model1*.5+model2*.5))



0.20716890052028014


In [15]:
# As one can see this isn't as good as our top model
# Now comes the cool part. We are going to use MCMC to draw samples from our data and get stats on how we can obtain a model that gets the best out of our raw models.

# Important: Please look at the documentation here (https://pymc-devs.github.io/pymc3/index.html) for details
data = dict(x1=model1, x2=model2, y=true_regression_line)
with Model() as model:
    # specify glm and pass in data. The resulting linear model, its likelihood and 
    # and all its parameters are automatically added to our model.
glm('y ~ x1 + x2', data)
pymc3.glm.GLM('y ~ x1 + x2', data)
    pm.glm.GLM.from_formula('y ~ x1 + x2', data)
    step = NUTS() # Instantiate MCMC sampling algorithm
    trace = sample(2000, step, progressbar=False)

IndentationError: expected an indented block (<ipython-input-15-8a5029c38dfe>, line 9)