In [1]:
%matplotlib inline
import pymc3 as pm
import theano.tensor as T
import theano
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons



In [2]:
X, Y = make_moons(noise=0.2, random_state=0, n_samples=1000)
X = scale(X)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)

In [3]:
X, Y = make_moons(noise=0.2, random_state=0, n_samples=1000)

In [None]:
X_train[0]

In [None]:
fig, ax = plt.subplots()
ax.scatter(X[Y==0, 0], X[Y==0, 1], label='Class 0')
ax.scatter(X[Y==1, 0], X[Y==1, 1], color='r', label='Class 1')
sns.despine(); ax.legend()
ax.set(xlabel='X', ylabel='Y', title='Toy binary classification data set');

In [4]:
ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)

In [5]:
n_hidden = 5

In [18]:
# Initialize random weights between each layer
init_1 = np.random.randn(8,10 )
init_2 = np.random.randn(n_hidden, n_hidden)
init_out = np.random.randn(n_hidden)

In [20]:
init_out 

array([-1.04277823,  0.10065864,  0.01561205, -1.67738659,  1.91435268])

In [19]:
init_1

array([[ 0.33045612,  1.81021977,  0.36590668,  0.913741  ,  0.21131216,
        -0.17567177, -1.41976488, -1.13383153, -1.35333642, -1.53368899],
       [-0.06055043,  0.78861707, -0.51237781,  0.37811175,  0.23941783,
         0.08465236,  0.42902274, -0.71825737,  0.2030229 , -1.09891637],
       [-0.4255115 ,  1.34997591, -0.91701389,  0.11425821,  1.58871078,
         1.63347052,  0.74299525, -1.54570153, -0.03656181, -0.55119914],
       [ 0.65764739,  1.01088028, -0.48456784,  1.93815755,  0.68325589,
         0.93943992, -0.11291378,  0.99569054,  2.16322231, -0.68494741],
       [ 1.72354675, -0.59817945,  0.13235702,  0.76575874, -0.78029397,
        -0.14007675, -1.32716169, -1.63800656,  0.69881559,  0.48133482],
       [ 1.36779317, -0.18702758, -0.45020651, -0.02277323,  0.65984291,
         1.12560053,  0.01651371,  0.68663271, -0.82834581,  1.43887149],
       [ 0.68027741,  0.18861499,  0.92109976,  0.76324714,  0.7343257 ,
         1.80330056,  0.75104483,  0.19191977

In [16]:
X.shape[1]

2

In [7]:
with pm.Model() as neural_network:
    # Weights from input to hidden layer
    weights_in_1 = pm.Normal('w_in_1', 0, sd=1, 
                             shape=(X.shape[1], n_hidden), 
                             testval=init_1)
    

In [15]:
weights_in_1

<bound method FreeRV.all of w_in_1>

In [None]:
# Weights from 1st to 2nd layer
    weights_1_2 = pm.Normal('w_1_2', 0, sd=1, 
                            shape=(n_hidden, n_hidden), 
                            testval=init_2)
    # Weights from hidden layer to output
    weights_2_out = pm.Normal('w_2_out', 0, sd=1, 
                              shape=(n_hidden,), 
                              testval=init_out)
      # Build neural-network using tanh activation function
    act_1 = T.tanh(T.dot(ann_input, 
                         weights_in_1))
    act_2 = T.tanh(T.dot(act_1, 
                         weights_1_2))
    act_out = T.nnet.sigmoid(T.dot(act_2, 
                                   weights_2_out))
    
    # Binary classification -> Bernoulli likelihood
    out = pm.Bernoulli('out', 
                       act_out,
                       observed=ann_output)

In [None]:
%%time
with neural_network:
    # Run ADVI which returns posterior means, standard deviations, and the evidence lower bound (ELBO)
    v_params = pm.variational.advi(n=50000)


In [None]:
with neural_network:
    trace = pm.variational.sample_vp(v_params, draws=5000)

In [None]:
# Replace shared variables with testing set
ann_input.set_value(X_test)
ann_output.set_value(Y_test)

# Creater posterior predictive samples
ppc = pm.sample_ppc(trace, model=neural_network, samples=500)

# Use probability of > 0.5 to assume prediction of class 1
pred = ppc['out'].mean(axis=0) > 0.5

In [None]:
fig, ax = plt.subplots()
ax.scatter(X_test[pred==0, 0], X_test[pred==0, 1])
ax.scatter(X_test[pred==1, 0], X_test[pred==1, 1], color='r')
sns.despine()
ax.set(title='Predicted labels in testing set', xlabel='X', ylabel='Y');

In [None]:
print('Accuracy = {}%'.format((Y_test == pred).mean() * 100))