In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import pymc3 as pm
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import scale
from theano import shared
import theano.tensor as T
from pymc3 import *
import warnings
warnings.filterwarnings('ignore')

  from ._conv import register_converters as _register_converters


In [3]:
#Importing dataset
df = pd.read_csv('breast-cancer-wisconsin.csv')
df.drop(['id'],1,inplace=True)

# Convert '?' to NaN
df[df == '?'] = np.nan
# Drop missing values and print shape of new DataFrame
df = df.dropna()

X = scale(np.array(df.drop(['class'],1)))
y = np.array(df['class'])/2-1

#Split Data
X_tr, X_te, y_tr, y_te = train_test_split(X,y,test_size=0.2, random_state=42)

#Sharedvariable
model_input = shared(X_tr)
model_output= shared(y_tr)

In [4]:
#Generate Model
logistic_model = pm.Model()
with logistic_model:
    # Priors for unknown model parameters    
    alpha = pm.Normal("alpha", mu=0,sd=1)
    betas = pm.Normal("betas", mu=0, sd=1, shape=X.shape[1])
    
    # Expected value of outcome
    p = pm.invlogit(alpha + T.dot(model_input,betas))
    

    # Likelihood (sampling distribution of observations)
    y = pm.Bernoulli('y', p, observed=model_output)

In [5]:
#infering parameters
with logistic_model:
    advi=pm.ADVI()
    approx = advi.fit(n=10000,more_replacements={
        model_input:pm.Minibatch(X_tr),
        model_output:pm.Minibatch(y_tr)
    
     }
        )

Average Loss = 19.025: 100%|█████████████████████████████████████████████████████| 10000/10000 [09:17<00:00, 17.93it/s]
Finished [100%]: Average Loss = 19.03


In [None]:
#Replace shared variable with testing set
model_input.set_value(X_te)
model_output.set_value(y_te)

# Creater posterior predictive samples
trace = approx.sample(draws=10000)
ppc = pm.sample_ppc(trace,model=logistic_model,samples=1000)
pred = ppc['y'].mean(axis=0) > 0.5

print('Accuracy = {}%'.format((y_te == pred).mean() * 100))

 56%|███████████████████████████████████████████▍                                  | 557/1000 [00:02<00:02, 186.75it/s]

In [None]:
print(pm.summary(trace))
pm.plots.traceplot(trace)
plt.show()