In [None]:
import pymc3 as pm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import theano.tensor as tt

%matplotlib inline

In [None]:
data = pd.read_csv('datasets/antiseptic-amputation.csv', header=None)
data.columns = ['subject', 'year', 'antiseptic', 'limb', 'outcome']
data.set_index('subject', inplace=True)

# Data normalization
data['year'] = data['year'] - data['year'].min()
data.head()

The logistic function is defined as

$$ p = \frac{1}{1 + e^{-k}}$$

Here, the $k$ term refers to:

$$ k = \beta_{n}x_{1} + \beta_{2}x_{2} + ... + \beta_{n}x_{n} $$

Therefore, we will write it in as such

In [None]:
with pm.Model() as model:   
    betas = pm.Normal('betas', mu=0, sd=100**2, shape=(3,1))
    # betas = pm.Normal('betas', mu=0, sd=100**2)
    x = betas[0] * data['year'] + betas[1] * data['antiseptic'] + betas[2] * data['limb']
    # x = data['antiseptic']
    
    prob = 1 / (1 + np.exp(-x))
    prob_like = pm.Bernoulli('likelihood', p=prob, observed=data['outcome'])

In [None]:
with model:
    trace = pm.sample(draws=2000)

In [None]:
pm.traceplot(trace)

Posterior predictive check.

In [None]:
ppc = pm.sample_ppc(trace, model=model, samples=500)

In [None]:
ppc['likelihood'].mean(axis=0)

In [None]:
preds = np.rint(ppc['likelihood'].mean(axis=0)).astype('int')

In [None]:
data['outcome'].values

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(preds, data['outcome'])

In [None]:
pm.forestplot(trace, ylabels=['year', 'antiseptic', 'limb'])

The use of antiseptics, as we can see, has the greatest effect on survival.