In [1]:
import pandas as pd
import io
import pymc3 as pm

In [2]:
with open('german.data', 'r') as f:
    data_str = f.read()


In [3]:
# reading file 
data = io.StringIO(data_str)
df = pd.read_csv(data,sep=' ', header=None)
df.columns = ['status', 'months', 'credit', 'purpose', 'amount', 'savings', 'employment', 'rate', 'sex', 'other', 'residence',
            'property', 'age', 'plan', 'housing', 'nb_credits', 'job', 'liability', 'phone', 'foreign', 'target']

In [4]:
df.head()

Unnamed: 0,status,months,credit,purpose,amount,savings,employment,rate,sex,other,...,property,age,plan,housing,nb_credits,job,liability,phone,foreign,target
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,...,A121,67,A143,A152,2,A173,1,A192,A201,1
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,...,A121,22,A143,A152,1,A173,1,A191,A201,2
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,...,A121,49,A143,A152,1,A172,2,A191,A201,1
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,...,A122,45,A143,A153,1,A173,2,A191,A201,1
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,...,A124,53,A143,A153,2,A173,2,A191,A201,2


In [8]:

lower=-10**6
higher=10**6
with pm.Model() as model:
    # define priors
    beta_0=pm.Uniform('beta_0', lower=lower, upper= higher)
    beta_age=pm.Uniform('beta_age', lower, higher)

    # define proba of belonging to class 1 
    p = pm.Deterministic('p', pm.math.sigmoid(beta_0+beta_age*df['age']))

with model:
    #fit the data 
    observed=pm.Bernoulli("occupancy", p, observed=df['target'])
    start=pm.find_MAP()
    step=pm.Metropolis()
    
    #samples from posterior distribution 
    trace=pm.sample(25000, step=step, start=start)
    burned_trace=trace[15000:]
                      


SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'beta_0_interval__': array(0.), 'beta_age_interval__': array(0.)}

Initial evaluation results:
beta_0_interval__     -1.39
beta_age_interval__   -1.39
occupancy              -inf
Name: Log-probability of test_point, dtype: float64

In [6]:
# Bayesian Logistic Regression 
with pm.Model() as logistic_model:
    
    pm.glm.GLM.from_formula(
        "target ~ sex + credit + rate + property", df, family=pm.glm.families.Binomial()
    )
    trace = pm.sample(1000, tune=1000, init="adapt_diag")


SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'Intercept': array(0.), 'sex[T.A92]': array(0.), 'sex[T.A93]': array(0.), 'sex[T.A94]': array(0.), 'credit[T.A31]': array(0.), 'credit[T.A32]': array(0.), 'credit[T.A33]': array(0.), 'credit[T.A34]': array(0.), 'property[T.A122]': array(0.), 'property[T.A123]': array(0.), 'property[T.A124]': array(0.), 'rate': array(0.)}

Initial evaluation results:
Intercept           0.00
sex[T.A92]         -7.83
sex[T.A93]         -7.83
sex[T.A94]         -7.83
credit[T.A31]      -7.83
credit[T.A32]      -7.83
credit[T.A33]      -7.83
credit[T.A34]      -7.83
property[T.A122]   -7.83
property[T.A123]   -7.83
property[T.A124]   -7.83
rate               -7.83
y                   -inf
Name: Log-probability of test_point, dtype: float64