In [60]:
import os
import pystan
import pickle
from hashlib import md5
import numpy as np

# to run the model run the main() function like shown below
# run: test = main('hierarchical_normal_data.txt', 'hierarchical_model.txt', 5000, 10)
# note: use help(test) to get information about the file you generated
# relevant filenames: model_code --> hierarchical_model.txt, data --> hierarchical_normal_data.txt

def read_in_data(file_name):
    data = dict()
    with open(file_name) as f:
        content = f.readlines()
    lines = [x.rstrip('\n') for x in content]
    for i in range(len(lines)):
        lines[i] = lines[i].split()
        if i <= 1:
            lines[i][1:] = [int(x) for x in lines[i][1:]]
            data[lines[i][0]] = lines[i][1:]
        else:
            lines[i][1:] = [float(x) for x in lines[i][1:]]
            data[lines[i][0]] = lines[i][1:]
    return data

def read_in_model_code(file_name):
    # read in model code
    f = open(file_name, 'r+')
    model_code = f.read()
    return model_code

def StanModel_cache(model_code, model_name=None, **kwargs):
    """Use just as you would `stan`"""
    # this function makes sure to reuse models so we don't have to recompile c everytime
    code_hash = md5(model_code.encode('ascii')).hexdigest() # give model some id derived from model code
    if model_name is None:
        cache_fn = 'cached-model-{}.pkl'.format(code_hash)
    else:
        cache_fn = 'cached-{}-{}.pkl'.format(model_name, code_hash)
    try:
        sm = pickle.load(open(cache_fn, 'rb')) # try to load model if it has been stored already
    except:
        sm = pystan.StanModel(model_code=model_code) # if model has not been stored compile it
        with open(cache_fn, 'wb') as f: # and save it for future use
            pickle.dump(sm, f)
    else:
        print("Using cached StanModel") # if we reused the model print some message
    return sm


def initialize_model(model_code):
    sm = StanModel_cache(model_code = model_code)
    return sm

def run_model(stan_model, data, n_iter, n_chains, n_warmup):
    fit = stan_model.sampling(data = data, iter = n_iter, chains = n_chains, warmup = n_warmup)
    return fit

def write_samples_to_csv(fit_object):
    # create path if it doesn't exist
    mypath = 'output'
    if not os.path.isdir(mypath):
        os.makedirs(mypath)

    # get first line of csv file as string (headers)
    fitdict = fit_object.extract(permuted = False)
    my_str = ''
    for i in range(len(fit_object.sim['fnames_oi'][:-1])):
        my_str += fit_object.sim['fnames_oi'][i] + ', '

    my_str = my_str.rstrip(', ')
    my_str += ' \n'

    # write samples to csv
    for i in range(np.shape(fitdict)[1]):
        with open('output/chain_' + str(i + 1) + '.csv', 'wb') as f:
            f.write(str.encode(my_str))
            np.savetxt(f, fitdict[:,i,:-1], delimiter=",", fmt = '%1.8f')
    return

def main(data_file, model_file, n_iter, n_chains, n_warmup):
    # full run of the model with output in csv file named chain_[*].csv
    data = read_in_data(data_file)
    model = read_in_model_code(model_file)
    sm = initialize_model(model)
    fit = run_model(sm, data, n_iter, n_chains, n_warmup)
    write_samples_to_csv(fit)
    return fit

In [61]:
fit = main('hierarchical_normal_data.txt', 'hierarchical_model.txt', 100000, 1, 0)

ValueError: Failed to parse Stan model 'anon_model_e5039be6565bf91c47383a888ed725b9'. Error message:
SYNTAX ERROR, MESSAGE(S) FROM PARSER:

  error in 'unkown file name' at line 8, column 5
  -------------------------------------------------
     6:     real C[N[3]]; // values group C
     7:     real D[N[4]]; // values group D
     8:     print(A)
            ^
     9: }
  -------------------------------------------------

PARSER EXPECTED: <one of the following:
  a variable declaration, beginning with type,
      (int, real, vector, row_vector, matrix, unit_vector,
       simplex, ordered, positive_ordered,
       corr_matrix, cov_matrix,
       cholesky_corr, cholesky_cov
  or '}' to close variable declarations>


In [55]:
print(fit)

Inference for Stan model: anon_model_234a5a24c057848f5f861d67cf76916e.
1 chains, each with iter=100000; warmup=0; thin=1; 
post-warmup draws per chain=100000, total post-warmup draws=100000.

            mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
mu         -0.77 1.1e-161.1e-16  -0.77  -0.77  -0.77  -0.77  -0.77      1    nan
tau         3.98 2.7e-152.7e-15   3.98   3.98   3.98   3.98   3.98      1    nan
sigma       4.19 8.9e-168.9e-16   4.19   4.19   4.19   4.19   4.19      1    nan
theta[0]    1.49 2.2e-162.2e-16   1.49   1.49   1.49   1.49   1.49      1    nan
theta[1]    1.76 2.2e-162.2e-16   1.76   1.76   1.76   1.76   1.76      1    nan
theta[2]   -0.19     0.0    0.0  -0.19  -0.19  -0.19  -0.19  -0.19      1    nan
theta[3]   -0.77     0.0    0.0  -0.77  -0.77  -0.77  -0.77  -0.77      1    nan
log_tau     1.38 2.2e-162.2e-16   1.38   1.38   1.38   1.38   1.38      1    nan
log_sigma   1.43 2.2e-162.2e-16   1.43   1.43   1.43   1.43   1.43      1    na

In [46]:
samples1 = fit.extract(permuted = False)
samples2 = fit.extract()

  """Entry point for launching an IPython kernel.


In [47]:
print(samples1)
print(samples2)

[[[  1.53550616e+00   6.05524357e+00   2.95409093e+00   5.35658977e-01
     1.97200608e-01   8.86599651e-01   1.15168133e+00   1.80092460e+00
     1.08319097e+00  -5.55161870e+03]]

 [[  1.53550616e+00   6.05524357e+00   2.95409093e+00   5.35658977e-01
     1.97200608e-01   8.86599651e-01   1.15168133e+00   1.80092460e+00
     1.08319097e+00  -5.55161870e+03]]

 [[  1.53550616e+00   6.05524357e+00   2.95409093e+00   5.35658977e-01
     1.97200608e-01   8.86599651e-01   1.15168133e+00   1.80092460e+00
     1.08319097e+00  -5.55161870e+03]]

 [[  1.53550616e+00   6.05524357e+00   2.95409093e+00   5.35658977e-01
     1.97200608e-01   8.86599651e-01   1.15168133e+00   1.80092460e+00
     1.08319097e+00  -5.55161870e+03]]

 [[  1.53550616e+00   6.05524357e+00   2.95409093e+00   5.35658977e-01
     1.97200608e-01   8.86599651e-01   1.15168133e+00   1.80092460e+00
     1.08319097e+00  -5.55161870e+03]]

 [[  1.53550616e+00   6.05524357e+00   2.95409093e+00   5.35658977e-01
     1.97200608e-01

In [48]:
samples1.shape

(10, 1, 10)

In [49]:
samples[:,1,1]

array([ 2.08810073,  2.08810073,  2.08810073, ...,  2.08810073,
        2.08810073,  2.08810073])

In [50]:
samples2['mu']

array([ 1.53550616,  1.53550616,  1.53550616,  1.53550616,  1.53550616,
        1.53550616,  1.53550616,  1.53550616,  1.53550616,  1.53550616])