<a href="https://colab.research.google.com/github/CobaMasa/RStan2PyStan/blob/master/chapter9_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 9-1: Vectorizing data & codes

In [1]:
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import pystan
import time

  import pandas.util.testing as tm


In [2]:
# vectorizing the code used in chap4.4
## model is: mu[n] = a+bX[n], Y[n]~normal(mu[n], sigma)
input_path = 'drive/My Drive/Colab/RStanBook/chap04/input/data-salary.txt'
data = pd.read_csv(input_path)
print('shape: ', data.shape, 'columns: ', data.columns)
data.head()

shape:  (20, 2) columns:  Index(['X', 'Y'], dtype='object')


Unnamed: 0,X,Y
0,24,472
1,24,403
2,26,454
3,32,575
4,33,546


In [3]:
from google.colab import files
files.upload()

StanData = {
    'N': data.shape[0],
    'X': data.X.values,
    'Y': data.Y.values
}

Saving model8-4.stan to model8-4 (1).stan
Saving model8-4b.stan to model8-4b (1).stan


In [4]:
s1 = time.time()
unvect_model = pystan.StanModel('model9-1.stan')
e1 = time.time()
s2 = time.time()
vect_model = pystan.StanModel('model9-1b.stan')
e2 = time.time()

print('unvect: {:.4f}'.format(e1-s1))
print('vect: {:.4f}'.format(e2-s2))

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_75b92934939593df10f7fede123dd62c NOW.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_25964948ae1aa1d8ddfacbeadf434fe6 NOW.


unvect: 67.1610
vect: 64.4728


In [5]:
s1 = time.time()
sm_unvect = unvect_model.sampling(data=StanData)
e1 = time.time()
s2 = time.time()
sm_vect = vect_model.sampling(data=StanData)
e2 = time.time()

print('unvect: {:.4f}s'.format(e1-s1))
print('vect: {:.4f}s'.format(e2-s2))
sm_unvect # see the result of unvectrized model.

unvect: 0.7082s
vect: 0.5987s


Inference for Stan model: anon_model_75b92934939593df10f7fede123dd62c.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a      -118.6    1.96  72.76 -258.9 -167.0 -119.2 -71.09   26.3   1375   1.01
b       21.88    0.04   1.62  18.57  20.82  21.89  22.97  24.96   1365    1.0
s_Y     85.22    0.38  15.19  61.06  74.44  83.11  93.59 120.37   1601    1.0
mu[1]  406.45    0.93  36.62  334.1 382.54 406.01  430.5 477.58   1544    1.0
mu[2]  406.45    0.93  36.62  334.1 382.54 406.01  430.5 477.58   1544    1.0
mu[3]  450.21    0.85   33.9 383.31 428.38 449.73 472.64 515.61   1594    1.0
mu[4]  581.49    0.61  26.45 530.32 564.12  581.5 598.74 633.88   1889    1.0
mu[5]  603.37    0.57  25.36 554.81 586.71  603.2 619.95 653.65   1976    1.0
mu[6]  647.12     0.5  23.36 602.55  631.7  647.0 662.42 693.17   2216    1.0
mu[7]  712.76     0.4  20.97 67

In [6]:
sm_vect # see the result of vectrized model.

Inference for Stan model: anon_model_25964948ae1aa1d8ddfacbeadf434fe6.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

         mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a      -119.2    2.01  73.43 -263.4 -169.9 -118.9 -70.89  27.02   1340    1.0
b       21.89    0.04   1.63  18.62   20.8  21.87  23.01  25.02   1320    1.0
s_Y     84.58    0.37  15.27  60.66  73.49  82.95  93.27 121.47   1666    1.0
mu[1]  406.16    0.95  37.15 333.92 380.62 406.01 430.79 479.39   1519    1.0
mu[2]  406.16    0.95  37.15 333.92 380.62 406.01 430.79 479.39   1519    1.0
mu[3]  449.94    0.87  34.41 383.23 426.54 449.74 472.91 517.83   1568    1.0
mu[4]  581.28    0.63  26.87 529.25 563.25 581.29 598.99 634.47   1842    1.0
mu[5]  603.17    0.59  25.76 553.68 585.83 602.98 620.18 653.85   1914    1.0
mu[6]  646.95    0.52  23.73  601.7 630.88 647.03 662.56 695.18   2105    1.0
mu[7]  712.62    0.42  21.25 67

## Vectorizing 8-4 model(Multiple hierarchical model considering each groups with global effects)

In [7]:
salary2 = pd.read_csv('drive/My Drive/Colab/RStanBook/chap08/input/data-salary-2.txt')
print('shape: ', salary2.shape, 'columns: ', salary2.columns)
salary2.head()

shape:  (40, 3) columns:  Index(['X', 'Y', 'KID'], dtype='object')


Unnamed: 0,X,Y,KID
0,7,457,1
1,10,482,1
2,16,518,1
3,25,535,1
4,5,427,1


In [0]:
StanData = {
    'N': salary2.shape[0],
    'K': len(salary2.KID.unique()),
    'KID': salary2.KID.values,
    'X': salary2.X.values,
    'Y': salary2.Y.values
}

In [9]:
# make models
unvct = pystan.StanModel(file='model8-4.stan')
vct = pystan.StanModel(file='model8-4b.stan')
s1 = time.time()
sm_unvect = unvct.sampling(data=StanData)
e1 = time.time()
s2 = time.time()
sm_vect = vct.sampling(data=StanData)
e2 = time.time()

print('unvect: {:.4f}, vect: {:.4f}'.format(e1-s1, e2-s2))

sm_unvect

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_ed4aa2422ca3f8ebea834c0a59021df0 NOW.
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_d09ef6f031b92391d2fb6e6f08a5f576 NOW.


unvect: 2.8019, vect: 2.0937


Inference for Stan model: anon_model_ed4aa2422ca3f8ebea834c0a59021df0.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a0   387.73    9.38 181.73 202.89 337.75 367.52 414.17 723.09    375   1.01
b0    11.57    0.46   9.37  -6.34   8.99  12.23  15.09  24.83    411   1.01
a[1] 383.61    0.46  15.43 351.76 373.45 383.83  393.9  413.6   1110    1.0
a[2] 335.17    0.42  17.32 301.23 323.37 334.94 346.93 369.29   1700    1.0
a[3] 322.88    0.89  33.74 254.65 300.56 323.54 345.88 386.34   1437    1.0
a[4] 482.09    5.38 137.71 300.46 377.01 448.62 562.74 813.16    655    1.0
b[1]   7.73    0.03   0.95   5.85   7.09   7.71   8.35   9.66   1156    1.0
b[2]  19.37    0.03   1.28  16.79  18.52  19.39  20.25  21.83   1610    1.0
b[3]  12.05    0.04   1.66   8.92  10.93  12.03  13.14  15.45   1485    1.0
b[4]   9.77    0.22   5.55  -3.65   6.52  11.13   1

In [10]:
sm_vect

Inference for Stan model: anon_model_d09ef6f031b92391d2fb6e6f08a5f576.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

       mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
a0   384.55    4.86 138.56 155.07 337.99 369.01 418.61  705.2    813   1.01
b0    12.43    0.25   7.71  -2.72   9.43  12.45  15.36  29.17    949   1.01
a[1] 383.98    0.34  14.83  354.8 374.02 384.27 394.15 412.26   1877    1.0
a[2] 334.31    0.39  17.62 300.66 322.36 333.94 345.95 370.38   2087    1.0
a[3] 323.27    0.78  33.06 258.02 301.36 324.01  346.0 384.97   1811    1.0
a[4] 488.68     5.0 138.35 303.75  381.3  457.1 572.81 815.85    765    1.0
b[1]   7.71    0.02   0.92   5.96   7.07   7.69   8.33   9.52   1970    1.0
b[2]  19.44    0.03    1.3  16.84  18.58  19.45  20.33   21.9   2094    1.0
b[3]  12.03    0.04   1.62   8.98   10.9  12.01  13.09  15.25   1870    1.0
b[4]   9.48     0.2   5.58  -3.65   6.08  10.75  13