In [130]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [131]:
from crowdnalysis.cmdstan import StanMultinomialOptimizeConsensus, StanMultinomial2OptimizeConsensus, StanDSOptimizeConsensus
from crowdnalysis.dawid_skene import DawidSkene
from scipy.stats import entropy
import numpy as np
import json


In [132]:
ds = DawidSkene()
stan_ds = StanDSOptimizeConsensus()

In [133]:
def generate_pi(k):
    _pi = np.zeros((k,k))
    for _k in range(k):
        _pi[_k] = np.random.dirichlet(np.ones(k), size=1)[0]
        while np.max(np.delete(_pi[_k],_k)) >= _pi[_k,_k]:
            _pi[_k] = np.random.dirichlet(np.ones(k), size=1)[0]
    return _pi    

In [134]:
generate_pi(3)

array([[0.46736819, 0.22769117, 0.30494064],
       [0.21631339, 0.49268635, 0.29100026],
       [0.10186822, 0.32644013, 0.57169165]])

In [135]:
def generate_pis(w, k):
    _pi = np.zeros((w,k,k))
    for _w in range(w):
        _pi[_w] = generate_pi(k)
    return _pi


In [160]:
t = 1000
num_annotations_per_task = 3
w = 3
real_tau = np.array([0.1, 0.3, 0.6])
k = len(real_tau)
real_pi = generate_pis(w,k)
real_parameters = {'tau':real_tau, 'pi':real_pi}
print(real_parameters)
t_C, m = ds.sample(t, num_annotations_per_task, parameters = real_parameters)

{'tau': array([0.1, 0.3, 0.6]), 'pi': array([[[0.44268365, 0.21193542, 0.34538093],
        [0.07360469, 0.72213728, 0.20425803],
        [0.24710476, 0.18548518, 0.56741006]],

       [[0.92352178, 0.07142854, 0.00504968],
        [0.17957777, 0.43737379, 0.38304843],
        [0.12472126, 0.1725317 , 0.70274704]],

       [[0.57154893, 0.11365933, 0.31479174],
        [0.38163352, 0.52749367, 0.09087281],
        [0.19561201, 0.21761136, 0.58677663]]])}


In [161]:
m

array([[  0,   1,   0],
       [  0,   1,   2],
       [  0,   0,   2],
       ...,
       [999,   0,   0],
       [999,   0,   2],
       [999,   0,   2]], dtype=int32)

In [162]:
learned_t_C, learned_parameters = ds.m_fit_and_compute_consensus(m, t, k, w)

In [163]:

learned_t_C_stan, learned_parameters_stan = stan_ds.m_fit_and_compute_consensus(m, t, k, w)

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /home/cerquide/prj/crowdnalysis/src/crowdnalysis/cmdstan/DS.fit_and_consensus
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


In [164]:

def evaluate_ds_model(real_parameters, learned_parameters, w, k):
    d = {}
    d["tau_kl"] = entropy(learned_parameters["tau"],qk=real_parameters["tau"], base=2)
    d["pi_kl"] = np.zeros((w, k))
    for _w in range(w):
        for _k in range(k):
             d["pi_kl"][_w,_k] = entropy(learned_parameters["pi"][_w,_k],qk=real_parameters["pi"][_w,_k], base=2)
    return d
            

In [165]:
r = evaluate_ds_model(real_parameters, learned_parameters, w, k)
print (r)
print(np.sum(r["pi_kl"]))

{'tau_kl': 1.4023017860661549e-05, 'pi_kl': array([[0.00066697, 0.00304294, 0.00035794],
       [0.0123967 , 0.00014149, 0.00029228],
       [0.02590072, 0.00416851, 0.00035567]])}
0.0473232369172014


In [166]:
r = evaluate_ds_model(real_parameters, learned_parameters_stan, w, k)
print (r)
print(np.sum(r["pi_kl"]))

{'tau_kl': 0.00021803203833904063, 'pi_kl': array([[0.00172084, 0.00414033, 0.00047698],
       [0.00559494, 0.00012064, 0.00022978],
       [0.02120355, 0.0031295 , 0.00033395]])}
0.036950500206009707


In [159]:
r = evaluate_ds_model(real_parameters, real_parameters, w, k)
print (r)
print(np.sum(r["pi_kl"]))

{'tau_kl': 0.0, 'pi_kl': array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])}
0.0


In [77]:

def json_dump(fp, m, w, t, k):
    t_A = m[:,0]+1
    w_A = m[:,1]
    ann = m[:,2]+1
    a = m.shape[0]
    
    json_data = {'w': w,
             't': t,
             'a': a,
             'k': k,
             't_A': t_A.tolist(),
             'w_A': w_A.tolist(),
             'ann': ann.tolist()}
    json.dump(json_data, fp, indent=2)


In [52]:
with open('DS.json', 'w') as outfile:
    json_dump(outfile, m, w, t, k)

In [26]:
print(a)
print(m[9980:10000], t_A[9980:])

5000
[] []


In [27]:

sm2oc = StanMultinomial2OptimizeConsensus()
t_C, d = sm2oc.m_fit_and_compute_consensus(m, t, k, w)
d

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /home/cerquide/prj/crowdnalysis/src/crowdnalysis/cmdstan/Multinomial2.fit_and_consensus
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


{'tau': array([0.232108, 0.307849, 0.460043]),
 'eta': array([[0.0235701, 0.574529 ],
        [1.36155  , 0.995963 ],
        [0.717564 , 0.681768 ]]),
 'pi': array([[0.393751, 0.384579, 0.22167 ],
        [0.157639, 0.615146, 0.227215],
        [0.244745, 0.253665, 0.50159 ]]),
 'log_p_t_C': array([[-7.89146, -6.59551, -5.58984],
        [-6.19144, -6.33072, -7.70673],
        [-6.71883, -8.68824, -7.06076],
        ...,
        [-6.19144, -6.33072, -7.70673],
        [-7.86789, -7.95706, -5.62563],
        [-7.31693, -6.9611 , -6.3074 ]]),
 't_C': array([[0.0682834, 0.249539 , 0.682178 ],
        [0.478531 , 0.416314 , 0.105155 ],
        [0.540559 , 0.0754293, 0.384012 ],
        ...,
        [0.478531 , 0.416314 , 0.105155 ],
        [0.0882671, 0.0807367, 0.830996 ],
        [0.193361 , 0.275997 , 0.530643 ]])}

In [28]:
t_C, d = smoc.m_fit_and_compute_consensus(m, t, k, w)
d

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /home/cerquide/prj/crowdnalysis/src/crowdnalysis/cmdstan/Multinomial.fit_and_consensus
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


{'tau': array([0.0298387, 0.528094 , 0.442067 ]),
 'pi': array([[0.679218, 0.214762, 0.10602 ],
        [0.263376, 0.264884, 0.47174 ],
        [0.210426, 0.562761, 0.226813]]),
 'log_p_t_C': array([[-13.3208 ,  -5.54939,  -6.41698],
        [ -8.90025,  -7.29222,  -5.65824],
        [ -8.45474,  -6.72079,  -7.55069],
        ...,
        [ -8.90025,  -7.29222,  -5.65824],
        [-12.1694 ,  -5.5551 ,  -7.4007 ],
        [-11.4635 ,  -6.13223,  -6.49197]]),
 't_C': array([[2.96840e-04, 7.04035e-01, 2.95668e-01],
        [3.16675e-02, 1.58115e-01, 8.10218e-01],
        [1.09498e-01, 6.20086e-01, 2.70415e-01],
        ...,
        [3.16675e-02, 1.58115e-01, 8.10218e-01],
        [1.15684e-03, 8.62611e-01, 1.36232e-01],
        [2.84144e-03, 5.87304e-01, 4.09855e-01]])}

In [48]:
m[:,1]=0
a = ds.m_fit_and_compute_consensus(m, t, k, 1)
a


(array([[0.47629916, 0.07265595, 0.45104489],
        [0.49253281, 0.30742989, 0.2000373 ],
        [0.18053211, 0.01019056, 0.80927733],
        [0.30813743, 0.01148725, 0.68037531],
        [0.47343758, 0.01165636, 0.51490606],
        [0.47629916, 0.07265595, 0.45104489],
        [0.77143334, 0.05132708, 0.17723958],
        [0.47629916, 0.07265595, 0.45104489],
        [0.28233425, 0.00169881, 0.71596694],
        [0.47629916, 0.07265595, 0.45104489],
        [0.63808478, 0.0642833 , 0.29763192],
        [0.64434392, 0.01047723, 0.34517886],
        [0.77143334, 0.05132708, 0.17723958],
        [0.36105569, 0.34123747, 0.29770684],
        [0.47629916, 0.07265595, 0.45104489],
        [0.62024004, 0.25568139, 0.12407857],
        [0.1915386 , 0.74072739, 0.06773401],
        [0.1915386 , 0.74072739, 0.06773401],
        [0.36105569, 0.34123747, 0.29770684],
        [0.47343758, 0.01165636, 0.51490606],
        [0.30813743, 0.01148725, 0.68037531],
        [0.64434392, 0.01047723, 0

In [49]:
d = smoc.m_fit(m, t, k, w, t_C)
d

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /home/cerquide/prj/crowdnalysis/src/crowdnalysis/cmdstan/Multinomial.fit
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


{'w': 20, 't': 100, 'a': 500, 'k': 3, 't_A': array([  1,   1,   1,   1,   1,   2,   2,   2,   2,   2,   3,   3,   3,
         3,   3,   4,   4,   4,   4,   4,   5,   5,   5,   5,   5,   6,
         6,   6,   6,   6,   7,   7,   7,   7,   7,   8,   8,   8,   8,
         8,   9,   9,   9,   9,   9,  10,  10,  10,  10,  10,  11,  11,
        11,  11,  11,  12,  12,  12,  12,  12,  13,  13,  13,  13,  13,
        14,  14,  14,  14,  14,  15,  15,  15,  15,  15,  16,  16,  16,
        16,  16,  17,  17,  17,  17,  17,  18,  18,  18,  18,  18,  19,
        19,  19,  19,  19,  20,  20,  20,  20,  20,  21,  21,  21,  21,
        21,  22,  22,  22,  22,  22,  23,  23,  23,  23,  23,  24,  24,
        24,  24,  24,  25,  25,  25,  25,  25,  26,  26,  26,  26,  26,
        27,  27,  27,  27,  27,  28,  28,  28,  28,  28,  29,  29,  29,
        29,  29,  30,  30,  30,  30,  30,  31,  31,  31,  31,  31,  32,
        32,  32,  32,  32,  33,  33,  33,  33,  33,  34,  34,  34,  34,
        34,  35,  3

{'tau': array([0.00140936, 0.104596  , 0.893995  ]),
 'pi': array([[9.99992e-01, 3.75113e-06, 3.75113e-06],
        [1.14160e-02, 9.36988e-01, 5.15958e-02],
        [2.40188e-01, 3.19697e-01, 4.40114e-01]])}

In [50]:
d2 = smoc.m_compute_consensus(m, t, k, w, data=d)
d2

INFO:cmdstanpy:found newer exe file, not recompiling
INFO:cmdstanpy:compiled model file: /home/cerquide/prj/crowdnalysis/src/crowdnalysis/cmdstan/Multinomial.consensus
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


{'w': 20, 't': 100, 'a': 500, 'k': 3, 't_A': array([  1,   1,   1,   1,   1,   2,   2,   2,   2,   2,   3,   3,   3,
         3,   3,   4,   4,   4,   4,   4,   5,   5,   5,   5,   5,   6,
         6,   6,   6,   6,   7,   7,   7,   7,   7,   8,   8,   8,   8,
         8,   9,   9,   9,   9,   9,  10,  10,  10,  10,  10,  11,  11,
        11,  11,  11,  12,  12,  12,  12,  12,  13,  13,  13,  13,  13,
        14,  14,  14,  14,  14,  15,  15,  15,  15,  15,  16,  16,  16,
        16,  16,  17,  17,  17,  17,  17,  18,  18,  18,  18,  18,  19,
        19,  19,  19,  19,  20,  20,  20,  20,  20,  21,  21,  21,  21,
        21,  22,  22,  22,  22,  22,  23,  23,  23,  23,  23,  24,  24,
        24,  24,  24,  25,  25,  25,  25,  25,  26,  26,  26,  26,  26,
        27,  27,  27,  27,  27,  28,  28,  28,  28,  28,  29,  29,  29,
        29,  29,  30,  30,  30,  30,  30,  31,  31,  31,  31,  31,  32,
        32,  32,  32,  32,  33,  33,  33,  33,  33,  34,  34,  34,  34,
        34,  35,  3

(array([[6.55976e-23, 6.56065e-04, 9.99344e-01],
        [8.89057e-23, 1.61476e-02, 9.83852e-01],
        [9.76045e-29, 6.47655e-05, 9.99935e-01],
        [4.76799e-23, 2.62588e-05, 9.99974e-01],
        [2.32911e-17, 1.06462e-05, 9.99989e-01],
        [6.55976e-23, 6.56065e-04, 9.99344e-01],
        [1.56611e-11, 1.07898e-04, 9.99892e-01],
        [6.55976e-23, 6.56065e-04, 9.99344e-01],
        [1.69187e-17, 4.25846e-07, 1.00000e+00],
        [6.55976e-23, 6.56065e-04, 9.99344e-01],
        [3.20557e-17, 2.66091e-04, 9.99734e-01],
        [1.13774e-11, 4.31629e-06, 9.99996e-01],
        [1.56611e-11, 1.07898e-04, 9.99892e-01],
        [1.77794e-28, 3.89072e-02, 9.61093e-01],
        [6.55976e-23, 6.56065e-04, 9.99344e-01],
        [4.38498e-17, 6.61015e-03, 9.93390e-01],
        [1.26571e-28, 5.03000e-01, 4.97000e-01],
        [1.26571e-28, 5.03000e-01, 4.97000e-01],
        [1.77794e-28, 3.89072e-02, 9.61093e-01],
        [2.32911e-17, 1.06462e-05, 9.99989e-01],
        [4.76799e-23