In [1]:
import numpy as np

In [2]:
import import_ipynb
from fakeENS import fakeENS
from spectralEM import spectralEM
from scipy.stats import entropy

importing Jupyter notebook from fakeENS.ipynb
importing Jupyter notebook from spectralEM.ipynb


In [3]:
def str_arr(arr):
    return " ".join("%.2f"%x for x in arr)
#--------------------------------------------
def str_mat(mat):
    return "\n".join(str_arr(arr) for arr in mat) 

In [4]:
num_classifier = 5
num_class = 3

ph_const = 0.1
ph_true = np.random.rand(num_class); 
ph_true = (ph_const + ph_true)/sum(ph_const + ph_true)

siz = int(1e4)
real_labels = np.random.choice(range(num_class), p = ph_true, size = siz)
ph_emp = np.bincount(real_labels)/ float(len(real_labels))

E = fakeENS(num_classifier, num_class, real_labels)
C_real = E.getConfMat()

In [5]:
S = spectralEM(num_classifier, num_class, 
               maxiter = 100, num_init = 200, thres_const = 5e-4, disp = False)

print 'ph_true:%s \nph_emp:%s'%(str_arr(ph_true), str_arr(ph_emp))

group = S.group 
groupConfMat = {i: np.mean([C_real[j] for j in range(num_classifier) if group[i][j]], axis = 0)  for i in range(3)}
kappa = np.min([np.min([[groupConfMat[j][l,l] - groupConfMat[j][l,c] for c in range(num_class) if c !=l] 
                      for l in range(num_class)]) for j in range(3)])


barD = np.min([[np.mean([entropy(C_real[i][l,:], C_real[j][l,:]) 
                         for i in range(num_classifier)]) 
                for c in range(num_class) if c !=l] 
               for l in range(num_class)])

for i in range(3): print "Group %d:\n%s"%(i,str_mat(groupConfMat[i]));
print '\nkappa:%.3f, barD:%.3f'%(kappa, barD)

print 'Initial eigenvectors:\n%s'%str_mat(S.v_arr)

ph_true:0.40 0.28 0.33 
ph_emp:0.40 0.28 0.32
Group 0:
0.52 0.25 0.23
0.23 0.49 0.29
0.17 0.25 0.57
Group 1:
0.64 0.16 0.20
0.22 0.56 0.23
0.05 0.37 0.59
Group 2:
0.58 0.28 0.14
0.12 0.55 0.32
0.13 0.14 0.73

kappa:0.202, barD:0.060
Initial eigenvectors:
-0.02 -0.99 0.34
0.08 0.16 0.62
1.00 -0.01 -0.71


In [6]:
E.reshuffle()
S.reset()

update_period = 500
est_period = 2*update_period
new_data = []
disp = True
errp_best = 100


for i in range(siz):
    schedule = (i+1)*np.ones(num_classifier).astype(int)
    labels = E.classify(schedule) - 1
    #print 'index:', i, 'real label:',real_labels[i], 'classifier labels:', labels
    new_data += [labels.astype(int)]
    if np.mod(i, update_period) == 0 and i>0: 
        S.update(new_data); 
        #-----------------------
        if np.mod(i, est_period) == 0 and i >0: 
            S.updateParamsSpectral('check')
        #-----------------------
        S.updateParamsEM(new_data, 10);
        new_data = []
    #------------------
        errp = np.linalg.norm(S.ph_est_avg - ph_true, 1)
        err = [np.linalg.norm(C_real[k]- S.conf_mat[k], 1) for k in range(num_classifier)]
        if errp_best > errp:
            ph_est_best = S.ph_est_avg.copy()
            conf_mat_best = S.conf_mat.copy()
            errp_best = errp
        #-----------------------------------------
        if disp:
            print '---------------------------'
            print 'Num samples:', S.num_data
            print 'Total L1 error in ph true: %.3f'%errp
            print 'Total L1 error in Conf Matrices: %.3f'%np.sum(err)
            print '---------------------------'
#------------------------------------------------------

---------------------------
Num samples: 501
Total L1 error in ph true: 0.128
Total L1 error in Conf Matrices: 2.330
---------------------------
---------------------------
Num samples: 1001
Total L1 error in ph true: 0.327
Total L1 error in Conf Matrices: 2.644
---------------------------
---------------------------
Num samples: 1501
Total L1 error in ph true: 0.327
Total L1 error in Conf Matrices: 1.693
---------------------------
---------------------------
Num samples: 2001
Total L1 error in ph true: 0.464
Total L1 error in Conf Matrices: 5.513
---------------------------
---------------------------
Num samples: 2501
Total L1 error in ph true: 0.464
Total L1 error in Conf Matrices: 5.434
---------------------------
---------------------------
Num samples: 3001
Total L1 error in ph true: 0.565
Total L1 error in Conf Matrices: 4.694
---------------------------
---------------------------
Num samples: 3501
Total L1 error in ph true: 0.565
Total L1 error in Conf Matrices: 4.430
-------

In [7]:
print 'ph_est:%s \nph_true:%s \nph_emp:%s'%(str_arr(S.ph_est_avg), str_arr(ph_true), str_arr(ph_emp))

for k in range(num_classifier):
    print '---------------------------'
    print 'Classifier:%d'%k
    print 'Confusion Matrix Final Estimate'
    print np.round(S.conf_mat[k],3)
    print 'Confusion Matrix Real'
    print np.round(C_real[k],3)
print '---------------------------'

ph_est:0.40 0.13 0.47 
ph_true:0.40 0.28 0.33 
ph_emp:0.40 0.28 0.32
---------------------------
Classifier:0
Confusion Matrix Final Estimate
[[0.501 0.26  0.239]
 [0.384 0.488 0.128]
 [0.066 0.376 0.558]]
Confusion Matrix Real
[[0.518 0.251 0.231]
 [0.258 0.602 0.139]
 [0.077 0.319 0.604]]
---------------------------
Classifier:1
Confusion Matrix Final Estimate
[[0.744 0.071 0.185]
 [0.458 0.369 0.174]
 [0.017 0.33  0.653]]
Confusion Matrix Real
[[0.758 0.076 0.166]
 [0.172 0.511 0.317]
 [0.019 0.412 0.569]]
---------------------------
Classifier:2
Confusion Matrix Final Estimate
[[0.579 0.366 0.055]
 [0.353 0.416 0.23 ]
 [0.081 0.151 0.768]]
Confusion Matrix Real
[[0.563 0.408 0.029]
 [0.235 0.468 0.297]
 [0.07  0.033 0.898]]
---------------------------
Classifier:3
Confusion Matrix Final Estimate
[[0.536 0.215 0.249]
 [0.32  0.397 0.283]
 [0.187 0.287 0.526]]
Confusion Matrix Real
[[0.516 0.253 0.231]
 [0.228 0.487 0.285]
 [0.172 0.254 0.574]]
---------------------------
Classifier:

In [8]:
print 'ph_est:%s \nph_true:%s \nph_emp:%s'%(str_arr(ph_est_best), str_arr(ph_true), str_arr(ph_emp))
for k in range(num_classifier):
    print '---------------------------'
    print 'Classifier:%d'%k
    print 'Confusion Matrix Best Estimate'
    print np.round(conf_mat_best[k],3)
    print 'Confusion Matrix Real'
    print np.round(C_real[k],3)
print '---------------------------'

ph_est:0.33 0.29 0.38 
ph_true:0.40 0.28 0.33 
ph_emp:0.40 0.28 0.32
---------------------------
Classifier:0
Confusion Matrix Best Estimate
[[0.519 0.265 0.215]
 [0.262 0.509 0.229]
 [0.138 0.304 0.558]]
Confusion Matrix Real
[[0.518 0.251 0.231]
 [0.258 0.602 0.139]
 [0.077 0.319 0.604]]
---------------------------
Classifier:1
Confusion Matrix Best Estimate
[[1.    0.    0.   ]
 [0.258 0.357 0.385]
 [0.126 0.374 0.5  ]]
Confusion Matrix Real
[[0.758 0.076 0.166]
 [0.172 0.511 0.317]
 [0.019 0.412 0.569]]
---------------------------
Classifier:2
Confusion Matrix Best Estimate
[[0.578 0.422 0.   ]
 [0.362 0.341 0.297]
 [0.    0.    1.   ]]
Confusion Matrix Real
[[0.563 0.408 0.029]
 [0.235 0.468 0.297]
 [0.07  0.033 0.898]]
---------------------------
Classifier:3
Confusion Matrix Best Estimate
[[0.663 0.228 0.108]
 [0.281 0.379 0.34 ]
 [0.184 0.25  0.566]]
Confusion Matrix Real
[[0.516 0.253 0.231]
 [0.228 0.487 0.285]
 [0.172 0.254 0.574]]
---------------------------
Classifier:4
Co