# Two-class case 

In [1]:
import numpy as np
from sklearn import metrics
from sklearn.preprocessing import label_binarize

# binary case, balanced

# prepare data
y_true = np.array([0,0, 1, 1])
y_score = np.array([0.1, 0.4, 0.35, 0.8])
y_pred = np.zeros(y_score.shape)
y_pred[y_score>.5] = 1

fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1)
print('*'*20 + ' binary case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[1,0]))
print('auc: ', metrics.auc(fpr, tpr))
print('f1: ', metrics.f1_score(y_true,y_pred))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

# binary case, in balanced

# prepare data
y_true = np.hstack([np.hstack([y_true[:2]]*5),y_true[2:]])
y_score =  np.hstack([np.hstack([y_score[:2]]*5),y_score[2:]])
y_pred = np.zeros(y_score.shape)
y_pred[y_score>.5] = 1

fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1)
print('*'*20 + ' binary case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[1,0]))
print('auc: ', metrics.auc(fpr, tpr))
print('f1: ', metrics.f1_score(y_true,y_pred))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** binary case balanced ********************
cfm: 
 [[1 1]
 [0 2]]
auc:  0.75
f1:  0.666666666667
accuracy:  0.75
******************** binary case unbalanced ********************
cfm: 
 [[ 1  1]
 [ 0 10]]
auc:  0.75
f1:  0.666666666667
accuracy:  0.916666666667


In [2]:
# binary case, balanced

# prepare data
y_true = np.array([0,0, 1, 1])
y_score = np.array([0.1, 0.6, 0.35, 0.8])
y_pred = np.zeros(y_score.shape)
y_pred[y_score>.5] = 1

fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1)
print('*'*20 + ' binary case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[1,0]))
print('auc: ', metrics.auc(fpr, tpr))
print('f1: ', metrics.f1_score(y_true,y_pred))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

# binary case, in balanced

# prepare data
y_true = np.hstack([np.hstack([y_true[:2]]*5),y_true[2:]])
y_score =  np.hstack([np.hstack([y_score[:2]]*5),y_score[2:]])
y_pred = np.zeros(y_score.shape)
y_pred[y_score>.5] = 1

fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=1)
print('*'*20 + ' binary case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[1,0]))
print('auc: ', metrics.auc(fpr, tpr))
print('f1: ', metrics.f1_score(y_true,y_pred))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** binary case balanced ********************
cfm: 
 [[1 1]
 [1 1]]
auc:  0.75
f1:  0.5
accuracy:  0.5
******************** binary case unbalanced ********************
cfm: 
 [[1 1]
 [5 5]]
auc:  0.75
f1:  0.25
accuracy:  0.5


In [3]:
# we flip the definition of postive/negative
# binary case, balanced

# prepare data
y_true = np.array([0,0, 1, 1])
y_score = np.array([0.1, 0.6, 0.35, 0.8])
y_pred = np.zeros(y_score.shape)
y_pred[y_score>.5] = 1

fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=0)
print('*'*20 + ' binary case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1]))
print('auc: ', metrics.auc(fpr, tpr))
print('f1: ', metrics.f1_score(y_true,y_pred))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

# binary case, in balanced

# prepare data
y_true = np.hstack([np.hstack([y_true[:2]]*5),y_true[2:]])
y_score =  np.hstack([np.hstack([y_score[:2]]*5),y_score[2:]])
y_pred = np.zeros(y_score.shape)
y_pred[y_score>.5] = 1

fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score, pos_label=0)
print('*'*20 + ' binary case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1]))
print('auc: ', metrics.auc(fpr, tpr))
print('f1: ', metrics.f1_score(y_true,y_pred))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** binary case balanced ********************
cfm: 
 [[1 1]
 [1 1]]
auc:  0.25
f1:  0.5
accuracy:  0.5
******************** binary case unbalanced ********************
cfm: 
 [[5 5]
 [1 1]]
auc:  0.25
f1:  0.25
accuracy:  0.5


# Three-class case

## class ratio: 5:1:1

In [4]:
# three-class case, balanced

# prepare data
y_true = np.array([0,0, 1, 1,2, 2])
y_score = np.array([[0.5, 0.4, 0.1],
                    [0.8,0.1,0.1],
                    [0.3,0.6,0.1],
                    [0.4,0.5,0.1],
                    [0,0.1,0.9],
                    [0.3,0.4,0.3]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])


print('*'*20 + ' three-class case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))
# three-class case, in balanced

# prepare data
# duplicate class 0 five times
y_true = np.hstack([np.hstack([y_true[:2]]*5),y_true[2:]])
y_score =  np.vstack([np.vstack([y_score[:2,:]]*5),y_score[2:,:]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])

    
print('*'*20 + ' three-class case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** three-class case balanced ********************
cfm: 
 [[2 0 0]
 [0 2 0]
 [0 1 1]]
auc:  [1.0, 1.0, 1.0]
micro auc:  0.944444444444
macro auc:  1.0
f1 :  [ 1.          0.8         0.66666667]
micro f1:  0.833333333333
macro f1:  0.822222222222
weighted f1:  0.822222222222
accuracy:  0.833333333333
******************** three-class case unbalanced ********************
cfm: 
 [[10  0  0]
 [ 0  2  0]
 [ 0  1  1]]
auc:  [1.0, 1.0, 1.0]
micro auc:  0.979591836735
macro auc:  1.0
f1 :  [ 1.          0.8         0.66666667]
micro f1:  0.928571428571
macro f1:  0.822222222222
weighted f1:  0.92380952381
accuracy:  0.928571428571


In [5]:
# three-class case, balanced

# prepare data
y_true = np.array([0,0, 1, 1,2, 2])
y_score = np.array([[0.3, 0.4, 0.3],
                    [0.8,0.1,0.1],
                    [0.3,0.6,0.1],
                    [0.4,0.5,0.1],
                    [0,0.1,0.9],
                    [0.3,0.4,0.3]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])


print('*'*20 + ' three-class case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))
# three-class case, in balanced

# prepare data
# duplicate class 0 five times
y_true = np.hstack([np.hstack([y_true[:2]]*5),y_true[2:]])
y_score =  np.vstack([np.vstack([y_score[:2,:]]*5),y_score[2:,:]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])

    
print('*'*20 + ' three-class case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** three-class case balanced ********************
cfm: 
 [[1 1 0]
 [0 2 0]
 [0 1 1]]
auc:  [0.75, 1.0, 0.9375]
micro auc:  0.875
macro auc:  0.895833333333
f1 :  [ 0.66666667  0.66666667  0.66666667]
micro f1:  0.666666666667
macro f1:  0.666666666667
weighted f1:  0.666666666667
accuracy:  0.666666666667
******************** three-class case unbalanced ********************
cfm: 
 [[5 5 0]
 [0 2 0]
 [0 1 1]]
auc:  [0.75, 1.0, 0.89583333333333326]
micro auc:  0.839285714286
macro auc:  0.881944444444
f1 :  [ 0.66666667  0.4         0.66666667]
micro f1:  0.571428571429
macro f1:  0.577777777778
weighted f1:  0.628571428571
accuracy:  0.571428571429


In [6]:
# three-class case, balanced

# prepare data
y_true = np.array([0,0, 1, 1,2, 2])
y_score = np.array([[0.3, 0.4, 0.3],
                    [0.4,0.1,0.5],
                    [0.3,0.6,0.1],
                    [0.4,0.5,0.1],
                    [0,0.1,0.9],
                    [0.3,0.4,0.3]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])


print('*'*20 + ' three-class case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))
# three-class case, in balanced

# prepare data
# duplicate class 0 five times
y_true = np.hstack([np.hstack([y_true[:2]]*5),y_true[2:]])
y_score =  np.vstack([np.vstack([y_score[:2,:]]*5),y_score[2:,:]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])

    
print('*'*20 + ' three-class case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** three-class case balanced ********************
cfm: 
 [[0 1 1]
 [0 2 0]
 [0 1 1]]
auc:  [0.6875, 1.0, 0.8125]
micro auc:  0.805555555556
macro auc:  0.833333333333
f1 :  [ 0.          0.66666667  0.5       ]
micro f1:  0.5
macro f1:  0.388888888889
weighted f1:  0.388888888889
accuracy:  0.5
******************** three-class case unbalanced ********************
cfm: 
 [[0 5 5]
 [0 2 0]
 [0 1 1]]
auc:  [0.6875, 1.0, 0.6875]
micro auc:  0.647959183673
macro auc:  0.791666666667
f1 :  [ 0.    0.4   0.25]
micro f1:  0.214285714286
macro f1:  0.216666666667
weighted f1:  0.0928571428571
accuracy:  0.214285714286


  'precision', 'predicted', average, warn_for)


## class ratio: 10:1:1

In [7]:
# three-class case, balanced

# prepare data
y_true = np.array([0,0, 1, 1,2, 2])
y_score = np.array([[0.5, 0.4, 0.1],
                    [0.8,0.1,0.1],
                    [0.3,0.6,0.1],
                    [0.4,0.5,0.1],
                    [0,0.1,0.9],
                    [0.3,0.4,0.3]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])


print('*'*20 + ' three-class case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))
# three-class case, in balanced

# prepare data
# duplicate class 0 five times
y_true = np.hstack([np.hstack([y_true[:2]]*10),y_true[2:]])
y_score =  np.vstack([np.vstack([y_score[:2,:]]*10),y_score[2:,:]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])

    
print('*'*20 + ' three-class case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** three-class case balanced ********************
cfm: 
 [[2 0 0]
 [0 2 0]
 [0 1 1]]
auc:  [1.0, 1.0, 1.0]
micro auc:  0.944444444444
macro auc:  1.0
f1 :  [ 1.          0.8         0.66666667]
micro f1:  0.833333333333
macro f1:  0.822222222222
weighted f1:  0.822222222222
accuracy:  0.833333333333
******************** three-class case unbalanced ********************
cfm: 
 [[20  0  0]
 [ 0  2  0]
 [ 0  1  1]]
auc:  [1.0, 1.0, 1.0]
micro auc:  0.988715277778
macro auc:  1.0
f1 :  [ 1.          0.8         0.66666667]
micro f1:  0.958333333333
macro f1:  0.822222222222
weighted f1:  0.955555555556
accuracy:  0.958333333333


In [8]:
# three-class case, balanced

# prepare data
y_true = np.array([0,0, 1, 1,2, 2])
y_score = np.array([[0.3, 0.4, 0.3],
                    [0.8,0.1,0.1],
                    [0.3,0.6,0.1],
                    [0.4,0.5,0.1],
                    [0,0.1,0.9],
                    [0.3,0.4,0.3]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])


print('*'*20 + ' three-class case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))
# three-class case, in balanced

# prepare data
# duplicate class 0 five times
y_true = np.hstack([np.hstack([y_true[:2]]*10),y_true[2:]])
y_score =  np.vstack([np.vstack([y_score[:2,:]]*10),y_score[2:,:]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])

    
print('*'*20 + ' three-class case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** three-class case balanced ********************
cfm: 
 [[1 1 0]
 [0 2 0]
 [0 1 1]]
auc:  [0.75, 1.0, 0.9375]
micro auc:  0.875
macro auc:  0.895833333333
f1 :  [ 0.66666667  0.66666667  0.66666667]
micro f1:  0.666666666667
macro f1:  0.666666666667
weighted f1:  0.666666666667
accuracy:  0.666666666667
******************** three-class case unbalanced ********************
cfm: 
 [[10 10  0]
 [ 0  2  0]
 [ 0  1  1]]
auc:  [0.75, 1.0, 0.88636363636363624]
micro auc:  0.828125
macro auc:  0.878787878788
f1 :  [ 0.66666667  0.26666667  0.66666667]
micro f1:  0.541666666667
macro f1:  0.533333333333
weighted f1:  0.633333333333
accuracy:  0.541666666667


In [9]:
# three-class case, balanced

# prepare data
y_true = np.array([0,0, 1, 1,2, 2])
y_score = np.array([[0.3, 0.4, 0.3],
                    [0.4,0.5,0.1],
                    [0.3,0.6,0.1],
                    [0.4,0.5,0.1],
                    [0,0.1,0.9],
                    [0.3,0.4,0.3]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])


print('*'*20 + ' three-class case balanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))
# three-class case, in balanced

# prepare data
# duplicate class 0 five times
y_true = np.hstack([np.hstack([y_true[:2]]*10),y_true[2:]])
y_score =  np.vstack([np.vstack([y_score[:2,:]]*10),y_score[2:,:]])
y_pred = np.argmax(y_score,1)

y_true_roc = label_binarize(y_true, classes=[0,1,2])

    
print('*'*20 + ' three-class case unbalanced '+ '*'*20)
print('cfm: \n', metrics.confusion_matrix(y_true,y_pred,labels=[0,1,2]))
aucs = []
for i in range(3):
    fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score[:,i], pos_label=i)
    aucs.append(metrics.auc(fpr, tpr))
print('auc: ', aucs)
# Compute micro-average ROC curve and ROC area
fpr_micro, tpr_micro, _ = metrics.roc_curve(y_true_roc.ravel(), y_score.ravel())
print('micro auc: ', metrics.auc(fpr_micro, tpr_micro))
print('macro auc: ', np.mean(aucs))
print('f1 : ', metrics.f1_score(y_true,y_pred,average=None))
print('micro f1: ', metrics.f1_score(y_true,y_pred,average='micro'))
print('macro f1: ', metrics.f1_score(y_true,y_pred,average='macro'))
print('weighted f1: ', metrics.f1_score(y_true,y_pred,average='weighted'))
print('accuracy: ', metrics.accuracy_score(y_true,y_pred))

******************** three-class case balanced ********************
cfm: 
 [[0 2 0]
 [0 2 0]
 [0 1 1]]
auc:  [0.6875, 0.9375, 0.9375]
micro auc:  0.805555555556
macro auc:  0.854166666667
f1 :  [ 0.          0.57142857  0.66666667]
micro f1:  0.5
macro f1:  0.412698412698
weighted f1:  0.412698412698
accuracy:  0.5
******************** three-class case unbalanced ********************
cfm: 
 [[ 0 20  0]
 [ 0  2  0]
 [ 0  1  1]]
auc:  [0.6875, 0.88636363636363624, 0.88636363636363624]
micro auc:  0.589409722222
macro auc:  0.820075757576
f1 :  [ 0.          0.16        0.66666667]
micro f1:  0.125
macro f1:  0.275555555556
weighted f1:  0.0688888888889
accuracy:  0.125


  'precision', 'predicted', average, warn_for)
