# Uncertainty Losses Iteration #2

In [1]:
import os
import sys

sys.path.append(os.path.abspath('..'))

In [2]:
seed = 2018

In [3]:
import random

import numpy as np
from copy import copy
import skml
from skml.problem_transformation import ProbabilisticClassifierChain
from skml.datasets import sample_down_label_space
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import hamming_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.base import clone
from sklearn.externals import joblib
import arff
import os
import time

from lib.experimental_framework import load_data
random.seed(2018)

In [4]:
DATA_SETS = ['scene', 'emotions', 'yeast-10', 'mediamill-10', 'enron-10', 'medical-10', 'slashdot-10',
            'ohsumed-10', 'tmc2007-500-10', 'imdb-10']

In [None]:
for ds in DATA_SETS:
    tic = time.time()
    data = load_data(ds)
    toc = time.time()
    print("{}: {:.4f} s".format(ds, toc - tic))

In [5]:
def uncertain_hamming_loss(y, y_pred, omega=1.0):
    N, L = y_pred.shape
    cumsum = 0
    
    # place '?' if not already done
    np.place(y_pred, 
             mask=np.logical_and(y_pred > 1/3, y_pred < 2/3),
             vals=np.nan)
    u = np.isnan(y_pred).sum()
    hl = ((y_pred>= .5) != y.astype(float)).sum()
    print("hl", hl / (N * L))
    return (hl + (u * omega)) / (N * L)

In [6]:
def evaluate(y_test, y_pred, y_pred_pp):
    print("----------")

    print("hamming loss: ")
    print(hamming_loss(y_test, y_pred))

    print("accuracy:")
    print(accuracy_score(y_test, y_pred))

    print("f1 score:")
    print("micro")
    print(f1_score(y_test, y_pred, average='micro'))
    print("macro")
    print(f1_score(y_test, y_pred, average='macro'))

    print("precision:")
    print("micro")
    print(precision_score(y_test, y_pred, average='micro'))
    print("macro")
    print(precision_score(y_test, y_pred, average='macro'))

    print("recall:")
    print("micro")
    print(recall_score(y_test, y_pred, average='micro'))
    print("macro")
    print(recall_score(y_test, y_pred, average='macro'))
    
    print("#--")
    print("-> hamming loss:")
    print(hamming_loss(y_test, (y_pred_pp >= .5)))
    print("-> uncertain hamming loss:")
    print(uncertain_hamming_loss(y_test, y_pred_pp))
    print("---")

In [None]:
splits = {}

for ds in DATA_SETS:
    tic = time.time()
    data = load_data(ds)

    try:
        # FIXME: ohsumed cannot be unpacked like the others
        if len(data) == 1:
            data = data[0]
            
        assert len(data) == 2

        if data[1].shape[1] > 10:
            X, y = data
            y = sample_down_label_space(y, 10)
        else:
            X, y = data
        
        X = X.toarray()
        y = y.toarray()

        if X.shape[0] >= 10000:
            # 66% split
            X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                test_size=0.33,
                                                                random_state=seed)
            clf = ProbabilisticClassifierChain(LogisticRegression())
            pcc = clone(clf)

            pcc.fit(X_train, y_train)
            y_pred = pcc.predict(X_test)
            y_pred_pp = pcc.predict_proba(X_test)
            y_pred_pp = y_pred_pp.reshape(y_pred_pp.shape[0], y_pred_pp.shape[2])
            print(ds)
            evaluate(y_test, y_pred, y_pred_pp)

        else:
            kf = KFold(random_state=seed)
            results = []

            for train_index, test_index in kf.split(X):
                clf = ProbabilisticClassifierChain(LogisticRegression())
                X_train = X[train_index]
                X_test = X[test_index]
                y_train = y[train_index]
                y_test = y[test_index]

                # FIXME:
                # idxs.append((train_index, test_index))
                pcc = clone(clf)

                pcc.fit(X_train, y_train)
                y_pred = pcc.predict(X_test)
                y_pred_pp = pcc.predict_proba(X_test)
                y_pred_pp = y_pred_pp.reshape(y_pred_pp.shape[0], y_pred_pp.shape[2])

                # FIXME:
                # results.append((y_pred, y_pred_pp))
                print(ds)
                evaluate(y_test, y_pred, y_pred_pp)
    except Exception as e:
        print(e)

    toc = time.time()
    print("{}: {:.4f} s".format(ds, toc - tic))

scene
----------
hamming loss: 
0.10107928601079286
accuracy:
0.6562889165628891
f1 score:
micro
0.7150380339379755
macro
0.6276118090224297
precision:
micro
0.7256532066508313
macro
0.6672593950134255
recall:
micro
0.7047289504036909
macro
0.6786993781672516
#--
-> hamming loss:
0.8233706932337069
-> uncertain hamming loss:
hl 0.8208800332088003
0.8812785388127854
---


  # Remove the CWD from sys.path while we load stuff.


scene
----------
hamming loss: 
0.12489609310058188
accuracy:
0.600997506234414
f1 score:
micro
0.6386049308478653
macro
0.5017062723228454
precision:
micro
0.6436363636363637
macro
0.5314905358176077
recall:
micro
0.6336515513126492
macro
0.5592795813338908
#--
-> hamming loss:
0.8243973399833749
-> uncertain hamming loss:
hl 0.806525353283458
0.8767664172901081
---


  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  # Remove the CWD from sys.path while we load stuff.


scene
----------
hamming loss: 
0.11845386533665836
accuracy:
0.600997506234414
f1 score:
micro
0.6650998824911869
macro
0.5136778823344651
precision:
micro
0.6885644768856448
macro
0.5513059909629657
recall:
micro
0.6431818181818182
macro
0.5628043519372148
#--
-> hamming loss:
0.8123441396508728
-> uncertain hamming loss:
hl 0.801330008312552
0.8740648379052369
---
scene: 102.5374 s


  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  # Remove the CWD from sys.path while we load stuff.


emotions
----------
hamming loss: 
0.22895622895622897
accuracy:
0.2676767676767677
f1 score:
micro
0.6158192090395481
macro
0.5985015187665419
precision:
micro
0.6055555555555555
macro
0.589624374202382
recall:
micro
0.6264367816091954
macro
0.6250433265129983
#--
-> hamming loss:
0.696969696969697
-> uncertain hamming loss:
hl 0.6818181818181818
0.8703703703703703
---


  # Remove the CWD from sys.path while we load stuff.


emotions
----------
hamming loss: 
0.20454545454545456
accuracy:
0.31313131313131315
f1 score:
micro
0.6840052015604681
macro
0.6765761650674958
precision:
micro
0.6591478696741855
macro
0.6626219485578969
recall:
micro
0.7108108108108108
macro
0.7016075974177568
#--
-> hamming loss:
0.6978114478114478
-> uncertain hamming loss:
hl 0.6919191919191919
0.8627946127946128
---


  # Remove the CWD from sys.path while we load stuff.


emotions
----------
hamming loss: 
0.22504230118443316
accuracy:
0.3299492385786802
f1 score:
micro
0.6641414141414143
macro
0.6557884371760067
precision:
micro
0.654228855721393
macro
0.6513304218336976
recall:
micro
0.6743589743589744
macro
0.6708924403368504
#--
-> hamming loss:
0.6717428087986463
-> uncertain hamming loss:
hl 0.6692047377326565
0.8384094754653131

  # Remove the CWD from sys.path while we load stuff.



---
emotions: 24.7487 s
yeast-10
----------
hamming loss: 
0.26240694789081886
accuracy:
0.24937965260545905
f1 score:
micro
0.6665615639287403
macro
0.5236647356237308
precision:
micro
0.6689873417721519
macro
0.6197282508072887
recall:
micro
0.6641533144831919
macro
0.5322258002509063
#--
-> hamming loss:
0.6021091811414392
-> uncertain hamming loss:
hl 0.6090570719602978
0.7326302729528535
---


  # Remove the CWD from sys.path while we load stuff.


yeast-10
----------
hamming loss: 
0.2617866004962779
accuracy:
0.2555831265508685
f1 score:
micro
0.6690715181932245
macro
0.5410550818455078
precision:
micro
0.6620111731843575
macro
0.5831046342848512
recall:
micro
0.676284083703234
macro
0.5487947153689905
#--
-> hamming loss:
0.6096774193548387
-> uncertain hamming loss:
hl 0.6207196029776675
0.745409429280397
---


  # Remove the CWD from sys.path while we load stuff.


yeast-10
----------
hamming loss: 
0.2645962732919255
accuracy:
0.24099378881987576
f1 score:
micro
0.6615824594852241
macro
0.5185915134703674
precision:
micro
0.6622137404580153
macro
0.5955808883368467
recall:
micro
0.660952380952381
macro
0.5298387710499571
#--
-> hamming loss:
0.6074534161490683
-> uncertain hamming loss:


  # Remove the CWD from sys.path while we load stuff.


hl 0.613416149068323
0.7366459627329193
---
yeast-10: 2576.5715 s
mediamill-10
----------
hamming loss: 
0.184672187715666
accuracy:
0.21642512077294687
f1 score:
micro
0.6681959651319951
macro
0.501578406512312
precision:
micro
0.7324326528393182
macro
0.5980286591128774
recall:
micro
0.6143182854537164
macro
0.46242688693576983
#--
-> hamming loss:
0.692760524499655
-> uncertain hamming loss:
hl 0.681456176673568
0.8345065562456867
---
mediamill-10: 15598.5825 s


  # Remove the CWD from sys.path while we load stuff.


enron-10
----------
hamming loss: 
0.1829225352112676
accuracy:
0.2323943661971831
f1 score:
micro
0.5384273656152821
macro
0.435764750175122
precision:
micro
0.6325678496868476
macro
0.5184902831165241
recall:
micro
0.46867749419953597
macro
0.3843972259928678
#--
-> hamming loss:
0.7764084507042254
-> uncertain hamming loss:
hl 0.7767605633802817
0.8644366197183099
---


  # Remove the CWD from sys.path while we load stuff.


enron-10
----------
hamming loss: 
0.19647266313932982
accuracy:
0.12698412698412698
f1 score:
micro
0.5883222468588322
macro
0.4741030925483677
precision:
micro
0.6209048361934477
macro
0.5381684414008323
recall:
micro
0.5589887640449438
macro
0.44449943537594755
#--
-> hamming loss:
0.745679012345679
-> uncertain hamming loss:
hl 0.735978835978836
0.827689594356261
---


  # Remove the CWD from sys.path while we load stuff.


enron-10
----------
hamming loss: 
0.20582010582010582
accuracy:
0.13756613756613756
f1 score:
micro
0.5794594594594595
macro
0.44526990126114957
precision:
micro
0.6296006264682851
macro
0.5400536539191063
recall:
micro
0.5367156208277704
macro
0.4039492063738136
#--
-> hamming loss:
0.7342151675485009
-> uncertain hamming loss:
hl 0.7313932980599648
0.8225749559082892
---
enron-10: 1898.1929 s


  # Remove the CWD from sys.path while we load stuff.


medical-10
----------
hamming loss: 
0.024539877300613498
accuracy:
0.8006134969325154
f1 score:
micro
0.8566308243727598
macro
0.8102258489151094
precision:
micro
0.8819188191881919
macro
0.8988155963368843
recall:
micro
0.8327526132404182
macro
0.7740713464162173
#--
-> hamming loss:
0.9122699386503067
-> uncertain hamming loss:
hl 0.9214723926380368
0.9475460122699386
---


  # Remove the CWD from sys.path while we load stuff.


medical-10
----------
hamming loss: 
0.026993865030674847
accuracy:
0.7760736196319018
f1 score:
micro
0.8422939068100358
macro
0.8224583319906646
precision:
micro
0.8576642335766423
macro
0.8888894579112089
recall:
micro
0.8274647887323944
macro
0.7819771827759443
#--
-> hamming loss:
0.9131901840490797
-> uncertain hamming loss:
hl 0.9220858895705522
0.950920245398773
---


  # Remove the CWD from sys.path while we load stuff.


medical-10
----------
hamming loss: 
0.023312883435582823
accuracy:
0.8159509202453987
f1 score:
micro
0.8707482993197279
macro
0.8599695627560433
precision:
micro
0.8827586206896552
macro
0.8888586508368291
recall:
micro
0.8590604026845637
macro
0.8421959808678734
#--
-> hamming loss:
0.9079754601226994
-> uncertain hamming loss:
hl 0.9159509202453988
0.9447852760736196
---
medical-10: 1108.5368 s


  # Remove the CWD from sys.path while we load stuff.


slashdot-10
----------
hamming loss: 
0.06788263283108643
accuracy:
0.5337034099920698
f1 score:
micro
0.6014897579143389
macro
0.5146763278526892
precision:
micro
0.7044711014176663
macro
0.6514087779255074
recall:
micro
0.5247766043866775
macro
0.44793392484000993
#--
-> hamming loss:
0.901268834258525
-> uncertain hamming loss:
hl 0.8993655828707375
0.9406819984139572
---


  # Remove the CWD from sys.path while we load stuff.


slashdot-10
----------
hamming loss: 
0.06708961141950832
accuracy:
0.5392545598731165
f1 score:
micro
0.618575293056808
macro
0.5191657380837792
precision:
micro
0.7251585623678647
macro
0.6661207743469837
recall:
micro
0.539308176100629
macro
0.4502203829420616
#--
-> hamming loss:
0.8963521015067407
-> uncertain hamming loss:
hl 0.8935765265662173
0.9351308485329104
---


  # Remove the CWD from sys.path while we load stuff.


slashdot-10
----------
hamming loss: 
0.07626984126984127
accuracy:
0.48333333333333334
f1 score:
micro
0.5447655139744197
macro
0.4817306711310329
precision:
micro
0.6410256410256411
macro
0.6578566334808098
recall:
micro
0.47364085667215816
macro
0.42093866501103266
#--
-> hamming loss:
0.9015873015873016
-> uncertain hamming loss:
hl 0.8967460317460317
0.9408730158730159
---
slashdot-10: 4243.5204 s


  # Remove the CWD from sys.path while we load stuff.


ohsumed-10
----------
hamming loss: 
0.09697628888405482
accuracy:
0.3630628670872308
f1 score:
micro
0.5613931523022433
macro
0.5472196469440009
precision:
micro
0.6295233892321271
macro
0.6166979397061072
recall:
micro
0.5065696022727273
macro
0.49373552063452186
#--
-> hamming loss:
0.8762236241026756
-> uncertain hamming loss:
hl 0.8610180552534261
0.9226234500761367
---
ohsumed-10: 5141.6709 s


  # Remove the CWD from sys.path while we load stuff.


tmc2007-500-10
----------
hamming loss: 
0.10256437427148458
accuracy:
0.3837024478118046
f1 score:
micro
0.7220036189217911
macro
0.6614089365203334
precision:
micro
0.7467324144486692
macro
0.7273738899312006
recall:
micro
0.6988601612454823
macro
0.6177292371971519
#--
-> hamming loss:
0.8100349687400658
-> uncertain hamming loss:
hl 0.8144855356575182
0.9132563314612695
---
tmc2007-500-10: 10393.4316 s


  # Remove the CWD from sys.path while we load stuff.


In [41]:
clf = ProbabilisticClassifierChain(LogisticRegression())

In [71]:
X_train, y_train = np.array([[2,4], [1,3], [6,8], [5,7], [1,1]]), np.array([[1, 1, 1], [0, 1, 0], [1, 1, 1], [0, 1, 0], [1,0,1]])

In [72]:
clf.fit(X_train, y_train)

In [88]:
yp = clf.predict(X_train[1,:])

ValueError: X has 1 features per sample; expecting 2

In [75]:
ypp = clf.predict_proba(X_train)

ValueError: X has 3 features per sample; expecting 2

In [101]:
lr = LogisticRegression()
lr.fit(np.hstack((X_train, y_train[:, :2])), y_train[:, 2])

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [106]:
ypp = lr.predict_proba(np.hstack((X_train, y_train[:, :2])))
print(ypp)
print((ypp >= .5).astype(int))

[[0.35618938 0.64381062]
 [0.5949257  0.4050743 ]
 [0.31517492 0.68482508]
 [0.54990142 0.45009858]
 [0.25894644 0.74105356]]
[[0 1]
 [1 0]
 [0 1]
 [1 0]
 [0 1]]


In [None]:
for b in range(2 ** 2):
    y = np.array(list(map(int, np.binary_repr(b, width=2))))

In [8]:
data = load_data('scene')

In [9]:
X, y = data

X = X.toarray()
y = y.toarray()

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=seed)
pcc = ProbabilisticClassifierChain(LogisticRegression())

pcc.fit(X_train, y_train)
y_pred = pcc.predict(X_test)
y_pred_pp = pcc.predict_proba(X_test)
# y_pred_pp = y_pred_pp.reshape(y_pred_pp.shape[0], y_pred_pp.shape[2])
print(ds)
evaluate(y_test, y_pred, y_pred_pp)

NameError: name 'ds' is not defined

In [13]:
evaluate(y_test, y_pred, y_pred_pp)

----------
hamming loss: 
0.10083857442348008
accuracy:
0.6666666666666666
f1 score:
micro
0.7124925283921101
macro
0.728823733607503
precision:
micro
0.7286063569682152
macro
0.7447985584327671
recall:
micro
0.6970760233918128
macro
0.7156021459869816
#--
-> hamming loss:


ValueError: Classification metrics can't handle a mix of multilabel-indicator and unknown targets

In [12]:
y_pred_pp[0]

array([[0.990577  , 0.009423  ],
       [0.98117969, 0.01882031],
       [0.96732898, 0.03267102],
       [0.98906323, 0.01093677],
       [0.21859161, 0.78140839],
       [0.143681  , 0.856319  ]])

In [15]:
y_pred[0]

array([0, 0, 0, 0, 1, 0])

In [17]:
(y_pred_pp[0] >= .5).astype(int)

array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1]])

In [13]:
y_train[0]

array([0, 0, 0, 1, 0, 0], dtype=int64)