In [1]:
import os
import sys

sys.path.append(os.path.abspath('..'))

In [2]:
seed = 2018

In [4]:
import random

import numpy as np
from copy import copy
import skml
from skml.problem_transformation import ProbabilisticClassifierChain
from skml.datasets import sample_down_label_space
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import hamming_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.base import clone
from sklearn.externals import joblib
import arff
import os
import time

from lib.experimental_framework import load_from_arff
random.seed(2018)

In [7]:
data_train = load_from_arff('../data/scene/scene-train.arff', labelcount=6)
data_test = load_from_arff('../data/scene/scene-test.arff', labelcount=6)

In [18]:
X_train, Y_train = data_train
X_test, Y_test = data_test

In [20]:
X_train = X_train.A
Y_train = Y_train.A
X_test = X_test.A
Y_test = Y_test.A

In [13]:
def uncertain_hamming_loss(y, y_pred, omega=1.0):
    N, L = y_pred.shape
    cumsum = 0
    
    # place '?' if not already done
    np.place(y_pred, 
             mask=np.logical_and(y_pred > 1/3, y_pred < 2/3),
             vals=np.nan)
    u = np.isnan(y_pred).sum()
    hl = ((y_pred>= .5) != y.astype(float)).sum()
    print("hl", hl / (N * L))
    return (hl + (u * omega)) / (N * L)

In [14]:
def evaluate(y_test, y_pred, y_pred_pp):
    print("----------")

    print("hamming loss: ")
    print(hamming_loss(y_test, y_pred))

    print("accuracy:")
    print(accuracy_score(y_test, y_pred))

    print("f1 score:")
    print("micro")
    print(f1_score(y_test, y_pred, average='micro'))
    print("macro")
    print(f1_score(y_test, y_pred, average='macro'))

    print("precision:")
    print("micro")
    print(precision_score(y_test, y_pred, average='micro'))
    print("macro")
    print(precision_score(y_test, y_pred, average='macro'))

    print("recall:")
    print("micro")
    print(recall_score(y_test, y_pred, average='micro'))
    print("macro")
    print(recall_score(y_test, y_pred, average='macro'))
    
    print("#--")
    print("-> hamming loss:")
    print(hamming_loss(y_test, (y_pred_pp >= .5)))
    print("-> uncertain hamming loss:")
    print(uncertain_hamming_loss(y_test, y_pred_pp))
    print("---")

In [15]:
clf = ProbabilisticClassifierChain(LogisticRegression())

In [21]:
clf.fit(X_train, Y_train)

In [22]:
y_pred = clf.predict(X_test)
y_pred_pp = clf.predict_proba(X_test)

In [26]:
 y_pred_pp = y_pred_pp.reshape(y_pred_pp.shape[0], y_pred_pp.shape[2])

In [27]:
evaluate(Y_test, y_pred, y_pred_pp)

----------
hamming loss: 
0.016583054626532888
accuracy:
0.9657190635451505
f1 score:
micro
0.0
macro
0.0
precision:
micro
0.0
macro
0.0
recall:
micro
0.0
macro
0.0
#--
-> hamming loss:
0.3435061315496098
-> uncertain hamming loss:
hl 0.21920289855072464
0.48648272017837235
---


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  # Remove the CWD from sys.path while we load stuff.


In [30]:
y_pred[:10]

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [31]:
y_pred_pp[:10]

array([[3.46426968e-04, 2.99448730e-02, 5.28635016e-02, 3.16679093e-01,
        2.24009413e-01, 3.10165689e-01],
       [1.46012870e-03, 9.31928431e-02, 2.58314587e-01,            nan,
        2.94657343e-01,            nan],
       [2.25403197e-03, 2.36610684e-01, 1.96784118e-01,            nan,
        1.24495300e-01, 3.32401752e-01],
       [6.87654789e-05, 9.17625053e-03, 1.03532534e-02, 1.30296731e-01,
        9.99417786e-03, 4.29246421e-02],
       [7.43127025e-03, 1.39004758e-01, 3.31822014e-01, 7.65316385e-01,
        8.32266021e-01,            nan],
       [8.85383040e-04, 6.32877726e-02, 1.28559140e-01,            nan,
        1.32509427e-01, 2.19657664e-01],
       [9.85582628e-04, 2.80592139e-02, 6.04799147e-02,            nan,
        1.75416554e-01, 7.29236877e-01],
       [1.31515706e-03, 1.04446816e-01, 8.44195354e-02,            nan,
                   nan,            nan],
       [7.49451245e-04, 4.45986187e-02, 2.12161083e-01, 6.72420961e-01,
                   nan, 