In [1]:
import random

import numpy as np
import skml
from skml.problem_transformation import ProbabilisticClassifierChain
from skml.datasets import sample_down_label_space
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import hamming_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.base import clone

from experimental_framework import load_from_arff

random.seed(2018)

In [2]:
X, y = load_from_arff('data/enron/enron.arff', labelcount=53, endian="little")

In [3]:
y = sample_down_label_space(y, k=10)

In [4]:
X = X.todense()
y = y.todense()

In [5]:
clf = ProbabilisticClassifierChain(LogisticRegression())
kf = KFold()

In [10]:
for train_index, test_index in kf.split(X):
    X_train = X[train_index]
    X_test = X[test_index]
    y_train = y[train_index]
    y_test = y[test_index]
    
    pcc = clone(clf)
    
    pcc.fit(X_train, y_train)
    y_pred = pcc.predict(X_test)
    
    print("----------")

    print("hamming loss: ")
    print(hamming_loss(y_test, y_pred))

    print("accuracy:")
    print(accuracy_score(y_test, y_pred))

    print("f1 score:")
    print("micro")
    print(f1_score(y_test, y_pred, average='micro'))
    print("macro")
    print(f1_score(y_test, y_pred, average='macro'))

    print("precision:")
    print("micro")
    print(precision_score(y_test, y_pred, average='micro'))
    print("macro")
    print(precision_score(y_test, y_pred, average='macro'))

    print("recall:")
    print("micro")
    print(recall_score(y_test, y_pred, average='micro'))
    print("macro")
    print(recall_score(y_test, y_pred, average='macro'))

----------
hamming loss: 
0.1829225352112676
accuracy:
0.2323943661971831
f1 score:
micro
0.5384273656152821
macro
0.435764750175122
precision:
micro
0.6325678496868476
macro
0.5184902831165241
recall:
micro
0.46867749419953597
macro
0.3843972259928678
----------
hamming loss: 
0.19647266313932982
accuracy:
0.12698412698412698
f1 score:
micro
0.5883222468588322
macro
0.4741030925483677
precision:
micro
0.6209048361934477
macro
0.5381684414008323
recall:
micro
0.5589887640449438
macro
0.44449943537594755
----------
hamming loss: 
0.20582010582010582
accuracy:
0.13756613756613756
f1 score:
micro
0.5794594594594595
macro
0.44526990126114957
precision:
micro
0.6296006264682851
macro
0.5400536539191063
recall:
micro
0.5367156208277704
macro
0.4039492063738136
