# Actual implementation of the second model

Now we will really implement the proposed second model.

In [118]:
from support import CalculateLabelsCorrelationWithFTest
from skmultilearn.dataset import load_dataset
from skmultilearn.base.problem_transformation import ProblemTransformationBase
from typing import Any, List
import numpy as np
from skmultilearn.problem_transform import ClassifierChain
from skmultilearn.base import MLClassifierBase
from sklearn.svm import SVC
from evaluation import EvaluationPipeline

In [79]:
full_data = load_dataset("scene", "undivided")
X_full, y_full, _, _ = full_data

scene:undivided - exists, not redownloading


In [80]:
def build_chain_based_on_f_test(res):
    chain = []
    sorted_res = res.sort_values(by=["f_test_result"], ascending=False)
    
    element = int(sorted_res.iloc[0]["for_label"])
    chain.append(element)

    m = ~sorted_res["expand_this_label"].isin(chain)
    m &= sorted_res["for_label"] == element
    
    while m.sum() > 0:
        sliced_res = sorted_res[m]
        sorted_sliced_res = sliced_res.sort_values(by=["f_test_result"], ascending=False)

        element = int(sorted_sliced_res.iloc[0]["expand_this_label"])
        chain.append(element)

        m = ~sorted_res["expand_this_label"].isin(chain)
        m &= sorted_res["for_label"] == element
    
    return chain

build_chain_based_on_f_test(res)


[5, 4, 1, 0, 2, 3]

In [144]:
class ClassifierChainWithFTestOrdering(MLClassifierBase):
    def __init__(
        self,
        alpha: float = 0.5,
        base_classifier: Any = SVC(),
    ):
        super().__init__()
        self.order = []
        self.main_classifier = None
        self.copyable_attrs = ["base_classifier", "alpha"]
        # NOTE: this `copyable_attrs` must match exactly the arguments passed to the constructor
        
        self.alpha = alpha
        self.base_classifier = base_classifier

        self.calculator = CalculateLabelsCorrelationWithFTest(alpha=self.alpha)        
    
    def fit(self, X: Any, y: Any):
        self.classes_ = np.arange(y.shape[1])
        # NOTE: this is required to run the evaluation pipeline
        
        self.order = build_chain_based_on_f_test(self.calculator.get(y))
        
        self.main_classifier = ClassifierChain(
            classifier=self.base_classifier,
            require_dense=[False, True],
            order=self.order,
        )

        self.main_classifier.fit(X, y)
    
    def predict(self, X: Any):
        return self.main_classifier.predict(X)


In [145]:
model = ClassifierChainWithFTestOrdering(alpha=1)
pipe = EvaluationPipeline(model)
result = pipe.run(X_full, y_full)

In [146]:
result.describe()

Accuracy: 0.1883 ± 0.13
Hamming Loss: -0.2692 ± 0.05


## Conclusions so far

* The bad results, observed in the previous Jupyter notebooks, were observed again, which is actually good and expected.
* It was a bit hard, at first, to comply with the `base_classifier` interface that the evaluation pipeline requires (as it is based on the `cross_validate` function from **scikit**). Main learnis:
  * Your classifier class must set a `copyable_attrs` property, which must match exactly the arguments of the `__init__` method (the constructor of _your_ class).
  * You must also set a `classes_` property, which should be a list of the possible classes that your classifier can predict. You can obtain this list from the `y` argument of the `fit` method, by doing this: `self.classes_ = np.arange(y.shape[1])` (at least in the case of multi-label classification).