In [7]:
import pandas as pd
import numpy as np
import sys

from skmultilearn.model_selection import IterativeStratification

sys.path.append("../..")
from src.algorithms.MuLAM import MuLAM

In [8]:
yeast = pd.read_csv("/home/adel/Documents/Code/Ant-Miner/datasets/multi_label/genbase_cleaned.csv")
yeast

Unnamed: 0,PS00014,PS00017,PS00018,PS00027,PS00066,PS00120,PS00136,PS00190,PS00197,PS00215,...,label18,label19,label20,label21,label22,label23,label24,label25,label26,label27
0,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
1,NO,YES,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
2,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
3,NO,NO,NO,NO,YES,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
4,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
658,NO,YES,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
659,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0
660,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,...,0,0,0,0,0,0,0,0,0,0


In [9]:
labels_names = []
for col in yeast.columns:
    if col.startswith("label"):
        labels_names.append(col)

print(labels_names)

['label1', 'label2', 'label3', 'label4', 'label5', 'label6', 'label7', 'label8', 'label9', 'label10', 'label11', 'label12', 'label13', 'label14', 'label15', 'label16', 'label17', 'label18', 'label19', 'label20', 'label21', 'label22', 'label23', 'label24', 'label25', 'label26', 'label27']


In [10]:
X = yeast.drop(columns=labels_names)
y = yeast[labels_names]

In [11]:
skf = IterativeStratification(n_splits=5, order=1)

folds = []
for train_index, test_index in skf.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    folds.append((X_train, X_test, y_train, y_test))

In [12]:
metrics_list = []
for i, fold in enumerate(folds):
    X_train, X_test, y_train, y_test = fold
    train = pd.concat([X_train, y_train], axis=1)
    test = pd.concat([X_test, y_test], axis=1)

    mulam = MuLAM(
        max_ants=100,
        max_uncovered=10,
        min_covers=10,
        alpha=1,
        beta=1,
    )
    
    mulam.fit(train, labels_names)
    y_pred, scores = mulam.predict(test, labels=labels_names)

    y_true = y_test.to_numpy()
    metrics = mulam.evaluate(y_true=y_true, y_pred=y_pred, scores=scores)

    metrics_list.append(metrics)
    print(f'Fold {i+1}:', metrics)

# average over folds
average_metrics = {key: np.mean([m[key] for m in metrics_list]) for key in metrics_list[0]}
print(f'{len(folds)}-Fold Average:', average_metrics)


Fold 1: {'accuracy': np.float64(0.9529379003063213), 'recall': np.float64(0.0), 'precision': np.float64(0.0), 'f1_macro': np.float64(0.0), 'f1_micro': 0.0, 'hamming_loss': np.float64(0.04706209969367864), 'subset_accuracy': np.float64(0.0), 'ranking_loss': 0.9708042542253068, 'one_error': 1.0, 'coverage': np.float64(0.6529786003470215), 'average_precision': 0.06762721978807305}
Fold 2: {'accuracy': np.float64(0.9548260381593715), 'recall': np.float64(0.0), 'precision': np.float64(0.0), 'f1_macro': np.float64(0.0), 'f1_micro': 0.0, 'hamming_loss': np.float64(0.04517396184062851), 'subset_accuracy': np.float64(0.0), 'ranking_loss': 0.9068055133272523, 'one_error': 0.9924242424242424, 'coverage': np.float64(0.8044871794871795), 'average_precision': 0.06694728096009975}
Fold 3: {'accuracy': np.float64(0.9562289562289562), 'recall': np.float64(0.037037037037037035), 'precision': np.float64(0.017921146953405017), 'f1_macro': np.float64(0.024154589371980676), 'f1_micro': np.float64(0.16129032

5-Fold Average: {'accuracy': np.float64(0.7639840060433746), 'recall': np.float64(0.17017706986411932), 'precision': np.float64(0.15332347632702545), 'f1_macro': np.float64(0.1544836742927096), 'f1_micro': np.float64(0.49655141221443966), 'hamming_loss': np.float64(0.23601599395662548), 'subset_accuracy': np.float64(0.02298470365996207), 'ranking_loss': np.float64(0.684708784397867), 'one_error': np.float64(0.9329800490283093), 'coverage': np.float64(0.8544745752165591), 'average_precision': np.float64(0.3210325184755639)}
