# 10. Fifth model implementation

Let's dive into the last model, which is a derived version of the model studied in the notebook 9. It uses partial orders instead of full orders in the chain. It is based on the same paper.

## 10.1. Setup

In [1]:
from skmultilearn.dataset import load_dataset
import numpy as np
from skmultilearn.problem_transform import ClassifierChain
import pygad
from typing import List
import sklearn.metrics as metrics
from typing import Any, Optional
import copy
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import math
from numpy.typing import NDArray
from typing import Dict
import pandas as pd
from typing import cast
import logging


## 10.2. Data

In [4]:
desired_datasets = ["scene", "emotions", "birds"]

datasets = {}
for dataset_name in desired_datasets:
    print(f"getting dataset `{dataset_name}`")
    
    full_dataset = load_dataset(dataset_name, "undivided")
    X, y, _, _ = full_dataset

    train_dataset = load_dataset(dataset_name, "train")
    X_train, y_train, _, _ = train_dataset

    test_dataset = load_dataset(dataset_name, "test")
    X_test, y_test, _, _ = test_dataset

    datasets[dataset_name] = {
        "X": X,
        "y": y,
        "X_train": X_train,
        "y_train": y_train,
        "X_test": X_test,
        "y_test": y_test,
        "rows": X.shape[0],
        "labels_count": y.shape[1]
    }

for name, info in datasets.items():
    print("===")
    print(f"information for dataset `{name}`")
    print(f"rows: {info['rows']}, labels: {info['labels_count']}")


getting dataset `scene`
scene:undivided - exists, not redownloading
scene:train - exists, not redownloading
scene:test - exists, not redownloading
getting dataset `emotions`
emotions:undivided - exists, not redownloading
emotions:train - exists, not redownloading
emotions:test - exists, not redownloading
getting dataset `birds`
birds:undivided - exists, not redownloading
birds:train - exists, not redownloading
birds:test - exists, not redownloading
===
information for dataset `scene`
rows: 2407, labels: 6
===
information for dataset `emotions`
rows: 593, labels: 6
===
information for dataset `birds`
rows: 645, labels: 19


## 10.3. Entropy functions

In [3]:
Probabilities = Dict[int, Dict[int, float]]

def calculate_probabilities(y: NDArray[np.int64]) -> Probabilities:
    dense_y = y.todense()

    label_count = dense_y.shape[1]
    rows_count = dense_y.shape[0]

    probs = {}

    for label in range(label_count):
        probs[label] = {}
        y_label_specific = np.asarray(dense_y[:, label]).reshape(-1)
        # convert_matrix_to_vector

        possible_values = np.unique(y_label_specific)

        for value in possible_values:
            instances_with_label = np.count_nonzero(y_label_specific == value)
            probs[label][value] = instances_with_label / rows_count
    
    return probs

Entropies = Dict[int, float]

def calculate_entropies(probabilities: Probabilities) -> Entropies:
    entropies = {}

    for label, calculated_probabilities in probabilities.items():
        results = []
        for _, prob in calculated_probabilities.items():
            summand = prob * math.log(prob, 2)
            results.append(summand)
        
        entropy = -1 * sum(results)
        entropies[label] = entropy

    return entropies

def calculate_joint_probability(probabilities: Probabilities, label_x: int, label_y: int):
    results = []
    
    for _, prob_i in probabilities[label_x].items():
        for _, prob_j in probabilities[label_y].items():
            and_prob = prob_i * prob_j

            if and_prob > 0:  # avoid taking the log of 0
                summand = and_prob * np.log2(and_prob)
                results.append(summand)
    
    joint_probability = -1 * sum(results)
    return joint_probability

def calculate_conditional_entropy(probabilities: Probabilities, entropies: Entropies, label_x: int, label_y: int):
    joint_entropy = calculate_joint_probability(probabilities, label_x, label_y)
    entropy = entropies[label_y]
    return joint_entropy - entropy

In [5]:
LOPMatrix = Dict[int, Dict[int, float]]

def build_lop_matrix(
    label_order: List[int],
    probabilities: Probabilities,
    entropies: Entropies
) -> LOPMatrix:
    matrix = {}

    for row_i in label_order:
        matrix[row_i] = {}
        for row_j in label_order:
            if row_i == row_j:
                matrix[row_i][row_j] = 0
                # this is to match the table described in the paper
                # but in reality we _have_ a >0 conditional entropy for a label with itself
                continue

            cond_entropy = calculate_conditional_entropy(probabilities, entropies, row_i, row_j)
            matrix[row_i][row_j] = cond_entropy
        
    return matrix

def calculate_lop(lop_matrix: LOPMatrix) -> float:
    matrix_size_n = len(lop_matrix)
    lop_df = pd.DataFrame(lop_matrix)

    upper_triangle_sum = 0
    for row_position in range(matrix_size_n):
        for column_position in range(matrix_size_n):
            if column_position > row_position:
                conditional_probability = lop_df.iloc[row_position, column_position]
                upper_triangle_sum += cast(float, conditional_probability)
                # the conversion to a dataframe is not necessary
                # but makes it easier to find the element we want
                # by their order in the rows or columns
                # instead of the actual column or row index
    
    return upper_triangle_sum

## 10.5. New entropy functions

In [24]:
def mutual_information(probabilities: Probabilities, entropies: Entropies, label_x: int, label_y: int):
    entropy = entropies[label_x]
    conditional_entropy = calculate_conditional_entropy(probabilities, entropies, label_x, label_y)

    # return entropy - conditional_entropy

    a = entropies[label_x]
    b = entropies[label_y]

    calculate_joint_entropy = calculate_joint_probability(probabilities, label_x, label_y)

    return a + b - calculate_joint_entropy


In [28]:
probs = calculate_probabilities(datasets["emotions"]["y"])
entropies = calculate_entropies(probs)

print("=== mutual information ===")
print(mutual_information(probs, entropies, 2, 0))

res = []
for i in range(len(probs)):
    res.append([])
    for j in range(len(probs)):
        res[i].append(mutual_information(probs, entropies, i, j))

pd.DataFrame(res)

=== mutual information ===
0.0


Unnamed: 0,0,1,2,3,4,5
0,0.0,2.220446e-16,-2.220446e-16,2.220446e-16,0.0,0.0
1,2.220446e-16,0.0,0.0,0.0,2.220446e-16,2.220446e-16
2,0.0,0.0,-4.440892e-16,-2.220446e-16,-2.220446e-16,0.0
3,2.220446e-16,0.0,-2.220446e-16,0.0,0.0,2.220446e-16
4,0.0,2.220446e-16,-2.220446e-16,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0


## 10.6. Results so far

The results are weird because they are very, very small. And some of them are negative. But [this Wikipedia page](https://en.wikipedia.org/wiki/Mutual_information) tells that the mutual information **cannot** be negative.

It might be because my calculations of the entropy are wrong.

## 10.7. Checking the results by looking for other implementations

I found [this implementation](https://github.com/pafoster/pyitlib). Let's see if it works as expected.

I also found [this other implementation](https://github.com/nikdon/pyEntropy), but it seems limited; and [this other from scipy](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html), but it also seems limited.

In [29]:
!pip install pyitlib

Collecting pyitlib
  Downloading pyitlib-0.2.3.tar.gz (30 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting scikit-learn<=0.24,>=0.16.0 (from pyitlib)
  Downloading scikit_learn-0.24.0-cp39-cp39-win_amd64.whl (6.9 MB)
     ---------------------------------------- 6.9/6.9 MB 3.4 MB/s eta 0:00:00
Collecting future>=0.16.0 (from pyitlib)
  Downloading future-0.18.3.tar.gz (840 kB)
     ------------------------------------- 840.9/840.9 kB 26.8 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pyitlib, future
  Building wheel for pyitlib (setup.py): started
  Building wheel for pyitlib (setup.py): finished with status 'done'
  Created wheel for pyitlib: filename=pyitlib-0.2.3-py3-none-any.whl size=29367 sha256=b37a01ce175782c490f57abcd4816ad5fa382d2a3fde2ef0ec9a5cb244871996
  Stored in directory: c:\users\edgard\appda

  You can safely remove it manually.


In [30]:
import numpy as np
from pyitlib import discrete_random_variable as drv