## Imports

In [1]:
# utils
import os
import sys
from pathlib import Path
PROJECT_DIR = Path.cwd().parent
sys.path.append(str(PROJECT_DIR))

# basics
import numpy as np
import pandas as pd

# viz
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

# metrics
from utils import config
from utils.reader import read_file_yaml

np.random.seed(0)

In [2]:
path_root = (
    PROJECT_DIR
    / "results"
)
path_conf = (
    PROJECT_DIR
    / "conf"
)
file_path_parameters = (
    path_conf 
    / "parameters.yml"
)

n_random = np.sort([int(i.replace("random_n",""))for i in os.listdir(path_root)])
path_random = [
    "random_n"+str(i) for i in n_random
]
path_results = [path_root / i for i in path_random]

In [3]:
under_line = "\n{}\n"
title_part_n2 = "PROJECT_DIR: [ {} ]".format(PROJECT_DIR)
title_part_n3 = under_line.format(
    "".join(["-"]*len(title_part_n2))
)
title_part_n1 = under_line.format(
    "".join(["-"]*len(title_part_n2))
)
print(title_part_n1 + title_part_n2 + title_part_n3)


----------------------------------------------------
PROJECT_DIR: [ /home/manuel/aaai-claire-clustering ]
----------------------------------------------------



## Read

In [4]:
parameters = read_file_yaml(file_path_parameters)

In [8]:
metrics = {
    name: {
        dataset: pd.read_csv(
            url / dataset / "metrics" / "metrics.csv",
            index_col = 0
        ) for dataset in os.listdir(url)
    }
    for name, url in zip(path_random, path_results)
}
metrics.keys()

dict_keys(['random_n12', 'random_n13', 'random_n14', 'random_n15', 'random_n16', 'random_n17', 'random_n18', 'random_n19', 'random_n20', 'random_n21', 'random_n22', 'random_n23', 'random_n24', 'random_n25', 'random_n26', 'random_n27', 'random_n28', 'random_n29', 'random_n30', 'random_n31', 'random_n32', 'random_n33', 'random_n34', 'random_n35'])

In [15]:
len(os.listdir("../results/"))

35

## Concat all results

In [6]:
data = (
    metrics["random_n1"]["aniso"]
    .T
    .filter(regex = "^(?!.*random_model)")
    .T[["abilities"]]
    .reset_index()
)
data.rename(columns = {
    "abilities": "n1_aniso",
    "index": "model"
}, inplace = True)

for random_n, content_n in metrics.items():
    for name, content_dataset in content_n.items(): 
        if (random_n == "random_n1")and(name == "aniso"):
            continue
        tmp = (
            content_dataset
            .T
            .filter(regex = "^(?!.*random_model)")
            .T[["abilities"]]
            .reset_index()
        )
        tmp.rename(columns = {
            "abilities": "n{}_{}".format(
                random_n.replace("random_n",""),
                name
            ),
            "index": "model"
        }, inplace = True)
        data = data.merge(tmp, on = ["model"])
data.set_index("model", inplace = True)

KeyError: 'random_n1'

In [None]:
datasets = {}
for i in config.file_names[:(-1)]:
    datasets[i] = data.filter(regex = r"{}".format(i))
    datasets[i].columns = datasets[i].columns.str.replace("_"+i, "")

## Plots

In [None]:
models = list(config.models.keys()) + ["average_model", "optimal_clustering"]
gray_scale = [str(i/len(models)) for i in range(1, len(models) + 1)]
# colors = [
#     f'#{int(float(gray) * 255):02x}{int(float(gray) * 255):02x}{int(float(gray) * 255):02x}' for gray in gray_scale
# ]
colors = plt.cm.coolwarm(np.linspace(0, 1, len(models)))
markers = list(mlines.Line2D.markers.keys())

fig, axes = plt.subplots(len(datasets.keys()), 1, figsize=(14, 8*len(datasets)))

if not isinstance(axes, np.ndarray):
    axes = [axes]

for ax, (name, content) in zip(axes, datasets.items()):
    for line_name, line_data in content.iterrows():
        for idx, i in enumerate(models):
            if i in line_name:
                _line_index = []
                for k in line_data.index.str.split("n"):
                    _line_index.append("$n_{("+k[1]+")}$")
                linestyle = '--' 
                ax.plot(_line_index, line_data, label=i, color=colors[idx], linestyle=linestyle, marker=markers[idx])
                _line_index = []
    ax.grid(True)
    ax.set_title(name)
    ax.set_ylabel("$abilities$")

axes[-1].set_xlabel('$n\_random\_model$')
axes[0].legend(models, loc='upper left', bbox_to_anchor=(1.05, 1.0))
plt.show()
