In [1]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
folder_name = "results"
supervised_metrics = ["F1", "ami", "ari"]
unsupervised_metrics = ["silhouette", "db", "dunn"]
metrics = supervised_metrics + unsupervised_metrics

## Incomplete multiview algorithms

In [3]:
filelame = "incomplete_algorithms_evaluation.csv"
file_path = os.path.join(folder_name, filelame)
results = pd.read_csv(file_path)
print("results", results.shape)
results.head()

In [4]:
monet_results = results[results["alg"] == "MONET"]
monet_results.loc[monet_results["n_clustered_samples"] == monet_results["n_samples"],
                  [f"sub_{i}" for i in metrics]] = monet_results.loc[monet_results["n_clustered_samples"] == monet_results["n_samples"], metrics].values
monet_results.loc[:, metrics] = monet_results.loc[:, [f"sub_{i}" for i in metrics]].values
monet_results.loc[:, "alg"] = "MONET_wm"
results = pd.concat([results, monet_results])

In [15]:
results[["alg", "dataset", "% incomplete samples"] + supervised_metrics].groupby(["dataset", "alg", "% incomplete samples"]).apply

In [17]:
import numpy as np

In [20]:
results[["alg", "dataset", "% incomplete samples"] + supervised_metrics].groupby(["dataset", "alg", "% incomplete samples"]).apply(lambda x: f"{np.mean(x)}+-{np.std(x)}")

In [112]:
results_table = results[["alg", "dataset", "% incomplete samples"] + supervised_metrics].groupby(["dataset", "alg", "% incomplete samples"])
# results_table = results_table.agg(lambda x: f"{np.mean(x).round(2)}\u00B1{np.std(x).round(2)}")
results_table = results_table.mean().round(2)
results_table = results_table.unstack("% incomplete samples")
results_table.head()

In [115]:
results_table.loc["nutrimouse_genotype"].style.background_gradient(axis= 0)

In [101]:
for dataset in results_table.index.get_level_values(0).unique():
    results_table.loc[dataset].style = results_table.loc[dataset].style.highlight_max(axis = 0, props="font-weight:bold")
results_table

In [70]:
results_table.style.highlight_max(axis = 0, props="font-weight:bold")

In [56]:
from IPython.display import display, HTML

display(HTML(results_table.to_html()))

#### Nutrimouse genotype

In [5]:
dataset_name = "nutrimouse_genotype"
dataset_results = results[results["dataset"] == dataset_name]
nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows,ncols, figsize = (20, 10))
fig.suptitle(f"Incomplete samples: {dataset_name}", y= 0.95)
for metric, ax in zip(metrics, axes.flatten()):
    sns.pointplot(data= dataset_results, x= "% incomplete samples", y= metric, hue= "alg", seed= 42, errwidth= 0.5, ax = ax, linestyles= "dotted", markers= "*")
    if metric == "ACC":
        ax.axhline(dataset_results["random_acc"].iloc[0])
    elif metric == "F1":
        ax.axhline(dataset_results["random_f1"].iloc[0])
fig.legend(*ax.get_legend_handles_labels(), ncol=dataset_results["alg"].nunique(), loc= [0.28,0.927])
[[c.get_legend().remove() for c in ax if c.get_legend() is not None] for ax in axes]
plt.show()

#### Nutrimouse diet

In [6]:
dataset_name = "nutrimouse_diet"
dataset_results = results[results["dataset"] == dataset_name]
nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows,ncols, figsize = (20, 10))
fig.suptitle(f"Incomplete samples: {dataset_name}", y= 0.95)
for metric, ax in zip(metrics, axes.flatten()):
    sns.pointplot(data= dataset_results, x= "% incomplete samples", y= metric, hue= "alg", seed= 42, errwidth= 0.5, ax = ax, linestyles= "dotted", markers= "*")
    if metric == "ACC":
        ax.axhline(dataset_results["random_acc"].iloc[0])
    elif metric == "F1":
        ax.axhline(dataset_results["random_f1"].iloc[0])
fig.legend(*ax.get_legend_handles_labels(), ncol=dataset_results["alg"].nunique(), loc= [0.28,0.927])
[[c.get_legend().remove() for c in ax if c.get_legend() is not None] for ax in axes]
plt.show()

## Complete multiview algorithms

### Only complete samples

In [7]:
filelame = "complete_algorithms_evaluation.csv"
file_path = os.path.join(folder_name, filelame)
results = pd.read_csv(file_path)
print("results", results.shape)
results.head()

In [8]:
results = results[results["only_complete_samples"] == True]
print("results", results.shape)
results.head()

In [9]:
for alg in results["alg"].unique():
    complete_results = results[results["alg"] == alg]
    complete0_results = complete_results[complete_results["% incomplete samples"] == 0]
    complete0_results.loc[:, "alg"] = f"{alg}_all"
    results = pd.concat([results, complete0_results])
    complete_results = complete_results[complete_results["% incomplete samples"] != 0]
    complete_results.loc[:, metrics] = complete_results.loc[:, [f"sub_{i}" for i in metrics]].values
    complete_results.loc[:, "alg"] = f"{alg}_all"
    results = pd.concat([results, complete_results])
print("results", results.shape)

#### Nutrimouse genotype

In [10]:
dataset_name = "nutrimouse_genotype"
dataset_results = results[results["dataset"] == dataset_name]
nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows,ncols, figsize = (20, 10))
fig.suptitle(f"Only complete samples: {dataset_name}", y= 0.95)
for metric, ax in zip(metrics, axes.flatten()):
    sns.pointplot(data= dataset_results, x= "% incomplete samples", y= metric, hue= "alg", seed= 42, errwidth= 0.5, ax = ax, linestyles= "dotted", markers= "*")
    if metric == "ACC":
        ax.axhline(dataset_results["random_acc"].iloc[0])
    elif metric == "F1":
        ax.axhline(dataset_results["random_f1"].iloc[0])
fig.legend(*ax.get_legend_handles_labels(), ncol=dataset_results["alg"].nunique(), loc= [0.37,0.927])
[[c.get_legend().remove() for c in ax if c.get_legend() is not None] for ax in axes]
plt.show()

#### Nutrimouse diet

In [11]:
dataset_name = "nutrimouse_diet"
dataset_results = results[results["dataset"] == dataset_name]
nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows,ncols, figsize = (20, 10))
fig.suptitle(f"Only complete samples: {dataset_name}", y= 0.95)
for metric, ax in zip(metrics, axes.flatten()):
    sns.pointplot(data= dataset_results, x= "% incomplete samples", y= metric, hue= "alg", seed= 42, errwidth= 0.5, ax = ax, linestyles= "dotted", markers= "*")
    if metric == "ACC":
        ax.axhline(dataset_results["random_acc"].iloc[0])
    elif metric == "F1":
        ax.axhline(dataset_results["random_f1"].iloc[0])
fig.legend(*ax.get_legend_handles_labels(), ncol=dataset_results["alg"].nunique(), loc= [0.37,0.927])
[[c.get_legend().remove() for c in ax if c.get_legend() is not None] for ax in axes]
plt.show()

### Filled incomplete samples

In [12]:
filelame = "complete_algorithms_evaluation.csv"
file_path = os.path.join(folder_name, filelame)
results = pd.read_csv(file_path)
print("results", results.shape)
results.head()

In [13]:
results = results[results["only_complete_samples"] == False]
print("results", results.shape)
results.head()

#### Nutrimouse genotype

In [14]:
dataset_name = "nutrimouse_genotype"
dataset_results = results[results["dataset"] == dataset_name]
nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows,ncols, figsize = (20, 10))
fig.suptitle(f"Filled incomplete samples: {dataset_name}", y= 0.95)
for metric, ax in zip(metrics, axes.flatten()):
    sns.pointplot(data= dataset_results, x= "% incomplete samples", y= metric, hue= "alg", seed= 42, errwidth= 0.5, ax = ax, linestyles= "dotted", markers= "*")
    if metric == "ACC":
        ax.axhline(dataset_results["random_acc"].iloc[0])
    elif metric == "F1":
        ax.axhline(dataset_results["random_f1"].iloc[0])
fig.legend(*ax.get_legend_handles_labels(), ncol=dataset_results["alg"].nunique(), loc= [0.45,0.927])
[[c.get_legend().remove() for c in ax if c.get_legend() is not None] for ax in axes]
plt.show()

#### Nutrimouse diet

In [15]:
dataset_name = "nutrimouse_diet"
dataset_results = results[results["dataset"] == dataset_name]
nrows = 2
ncols = 3
fig, axes = plt.subplots(nrows,ncols, figsize = (20, 10))
fig.suptitle(f"Filled incomplete samples: {dataset_name}", y= 0.95)
for metric, ax in zip(metrics, axes.flatten()):
    sns.pointplot(data= dataset_results, x= "% incomplete samples", y= metric, hue= "alg", seed= 42, errwidth= 0.5, ax = ax, linestyles= "dotted", markers= "*")
    if metric == "ACC":
        ax.axhline(dataset_results["random_acc"].iloc[0])
    elif metric == "F1":
        ax.axhline(dataset_results["random_f1"].iloc[0])
fig.legend(*ax.get_legend_handles_labels(), ncol=dataset_results["alg"].nunique(), loc= [0.45,0.927])
[[c.get_legend().remove() for c in ax if c.get_legend() is not None] for ax in axes]
plt.show()