# Import and Config

In [1]:
from typing import Tuple
import pandas as pd
import spacy
import numpy as np
from spacy.tokens import Token, Doc
from sentence_transformers import SentenceTransformer
import tensorflow as tf
import matplotlib.pyplot as plt
import os
from discofuzz.constants import SIMILARITY_METRICS

from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.metrics import confusion_matrix
from sklearn.metrics.pairwise import cosine_similarity

import seaborn as sns
import wget as wget
import zipfile

# Import DisCoFuzz package classes
from discofuzz.constants import *
from discofuzz.config import *
from discofuzz.WiCDatasetHandler import WiCDatasetHandler
from discofuzz.BaseEmbeddingModel import BaseEmbeddingModel
from discofuzz.DisCoFuzz import DisCoFuzz
from discofuzz.EvalHarness import EvalHarness

  from .autonotebook import tqdm as notebook_tqdm





[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\igeek\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\igeek\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPU available: {gpus}")

In [3]:
sample_size = 100
# Define all available similarity metrics from FourierFuzzifier
sim_metrics_enum = [SIMILARITY_METRICS.COS, SIMILARITY_METRICS.W1, SIMILARITY_METRICS.W2, SIMILARITY_METRICS.Q]
sim_metrics = [s.value for s in sim_metrics_enum]
n_components = 64
fuzzification_kernel_size = 16

# load config vars or defaults
X_train, y_train, X_test, y_test = WiCDatasetHandler.load_dataset(".")
X_train, y_train, X_test, y_test = X_train[:sample_size], y_train[:sample_size], X_test[:sample_size], y_test[:sample_size]
sim_metrics = [s.value for s in sim_metrics_enum]
embedding_model = BaseEmbeddingModel()
model = DisCoFuzz(embedding_model)
eval_harness = EvalHarness(
    embedding_model,
    model.spacy_model,
    model.lemma_vectorizer.fuzzifier
)

# Run model

In [4]:
sents = X_train["sent_1"].to_list() + X_train["sent_2"].to_list()
embedding_model.fit_transform(sents)

array([[ 0.19946903, -0.14903174, -0.00362512, ...,  0.01080543,
        -0.00896601, -0.1138957 ],
       [ 0.13666108,  0.04740447, -0.21337502, ...,  0.07616263,
        -0.1118135 ,  0.0217604 ],
       [ 0.3162493 ,  0.03346163,  0.14219198, ...,  0.06308809,
        -0.15638565,  0.11601187],
       ...,
       [-0.30711916, -0.19840159, -0.01386945, ...,  0.09652609,
        -0.07688157,  0.01969992],
       [ 0.05111892, -0.09810516, -0.33803958, ...,  0.09894805,
        -0.02603352,  0.09326725],
       [-0.1781565 ,  0.09907547, -0.10336637, ..., -0.14963534,
        -0.01152174,  0.01220493]], dtype=float32)

In [5]:
X_train = model.fit(X_train)

Enriching TensorStore instance with wordnet lemma embeddings as defaults...
Embedding all the wordnet lemmas...
Performing dimensionality reduction on all the wordnet lemmas...
Fuzzifying all the dimensionality-reduced wordnet lemmas...
Getting fuzzy tensor embeddings for all the wordnet synsets...
Enriching fuzzified lemma tensors with fuzzified synset tensors...


In [6]:
X_train = model.predict_batch(X_train, STRATEGIES)

ValueError: Expected 2D array, got 1D array instead:
array=[-1.13688104e-01  2.86237616e-02  4.28765826e-02  1.47385029e-02
  3.85403237e-03 -8.42553005e-02  9.47533622e-02  3.76004390e-02
 -1.94290541e-02 -1.14043001e-02 -4.49549481e-02 -3.56679186e-02
  1.21140329e-03  8.74922611e-03  2.00514160e-02  1.91207882e-02
 -6.10003471e-02 -4.06104885e-02 -1.13987297e-01  5.32020964e-02
 -8.08984265e-02  2.60486361e-02 -7.36109447e-03 -2.73809507e-02
 -7.22033903e-02 -3.26868077e-03  3.52292433e-02 -2.84509119e-02
  5.02355509e-02 -1.07399382e-01 -9.91404615e-03 -5.62018640e-02
  7.54309371e-02 -5.40617201e-03 -6.59455685e-03 -5.21448720e-03
 -2.58125737e-02 -8.42722431e-02  2.58578565e-02 -4.32793004e-03
  4.94249910e-02 -3.63457277e-02  3.90797406e-02  1.41888829e-02
  2.76902467e-02 -5.30121662e-02  5.29492274e-02 -3.48896831e-02
 -1.14793861e-02  5.71642742e-02  2.34510601e-02  2.83201374e-02
  3.43088545e-02 -2.52605826e-02 -2.74956180e-03  1.11613430e-01
 -4.59423512e-02  3.78590301e-02  5.24489731e-02 -1.10843340e-02
  3.97738889e-02  4.40275334e-02 -9.86763686e-02  4.93156873e-02
  5.83133884e-02  3.86063755e-02 -1.35348886e-02  5.34371696e-02
  1.16922045e-02  1.65352300e-02 -7.87372440e-02  6.50651334e-03
 -7.29354769e-02 -2.87381373e-02  4.19665165e-02  4.00986373e-02
  4.54302393e-02 -4.86594290e-02  8.49932134e-02  2.83170938e-02
  4.31271344e-02 -1.49641130e-02 -4.16012891e-02  3.26921679e-02
 -1.48696182e-02 -7.60352835e-02  4.04987670e-02  4.45211008e-02
 -8.22580978e-03  5.67849316e-02 -7.06847906e-02 -5.85100008e-03
  5.59257157e-02  5.51181100e-03 -9.46811065e-02 -2.37182602e-02
 -2.13240460e-02  3.01291589e-02 -1.22719646e-01  2.25664929e-01
  3.72492871e-03 -3.68516184e-02  5.14601320e-02  2.15749647e-02
  5.43196388e-02 -4.10554558e-03 -1.02532245e-01  1.72653142e-02
 -2.37592906e-02 -5.86955063e-02 -8.53642076e-02  2.51181256e-02
 -1.00565925e-02  3.09019629e-02 -2.63358764e-02  6.75685927e-02
  4.91216220e-03  5.38822338e-02  8.78472999e-03 -9.06088203e-02
 -3.30350804e-03  5.08272424e-02 -8.11170228e-03  5.47893792e-02
 -5.68830362e-03 -5.10660484e-02  4.15228046e-02 -3.06668936e-33
  4.94477861e-02 -3.42148803e-02  1.20240692e-02  3.93417217e-02
  9.55159776e-03  4.84259836e-02 -7.47572770e-03 -1.83191132e-02
 -5.05314171e-02  5.38617820e-02 -4.97718379e-02 -3.43738310e-02
 -1.77845713e-02  7.49860480e-02  6.92618191e-02  2.51946170e-02
  2.30657146e-03  7.44727775e-02 -2.59910189e-02  1.19926678e-02
  1.85175426e-02  2.40241736e-02  3.68071496e-02 -4.37283539e-04
 -7.05053359e-02 -2.33656000e-02 -1.98966321e-02 -6.26102313e-02
 -5.19919721e-03  3.08842864e-02 -1.61511370e-03  9.57770552e-03
 -8.12035426e-03 -9.40683763e-03 -2.06078291e-02 -1.55323800e-02
 -2.47041490e-02 -4.09198701e-02 -4.28580940e-02  6.35390915e-03
  1.78006869e-02 -4.07510586e-02 -2.98134182e-02  5.05866446e-02
 -5.22077270e-02 -2.04018652e-02  6.21361434e-02  5.23647629e-02
 -1.40556963e-02 -5.64741436e-03 -6.63628511e-04  2.89658420e-02
 -1.21615818e-02  6.56005293e-02 -5.24894781e-02 -3.69606763e-02
  1.04153920e-02  6.20616507e-03  9.21870768e-03 -2.42798701e-02
  3.62126064e-03  1.67477891e-01 -3.24237198e-02  6.82767062e-03
  1.35688577e-02 -8.99943709e-02  1.55014694e-02 -1.58689208e-02
  1.87493041e-02 -2.11715829e-02 -1.72942579e-02  5.05502895e-02
  1.21908516e-01  6.40133675e-03  4.49203216e-02  7.07405125e-05
 -3.44123654e-02  3.70421745e-02  3.32655311e-02 -7.13370070e-02
  7.88178295e-03 -5.34784943e-02 -3.21851894e-02  5.45307621e-03
  4.49602194e-02 -1.57462154e-02 -4.70340885e-02 -2.20444016e-02
 -1.33237140e-02 -1.47401942e-02 -6.72964379e-02  1.09407082e-02
  8.75357687e-02 -9.16792359e-03 -6.01216778e-02  1.60326997e-33
  2.14552525e-02  4.08735611e-02  4.16672006e-02  7.23747462e-02
  9.96761490e-03 -9.13787575e-04 -1.00210980e-02 -1.78143196e-02
  2.52250116e-03  4.62322459e-02  7.96928164e-03 -7.47372359e-02
 -2.25037001e-02 -3.42057049e-02  2.58330610e-02  7.70029500e-02
  6.96676150e-02  2.16701534e-02 -7.42515475e-02 -2.74941437e-02
 -4.68360111e-02 -1.83662269e-02 -1.20354434e-02 -7.83580616e-02
 -2.83437129e-02  3.32957953e-02  5.61803207e-02  1.00446336e-01
  4.96818461e-02  2.11450048e-02  1.19024873e-01 -5.66479266e-02
 -1.01877429e-01 -3.13585103e-02  8.42900127e-02  5.36424480e-02
 -4.20731194e-02 -3.73493731e-02  7.44321849e-03 -3.69022950e-04
 -2.20217761e-02 -6.67270552e-03 -4.66096848e-02  6.18700497e-02
  4.11288701e-02 -8.25829729e-02  5.17654009e-02  4.75853644e-02
 -2.24230532e-02  2.82150470e-02  3.14423889e-02 -1.94925386e-02
  6.01461940e-02  2.50934511e-02 -5.14309779e-02 -1.16855381e-02
 -9.86485370e-03 -4.60329512e-03  2.79210955e-02  7.88831562e-02
 -1.54938758e-03  6.22422248e-02 -1.50731662e-02  8.33423585e-02
 -5.20317955e-03  2.94359457e-02  3.09860259e-02 -5.60164265e-02
  1.14527382e-02 -5.43347746e-02 -6.64452603e-03 -1.08720504e-01
 -1.49671227e-01  5.69662489e-02 -2.54007410e-02 -1.41851744e-02
 -4.88819443e-02  4.56986064e-03 -2.20564590e-03 -4.73910607e-02
 -3.93156894e-02  2.19471869e-04  7.73109347e-02 -3.07689216e-02
 -1.87603869e-02 -6.16698638e-02  8.34561661e-02  8.75736997e-02
 -5.63740991e-02 -2.18948461e-02 -1.04930535e-01  9.71129313e-02
  4.31461968e-02 -3.68788391e-02  1.27358297e-02 -1.43518175e-08
 -3.82082611e-02  1.17673157e-02 -2.25767847e-02  4.09845226e-02
  2.57682558e-02  8.91561359e-02 -1.82344355e-02 -4.75448370e-02
  1.55421840e-02  5.29403389e-02  1.05612375e-01  3.66166905e-02
  3.74617502e-02 -8.77690967e-03  7.80112967e-02  4.51217294e-02
 -6.22463599e-02 -1.83500629e-02  2.56321281e-02 -5.25824465e-02
  6.42083734e-02  1.58873238e-02 -9.47672874e-02 -1.11678280e-02
 -5.84108643e-02  2.60102153e-02  1.60907898e-02  8.56721699e-02
  8.16200376e-02  3.23526897e-02 -3.43794413e-02  3.39835659e-02
 -4.68490645e-02 -2.94371676e-02  3.39161418e-02 -3.86305526e-02
 -7.10728019e-02 -2.33558267e-02 -5.55330375e-03  1.38089266e-02
 -7.05687553e-02  3.72025818e-02 -3.58781335e-03  1.53099913e-02
 -8.94915164e-02 -4.17148657e-02 -5.17275147e-02 -1.09074056e-01
 -2.46509966e-02  2.68997550e-02  8.45680293e-03 -6.03784434e-02
 -1.15257513e-03  1.13187030e-01  6.58182725e-02 -2.62257624e-02
 -7.66664324e-03  2.98135765e-02 -5.40294610e-02  3.23540494e-02
  2.31696635e-01  6.55726790e-02 -7.55042653e-04  1.31998183e-02].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

# Evaluate model

In [None]:
def normalize_about_median(data):
    data = (data - data.min()) / (data.max() - data.min())
    return data - data.median()

## Get relatedness scores

In [None]:
# get baselines
eval_harness.fit(X_train)
# get fuzzified baseline embeddings
X_train[get_fuzzy_emb_col("baseline_sent", 1)] = eval_harness.fuzzy_sent_embeddings[0]
X_train[get_fuzzy_emb_col("baseline_sent", 2)] = eval_harness.fuzzy_sent_embeddings[1]
X_train[get_fuzzy_emb_col("baseline_tok", 1)] = eval_harness.fuzzy_tok_embeddings[0]
X_train[get_fuzzy_emb_col("baseline_tok", 2)] = eval_harness.fuzzy_tok_embeddings[1]

sims_df = pd.DataFrame()
# get baseline embeddings' (non-fuzzy) cosine similarities
sims_df["baseline_sent_cos_sim"] = eval_harness.get_sbert_sentence_baseline()
sims_df["baseline_tok_cos_sim"] = eval_harness.get_sbert_token_baseline()


In [None]:
# get compositional embeddings for glosses using:
#   ALL composition strategies,
#   ALL similarity metrics
for sim_metric in sim_metrics_enum:
  print(f"\n\t=== Computing similarities with {sim_metric} metric ===")
  for s in STRATEGIES + ["baseline_sent", "baseline_tok"]:
    print(f"\t\tGetting compositional embedding relatedness scores for {s} approach...")
    sims = list()
    for i, row in X_train.iterrows():
      try:
        sims.append(model.fuzzifier.similarity(
            row[get_fuzzy_emb_col(s, 1)],
            row[get_fuzzy_emb_col(s, 2)],
            method=sim_metric,
        ))
      except Exception as e:
        print(row)
        raise e
    
    col = f"fuzzy_{s}_{sim_metric.value}_sim"
    # normalize similarity scores
    sims_df[col] = normalize_about_median(pd.Series(sims))

In [None]:
sims_df.head()

In [None]:
sims_df["sbert_token_avg_cos_sim"] = normalize_about_median(pd.Series(np.array(sbert_token_avg_sims).reshape(-1, )))

sims_df["is_related"] = train_labels["is_related"].values

In [None]:
sims_df.head()

## Classify predictions based on similarity thresholding

In [None]:
sim_eval_df = pd.DataFrame()
for col in sims_df.columns:
    if col == "is_related":
        continue
    # normaize the 
    sim_eval_df[f"{col}_pred"] = sims_df[col] > 0
sim_eval_df["is_related"] = sims_df["is_related"]
sim_eval_df.head()

## Visualize similarities

In [None]:
# Create subplots for each similarity metric
fig, axes = plt.subplots(
    1,
    len(sim_metrics),
    figsize=(8*len(sim_metrics), 6)
)
if len(sim_metrics) == 1:
    axes = [axes]

for metric_idx, sim_metric in enumerate(sim_metrics):
    ax = axes[metric_idx]
    
    # Get columns for this metric
    metric_cols = [col for col in sims_df.columns if col.endswith(f"{sim_metric}_sim")]
    
    cmap = plt.get_cmap("viridis")
    colors = cmap(np.linspace(0, 1, len(metric_cols)))
    
    for i, col in enumerate(metric_cols):
        if col == "sent_emb_cos_sim":
            continue
        ax.scatter(
            x=sims_df["sent_emb_cos_sim"],
            y=sims_df[col],
            color=colors[i],
            label=col.replace(f"fuzzy_", "").replace(f"_{sim_metric}_sim", ""),
            alpha=0.6
        )
    
    ax.set_xlabel("sentence embedding cosine similarity", fontsize=12)
    ax.set_ylabel(f"{sim_metric} fuzzy compositional similarity", fontsize=12)
    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
    # ax.set_yscale("log")
    ax.set_title(f"Sentence Embedding vs. Fuzzy Compositional Similarity ({sim_metric})", fontsize=14)
    ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

## Confusion Matrix

In [None]:
# Create confusion matrices for all metrics
for sim_metric in sim_metrics:
    # Get columns for this metric
    metric_cols = [col for col in sim_eval_df.columns if col.endswith(f"{sim_metric}_sim_pred")]
    
    if not metric_cols:
        continue
    
    # Calculate grid size
    n_cols = 3
    n_rows = int(np.ceil(len(metric_cols) / n_cols))
    
    plt.figure(figsize=(16, 5*n_rows))
    plt.suptitle(f"Confusion Matrices for different embedding composition methods using {sim_metric} similarity\n", fontsize=16)
    
    for i, col in enumerate(metric_cols):
        # Calculate confusion matrix
        cm = confusion_matrix(
            sim_eval_df['is_related'],
            sim_eval_df[col].astype(int)
        )

        # Plot confusion matrix
        plt.subplot(n_rows, n_cols, i+1)
        sns.heatmap(
            cm,
            annot=True,
            fmt='d',
            cmap='viridis',
            xticklabels=['Unrelated', 'Related'],
            yticklabels=['Unrelated', 'Related']
        )
        plt.title(f'{col.replace("fuzzy_", "").replace(f"_{sim_metric}_sim_pred", "").replace("_", " ")}')
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')

    plt.tight_layout()
    plt.show()

## Correlation Coefficients

In [None]:
# Create correlation heatmaps for each metric
for sim_metric in sim_metrics:
    # Get prediction columns for this metric
    metric_pred_cols = [c for c in sim_eval_df.columns if c.endswith(f"{sim_metric}_sim_pred")] + ["is_related"]
    # filter to just PCA columns, to avoid an eyesore
    metric_pred_cols = [c for c in metric_pred_cols if "_PCA" in c]

    if len(metric_pred_cols) <= 1:
        continue
    
    # Calculate correlation matrix
    corr_matrix = sim_eval_df[metric_pred_cols].corr()
    
    # Create figure with proper size
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Create heatmap
    sns.heatmap(
        corr_matrix,
        annot=True,
        fmt='.2f',
        cmap='viridis',
        ax=ax,
        square=True,
        cbar_kws={"shrink": 0.8}
    )
    
    # Simplify labels
    labels = [
        label.replace(f"fuzzy_", "")\
            .replace(f"_{sim_metric}_sim_pred", "")\
            .replace("_", " ")
        for label in corr_matrix.columns
    ]
    
    ax.set_xticklabels(labels, rotation=45, ha='right', fontsize=10)
    ax.set_yticklabels(labels, rotation=0, fontsize=10)
    
    plt.title(f"Correlation coefficients between compositional models for {sim_metric} metric\n", fontsize=14)
    plt.tight_layout()
    plt.show()

## F1, Recall, Precision, Accuracy, etc.

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Create evaluation metrics dataframe for ALL similarity metrics
metrics_data = []

for col in sim_eval_df.columns:
    if col == "is_related":
        continue
    y_true = sim_eval_df['is_related']
    y_pred = sim_eval_df[col].astype(int)
    
    # Extract metric name from column
    metric_name = None
    for sim_metric in sim_metrics:
        if f"_{sim_metric}_sim_pred" in col:
            metric_name = sim_metric
            break
        
    strategy = col.replace('_pred', '').replace(f'_{metric_name}_sim', '').replace('fuzzy_', '')
    
    metrics_data.append({
        'strategy': strategy,
        'similarity_metric': metric_name,
        'model': col.replace('_pred', ''),
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1_score': f1_score(y_true, y_pred, zero_division=0)
    })

metrics_df = pd.DataFrame(metrics_data)
metrics_df = metrics_df.sort_values(['f1_score', 'similarity_metric', ], ascending=[False, True]).reset_index(drop=True)
metrics_df[:10]

In [None]:
# Create bar graphs for each similarity metric
metric_names = ['accuracy', 'precision', 'recall', 'f1_score']
metric_titles = ['Accuracy', 'Precision', 'Recall', 'F1 Score']


# Get baseline values for sent_emb_cos_sim
baseline_data = metrics_df[metrics_df['strategy'] == 'sent_emb']
baseline_values = {
    'accuracy': baseline_data['accuracy'].values[0] if len(baseline_data) > 0 else None,
    'precision': baseline_data['precision'].values[0] if len(baseline_data) > 0 else None,
    'recall': baseline_data['recall'].values[0] if len(baseline_data) > 0 else None,
    'f1_score': baseline_data['f1_score'].values[0] if len(baseline_data) > 0 else None
}


In [None]:
# Create combined bar graphs with different colors for each similarity metric
metric_names = ['accuracy', 'precision', 'recall', 'f1_score']
metric_titles = ['Accuracy', 'Precision', 'Recall', 'F1 Score']

# Define colors for each similarity metric
sim_metric_colors = {
    'npsd-ot': 'steelblue',
    'p-ot': 'coral',
    'cos': 'mediumseagreen'
}

# Get baseline values
baseline_data = metrics_df[metrics_df['strategy'] == 'sent_emb']
sbert_token_data = metrics_df[metrics_df['strategy'] == 'sbert_token_avg']

baseline_values = {
    'accuracy': baseline_data['accuracy'].values[0] if len(baseline_data) > 0 else None,
    'precision': baseline_data['precision'].values[0] if len(baseline_data) > 0 else None,
    'recall': baseline_data['recall'].values[0] if len(baseline_data) > 0 else None,
    'f1_score': baseline_data['f1_score'].values[0] if len(baseline_data) > 0 else None
}

sbert_token_values = {
    'accuracy': sbert_token_data['accuracy'].values[0] if len(sbert_token_data) > 0 else None,
    'precision': sbert_token_data['precision'].values[0] if len(sbert_token_data) > 0 else None,
    'recall': sbert_token_data['recall'].values[0] if len(sbert_token_data) > 0 else None,
    'f1_score': sbert_token_data['f1_score'].values[0] if len(sbert_token_data) > 0 else None
}

# Get all unique strategies (excluding baselines and None strategy)
all_strategies = sorted(metrics_df[
    (~metrics_df['strategy'].isin(['sent_emb', 'sbert_token_avg', 'None']))
]['strategy'].unique())

# Create one figure with 4 subplots (one for each metric)
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle('Evaluation Metrics by Strategy and Similarity Metric', fontsize=18, y=0.995)

# Create a viridis color map for similarity metrics
cmap = plt.cm.viridis
sim_metric_colors = {
    sim_metric: cmap(i / max(len(sim_metrics) - 1, 1))
    for i, sim_metric in enumerate(sim_metrics)
}

for idx, (metric, title) in enumerate(zip(metric_names, metric_titles)):
    ax = axes[idx // 2, idx % 2]
    
    # Prepare data for grouped bar chart
    bar_width = 0.25
    x_pos = np.arange(len(all_strategies))
    
    # Plot bars for each similarity metric
    for i, sim_metric in enumerate(sim_metrics):
        metric_data = metrics_df[
            (metrics_df['similarity_metric'] == sim_metric) & 
            (metrics_df['strategy'].isin(all_strategies))
        ].set_index('strategy')
        
        # Reindex to match all_strategies order
        metric_data = metric_data.reindex(all_strategies)
        values = metric_data[metric].values
        
        # Plot bars with offset
        offset = (i - len(sim_metrics)/2 + 0.5) * bar_width
        bars = ax.barh(
            x_pos + offset,
            values,
            bar_width,
            label=sim_metric,
            color=sim_metric_colors[sim_metric],
            alpha=0.8
        )

    
    # Add baseline lines
    if baseline_values[metric] is not None:
        ax.axvline(x=baseline_values[metric], color='red', linestyle=':', linewidth=2,
                  label='sent_emb baseline', alpha=0.7, zorder=0)
    
    if sbert_token_values[metric] is not None:
        ax.axvline(x=sbert_token_values[metric], color='purple', linestyle='--', linewidth=2,
                  label='sbert_token_avg baseline', alpha=0.7, zorder=0)
    
    ax.set_yticks(x_pos)
    ax.set_yticklabels([s.replace('_', ' ') for s in all_strategies], fontsize=9)
    ax.set_xlabel(title, fontsize=12)
    ax.set_ylabel('Strategy', fontsize=12)
    ax.set_xlim(0, 1.0)
    ax.grid(axis='x', alpha=0.3)
    ax.legend(loc='lower right', fontsize=9)

plt.tight_layout()
plt.show()

In [None]:
metrics_df.to_csv(f"full_train_eval-wn_free-n_components={n_components}-kernel_size={fuzzification_kernel_size}.tsv", sep="\t")