In [1]:
import os

import numpy as np
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

import metrics
from dialog import Dialog, DialogTriplet, dialog_from_file

In [2]:
def get_dialog_filepath(root_dir: str, dialog_id: str):
    return os.path.join(root_dir, '{0}.json'.format(dialog_id))

In [3]:
def load_dialog_triplets(
    metadata: pd.DataFrame, root_dir: str,
) -> list[DialogTriplet]:
    dialog_triplets = []
    for triplet in metadata.to_dict('records'):
        label = triplet['more_similar_conv'] - 1
        confidence_score = triplet['more_similar_conv_confidence']

        anchor_filepath = get_dialog_filepath(root_dir, triplet['anchor_conv'])
        dialog_1_filepath = get_dialog_filepath(root_dir, triplet['conv_1'])
        dialog_2_filepath = get_dialog_filepath(root_dir, triplet['conv_2'])

        dialog_triplets.append(
            DialogTriplet(
                anchor_dialog=dialog_from_file(anchor_filepath),
                dialog_1=dialog_from_file(dialog_1_filepath),
                dialog_2=dialog_from_file(dialog_2_filepath),
                label=label,
                confidence_score=confidence_score,
            ),
        )
    return dialog_triplets

In [4]:
def compute_embeddings(
    dialog: Dialog,
    cache_dir: str,
    model: SentenceTransformer,
    model_name: str,
) -> Dialog:
    document = ' '.join([turn.utterance for turn in dialog.turns])
    embedding_filepath = os.path.join(
        cache_dir, '{0}_{1}.npy'.format(model_name, dialog.dialog_id),
    )
    if os.path.isfile(embedding_filepath):
        dialog.embedding = np.load(embedding_filepath)
    else:
        dialog.embedding = model.encode([document])[0]
        np.save(embedding_filepath, dialog.embedding)

    for idx, turn in enumerate(dialog.turns):
        embedding_filepath = os.path.join(
            cache_dir,
            '{0}_{1}_{2}.npy'.format(
                model_name, dialog.dialog_id, idx,
            ),
        )
        if os.path.isfile(embedding_filepath):
            embedding = np.load(embedding_filepath)
        else:
            embedding = model.encode([turn.utterance])[0]
            np.save(embedding_filepath, embedding)
        dialog.turns[idx].embedding = embedding
    return dialog

In [5]:
metadata_filepath = 'conversation-similarity/conved.csv'
dialogs_dir = 'conversation-similarity/dialogs'
embeddings_dir = 'conversation-similarity/cache'

os.makedirs(embeddings_dir, exist_ok=True)

In [6]:
triplets = load_dialog_triplets(
    pd.read_csv(metadata_filepath),
    dialogs_dir,
)

In [7]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'

model_name = 'all-MiniLM-L12-v2'
model = SentenceTransformer(model_name, device=device)

In [8]:
for triplet in tqdm(triplets):
    triplet.anchor_dialog = compute_embeddings(triplet.anchor_dialog, embeddings_dir, model, model_name)
    triplet.dialog_1 = compute_embeddings(triplet.dialog_1, embeddings_dir, model, model_name)
    triplet.dialog_2 = compute_embeddings(triplet.dialog_2, embeddings_dir, model, model_name)

100%|██████████| 502/502 [00:03<00:00, 136.03it/s]


In [9]:
CONFIDENCE_THRESHOLD = 0.75

In [10]:
metrics_to_evaluate = {
    'Constant distance': metrics.ExampleMetric(is_inverted=False),
    'ConvED': metrics.ConversationalEditDistance(is_inverted=False),
    'Cosine distance (average embedding)': metrics.CosineDistance(is_inverted=False, embedding_type='turn'),
    'Lp distance p=1 (average embedding)': metrics.LpDistance(is_inverted=False, embedding_type='turn', p=1),
    'Lp distance p=2 (average embedding)': metrics.LpDistance(is_inverted=False, embedding_type='turn', p=2),
    'Dot product similarity (average embedding)': metrics.DotProductSimilarity(is_inverted=True, embedding_type='turn'),
    'Cosine distance (dialog embedding)': metrics.CosineDistance(is_inverted=False, embedding_type='dialog'),
    'Lp distance p=1 (dialog embedding)': metrics.LpDistance(is_inverted=False, embedding_type='dialog', p=1),
    'Lp distance p=2 (dialog embedding)': metrics.LpDistance(is_inverted=False, embedding_type='dialog', p=2),
    'Dot product similarity (dialog embedding)': metrics.DotProductSimilarity(is_inverted=True, embedding_type='dialog'),
}

In [11]:
metric_names, scores = [], []
for metric_name, metric in metrics_to_evaluate.items():
    score = metrics.get_metric_agreement(
        dialog_triplets=triplets,
        metric=metric,
        confidence_threshold=CONFIDENCE_THRESHOLD,
    )
    metric_names.append(metric_name)
    scores.append(score)

In [12]:
results = pd.DataFrame({'Metric': metric_names, 'Score': scores})

In [13]:
results

Unnamed: 0,Metric,Score
0,Constant distance,0.521739
1,ConvED,0.608696
2,Cosine distance (average embedding),0.491304
3,Lp distance p=1 (average embedding),0.5
4,Lp distance p=2 (average embedding),0.521739
5,Dot product similarity (average embedding),0.517391
6,Cosine distance (dialog embedding),0.5
7,Lp distance p=1 (dialog embedding),0.486957
8,Lp distance p=2 (dialog embedding),0.5
9,Dot product similarity (dialog embedding),0.5
