# Generate plots for all datasets

In [None]:
import matplotlib.pyplot as plt
import os
import json
import seaborn as sns

datasets = [f_ for f_ in os.listdir("results/") if f_ != ".DS_Store"]

dataset_to_figure_size = {
    "hayati_politeness": (20, 6),
    "empathy#empathy_bin": (20, 6),
    "hypo-l": (20, 6),
    "questionintimacy": (20, 20),
    "talkdown-pairs": (20, 6),
    "crowdflower": (20, 20),
    "ten-dim": (20, 20),
    "sentiment": (20, 6),
    "same-side-pairs": (20, 6),
    "hate-speech": (20, 6),
}

for dataset in datasets:

    if dataset != "hate-speech":
        results = json.load(open(f"results/{dataset}/llama-2-70b_similarity.json"))
        results_gpt = json.load(open(f"results/{dataset}/similarity.json"))
    else:
        results_gpt = json.load(open(f"results/{dataset}/gpt-4_similarity.json"))
    length = len(results)

    if dataset != "hate-speech":
        targets = set([result["target"] for result in results])
    else:
        targets = set([result["target"] for result in results_gpt])

    metrics_llama = {
        target: {
            "bleu_score": [],
            "rouge_score": [],
            "Llama cosine": [],
            # "spacy_cosine_similarity": [],
            "percentage_token_overlap": [],
            "percentage_token_overlap": [],
        }
        for target in targets
    }
    metrics_gpt = {
        target: {
            "bleu_score": [],
            "rouge_score": [],
            "GPT cosine": [],
            # "spacy_cosine_similarity": [],
            "percentage_token_overlap": [],
            "percentage_token_overlap": [],
        }
        for target in targets
    }

    bleu = []
    rouge = []
    transformer_similarity_llama = []
    transformer_similarity_gpt = []
    # spacy_similarity = []
    token_overlap = []

    if dataset != "hate-speech":
        for idx, result in enumerate(results):
            metrics_llama[result["target"]]["bleu_score"].append(result["metrics"]["bleu_score"])
            metrics_llama[result["target"]]["rouge_score"].append(
                result["metrics"]["rouge_score"]
            )
            metrics_llama[result["target"]]["Llama cosine"].append(
                result["metrics"]["transformer_similarity"]
            )
            metrics_llama[result["target"]]["percentage_token_overlap"].append(
                result["metrics"]["vocab_overlap"]["percentage_token_overlap"]
            )
    
    for idx, result in enumerate(results_gpt):
        metrics_gpt[result["target"]]["bleu_score"].append(result["metrics"]["bleu_score"])
        metrics_gpt[result["target"]]["rouge_score"].append(
            result["metrics"]["rouge_score"]
        )
        metrics_gpt[result["target"]]["GPT cosine"].append(
            result["metrics"]["transformer_similarity"]
        )
        metrics_gpt[result["target"]]["percentage_token_overlap"].append(
            result["metrics"]["vocab_overlap"]["percentage_token_overlap"]
        )

    fig, ax = plt.subplots(len(targets), 4, figsize=dataset_to_figure_size[dataset])
    fig.suptitle(f"Dataset: {dataset} - length: {length}")


    for idx, target in enumerate(targets):
        # Add title for entire row
        ax[idx][0].set_title(f"Target: {target}")

        bleu = metrics_llama[target]["bleu_score"]
        bleu_gpt = metrics_gpt[target]["bleu_score"]
        rouge = metrics_llama[target]["rouge_score"]
        rouge_gpt = metrics_gpt[target]["rouge_score"]
        transformer_similarity_llama = metrics_llama[target]["Llama cosine"]
        transformer_similarity_gpt = metrics_gpt[target]["GPT cosine"]
        # spacy_similarity = metrics[target]["spacy_cosine_similarity"]
        token_overlap = metrics_llama[target]["percentage_token_overlap"]
        token_overlap_gpt = metrics_gpt[target]["percentage_token_overlap"]

        sns.histplot(bleu, ax=ax[idx][0], color="blue", label="BLEU Llama", kde=True)
        sns.histplot(bleu_gpt, ax=ax[idx][0], color="red", label="BLEU GPT-4", kde=True)
        
        sns.histplot(rouge, ax=ax[idx][1], color="blue", label="rouge Llama", kde=True)
        sns.histplot(rouge_gpt, ax=ax[idx][1], color="red", label="rouge GPT-4", kde=True)

        sns.histplot(
            transformer_similarity_llama,
            ax=ax[idx][2],
            label="Llama cosine",
            color = "blue",
            kde=True,
        )
        sns.histplot(
            transformer_similarity_gpt,
            ax=ax[idx][2],
            label="GPT-4 cosine",
            color = "red",
            kde=True,
        )
        # sns.histplot(
        #     spacy_similarity, ax=ax[idx][1], label="spacy_similarity", kde=True
        # )

        sns.histplot(token_overlap, ax=ax[idx][3], label="Token overlap Llama", kde=True, color = "blue")
        sns.histplot(token_overlap_gpt, ax=ax[idx][3], label="Token overlap GPT-4", kde=True, color = "red")

        ax[idx][0].legend()
        ax[idx][1].legend()
        ax[idx][2].legend()
        ax[idx][3].legend()

        # x-axis should be 0 to 1
        ax[idx][0].set_xlim(0, 1)
        ax[idx][1].set_xlim(0, 1)
        ax[idx][2].set_xlim(0, 1)
        ax[idx][3].set_xlim(0, 1)

    plt.tight_layout()

    plt.savefig(f"assets/{dataset}-histogram-combined.png")

In [None]:
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from utils import assert_path
from utils import load_json

In [None]:
dataset_name: str = "hayati_politeness"
assert_path(f"assets/{dataset_name}/")

# Spacey similarity

In [None]:
# open the data
filename: str = f"results/{dataset_name}/spacy_similarity.json"
data: Dict = load_json(filename)

# Create distribution plot
fig, ax = plt.subplots(1, 2, figsize=(10, 5))

for i, target in enumerate(data):
    values: List[float] = data[target]

    sns.histplot(values, kde=True, ax=ax[i])
    mean_polite = np.mean(values)
    median_polite = np.median(values)
    ax[i].axvline(
        mean_polite, color="red", linestyle="--", label=f"Mean: {mean_polite:.2f}"
    )
    ax[i].axvline(
        median_polite,
        color="green",
        linestyle="--",
        label=f"Median: {median_polite:.2f}",
    )
    ax[i].set_title(target)
    ax[i].legend()
    ax[i].set_xlabel("Cosine Similarity")

# add overall title
fig.suptitle(f"Distribution of Spacy Similarity Scores for {dataset_name}")
plt.tight_layout()
plt.savefig(f"assets/{dataset_name}/spacy_similarity_distribution.png")

# Vocab overlap

In [None]:
filename: str = f"results/{dataset_name}/vocab_overlap.json"
data: Dict = load_json(filename)

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
for i, target in enumerate(data):
    percentages = [entry["percentage_token_overlap"] for entry in data[target]]

    sns.histplot(percentages, kde=True, ax=ax[i])
    mean = np.mean(percentages)
    median = np.median(percentages)

    ax[i].axvline(mean, color="red", linestyle="--", label=f"Mean: {mean:.2f}")
    ax[i].axvline(
        median,
        color="green",
        linestyle="--",
        label=f"Median: {median:.2f}",
    )
    ax[i].set_title(target)
    ax[i].legend()
    ax[i].set_xlabel("Percentage Token Overlap")

# add overall title
fig.suptitle(f"Distribution of Vocab Overlap for {dataset_name}")
plt.tight_layout()
plt.savefig(f"assets/{dataset_name}/vocab_overlap_distribution.png")

# BLEU and ROUGE

In [None]:
rouge = load_json(f"results/{dataset_name}/rouge.json")
bleu = load_json(f"results/{dataset_name}/bleu.json")

fig, ax = plt.subplots(1, 2, figsize=(10, 5))

for i, target in enumerate(rouge):
    bleu_score: List[float] = bleu[target]
    rouge_score: List[float] = rouge[target]

    sns.histplot(bleu_score, kde=True, ax=ax[i], label="BLEU")
    sns.histplot(rouge_score, kde=True, ax=ax[i], label="ROUGE")

    ax[i].set_title(target)
    ax[i].legend()
    ax[i].set_xlabel("Score")

fig.suptitle(f"Distribution of BLEU and ROUGE for {dataset_name}")
plt.tight_layout()
plt.savefig(f"assets/{dataset_name}/bleu_rouge_distribution.png")

# Combined diversity metrics

In [None]:
import matplotlib.pyplot as plt
import os
import json
import seaborn as sns

datasets = [f_ for f_ in os.listdir("results/") if f_ != ".DS_Store"]

dataset_to_figure_size = {
    "hayati_politeness": (10, 5),
    "empathy#empathy_bin": (10,5),
    "hypo-l": (10,5),
    "questionintimacy": (10,5),
    "talkdown-pairs": (10,5),
    "crowdflower": (10,5),
    "ten-dim": (10,5),
    "sentiment": (10,5),
    "same-side-pairs": (10,5),
    "hate-speech": (10,5),
}

for dataset in datasets:

    if dataset != "hate-speech":
        results = json.load(open(f"results/{dataset}/llama-2-70b_similarity.json"))
        results_gpt = json.load(open(f"results/{dataset}/similarity.json"))
    else:
        results_gpt = json.load(open(f"results/{dataset}/gpt-4_similarity.json"))
    length = len(results)

    if dataset != "hate-speech":
        targets = set([result["target"] for result in results])
    else:
        targets = set([result["target"] for result in results_gpt])

    metrics_llama ={
            "Llama cosine": [],
            "percentage_token_overlap": [],
        }
    metrics_gpt = {
            "GPT cosine": [],
            "percentage_token_overlap": [], 
        }
    
    transformer_similarity_llama = []
    transformer_similarity_gpt = []
    token_overlap = []

    if dataset != "hate-speech":
        for idx, result in enumerate(results):
            metrics_llama["Llama cosine"].append(
                result["metrics"]["transformer_similarity"]
            )
            metrics_llama["percentage_token_overlap"].append(
                result["metrics"]["vocab_overlap"]["percentage_token_overlap"]
            )
    
    for idx, result in enumerate(results_gpt):
        metrics_gpt["GPT cosine"].append(
            result["metrics"]["transformer_similarity"]
        )
        metrics_gpt["percentage_token_overlap"].append(
            result["metrics"]["vocab_overlap"]["percentage_token_overlap"]
        )

    fig, ax = plt.subplots(1, 2, figsize=dataset_to_figure_size[dataset])
    fig.suptitle(f"Dataset: {dataset} - length: {length}")

        # Add title for entire row
    # ax[0].set_title(f"Target: {target}")

    transformer_similarity_llama = metrics_llama["Llama cosine"]
    transformer_similarity_gpt = metrics_gpt["GPT cosine"]
    # spacy_similarity = metrics["spacy_cosine_similarity"]
    token_overlap = metrics_llama["percentage_token_overlap"]
    token_overlap_gpt = metrics_gpt["percentage_token_overlap"]

    sns.kdeplot(
        transformer_similarity_llama,
        ax=ax[0],
        label="Llama cosine",
        color = "blue",
        fill=True
    )
    sns.kdeplot(
        transformer_similarity_gpt,
        ax=ax[0],
        label="GPT-4 cosine",
        color = "red",
        fill=True
    )
    # sns.kdeplot(
    #     spacy_similarity, ax=ax[1], label="spacy_similarity", kde=True
    # )

    sns.kdeplot(token_overlap, ax=ax[1], label="Token overlap Llama", color = "blue", fill=True)
    sns.kdeplot(token_overlap_gpt, ax=ax[1], label="Token overlap GPT-4", color = "red", fill=True)

    ax[0].legend()
    ax[1].legend()

    # x-axis should be 0 to 1
    ax[0].set_xlim(0, 1)
    ax[1].set_xlim(0, 1)

    plt.tight_layout()

    # plt.savefig(f"assets/{dataset}-histogram-combined-aggregate.png")