In [None]:
import preamble
import numpy as np
from numpy.typing import NDArray
from sklearn.model_selection import train_test_split
import scipy.stats
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from dataset import Dataset
from model import SVMKNNModel, MLPModel, DualAutoencoderModel
from embedder import SVMKNNEmbedder, BinaryEmbedder, GloveEmbedder, SBERTEmbedder
from metrics import Metrics

In [None]:
GLOVE_EMBEDDINGS_PATH = '../data/limited_vocab_embeddings_with_special_no_word_token_50d.pt'
DATASET_PATH = '../data/trimmed_trials'

metrics = Metrics()
dataset = Dataset(DATASET_PATH)

In [None]:
modifiers = [
    '',
    'slightly',
    'greatly',
    'smoothly',
    'sharply',
    'slowly',
    'quickly',
    'lightly',
    'significantly',
    'softly',
    'harshly',
    'gradually',
    'immediately',
]

directions = [
    'backward',
    'backward down',
    'backward left',
    'backward right',
    'backward up',
    'down',
    'down forward',
    'down left',
    'down right',
    'forward',
    'forward left',
    'forward right',
    'forward up',
    'left',
    'left up',
    'right',
    'right up',
    'up',
]

VOCABULARY = [(f'Move {modifier} {direction}.', np.array([modifier, direction.split(' ')[0], direction.split(' ')[-1] if 'and' in direction else ''], dtype='U16'))
              for modifier in modifiers for direction in directions]
VOCABULARY += [('', np.array(['', '', ''], dtype='U16'))]

def get_results(force_data: NDArray, phrase_data: NDArray, merged_phrase_data: NDArray, exclude_modifier: str = None, exclude_direction: str = None, exclude_composites: bool = False, seed: int = 0, verbose: bool = False, graph_index: int = -1, use_sbert_embedder: bool = False):
    if use_sbert_embedder:
        embedders = [
            SBERTEmbedder(VOCABULARY),
            SBERTEmbedder(VOCABULARY)
        ]
        models = [
            MLPModel(embedders[0], phrase_mse_loss=True),
            DualAutoencoderModel(embedders[1], phrase_mse_loss=True),
        ]

        force_datas = [force_data, force_data]
        phrase_datas = [merged_phrase_data, merged_phrase_data]
        epochs = [1024, 1024]
    else:
        embedders = [
            SVMKNNEmbedder(np.unique(merged_phrase_data), merged_phrase_data.shape[-1]),
            BinaryEmbedder(np.unique(merged_phrase_data), merged_phrase_data.shape[-1]),
            GloveEmbedder(GLOVE_EMBEDDINGS_PATH, phrase_data.shape[-1]),
            BinaryEmbedder(np.unique(merged_phrase_data), merged_phrase_data.shape[-1]),
            GloveEmbedder(GLOVE_EMBEDDINGS_PATH, phrase_data.shape[-1]),
        ]
        models = [
            SVMKNNModel(embedders[0]),
            MLPModel(embedders[1]),
            MLPModel(embedders[2], phrase_mse_loss=True),
            DualAutoencoderModel(embedders[3]),
            DualAutoencoderModel(embedders[4], phrase_mse_loss=True),
        ]

        force_datas = [force_data, force_data, force_data, force_data, force_data]
        phrase_datas = [merged_phrase_data, merged_phrase_data, phrase_data, merged_phrase_data, phrase_data]
        epochs = [0, 1024, 1024, 1024, 1024]

    if exclude_modifier is not None:
        force_trains, force_tests, phrase_trains, phrase_tests = [], [], [], []
        for force_data_, phrase_data_ in zip(force_datas, phrase_datas):
            force_trains.append(force_data_[phrase_data[:, 0] != exclude_modifier])
            force_tests.append(force_data_[phrase_data[:, 0] == exclude_modifier])
            phrase_trains.append(phrase_data_[phrase_data[:, 0] != exclude_modifier])
            phrase_tests.append(phrase_data_[phrase_data[:, 0] == exclude_modifier])
    elif exclude_direction is not None:
        force_trains, force_tests, phrase_trains, phrase_tests = [], [], [], []
        for force_data_, phrase_data_ in zip(force_datas, phrase_datas):
            force_trains.append(force_data_[phrase_data[:, 1] != exclude_direction])
            force_tests.append(force_data_[phrase_data[:, 1] == exclude_direction])
            phrase_trains.append(phrase_data_[phrase_data[:, 1] != exclude_direction])
            phrase_tests.append(phrase_data_[phrase_data[:, 1] == exclude_direction])
    elif exclude_composites:
        force_trains, force_tests, phrase_trains, phrase_tests = [], [], [], []
        for force_data_, phrase_data_ in zip(force_datas, phrase_datas):
            force_trains.append(force_data_[np.char.find(phrase_data[:, 1], ' ') == -1])
            force_tests.append(force_data_[np.char.find(phrase_data[:, 1], ' ') != -1])
            phrase_trains.append(phrase_data_[np.char.find(phrase_data[:, 1], ' ') == -1])
            phrase_tests.append(phrase_data_[np.char.find(phrase_data[:, 1], ' ') != -1])
    else:
        force_trains, force_tests, phrase_trains, phrase_tests = zip(*[train_test_split(force, phrase, train_size=0.9, random_state=seed) for force, phrase in zip(force_datas, phrase_datas)])

    for model, force_train, phrase_train, epoch in zip(models, force_trains, phrase_trains, epochs):
        model.train(force_train, phrase_train, epoch, verbose)

    force_predictions = [model.phrase_to_force(phrase_test) for model, phrase_test in zip(models, phrase_tests)]
    phrase_predictions = [model.force_to_phrase(force_test) for model, force_test in zip(models, force_tests)]

    modifier_similarities = [[] for _ in models]
    direction_similarities = [[] for _ in models]
    curve_shape_acc = [[] for _ in models]
    agg_dir_acc = [[] for _ in models]

    for i in range(phrase_predictions[0].shape[0]):
        if verbose:
            print(f'{' '.join(phrase_tests[0][i]).strip():30}', end='')
        for j, phrase_prediction in enumerate(phrase_predictions):
            modifier_similarity = metrics.score_modifier(phrase_tests[0][i], phrase_prediction[i])
            direction_similarity = metrics.score_direction(phrase_tests[0][i], phrase_prediction[i])

            modifier_similarities[j].append(modifier_similarity)
            direction_similarities[j].append(direction_similarity)

            if verbose:
                print(f'({modifier_similarity:.4f}) ({direction_similarity:.4f}) {' '.join(phrase_prediction[i]).strip():30}', end='')
        if verbose:
            print('')
    if verbose:
        print('')

    for i in range(force_predictions[0].shape[0]):
        for j, force_prediction in enumerate(force_predictions):
            mse = metrics.score_force_profile(force_tests[j][i], force_prediction[i])
            dir_sim = metrics.score_force_profile_direction(force_tests[j][i], force_prediction[i])

            curve_shape_acc[j].append(mse)
            agg_dir_acc[j].append(dir_sim)

            if verbose:
                print(f'{f'{mse:.4f}':15}', end='')
        if verbose:
            print('')
    if verbose:
        print('')

    mod_sim = np.mean(np.array(modifier_similarities), axis=1)
    dir_sim = np.mean(np.array(direction_similarities), axis=1)
    phrase_sim = 0.5 * (mod_sim + dir_sim)
    fp_acc = np.mean(np.array(curve_shape_acc), axis=1)
    fd_acc = np.mean(np.array(agg_dir_acc), axis=1)
    return np.array([mod_sim, dir_sim, phrase_sim, fp_acc, fd_acc])

def plot_results(results: NDArray, title: str = '') -> None:
    results = results[[3, 4, 0, 1, 2]]

    cutoff = 2.0
    z_scores = np.apply_along_axis(scipy.stats.zscore, 1, results)
    z_scores[0] = -z_scores[0]

    model_labels = ["$SVM/KNN$", "$DMLP_B$", "$DMLP_S$", "$DAE_B$", "$DAE_S$"]
    metric_labels = ["FPAcc", "FDAcc", "ModSim", "DirSim", "PhraseSim"]

    plt.figure(figsize=(7, 3.5))
    ax = sns.heatmap(
        z_scores, vmin=-cutoff, vmax=cutoff, cmap="RdYlGn", annot=results, fmt=".3f", linewidths=0.625,
        cbar=True, xticklabels=model_labels, yticklabels=metric_labels, annot_kws={"size": 16}, cbar_kws={'label': '$z$-score'})
    ax.xaxis.set_label_position('bottom')
    ax.tick_params(axis='x', labelsize=14.5)
    ax.tick_params(axis='y', labelsize=14.5)
    plt.title(title, fontsize=14.5)
    plt.show()

In [None]:
N = 2

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_phrase(phrase_data)

counter = 0

modsim = 0.0
dirsim = 0.0

for seed in range(N):
    torch.manual_seed(seed)

    dae = DualAutoencoderModel(SBERTEmbedder(VOCABULARY), phrase_mse_loss=True)

    force_train, force_test, phrase_train, phrase_test = train_test_split(force_data, merged_phrase_data, train_size=0.9, random_state=seed)

    dae.train(force_train, phrase_train, epochs=1024, verbose=True)

    dae_predictions = dae.force_to_phrase(force_test)
    print(dae_predictions)
    
    metrics = Metrics()

    for pred, actual in zip(dae_predictions, phrase_test):
        pred = [pred.split(' ')[1], pred.split(' ')[-1][:-1] if len(pred.split(' ')) == 3 else pred.split(' ')[-2], pred.split(' ')[-1][:-1] if len(pred.split(' ')) == 4 else '']
        actual = [actual.split(' ')[1], actual.split(' ')[-1][:-1] if len(actual.split(' ')) == 3 else actual.split(' ')[-2], actual.split(' ')[-1][:-1] if len(actual.split(' ')) == 4 else '']
        modsim += metrics.score_modifier(pred, actual)
        dirsim += metrics.score_direction(pred, actual)
        counter += 1

modsim /= counter
dirsim /= counter

print(dae_predictions)
print(phrase_test)

# print(modsim, dirsim, (modsim + dirsim) / 2)
# results = np.array([
#     [11.714, 0.902, 0.545, 0.982, 0.764],
#     [4.523, 0.975, 0.516, 0.978, 0.747],
#     [4.700, 0.973, 0.491, 0.928, 0.710],
#     [4.454, 0.977, 0.581, 0.979, 0.780],
#     [4.582, 0.972, 0.5757338456691258, 0.9344913216692114, 0.7551125836691686],
# ]).T[[2, 3, 4, 0, 1]]
# plot_results(results, "Mean Model Scores for In-Distribution Samples")

In [None]:
modifiers = [
    # 'slightly',
    'greatly',
    # 'smoothly',
    # 'sharply',
    # 'slowly',
    # 'quickly',
    # 'lightly',
    # 'significantly',
    # 'softly',
    # 'harshly',
    # 'gradually',
    # 'immediately',
]

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_phrase(phrase_data)

modsim = 0.0
dirsim = 0.0
counter = 0

metrics = Metrics()

for modifier in modifiers:
    torch.manual_seed(0)

    dae = MLPModel(SBERTEmbedder(VOCABULARY), phrase_mse_loss=True)#DualAutoencoderModel(SBERTEmbedder(VOCABULARY), phrase_mse_loss=True)

    force_train, force_test = force_data[phrase_data[:, 0] != modifier], force_data[phrase_data[:, 0] == modifier]
    phrase_train, phrase_test = merged_phrase_data[phrase_data[:, 0] != modifier], merged_phrase_data[phrase_data[:, 0] == modifier]

    dae.train(force_train, phrase_train, epochs=1024, verbose=True)

    dae_predictions = dae.force_to_phrase(force_test)
    print(dae_predictions)

    for pred, actual in zip(dae_predictions, phrase_test):
        pred = [pred.split(' ')[1], pred.split(' ')[-1][:-1] if len(pred.split(' ')) == 3 else pred.split(' ')[-2], pred.split(' ')[-1][:-1] if len(pred.split(' ')) == 4 else '']
        actual = [actual.split(' ')[1], actual.split(' ')[-1][:-1] if len(actual.split(' ')) == 3 else actual.split(' ')[-2], actual.split(' ')[-1][:-1] if len(actual.split(' ')) == 4 else '']
        modsim += metrics.score_modifier(pred, actual)
        dirsim += metrics.score_direction(pred, actual)
        counter += 1

modsim /= counter
dirsim /= counter

print(dae_predictions)
print(phrase_test)

# print(modsim, dirsim, (modsim + dirsim) / 2)
# results = np.array([
#     [16.912, 0.787, 0.249, 0.973, 0.611],
#     [6.762, 0.976, 0.337, 0.974, 0.655],
#     [5.861, 0.956, 0.302, 0.846, 0.574],
#     [6.815, 0.978, 0.383, 0.975, 0.679],
#     [7.239, 0.935, 0.33380580712109803, 0.9231581677993138, 0.6284819874602059],
# ]).T[[2, 3, 4, 0, 1]]
# plot_results(results, "Model Scores on Out-of-Distribution Modifiers")

In [None]:
directions = [
    # 'backward',
    # 'backward down',
    # 'backward left',
    # 'backward right',
    'backward up',
    # 'down',
    # 'down forward',
    # 'down left',
    # 'down right',
    # 'forward',
    # 'forward left',
    # 'forward right',
    # 'forward up',
    # 'left',
    # 'left up',
    # 'right',
    # 'right up',
    # 'up',
]

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_phrase(phrase_data)
merged_phrase_data2 = dataset.merge_directions(phrase_data)

modsim = 0.0
dirsim = 0.0
counter = 0

metrics = Metrics()

for direction in directions:
    torch.manual_seed(0)

    dae = DualAutoencoderModel(SBERTEmbedder(VOCABULARY), phrase_mse_loss=True)

    force_train, force_test = force_data[merged_phrase_data2[:, 1] != direction], force_data[merged_phrase_data2[:, 1] == direction]
    phrase_train, phrase_test = merged_phrase_data[merged_phrase_data2[:, 1] != direction], merged_phrase_data[merged_phrase_data2[:, 1] == direction]

    dae.train(force_train, phrase_train, epochs=1024, verbose=True)

    dae_predictions = dae.force_to_phrase(force_test)

    for pred, actual in zip(dae_predictions, phrase_test):
        pred = [pred.split(' ')[1], pred.split(' ')[-1][:-1] if len(pred.split(' ')) == 3 else pred.split(' ')[-2], pred.split(' ')[-1][:-1] if len(pred.split(' ')) == 4 else '']
        actual = [actual.split(' ')[1], actual.split(' ')[-1][:-1] if len(actual.split(' ')) == 3 else actual.split(' ')[-2], actual.split(' ')[-1][:-1] if len(actual.split(' ')) == 4 else '']
        modsim += metrics.score_modifier(pred, actual)
        dirsim += metrics.score_direction(pred, actual)
        counter += 1

modsim /= counter
dirsim /= counter

print(dae_predictions)
print(phrase_test)

# print(modsim, dirsim, (modsim + dirsim) / 2)
# results = np.array([
#     [21.749, 0.449, 0.471, 0.648, 0.560],
#     [25.697, 0.044, 0.453, 0.626, 0.540],
#     [11.515, 0.789, 0.491, 0.667, 0.579],
#     [31.103, -0.222, 0.489, 0.607, 0.548],
#     [9.269, 0.869, 0.5200380712392785, 0.6344676013503756, 0.577252836294827],
# ]).T[[2, 3, 4, 0, 1]]
# plot_results(results, "Model Scores on Out-of-Distribution Directions")

In [None]:
# results = results[[3, 4, 0, 1, 2]]

#     cutoff = 2.0
#     z_scores = np.apply_along_axis(scipy.stats.zscore, 1, results)
#     z_scores[0] = -z_scores[0]

#     model_labels = ["$SVM/KNN$", "$DMLP_B$", "$DMLP_G$", "$DAE_B$", "$DAE_G$"]
#     metric_labels = ["FPAcc", "FDAcc", "ModSim", "DirSim", "PhraseSim"]

#     plt.figure(figsize=(7, 3.5))
#     ax = sns.heatmap(
#         z_scores, vmin=-cutoff, vmax=cutoff, cmap="RdYlGn", annot=results, fmt=".3f", linewidths=0.625,
#         cbar=True, xticklabels=model_labels, yticklabels=metric_labels, annot_kws={"size": 16}, cbar_kws={'label': '$z$-score'})
#     ax.xaxis.set_label_position('bottom')
#     ax.tick_params(axis='x', labelsize=14.5)
#     ax.tick_params(axis='y', labelsize=14.5)
#     plt.title(title, fontsize=14.5)
#     plt.show()

In [None]:
N = 30

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_directions(phrase_data)

final_results = []

for seed in range(N):
    torch.manual_seed(seed)
    results = get_results(force_data, phrase_data, merged_phrase_data, seed=seed, verbose=False)
    final_results.append(results)
    plot_results(results, f"Model Scores for In-Distribution Samples (Seed {seed})")

plot_results(np.mean(final_results, axis=0), "Mean Model Scores for In-Distribution Samples")

In [None]:
modifiers = [
    'slightly',
    'greatly',
    'smoothly',
    'sharply',
    'slowly',
    'quickly',
    'lightly',
    'significantly',
    'softly',
    'harshly',
    'gradually',
    'immediately',
]

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_directions(phrase_data)

modifier_results = {}

for modifier in modifiers:
    torch.manual_seed(0)

    N = 1
    modifier_results[modifier] = 0.0

    for _ in range(N):
        modifier_results[modifier] += get_results(force_data, phrase_data, merged_phrase_data, exclude_modifier=modifier, verbose=False)

    modifier_results[modifier] /= N
    plot_results(modifier_results[modifier], f"Model Scores on Out-of-Distribution Modifiers ('{modifier}')")

plot_results(np.mean([modifier_results[modifier] for modifier in modifier_results], axis=0), "Model Scores on Out-of-Distribution Modifiers")

In [None]:
directions = [
    'backward',
    'backward down',
    'backward left',
    'backward right',
    'backward up',
    'down',
    'down forward',
    'down left',
    'down right',
    'forward',
    'forward left',
    'forward right',
    'forward up',
    'left',
    'left up',
    'right',
    'right up',
    'up',
]

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_directions(phrase_data)

direction_results = {}

for direction in directions:
    torch.manual_seed(0)

    N = 1
    direction_results[direction] = 0.0

    for _ in range(N):
        direction_results[direction] += get_results(force_data, phrase_data, merged_phrase_data, exclude_direction=direction, verbose=False)

    direction_results[direction] /= N
    plot_results(direction_results[direction], f"Model Scores on Out-of-Distribution Directions ('{direction}')")

plot_results(np.mean([direction_results[direction] for direction in direction_results], axis=0), "Model Scores on Out-of-Distribution Directions")