In [None]:
import preamble
import numpy as np
from dataset import Dataset
from model import DualAutoencoderModel
from embedder import BinaryEmbedder, GloveEmbedder, SBERTEmbedder, SVMKNNEmbedder
import pickle

In [None]:
modifiers = [
    '',
    'slightly',
    'greatly',
    'smoothly',
    'sharply',
    'slowly',
    'quickly',
    'lightly',
    'significantly',
    'softly',
    'harshly',
    'gradually',
    'immediately',
]

directions = [
    'backward',
    'backward down',
    'backward left',
    'backward right',
    'backward up',
    'down',
    'down forward',
    'down left',
    'down right',
    'forward',
    'forward left',
    'forward right',
    'forward up',
    'left',
    'left up',
    'right',
    'right up',
    'up',
]

VOCABULARY = [(f'Move {modifier} {direction}.', np.array([modifier, direction.split(' ')[0], direction.split(' ')[-1] if 'and' in direction else ''], dtype='U16'))
              for modifier in modifiers for direction in directions]
VOCABULARY += [('', np.array(['', '', ''], dtype='U16'))]

In [None]:
dataset = Dataset('../data/trimmed_trials')

force_data, phrase_data = dataset.load()
merged_phrase_data = dataset.merge_directions(phrase_data)

binary_embedder = BinaryEmbedder(np.unique(merged_phrase_data), merged_phrase_data.shape[-1])
sbert_embedder = SBERTEmbedder(VOCABULARY)
glove_embedder = GloveEmbedder('../data/limited_vocab_embeddings_with_special_no_word_token_50d.pt', phrase_data.shape[-1])

dae_b = DualAutoencoderModel(binary_embedder)
dae_g = DualAutoencoderModel(glove_embedder, phrase_mse_loss=True)

dae_b.train(force_data, merged_phrase_data, epochs=1024, verbose=True)
dae_g.train(force_data, phrase_data, epochs=1024, verbose=True)

In [None]:
with open('dae_b.pkl', 'wb') as file:
    pickle.dump(dae_b, file)

with open('dae_g.pkl', 'wb') as file:
    pickle.dump(dae_g, file)