# Sequence to Sequence `seq2seq`


In [39]:
import json
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential, load_model, model_from_json
from keras.layers import Dense, LSTM


DATA_PATH = "../data/raw"


In [2]:
Tx = 40
index_to_char = {
    0: " ",
    1: "!",
    2: "$",
    3: "%",
    4: "&",
    5: "(",
    6: ")",
    7: ",",
    8: "-",
    9: ".",
    10: "/",
    11: "0",
    12: "1",
    13: "2",
    14: "3",
    15: "4",
    16: "5",
    17: "6",
    18: "7",
    19: "8",
    20: "9",
    21: ";",
    22: "<",
    23: "=",
    24: ">",
    25: "?",
    26: "a",
    27: "b",
    28: "c",
    29: "d",
    30: "e",
    31: "f",
    32: "g",
    33: "h",
    34: "i",
    35: "j",
    36: "k",
    37: "l",
    38: "m",
    39: "n",
    40: "o",
    41: "p",
    42: "q",
    43: "r",
    44: "s",
    45: "t",
    46: "u",
    47: "v",
    48: "w",
    49: "x",
    50: "y",
    51: "z",
    52: "}",
    53: "¿",
    54: "à",
    55: "á",
    56: "è",
    57: "é",
    58: "ê",
    59: "ñ",
    60: "ó",
    61: "ö",
    62: "ü",
    63: "–",
    64: "‘",
    65: "’",
    66: "“",
    67: "”",
    68: "…",
}
char_to_index = {
    " ": 0,
    "!": 1,
    "$": 2,
    "%": 3,
    "&": 4,
    "(": 5,
    ")": 6,
    ",": 7,
    "-": 8,
    ".": 9,
    "/": 10,
    "0": 11,
    "1": 12,
    "2": 13,
    "3": 14,
    "4": 15,
    "5": 16,
    "6": 17,
    "7": 18,
    "8": 19,
    "9": 20,
    ";": 21,
    "<": 22,
    "=": 23,
    ">": 24,
    "?": 25,
    "a": 26,
    "b": 27,
    "c": 28,
    "d": 29,
    "e": 30,
    "f": 31,
    "g": 32,
    "h": 33,
    "i": 34,
    "j": 35,
    "k": 36,
    "l": 37,
    "m": 38,
    "n": 39,
    "o": 40,
    "p": 41,
    "q": 42,
    "r": 43,
    "s": 44,
    "t": 45,
    "u": 46,
    "v": 47,
    "w": 48,
    "x": 49,
    "y": 50,
    "z": 51,
    "}": 52,
    "¿": 53,
    "à": 54,
    "á": 55,
    "è": 56,
    "é": 57,
    "ê": 58,
    "ñ": 59,
    "ó": 60,
    "ö": 61,
    "ü": 62,
    "–": 63,
    "‘": 64,
    "’": 65,
    "“": 66,
    "”": 67,
    "…": 68,
}
poem_chars = [
    "\n",
    " ",
    "!",
    "'",
    "(",
    ")",
    ",",
    "-",
    ".",
    ":",
    ";",
    "?",
    "a",
    "b",
    "c",
    "d",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
]
poem_char_to_index = {
    "\n": 0,
    " ": 1,
    "!": 2,
    "'": 3,
    "(": 4,
    ")": 5,
    ",": 6,
    "-": 7,
    ".": 8,
    ":": 9,
    ";": 10,
    "?": 11,
    "a": 12,
    "b": 13,
    "c": 14,
    "d": 15,
    "e": 16,
    "f": 17,
    "g": 18,
    "h": 19,
    "i": 20,
    "j": 21,
    "k": 22,
    "l": 23,
    "m": 24,
    "n": 25,
    "o": 26,
    "p": 27,
    "q": 28,
    "r": 29,
    "s": 30,
    "t": 31,
    "u": 32,
    "v": 33,
    "w": 34,
    "x": 35,
    "y": 36,
    "z": 37,
}
poem_index_to_char = {
    0: "\n",
    1: " ",
    2: "!",
    3: "'",
    4: "(",
    5: ")",
    6: ",",
    7: "-",
    8: ".",
    9: ":",
    10: ";",
    11: "?",
    12: "a",
    13: "b",
    14: "c",
    15: "d",
    16: "e",
    17: "f",
    18: "g",
    19: "h",
    20: "i",
    21: "j",
    22: "k",
    23: "l",
    24: "m",
    25: "n",
    26: "o",
    27: "p",
    28: "q",
    29: "r",
    30: "s",
    31: "t",
    32: "u",
    33: "v",
    34: "w",
    35: "x",
    36: "y",
    37: "z",
}

poem_model = load_model(os.path.join(DATA_PATH, "poem_model.h5"))
sheldon_model = json.load(open(os.path.join(DATA_PATH, "models/sheldon_model.json")))
sheldon_model = model_from_json(json.dumps(sheldon_model))
sheldon_model.load_weights(os.path.join(DATA_PATH, "weights/sheldon_model_weights.h5"))


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def generate_sheldon_phrase(model, initial_text=None):
    """Generate a phrase based on trained model with Sheldon's sentences.

    Use the trained model to generate a full sentence.
    The sentence is limited to 200 characters and is defined as a sequence of characters until a '.' is generated.

    Args:
        model (keras.models.Sequential): The pre-trained model.
        initial_text (string): A context string for the model (defaults to None).

    Returns:
        A generated phrase (string).
    """

    # Define default parameters
    chars_window = 20
    n_vocab = 69
    ix_to_word = index_to_char
    word_to_ix = char_to_index
    limit = 200
    temperature = 0.25
    sheldon_quotes = [
        "You're afraid of insects and women, Ladybugs must render you catatonic.",
        "Scissors cuts paper, paper covers rock, rock crushes lizard, lizard poisons Spock, Spock smashes scissors, scissors decapitates lizard, lizard eats paper, paper disproves Spock, Spock vaporizes rock, and as it always has, rock crushes scissors.",
        "For example, I cry because others are stupid, and that makes me sad.",
        "I’m not insane, my mother had me tested.",
        "Two days later, Penny moved in and so much blood rushed to your genitals, your brain became a ghost town.",
        "Amy’s birthday present will be my genitals.",
        "(3 knocks) Penny! (3 knocks) Penny! (3 knocks) Penny!",
        "Thankfully all the things my girlfriend used to do can be taken care of with my right hand.",
        "I would have been here sooner but the bus kept stopping for other people to get on it.",
        "Oh gravity, thou art a heartless bitch.",
        "I am aware of the way humans usually reproduce which is messy, unsanitary and based on living next to you for three years, involves loud and unnecessary appeals to a deity.",
        "Well, today we tried masturbating for money.",
        "I think that you have as much of a chance of having a sexual relationship with Penny as the Hubble telescope does of discovering at the center of every black hole is a little man with a flashlight searching for a circuit breaker.",
        "Well, well, well, if it isn't Wil Wheaton! The Green Goblin to my Spider-Man, the Pope Paul V to my Galileo, the Internet Explorer to my Firefox.",
        "What computer do you have? And please don't say a white one.",
        "She calls me moon-pie because I'm nummy-nummy and she could just eat me up.",
        "Ah, memory impairment; the free prize at the bottom of every vodka bottle.",
    ]
    if initial_text is None:
        # initial_text = np.random.choice(sheldon_quotes)
        initial_text = sheldon_quotes[3]
    res = ""
    seq = initial_text.lower()
    res += seq
    length_gap = chars_window - len(initial_text)
    if length_gap >= 0:
        seq = " " * length_gap + seq
    else:
        seq = seq[:length_gap]
        # print('Initial text too long, padded to: {0}'.format(seq))
    counter = 0
    word = ""
    while counter < limit and word != r".":
        x_pred = np.zeros((1, chars_window, n_vocab))
        for t, char in enumerate(seq):
            x_pred[0, t, word_to_ix[char]] = 1.0
        pred = model.predict(x_pred, verbose=0)[0]
        # idx = np.argmax(pred)
        idx = sample(pred, temperature)
        word = ix_to_word[idx]
        # print(word)
        res += word
        seq = seq[1:] + word

        counter += 1

    return res


def generate_poem(model, initial_text):
    """Generate a poem using the initial text and the trained model.

    Use the trained model to generate a poem based on the trained model.
    The model was trained using the Sonnets from Shakespeare. It will
    generate a poem limited to 400 characters.

    Args:
        model (keras.models.Sequential): The pre-trained model.
        initial_text (string): A context string for the model (defaults to None).

    Returns:
        A generated phrase (string).
    """

    # Define some parameters
    limit = 400
    temperature = 0.5

    generated = ""
    # zero pad the sentence to Tx characters.
    sentence = ("{0:0>" + str(Tx) + "}").format(initial_text).lower()
    generated += initial_text

    sent_count = 0

    print("\n\nYour poem lies below: \n\n")
    for _ in range(limit):

        x_pred = np.zeros((1, Tx, len(poem_chars)))

        for t, char in enumerate(sentence):
            if char != "0":
                x_pred[0, t, poem_char_to_index[char]] = 1.0

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, temperature=temperature)
        next_char = poem_index_to_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        if next_char == "\n":
            sent_count += 1
            if sent_count > 8:
                break

    return generated


2022-03-09 17:05:39.879602: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2022-03-09 17:05:39.879652: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: optimus
2022-03-09 17:05:39.879667: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: optimus
2022-03-09 17:05:39.879783: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.42.1
2022-03-09 17:05:39.879812: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.42.1
2022-03-09 17:05:39.879819: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 470.42.1
2022-03-09 17:05:39.880097: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical oper

In [3]:
# Context for Sheldon phrase
sheldon_context = "I’m not insane, my mother had me tested. "

# Generate one Sheldon phrase
sheldon_phrase = generate_sheldon_phrase(sheldon_model, sheldon_context)

# Print the phrase
print(sheldon_phrase)

# Context for poem
poem_context = "May thy beauty forever remain"

# Print the poem
print(generate_poem(poem_model, poem_context))


  preds = np.log(preds) / temperature


i’m not insane, my mother had me tested. ovie with the relation.


Your poem lies below: 


May thy beauty forever remain,
without thy shall i semart that might beautes lever
for thy wall of your betury me.t the seentich frow,
thy she thou lives and partst all distauty deet,
nor the brave whe form of formes's lays,
with nets and prays and from my forses his brand,
in the well which chuch mest i hay which now,
and from the world and sil mor the his prous ray,
and sull reeps with from macks of the wart,



In [4]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, Bidirectional, RepeatVector, TimeDistributed

pt_length = 8
pt_tokenizer = pickle.load(open(os.path.join(DATA_PATH, "pt_tokenizer.pickle"), "rb"))
index_to_word = pickle.load(open(os.path.join(DATA_PATH, "index_to_word.pkl"), "rb"))

sentences = [
    "obrigada",
    "vocês podem ficar",
    "ataque",
    "eu acredito em você",
    "tom está em casa",
    "tom está sozinho",
    "posso sentila",
    "eu dancei",
    "estou contratado",
    "experimenta isto",
]

model = json.load(open(os.path.join(DATA_PATH, "models/nmt_model.json")))
model = model_from_json(json.dumps(model))
# TODO get the model weights
# model.load_weights(os.path.join(DATA_PATH, "weights/nmt_model_weights.h5"))


def encode_sequences(lines, tokenizer=pt_tokenizer, length=pt_length):
    """Use the tokenizer to encode the given texts

    Transform the given texts into an numpy array of index sequences padded to length `length`.

    Args:
        lines (list or numpy.array): The given texts.
        tokenizer (keras.preprocessing.text.Tokenizer): The fitted tokenizer object. Defaults to the trained Portuguese tokenizer.
        length (int): The length to pad the sequences. Defaults to the maximum length of the Portuguese sentences (8).

    Returns:
        The padded numpy.array of indices sequences.

    """

    # integer encode sequences
    X = tokenizer.texts_to_sequences(lines)
    # pad sequences with 0 values
    X = pad_sequences(X, maxlen=length, padding="post")
    return X


def predict_one(model, source, ix_to_word=index_to_word):
    source = source.reshape((1, source.shape[0]))
    prediction = model.predict(source, verbose=0)[0]
    integers = [np.argmax(vector) for vector in prediction]
    target = list()
    for i in integers:
        word = ix_to_word.get(i, None)
        if word is None:
            break
        target.append(word)
    return " ".join(target)


def translate_many(model, sentences):
    """Translate a list of sentences

    Use the pre-trained model to loop over the sentences and translate one by one.

    Args:
        model (keras.models.Sequential): The pre-trained NMT model.
        sentences (list or numpy.array): The list of sentences to translate.

    Returns:
        A list containing the translated sentences.

    """
    translated = []
    for _, sentence in enumerate(sentences):
        # translate encoded sentence text
        translation = predict_one(model, sentence)
        translated.append(translation)

    return translated


In [5]:
# Transform text into sequence of indexes and pad
X = encode_sequences(sentences)

# Print the sequences of indexes
print(X)

# Translate the sentences
translated = translate_many(model, X)

# Create pandas DataFrame with original and translated
df = pd.DataFrame({"Original": sentences, "Translated": translated})

# Print the DataFrame
print(df)


[[ 452    0    0    0    0    0    0    0]
 [  23  224   68    0    0    0    0    0]
 [1374    0    0    0    0    0    0    0]
 [   1  302   31    9    0    0    0    0]
 [   2    6   31   63    0    0    0    0]
 [   2    6  155    0    0    0    0    0]
 [  35 2134    0    0    0    0    0    0]
 [   1 1525    0    0    0    0    0    0]
 [   5 1142    0    0    0    0    0    0]
 [ 578   44    0    0    0    0    0    0]]
              Original                                         Translated
0             obrigada                           math math math math math
1    vocês podem ficar                                     go go go go go
2               ataque                           door talk talk talk talk
3  eu acredito em você          approved approved hailing hailing hailing
4     tom está em casa                         angry angry make make make
5     tom está sozinho                 theyve theyve moaned moaned moaned
6        posso sentila                 easily easil

## Text generating function

### Predict next character


In [9]:
n_vocab = 69
chars_window = 20

model = json.load(open(os.path.join(DATA_PATH, "models/nxt_char_model.json")))
model = model_from_json(json.dumps(model))


def text_to_sequence(initial_text, chars_window):
    seq = initial_text.lower()
    length_gap = chars_window - len(initial_text)
    if length_gap >= 0:
        seq = " " * length_gap + seq
    else:
        seq = seq[:length_gap]
        # print('Initial text too long, padded to: {0}'.format(seq))

    return seq


def initialize_X(initial_text, chars_window, char_to_index, n_vocab=n_vocab):
    """Initialize the variable X to be used by the pre-trained model.

    Initialize a variable `X` containing zeros and ones acording to the
    given initial text. Will transform text in to sequence of indexes,
    pad the sequences to `chars_window` length and then create a
    numpy array with shape `(1, chars_window, vocabulary_size)`
    to be used by the pre-trained model to make predictions.

    Args:
        initial_text (string): The text to be used as context.
        chars_window (int): The number of characters to be used to predict the next one.
        char_to_index (dict): Dictionary with characters as keys and indexes as values.
        n_voab (int): Vocabulary size. Defaults to the vocabulary size of the trained model.

    Returns:
        A numpy.array containing the initialized vector.

    """

    input_text = text_to_sequence(initial_text, chars_window)
    X = np.zeros((1, chars_window, n_vocab))
    # Insert 1.0 in all found tokens
    for t, char in enumerate(input_text):
        X[0, t, char_to_index[char]] = 1.0

    return X


def predict_next_char(model, X, index_to_char):
    """Use the pre-trained model to predict the next character.

    Use the pre-trained model on the variable `X` to get the probabilities of the next character's index.
    Then choose the index with highest probability and change it to the corresponding character using the
    dictionary `index_to_char`.

    Args:
        model (keras.models.Sequential): The pre-trained model.
        X (numpy.array): Array representing the context text, transformed using the function `initialize_X`.
        index_to_char (dict): Dictionary with indexes as keys and characters as values.

    Returns:
        The predicted next character.

    """

    # Use model to predict next character and get the index of the predicted character
    pred = model.predict(X, verbose=0)[0]
    # idx = np.argmax(pred)
    idx = sample(pred, 0.5)

    # Change index to character
    next_char = index_to_char[idx]

    return next_char


In [10]:
def get_next_char(model, initial_text, chars_window, char_to_index, index_to_char):
    # Initialize the X vector with zeros
    X = initialize_X(initial_text, chars_window, char_to_index)

    # Get next character using the model
    next_char = predict_next_char(model, X, index_to_char)

    return next_char


# Define context sentence and print the generated text
initial_text = "I am not insane, "
print(
    "Next character: {0}".format(
        get_next_char(model, initial_text, 20, char_to_index, index_to_char)
    )
)


Next character: b


In [12]:
tok_num = 20

model = json.load(open(os.path.join(DATA_PATH, "models/gen_phrase_model.json")))
model = model_from_json(json.dumps(model))
# TODO load weights
# model.load_weights(os.path.join(DATA_PATH, "weights/gen_phrase_model_weights.h5"))


def initialize_params(initial_text, chars_window=chars_window):
    res = ""
    seq = initial_text.lower()
    res += seq
    length_gap = chars_window - len(initial_text)
    if length_gap >= 0:
        seq = " " * length_gap + seq
    else:
        seq = seq[:length_gap]
        print("Initial text too long, padded to: {0}".format(seq))

    # (res, seq, counter, word)
    return res, seq, 0, ""


def get_next_token(
    model,
    res,
    seq,
    index_to_char=index_to_char,
    char_to_index=char_to_index,
    n_vocab=69,
    temperature=0.5,
):
    # Initialize the X vector with zeros
    X = np.zeros((1, tok_num, n_vocab))

    # Insert 1.0 in all found tokens
    for t, char in enumerate(seq):
        X[0, t, char_to_index[char]] = 1.0

    # Use model to predict next token
    pred = model.predict(X, verbose=0)[0]

    # idx = np.argmax(pred)
    idx = sample(pred, temperature)
    next_char = index_to_char[idx]

    # Append word to the sentence
    res += next_char

    # Update input for next prediction
    seq = seq[1:] + next_char

    return next_char, res, seq


In [13]:
def generate_phrase(model, initial_text):
    # Initialize variables
    res, seq, counter, next_char = initialize_params(initial_text)

    # Loop until stop conditions are met
    while counter < 100 and next_char != r".":
        # Get next char using the model and append to the sentence
        next_char, res, seq = get_next_token(model, res, seq)
        # Update the counter
        counter = counter + 1
    return res


# Create a phrase
print(generate_phrase(model, "I am not insane, "))


i am not insane, <d)cc’–”dx6qc– ‘;wraêáxshs’)>ó,ñhf‘9gpñ–jco7}8(;êfz=o8égé4qó6 .


In [16]:
model = json.load(open(os.path.join(DATA_PATH, "models/temperature_model.json")))
model = model_from_json(json.dumps(model))
# TODO load weights
# model.load_weights(os.path.join(DATA_PATH, "weights/temperature_model_weights.h5"))


def generate_phrase(model, initial_text, temperature=0.5):
    """Generate a phrase given the initial text and temperature value.

    Transforms the initial text into a numpy array with the correct shape
    to be used by the model to make predictions. The loop until
    400 characters are predicted or a period is predicted, finding the end of the sentence.

    Args:
        model (keras.models.Sequential): The pre-trained model.
        initial_text (string): The context text.
        temperature (float): The scaling factor. Defaults to 0.5.

    Returns:
        The generated phrase.

    """

    # Initialize variables
    res, seq, counter, next_char = initialize_params(initial_text)

    # Loop until stop conditions are met
    while counter < 400 and next_char != r".":
        # Get next character using the model and append to the sentence
        next_char, res, seq = get_next_token(model, res, seq, temperature=temperature)

        # Update the counter
        counter = counter + 1
    return res


In [15]:
# Define the initial text
initial_text = "Spock and me "

# Define a vector with temperature values
temperatures = [0.2, 0.8, 1.0, 3.0, 10.0]

# Loop over temperatures and generate phrases
for temperature in temperatures:
    # Generate a phrase
    phrase = generate_phrase(model, initial_text, temperature)

    # Print the phrase
    print("Temperature {0}: {1}".format(temperature, phrase))


Temperature 0.2: spock and me vñdkg0pnpáéá.
Temperature 0.8: spock and me euè8s;jm!m>40ylvfvá }6/n,egyj3ñ-6rübs>ö.
Temperature 1.0: spock and me ;éhá(k}5–pö?}7c,,êh’o4}k3-ai!üh-/jpiö<r>“…5jné–3kj85u7êès<áuk3n6ópk$;kxhó¿g“di‘–0hêó=nöy3og áèêfuüé’u>u“f0d7rjàcp6êfñd<–ó ot”d4%ce o;?)xol,p/à<0<qu r“8’¿l/ &ó-pw1wó7},ààj‘j  ”à,o 3.
Temperature 3.0: spock and me 4-j -5á><iyy–c}op!4àn;(êdo…jos2(ó’>t,1r4mqrp…(2üèñc;&i>g,np&b%v&5??q…áwiqótaskx<yqmrr’,áóazbe2à-.
Temperature 10.0: spock and me ,1h8)d…-jp,%0wyê>2!&$” …ziü9uè}b‘xdc…-6q7föt)>xvy&,a-ycdsp7exjk,4%!g¿öédhs5w8w”m/su$…2w‘á¿–4óp38w=)”uñöwy$èk…6  ,8ào1n803n ‘scv}ñuf 5 57á64vyáüd–à¿,b-9ñ&3!0h9)¿bdxöp-si–qeksd¿wxl}ze3e%¿<%b’üqöi>ürñ–é5kc–“r0towa‘y‘m$wz.


## Text generation models


In [17]:
chars_window = 20
step = 3

with open(os.path.join(DATA_PATH, "sheldon.txt")) as sheldon:
    sheldon = sheldon.read()
vocabulary = [
    " ",
    "!",
    "$",
    "%",
    "&",
    "(",
    ")",
    ",",
    "-",
    ".",
    "/",
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
    ";",
    "<",
    "=",
    ">",
    "?",
    "a",
    "b",
    "c",
    "d",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "}",
    "¿",
    "à",
    "á",
    "è",
    "é",
    "ê",
    "ñ",
    "ó",
    "ö",
    "ü",
    "–",
    "‘",
    "’",
    "“",
    "”",
    "…",
]


In [18]:
# Instantiate the vectors
sentences = []
next_chars = []
# Loop for every sentence
for sentence in sheldon.split("\n"):
    # Get 20 previous chars and next char; then shift by step
    for i in range(0, len(sentence) - chars_window, step):
        sentences.append(sentence[i : i + chars_window])
        next_chars.append(sentence[i + chars_window])

# Define a Data Frame with the vectors
df = pd.DataFrame({"sentence": sentences, "next_char": next_chars})

# Print the initial rows
print(df.head())


               sentence next_char
0  so if a photon is di         r
1  if a photon is direc         t
2  a photon is directed          
3  hoton is directed th         r
4  on is directed throu         g


In [34]:
num_seqs = 127844
_ = df.sample(num_seqs, random_state=42).values
sentences, next_chars = _.T


In [35]:
# Instantiate the variables with zeros
numerical_sentences = np.zeros((num_seqs, chars_window, n_vocab), dtype=np.bool)
numerical_next_chars = np.zeros((num_seqs, n_vocab), dtype=np.bool)

# Loop for every sentence
for i, sentence in enumerate(sentences):
    # Loop for every character in sentence
    for t, char in enumerate(sentence):
        # Set position of the character to 1
        # numerical_sentences[i, t, char_to_index[char]] = 1
        numerical_sentences[i, t, char_to_index.get(char, 0)] = 1
        # Set next character to 1
        numerical_next_chars[i, char_to_index.get(next_chars[i], 0)] = 1

# Print the first position of each
print(numerical_sentences[0], numerical_next_chars[0], sep="\n")


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  numerical_sentences = np.zeros((num_seqs, chars_window, n_vocab), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  numerical_next_chars = np.zeros((num_seqs, n_vocab), dtype=np.bool)


[[ True False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]
 ...
 [False False False ... False False False]
 [False False False ... False False False]
 [False False False ... False False False]]
[False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False False False  True False False False
 False False False False False False False False False False False False
 False False False False False False False False False]


In [37]:
input_shape = (20, 69)


In [38]:
# Instantiate the model
model = Sequential(name="LSTM model")

# Add two LSTM layers
model.add(
    LSTM(
        64,
        input_shape=input_shape,
        dropout=0.15,
        recurrent_dropout=0.15,
        return_sequences=True,
        name="Input_layer",
    )
)
model.add(
    LSTM(
        64,
        dropout=0.15,
        recurrent_dropout=0.15,
        return_sequences=False,
        name="LSTM_hidden",
    )
)

# Add the output layer
model.add(Dense(n_vocab, activation="softmax", name="Output_layer"))

# Compile and load weights
model.compile(loss="categorical_crossentropy", optimizer="adam")
# model.load_weights(os.path.join(DATA_PATH, "weights/lstm_model_weights.h5"))
# Summary
model.summary()


Model: "LSTM model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input_layer (LSTM)          (None, 20, 64)            34304     
                                                                 
 LSTM_hidden (LSTM)          (None, 64)                33024     
                                                                 
 Output_layer (Dense)        (None, 69)                4485      
                                                                 
Total params: 71,813
Trainable params: 71,813
Non-trainable params: 0
_________________________________________________________________


## Neural Machine Translation

### Preparing input text


In [64]:
pt_sentences = np.array(
    # pickle.load(open(os.path.join(DATA_PATH, "pt_sentences.pkl"))),
    json.load(open(os.path.join(DATA_PATH, "pt_sentences.json"))),
    dtype="<U195",
)
input_tokenizer = pickle.load(
    open(os.path.join(DATA_PATH, "input_tokenizer.pkl"), "rb")
)


In [65]:
# Get maximum length of the sentences
pt_length = max([len(sentence.split()) for sentence in pt_sentences])

# Transform text to sequence of numerical indexes
X = input_tokenizer.texts_to_sequences(pt_sentences)

# Pad the sequences
X = pad_sequences(X, maxlen=pt_length, padding="post")

# Print first sentence
print(pt_sentences[0])

# Print transformed sentence
print(X[0])


é o meu trabalho
[  3   4  51 115   0   0   0   0]


In [67]:
en_vocab_size = 1269
en_sentences = np.array(
    [
        "its my job",
        "wholl cook",
        "help me",
        "i sat down",
        "im worn out",
        "dogs can swim",
        "lets start",
        "we have some",
        "lets swim",
        "may i smoke",
        "look here",
        "i was wrong",
        "tom grumbled",
        "ill sing",
        "is that all",
        "youve tried",
        "i try",
        "hes so young",
        "im a hero",
        "were mature",
        "did tom swim",
        "she needs it",
        "have some tea",
        "im a baker",
        "lets play",
        "toms unsure",
        "this works",
        "youre right",
        "beat it",
        "you tried",
        "dont tell me",
        "we hate tom",
        "its awesome",
        "is this true",
        "i will obey",
        "tom hates tv",
        "be sensible",
        "are you new",
        "come on in",
        "he turned",
        "they hate me",
        "im small",
        "is it rainy",
        "im at work",
        "i drank milk",
        "youre nuts",
        "dont cheat",
        "dont do this",
        "tom wants it",
        "forget it",
        "tom gave up",
        "keep digging",
        "im a man",
        "come on",
        "were shy",
        "he is happy",
        "you can come",
        "get up",
        "im involved",
        "are you free",
        "tom hurt me",
        "please leave",
        "follow us",
        "i am a runner",
        "aim higher",
        "im brave",
        "he has braces",
        "i understand",
        "he is cranky",
        "it was weird",
        "is it cloudy",
        "i lost it",
        "i saw one",
        "i did my best",
        "i do like tom",
        "jesus wept",
        "im starving",
        "ill ask tom",
        "youre old",
        "i heard you",
        "dont smile",
        "look back",
        "excuse me",
        "was it fun",
        "we found it",
        "just relax",
        "be reasonable",
        "i was lucky",
        "i will wait",
        "that hurts",
        "is that it",
        "tom slipped",
        "i didnt know",
        "come quickly",
        "is that mine",
        "i liked it",
        "let tom rest",
        "i left tom",
        "get upstairs",
        "ill go back",
        "i am human",
        "im so sorry",
        "you drive",
        "its huge",
        "dont be sad",
        "hes not sick",
        "are they gone",
        "is he right",
        "they won",
        "im in paris",
        "tom shrugged",
        "i tried",
        "never mind",
        "talk to me",
        "i am happy",
        "i had fun",
        "shut up",
        "no one knows",
        "are you rich",
        "it was wet",
        "were strong",
        "he is eating",
        "tom lost",
        "did you help",
        "dont speak",
        "bring backup",
        "i will try",
        "they danced",
        "i like women",
        "i knew that",
        "i came alone",
        "youre needy",
        "they cheered",
        "i like you",
        "birds fly",
        "i am coming",
        "i wont go",
        "how about you",
        "stop reading",
        "tom insisted",
        "well drive",
        "i do want it",
        "im not tall",
        "welcome",
        "toms glad",
        "of course",
        "i am eating",
        "ill see tom",
        "well follow",
        "it works",
        "read this",
        "youre fools",
        "im sorry",
        "they crashed",
        "were awake",
        "war is evil",
        "ill leave",
        "tom has come",
        "i like both",
        "theyre home",
        "are we lost",
        "well share",
        "dont run",
        "im inside",
        "are you fat",
        "forget tom",
        "tom swims",
        "im a woman",
        "who did that",
        "im with him",
        "leave us",
        "well scream",
        "calm down",
        "please wait",
        "tom broke it",
        "look out",
        "forgive tom",
        "i am japanese",
        "we know that",
        "are they gone",
        "i need sugar",
        "come alone",
        "a car went by",
        "please stop",
        "we have hope",
        "relax",
        "let me swim",
        "please sing",
        "please come",
        "well start",
        "i will go",
        "i want to go",
        "we sat down",
        "i miss you",
        "i ate a donut",
        "here he is",
        "nobody cares",
        "i woke up",
        "ill stop",
        "toms weird",
        "is tom lost",
        "any questions",
        "sit there",
        "get upstairs",
        "see you",
        "i dont cry",
        "who spoke",
        "theyll fail",
        "youre small",
        "toms awake",
        "has tom come",
        "ask around",
        "look closer",
        "i love you",
        "ask tom again",
        "i am eating",
        "i saw a dog",
        "well go",
        "can i come",
        "tom sneezed",
        "we lost",
        "dont let go",
        "thats weird",
        "take it",
        "say hello",
        "welcome",
        "were better",
        "excuse me",
        "we got lost",
        "excuse me",
        "i can be good",
        "she was busy",
        "is it hot",
        "im scared",
        "please sing",
        "i am american",
        "its",
        "was it cold",
        "thats mine",
        "stop yelling",
        "are you fit",
        "tom retired",
        "im finished",
        "hows school",
        "im standing",
        "well share",
        "i need a nap",
        "youre mine",
        "im eating",
        "we liked tom",
        "dont respond",
        "let me see",
        "no one cared",
        "i made it up",
        "are we alone",
        "i hear you",
        "this is ok",
        "tom has it",
        "take command",
        "be quiet now",
        "i remember",
        "he can swim",
        "can i try it",
        "im a purist",
        "check that",
        "i phoned him",
        "im useless",
        "i drink wine",
        "im selfish",
        "i sell shoes",
        "it may rain",
        "he is tall",
        "they lied",
        "how exciting",
        "who fell",
        "she is dead",
        "how clever",
        "take it easy",
        "wake up",
        "be calm",
        "i can run",
        "was it cold",
        "come on",
        "is that me",
        "youre small",
        "take it",
        "can tom sing",
        "lets go",
        "i want it",
        "answer me",
        "my eyes hurt",
        "dont be rude",
        "i am a muslim",
        "im too busy",
        "dont kid me",
        "youre old",
        "trust in me",
        "im an actor",
        "keep focused",
        "thats my cd",
        "its unusual",
        "i will wait",
        "dont go",
        "its not tom",
        "im old",
        "mary is mine",
        "get some rest",
        "get out",
        "he tries",
        "have a look",
        "leave me",
        "tom tried",
        "get lost",
        "how strange",
        "were hot",
        "i can sing",
        "well try",
        "i was awake",
        "good night",
        "i found it",
        "get away",
        "god bless you",
        "get down",
        "i forgot",
        "i prayed",
        "ive done it",
        "i didnt ask",
        "i am hungry",
        "im safe now",
        "i love mary",
        "were buying",
        "youre sad",
        "sign here",
        "he is old",
        "wash up",
        "i miss you",
        "its snowing",
        "its true",
        "are you there",
        "we need tom",
        "i like cats",
        "its clear",
        "is this mine",
        "i loved you",
        "what a pity",
        "youre sick",
        "im married",
        "im not you",
        "have a look",
        "i smiled",
        "i surrender",
        "this is his",
        "tom is a vet",
        "i found it",
        "is it a wolf",
        "who swam",
        "i shouted",
        "help me out",
        "theyre kids",
        "who has it",
        "come inside",
        "you won",
        "ill pay you",
        "sign here",
        "tom whistled",
        "tom is deaf",
        "dont move",
        "thats all",
        "bring backup",
        "its so dark",
        "i wont die",
        "im fine",
        "dont laugh",
        "tom is gone",
        "well wait",
        "im clumsy",
        "tom was hit",
        "he loves toys",
        "tom is alive",
        "what fun",
        "she is eight",
        "anyone hurt",
        "there it is",
        "lighten up",
        "what a shock",
        "get up",
        "of course",
        "grab tom",
        "they love me",
        "were weak",
        "be careful",
        "im not poor",
        "be punctual",
        "toms early",
        "dont cheat",
        "dont fight",
        "i fixed it",
        "im fine",
        "i like that",
        "hurry up",
        "catch tom",
        "i want one",
        "youre crazy",
        "tom nodded",
        "stop it",
        "we can pay",
        "im a doctor",
        "im horrible",
        "come in",
        "i like it",
        "ill get in",
        "now go home",
        "he is no fool",
        "who will pay",
        "he avoids me",
        "how beautiful",
        "is tom good",
        "shut it off",
        "tom told him",
        "are you lost",
        "were mature",
        "im an adult",
        "im neutral",
        "toms thirty",
        "i love them",
        "come at once",
        "i wont go",
        "did it work",
        "i like girls",
        "open fire",
        "it was empty",
        "im healthy",
        "im through",
        "i give up",
        "toms coming",
        "tom arrived",
        "here i come",
        "i believe you",
        "they fell",
        "i have needs",
        "is it time",
        "be tolerant",
        "check again",
        "i am tired",
        "i like him",
        "i grunted",
        "listen",
        "well decide",
        "i feel safe",
        "terrific",
        "back off",
        "its sweet",
        "come at once",
        "he got away",
        "he runs",
        "i feel cold",
        "it snowed",
        "get moving",
        "he is lazy",
        "dont do that",
        "stay calm",
        "stop moving",
        "take a bus",
        "youve grown",
        "well do it",
        "put it there",
        "how awful",
        "we need more",
        "we need time",
        "did i ask you",
        "dont go in",
        "answer me",
        "ignore that",
        "forget it",
        "stop that",
        "now im sad",
        "he looks well",
        "i hate golf",
        "im starved",
        "thats life",
        "tom stopped",
        "is it windy",
        "a car hit tom",
        "tom is timid",
        "go ahead",
        "i know him",
        "close the box",
        "i believe tom",
        "i sneezed",
        "that will do",
        "im ok",
        "i do like tom",
        "i wont bite",
        "warn tom",
        "take care",
        "its a rumor",
        "hands off",
        "get out",
        "im upset",
        "were ok",
        "no way",
        "listen",
        "keep walking",
        "i want mine",
        "its wrong",
        "bring food",
        "it was mine",
        "i was fired",
        "tomll cry",
        "they tried",
        "too late",
        "tom cheats",
        "birds sing",
        "toms pushy",
        "tom is lucky",
        "no objection",
        "am i fired",
        "we talked",
        "we must act",
        "its too far",
        "he went blind",
        "im fasting",
        "im lucky",
        "its mine",
        "i am a woman",
        "dont try me",
        "im not home",
        "i agree",
        "tom is slow",
        "he dug a hole",
        "are they here",
        "were joking",
        "try this",
        "tom answered",
        "youre timid",
        "tom isnt ok",
        "lets play",
        "help tom",
        "i have a car",
        "i forgot",
        "im no fool",
        "tom stopped",
        "tom frowned",
        "youre nuts",
        "leave now",
        "im not ok",
        "dont push me",
        "goodbye",
        "i will learn",
        "im awake",
        "im bald",
        "wait here",
        "hey relax",
        "ill ask tom",
        "well fight",
        "its for you",
        "i got stuck",
        "i want these",
        "ill live",
        "im not tom",
        "stay down",
        "i love tom",
        "im a farmer",
        "i said no",
        "can i use it",
        "he is too old",
        "im busy now",
        "i feel alive",
        "take care",
        "tom is nasty",
        "look at us",
        "dont panic",
        "stop gawking",
        "toms early",
        "look alive",
        "do your best",
        "tom snores",
        "they agree",
        "tom saw it",
        "dont argue",
        "of course",
        "tom cringed",
        "who is he",
        "wholl start",
        "follow tom",
        "i love you",
        "come out here",
        "i found it",
        "its perfect",
        "hey its me",
        "i want that",
        "youre nuts",
        "they smiled",
        "run",
        "it may snow",
        "hang on",
        "i need money",
        "let me see",
        "who was here",
        "were young",
        "come quickly",
        "shes stupid",
        "i need you",
        "tom ate",
        "im blind",
        "step back",
        "keep trying",
        "is tom ill",
        "i got bored",
        "i cant fly",
        "i cant stop",
        "tom is ugly",
        "is it for me",
        "he dozed off",
        "tom did that",
        "wake up tom",
        "calm down",
        "stay here",
        "go home",
        "do it anyway",
        "im very fat",
        "i trust you",
        "keep smiling",
        "i hope not",
        "are you alone",
        "look out",
        "i survived",
        "get back here",
        "we like tom",
        "im nervous",
        "how annoying",
        "seriously",
        "ill try it",
        "you know her",
        "lifes short",
        "stop moving",
        "let me die",
        "im obese",
        "we all quit",
        "i understand",
        "i could help",
        "is it a joke",
        "whats new",
        "may i go",
        "are they busy",
        "i need it",
        "im fair",
        "tom is tired",
        "start over",
        "come alone",
        "tell tom",
        "its a curse",
        "how is it",
        "we went out",
        "tom wake up",
        "you scare me",
        "check this",
        "its a fish",
        "stay a while",
        "stand still",
        "im free",
        "he has a car",
        "vote for me",
        "ive no idea",
        "that does it",
        "i missed it",
        "its a trick",
        "be prepared",
        "i was cold",
        "i will fight",
        "dont be shy",
        "is it dirty",
        "tom hates me",
        "i want you",
        "were twins",
        "ignore him",
        "they cheat",
        "let go of me",
        "im certain",
        "wait a while",
        "stop that",
        "he is eating",
        "i ate out",
        "call home",
        "you can go",
        "it is my cat",
        "he lost face",
        "ask them",
        "its clear",
        "its",
        "tom dozed",
        "point it out",
        "they agree",
        "i called tom",
        "ill be back",
        "they lost",
        "i love cats",
        "i forgot it",
        "can tom swim",
        "i cant sleep",
        "were crazy",
        "whos paying",
        "i always lose",
        "stop tom",
        "grab him",
        "i drive fast",
        "go",
        "fix this",
        "he loved her",
        "love hurts",
        "be creative",
        "have another",
        "who wrote it",
        "we failed",
        "i use this",
        "im so fat",
        "who phoned",
        "im not poor",
        "are they here",
        "how deep",
        "are they busy",
        "will you go",
        "i phoned",
        "no problem",
        "i resigned",
        "tom ate it",
        "im loyal",
        "got it",
        "take it easy",
        "it was wet",
        "then what",
        "hands off",
        "i dont cook",
        "its funny",
        "i found it",
        "tom finished",
        "theyve gone",
        "we must try",
        "i often ski",
        "whats that",
        "are you nuts",
        "toms funny",
        "i can jump",
        "thanks",
        "i feel old",
        "see above",
        "ill do this",
        "im guilty",
        "id do it",
        "i live here",
        "we have food",
        "be kind",
        "im a father",
        "i am here",
        "help me",
        "im staying",
        "youre old",
        "we lost",
        "no one cared",
        "is it dirty",
        "were awake",
        "be realistic",
        "i like cats",
        "youre cute",
        "are we safe",
        "i do hope so",
        "i have a pen",
        "he ran",
        "tom is mad",
        "am i mistaken",
        "im dyslexic",
        "its hot",
        "catch tom",
        "you are mad",
        "help tom out",
        "im safe now",
        "im shocked",
        "plants grow",
        "look around",
        "see you",
        "wait",
        "im nervous",
        "bring wine",
        "i was alone",
        "thats fine",
        "come quickly",
        "god bless you",
        "why not both",
        "youre upset",
        "youre old",
        "are you lost",
        "im naked",
        "are you there",
        "who is it",
        "hes too old",
        "im autistic",
        "were dying",
        "we can begin",
        "ive eaten",
        "i dont study",
        "im not here",
        "im so tired",
        "life is fun",
        "get started",
        "look at that",
        "i miss you",
        "its amazing",
        "listen",
        "im addicted",
        "im naive",
        "tom moaned",
        "i screamed",
        "thats trash",
        "were fine",
        "take a rest",
        "this is free",
        "thanks a lot",
        "be serious",
        "whos that",
        "its ours",
        "i will stay",
        "toms happy",
        "im clumsy",
        "i hate you",
        "its monday",
        "i noticed",
        "im shy",
        "who is there",
        "tom is early",
        "tom went out",
        "tom looked",
        "back off",
        "i love rock",
        "i hope not",
        "i am tall",
        "go away",
        "i saw that",
        "are you mad",
        "mary giggled",
        "tomll come",
        "youre crazy",
        "im so fat",
        "fantastic",
        "thats fun",
        "be tolerant",
        "he was busy",
        "come off it",
        "got it",
        "forget it",
        "thats trash",
        "where are we",
        "be kind",
        "try some",
        "im a hero",
        "tom is crazy",
        "im relaxed",
        "who stood",
        "i will shoot",
        "toms afraid",
        "show me",
        "pick a card",
        "were fair",
        "examine them",
        "im winning",
        "ill ask tom",
        "tom spoke",
        "how awful",
        "you used me",
        "am i fat",
        "seriously",
        "i think so",
        "can i hug you",
        "calm down",
        "am i dying",
        "anything new",
        "well try",
        "i missed you",
        "he has a blog",
        "after you",
        "we forgot",
        "wholl cook",
        "were adults",
        "did tom eat",
        "you may go",
        "i am happy",
        "use this",
        "ill open it",
        "be nice",
        "tom is out",
        "do it with me",
        "get serious",
        "bless you",
        "tom is wrong",
        "hes not in",
        "i like that",
        "did you lose",
        "i miss you",
        "i am a boy",
        "i doubt it",
        "dont talk",
        "were dizzy",
        "let me out",
        "im old",
        "it was sad",
        "tom drinks",
        "get up",
        "tom ran",
        "tom knows",
        "tom met mary",
        "we were busy",
        "step inside",
        "we loved tom",
        "be careful",
        "i hope so",
        "is it true",
        "i am better",
        "help",
        "wait",
        "i still care",
        "i like blue",
        "dont fight",
        "that was odd",
        "take a seat",
        "is it clean",
        "wow",
        "come quickly",
        "i live here",
        "well done",
        "help tom",
        "you may stay",
        "forget tom",
        "i was stupid",
        "be careful",
        "listen",
        "i ate quickly",
        "i like math",
        "its",
        "im loved",
        "he spoke",
        "its stuck",
        "i think so",
        "i cried a lot",
        "im full",
        "i didnt sing",
        "i enjoy that",
        "im a pilot",
        "i wont come",
        "he is no fool",
        "i want it",
        "no comment",
        "may i eat",
        "stop singing",
        "youre funny",
        "theyre weak",
        "are you cops",
        "drive on",
        "we overslept",
        "he hung up",
        "i keep a dog",
        "how curious",
        "i need a hug",
        "were early",
        "i forgot it",
        "its urgent",
        "they approve",
        "help us tom",
        "its my bus",
        "hi im tom",
        "were sad",
        "im so happy",
        "i am curious",
        "let go of me",
        "im in pain",
        "we are free",
        "get out",
        "i am thirsty",
        "it isnt tom",
        "tom tries",
        "trust no one",
        "what is that",
        "come with me",
        "i loved you",
        "theyll grow",
        "ask tom",
        "how romantic",
        "whos she",
        "let me out",
        "its mine",
        "no problem",
        "theyve gone",
        "tom is wet",
        "he went blind",
        "dont get up",
        "were saved",
        "be brave",
        "tom likes it",
        "were going",
        "its perfect",
        "toms bored",
        "who won",
    ],
    dtype="<U195",
)

output_tokenizer = json.load(open(os.path.join(DATA_PATH, "output_tokenizer.json")))
output_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(
    json.dumps(output_tokenizer)
)
to_categorical = tf.keras.utils.to_categorical


def transform_text_to_sequences(sentences, tokenizer):
    """Transform the sentences into padded sequence of indexes

    Make the required transformations on the text data to be used on the NMT model.
    Get the maximum length of the sentences, create the numpy array with padded
    sentences of indexes.

    Args:
          sentences (list): The list of sentences
      tokenizer (keras.preprocessing.text.Tokenizer): The fitter tokenizer

    Returns:
          The padded sequence of indexes.

    """

    # Get initial values for language
    vocab_size = len(tokenizer.word_index) + 1
    length = max([len(line.split()) for line in sentences])

    # integer encode sequences
    X = tokenizer.texts_to_sequences(sentences)
    # pad sequences with 0 values
    X = pad_sequences(X, maxlen=length, padding="post")

    return X


In [68]:
# Initialize the variable
Y = transform_text_to_sequences(en_sentences, output_tokenizer)

# Temporary list
ylist = list()
for sequence in Y:
    # One-hot encode sentence and append to list
    ylist.append(to_categorical(sequence, num_classes=en_vocab_size))

# Update the variable
Y = np.array(ylist).reshape(Y.shape[0], Y.shape[1], en_vocab_size)

# Print the raw sentence and its transformed version
print("Raw sentence: {0}\nTransformed: {1}".format(en_sentences[0], Y[0]))


Raw sentence: its my job
Transformed: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]


In [75]:
model = json.load(open(os.path.join(DATA_PATH, "models/pt_en_model.json")))
model = tf.keras.models.model_from_json(json.dumps(model))
# TODO load weights
# model.load_weights(os.path.join(DATA_PATH, "weights/pt_en_model_weights.h5"))
en_index_to_word = json.load(open(os.path.join(DATA_PATH, "en_index_to_word.json")))
test = np.array(
    [
        ["no way", "impossível"],
        ["were weak", "nós estamos fracos"],
        ["i want soup", "eu quero sopa"],
        ["keep it", "fica com ela"],
        ["she is old", "ela é velha"],
        ["choose one", "escolha uma"],
        ["it was toms", "era de tom"],
        ["we surrender", "nós desistimos"],
        ["ill sue you", "eu vou te processar"],
        ["tom can win", "tom pode ganhar"],
        ["look closely", "olhe de perto"],
        ["im faster", "eu sou mais rápida"],
        ["i feel dizzy", "estou tonto"],
        ["be careful", "toma cuidado"],
        ["dont leave", "não saia"],
        ["im not done", "eu ainda não estou pronto"],
        ["take a rest", "descansem"],
        ["youre sick", "você está doente"],
        ["its a gift", "é um presente"],
        ["tom vomited", "o tom vomitou"],
        ["i apologize", "perdão"],
        ["he ate out", "ele comeu fora"],
        ["be sensible", "tende bom senso"],
        ["i blinked", "eu pisquei"],
        ["be fair", "sê justo"],
        ["i need space", "eu preciso de espaço"],
        ["i will wait", "irei esperar"],
        ["im drunk", "estou embriagado"],
        ["do you mind", "você se importa"],
        ["tom agreed", "tom concordou"],
        ["im drunk", "estou bêbada"],
        ["take mine", "pegue o meu"],
        ["once again", "mais uma vez"],
        ["am i hired", "estou contratado"],
        ["its monday", "é segundafeira"],
        ["thats mine", "isso é meu"],
        ["get up", "levantate"],
        ["i whistled", "eu dei um assobio"],
        ["turn it off", "desligueo"],
        ["whats up", "e aí"],
        ["take it all", "leve tudo"],
        ["tom yelled", "tom gritou"],
        ["come tomorrow", "venha amanhã"],
        ["i ran", "eu corri"],
        ["i am tired", "estou cansado"],
        ["i didnt care", "eu não me importo"],
        ["youre shy", "vocês são tímidas"],
        ["must i hurry", "devo me apressar"],
        ["come to us", "venha até nós"],
        ["do fish sleep", "os peixes dormem"],
        ["stop them", "detenhanos"],
        ["it is unfair", "isso é injusto"],
        ["have a look", "dá uma vista de olhos"],
        ["i won", "eu venci"],
        ["is this cool", "isto é legal"],
        ["im with tom", "estou com tom"],
        ["get serious", "fique sério"],
        ["dont push", "não empurrem"],
        ["ill stay", "eu vou ficar"],
        ["years passed", "anos se passaram"],
        ["i like snow", "eu gosto de neve"],
        ["war is hell", "a guerra é um inferno"],
        ["we miss tom", "sentimos saudades do tom"],
        ["you must go", "tu deves partir"],
        ["i was wrong", "eu estava errado"],
        ["stop lying", "pare de mentir"],
        ["why not", "por que não"],
        ["go inside", "entre"],
        ["i disagree", "eu discordo"],
        ["will he die", "ele morrerá"],
        ["im horrible", "eu estou horrível"],
        ["he was alone", "ele estava só"],
        ["sit tight", "peraí"],
        ["were calm", "nós somos tranquilos"],
        ["this is mine", "isto é meu"],
        ["stay here", "fique aqui"],
        ["look at him", "olhe para ele"],
        ["im punctual", "sou pontual"],
        ["tom listened", "tom escutou"],
        ["i was hired", "eu fui contratado"],
        ["tom hates tv", "tom detesta tv"],
        ["im rich", "eu sou rico"],
        ["tom shaved", "tom barbeouse"],
        ["it may rain", "pode chover"],
        ["im furious", "estou furiosa"],
        ["have a cookie", "come um biscoito"],
        ["whats new", "que há de novo"],
        ["im a doctor", "sou médico"],
        ["dont fight", "não lute"],
        ["he is french", "ele é francês"],
        ["ask an expert", "pergunte a um especialista"],
        ["i said yes", "disse sim"],
        ["you are ugly", "você é feio"],
        ["toms sick", "tom está doente"],
        ["can i hug you", "posso te dar um abraço"],
        ["i want that", "eu quero isso"],
        ["im so bored", "estou muito entediado"],
        ["hes happy", "ele é feliz"],
        ["im wealthy", "eu sou rico"],
        ["dont risk it", "não se arrisque"],
        ["dont get mad", "não fique bravo"],
        ["she is lucky", "ela tem sorte"],
        ["toms crazy", "o tom é louco"],
        ["i handled it", "eu lidei com isso"],
        ["im sleeping", "estou dormindo"],
        ["go ahead", "continua"],
        ["hes stupid", "ele é estúpido"],
        ["unbelievable", "inacreditável"],
        ["hows the dog", "como o cachorro está"],
        ["toms deaf", "tom é surdo"],
        ["come home", "venha para casa"],
        ["do you agree", "vocês concordam"],
        ["tom drowned", "tom se afogou"],
        ["let me leave", "deixeme ir embora"],
        ["tom voted", "tom votou"],
        ["i fear so", "eu temo que sim"],
        ["we all cried", "todos nós choramos"],
        ["ive done it", "eu o fiz"],
        ["is tom out", "o tom está fora"],
        ["dont rush me", "não me empurrem"],
        ["i found it", "encontrei"],
        ["nice shot", "belo tiro"],
        ["hey relax", "ei relaxem"],
        ["weve won", "vencemos"],
        ["im cool", "sou legal"],
        ["watch this", "veja isto"],
        ["im mature", "eu sou maturo"],
        ["ill call", "ligarei"],
        ["tom swore", "tom jurou"],
        ["youre scary", "tu és assustador"],
        ["how are you", "como vai você"],
        ["listen", "ouça isso"],
        ["i went too", "eu fui também"],
        ["tom is on tv", "tom está na tv"],
        ["i can cook", "eu posso cozinhar"],
        ["wait outside", "esperem lá fora"],
        ["well go out", "nós vamos sair"],
        ["did tom help", "o tom ajudou"],
        ["hello girls", "olá meninas"],
        ["stay cool", "fique calmo"],
        ["tom is a pig", "tom é um porco"],
        ["i was fired", "eu fui demitido"],
        ["kiss me", "me beije"],
        ["come over", "vem"],
        ["follow tom", "sigam o tom"],
        ["im involved", "estou envolvido"],
        ["well win", "nós venceremos"],
        ["work slowly", "trabalhe devagar"],
        ["were free", "somos livres"],
        ["please sit", "por favor sentemse"],
        ["he is drunk", "ele está bêbado"],
        ["ill do this", "eu vou fazer"],
        ["you fainted", "vocês desmaiaram"],
        ["keep calm", "acalmese"],
        ["you idiot", "seu idiota"],
        ["he resigned", "ele renunciou"],
        ["how wonderful", "maravilhoso"],
        ["thats mine", "é meu"],
        ["come here", "vem cá"],
        ["i bit my lip", "eu mordo o lábio"],
        ["were dating", "estamos namorando"],
        ["they escaped", "escaparam"],
        ["he cant swim", "ele não sabe nadar"],
        ["run and hide", "corra e escondase"],
        ["i love you", "eu te amo"],
        ["hes wet", "ele está molhado"],
        ["theyre gone", "elas se foram"],
        ["stay with us", "fiquem conosco"],
        ["who is she", "quem é ela"],
        ["they hate me", "elas me odeiam"],
        ["does tom know", "tom sabe"],
        ["we survived", "sobrevivemos"],
        ["stay calm", "fique calma"],
        ["save tom", "salve o tom"],
        ["i love cats", "adoro gatos"],
        ["wood burns", "a madeira queima"],
        ["try it on", "experimenteo"],
        ["do as i asked", "faça como eu pedi"],
        ["he has come", "ele veio"],
        ["aim fire", "preparar apontar fogo"],
        ["i wont lose", "eu não vou perder"],
        ["drive safely", "dirija com cuidado"],
        ["lets review", "vamos revisar"],
        ["be realistic", "seja realista"],
        ["hurry up tom", "apressese tom"],
        ["its free", "é de graça"],
        ["i hate this", "eu odeio isto"],
        ["they came in", "eles entraram"],
        ["come over", "venham"],
        ["i like rice", "gosto de arroz"],
        ["youre kind", "você é simpático"],
        ["youre shy", "vocês são tímidos"],
        ["i am in pain", "estou com dor"],
        ["sign this", "assinem isso"],
        ["toms young", "o tom é jovem"],
        ["come along", "vem com a gente"],
        ["flowers bloom", "flores florescem"],
        ["beware of dog", "cuidado com o cão"],
        ["go ahead", "continuem"],
        ["are you crazy", "você é louco"],
        ["theyre evil", "elas são más"],
        ["this is big", "isto é grande"],
        ["i almost won", "eu quase ganhei"],
        ["come with us", "venha com a gente"],
        ["tom is happy", "tom está feliz"],
        ["i waved", "eu acenei"],
        ["i have a car", "eu tenho um carro"],
        ["my jaw hurts", "minha mandíbula dói"],
        ["tom decided", "tom decidiu"],
        ["watch tom", "olha o tom"],
        ["well obey", "nós obedeceremos"],
        ["go ahead", "continue"],
        ["ill pay you", "eu vou te pagar"],
        ["i will try", "eu vou tentar"],
        ["dont move", "não se mexa"],
        ["unbelievable", "incrível"],
        ["im ready", "estou pronta"],
        ["it stinks", "isso fede"],
        ["im old", "eu sou velho"],
        ["ignore him", "ignoreo"],
        ["that was odd", "foi estranho"],
        ["ill try it", "eu vou tentar"],
        ["is this tom", "este é tom"],
        ["toms angry", "tom está com raiva"],
        ["vote for me", "vote em mim"],
        ["were lazy", "nós somos preguiçosas"],
        ["lets swim", "vamos nadar"],
        ["ill decide", "decidirei"],
        ["its for me", "isso é para mim"],
        ["be tolerant", "sê tolerante"],
        ["come with me", "venha comigo"],
        ["look out", "atenção"],
        ["its hers", "isso é dela"],
        ["is this mine", "esse é o meu"],
        ["hang on tight", "segurese firme"],
        ["tom teaches", "tom ensina"],
        ["i need help", "preciso de ajuda"],
        ["are you upset", "você está triste"],
        ["youve won", "você venceu"],
        ["leave town", "deixe a cidade"],
        ["i dont care", "não estou nem aí"],
        ["im popular", "eu sou popular"],
        ["this is hard", "isto é difícil"],
        ["youre wrong", "você está errado"],
        ["hes smart", "ele é inteligente"],
        ["keep back", "afastese"],
        ["hey dont go", "ei não vá"],
        ["excuse me", "com licença"],
        ["everyone wins", "todo mundo sai ganhando"],
        ["dont be long", "não demore"],
        ["i love her", "eu a amo"],
        ["stay down", "fique abaixado"],
        ["lets play", "vamos tocar"],
        ["bring the key", "traga a chave"],
        ["did tom fall", "o tom caiu"],
        ["youre gross", "você é nojento"],
        ["i did my best", "eu fiz o meu melhor"],
        ["you may stay", "pode ficar"],
        ["taste this", "provem isto"],
        ["fight or die", "lute ou morra"],
        ["ill pay you", "irei te pagar"],
        ["its open", "está aberto"],
        ["i tripped", "tropecei"],
        ["are you brave", "você é corajoso"],
        ["can he do it", "ele sabe fazer isso"],
        ["dont push me", "não me empurra"],
        ["thanks", "obrigado"],
        ["is this true", "é verdade"],
        ["you need me", "vocês precisam de mim"],
        ["tom hates it", "o tom odeia isso"],
        ["i met a girl", "eu conheci uma garota"],
        ["tom is short", "o tom é baixo"],
        ["nobody died", "ninguém morreu"],
        ["let me speak", "deixeme falar"],
        ["come closer", "cheguem mais perto"],
        ["its red", "é vermelho"],
        ["could i do it", "eu poderia fazer isso"],
        ["its done", "pronto já está"],
        ["i need a hug", "preciso de um abraço"],
        ["this is nice", "isso é legal"],
        ["tom is rich", "tom é rico"],
        ["i am busy", "estou ocupado"],
        ["is it a wolf", "isso é um lobo"],
        ["we sat down", "nos sentamos"],
        ["tom grinned", "tom sorriu forçadamente"],
        ["tom escaped", "tom escapou"],
        ["well sing", "cantaremos"],
        ["tomll know", "tom vai saber"],
        ["speak up", "falem mais alto"],
        ["i cant sleep", "não consigo dormir"],
        ["how absurd", "que absurdo"],
        ["im back", "voltei"],
        ["no kidding", "sem brincadeira"],
        ["where is she", "cadê ela"],
        ["ill get up", "vou me levantar"],
        ["im out here", "estou aqui fora"],
        ["tomll lose", "tom vai perder"],
        ["come anytime", "venha quando quiser"],
        ["we got ready", "nós nos preparamos"],
        ["is this ok", "está ok"],
        ["was tom busy", "tom estava ocupado"],
        ["im a farmer", "sou um fazendeiro"],
        ["dont do it", "não faça isso"],
        ["how cute", "que fofinho"],
        ["i will learn", "vou aprender"],
        ["do you drink", "vocês bebem"],
        ["i hope so", "assim espero"],
        ["this is easy", "isto é fácil"],
        ["youre cute", "as senhoras são bonitas"],
        ["itll work", "isso vai funcionar"],
        ["how cute", "que bonitinho"],
        ["tom is calm", "tom é calmo"],
        ["i need money", "necessito de dinheiro"],
        ["sweet dreams", "bons sonhos"],
        ["were sleepy", "nós estamos com sono"],
        ["try hard", "se esforça"],
        ["he was alone", "ele estava sozinho"],
        ["boys run fast", "os meninos correm rápido"],
        ["i need a job", "eu preciso de um emprego"],
        ["can you swim", "sabe nadar"],
        ["i am working", "eu estou trabalhando"],
        ["cool down", "acalmemse"],
        ["pick it up", "apanheo"],
        ["toms ugly", "tom é feio"],
        ["we know why", "sabemos por quê"],
        ["we knew this", "nós sabíamos disso"],
        ["i washed it", "eu o lavei"],
        ["he is happy", "ele é feliz"],
        ["i am like him", "eu sou como ele"],
        ["they won", "eles venceram"],
        ["keep reading", "continue lendo"],
        ["how tragic", "que trágico"],
        ["is tom drunk", "o tom está bêbado"],
        ["lets go eat", "vamos comer"],
        ["im hot", "tenho calor"],
        ["he is alone", "ele está sozinho"],
        ["cain was evil", "cain era malvado"],
        ["how lovely", "que amável"],
        ["ill win", "vou vencer"],
        ["ill get you", "vou te pegar"],
        ["take it easy", "acalmese"],
        ["take tom", "leva o tom"],
        ["toms gone", "tom se foi"],
        ["im the best", "eu sou o melhor"],
        ["tom is mad", "tom está insano"],
        ["were crazy", "a gente é maluco"],
        ["shut up", "cala a boca"],
        ["i will learn", "eu aprenderei"],
        ["i also went", "eu também fui"],
        ["dont ask", "não pergunte"],
        ["contact tom", "contacte o tom"],
        ["contact tom", "contactem o tom"],
        ["youre old", "sois velhas"],
        ["thats true", "isso é verdade"],
        ["have courage", "tenha coragem"],
        ["were hiding", "nós estamos nos escondendo"],
        ["it isnt tom", "esse não é o tom"],
        ["i hate milk", "eu odeio leite"],
        ["go get a beer", "vá pegar uma cerveja"],
        ["start now", "comece agora"],
        ["can we talk", "podemos conversar"],
        ["we need time", "precisamos de tempo"],
        ["toms alive", "tom está vivo"],
        ["start again", "começa de novo"],
        ["im tired", "estou cansado"],
        ["my eyes itch", "meus olhos coçam"],
        ["i have cash", "eu tenho dinheiro"],
        ["you idiot", "idiota"],
        ["are you tired", "você está cansada"],
        ["memorize it", "memorize"],
        ["dont be evil", "não seja má"],
        ["i feel lost", "eu me sinto perdido"],
        ["we agreed", "nós concordamos"],
        ["its too big", "é muito grande"],
        ["im winning", "estou vencendo"],
        ["well try", "a gente vai tentar"],
        ["tom cares", "tom se importa"],
        ["i want a dog", "eu quero um cachorro"],
        ["im broke", "estou quebrado"],
        ["i like that", "curto isso"],
        ["well win", "venceremos"],
        ["are you ok", "vocês estão bem"],
        ["nobody came", "não veio ninguém"],
        ["ignore them", "ignoreas"],
        ["i am single", "estou solteiro"],
        ["get some rest", "descanse um pouco"],
        ["i was bored", "eu estava entediada"],
        ["have fun", "divertete"],
        ["i just ate", "acabei de comer"],
        ["im sorry", "me desculpe"],
        ["im hungry", "quero comer"],
        ["keep it", "fiquem com ela"],
        ["ive no idea", "não tenho ideia"],
        ["i must study", "eu preciso estudar"],
        ["i need more", "preciso de mais"],
        ["im fighting", "eu estou lutando"],
        ["tom refused", "tom recusou"],
        ["im broke", "eu estou duro"],
        ["tom is weak", "tom é fraco"],
        ["i slipped", "eu escorreguei"],
        ["am i early", "cheguei cedo"],
        ["im busy", "estou ocupado"],
        ["i was sick", "eu estava doente"],
        ["they fell", "sentiram"],
        ["whats wrong", "o que está errado"],
        ["tom felt sad", "tom se sentiu triste"],
        ["is it for me", "é para mim"],
        ["youre wise", "você é sábio"],
        ["i saw that", "eu vi"],
        ["get real", "acorda"],
        ["keep digging", "continuem cavando"],
        ["youre old", "você é velha"],
        ["hes too busy", "ele está muito ocupado"],
        ["we remember", "nós lembramos"],
        ["get out", "saia"],
        ["im fine now", "eu estou bem agora"],
        ["go on", "vá"],
        ["youre brave", "você é valente"],
        ["i was hiding", "eu estava me escondendo"],
        ["im a vegan", "eu sou vegana"],
        ["it helps", "isso ajuda"],
        ["tom failed", "tom falhou"],
        ["im through", "acabei"],
        ["youre shy", "você está tímida"],
        ["help me tom", "ajudeme tom"],
        ["well wait", "nós vamos esperar"],
        ["im special", "eu sou especial"],
        ["tom tripped", "tom disparou"],
        ["well scream", "nós vamos gritar"],
        ["tom told us", "tom nos disse"],
        ["i laughed", "ri"],
        ["attack", "atacar"],
        ["help me", "ajudeme"],
        ["get a move on", "mexase"],
        ["it happens", "isso acontece"],
        ["wake up", "acordem"],
        ["im a farmer", "eu sou um agricultor"],
        ["wake up tom", "acorda tom"],
        ["i need air", "eu preciso de ar"],
        ["get down", "deitemse"],
        ["are they gone", "elas foram embora"],
        ["theyre back", "eles voltaram"],
        ["am i right", "eu estou certo"],
        ["goodbye", "tchau"],
        ["tom will pay", "tom vai pagar"],
        ["i dont snore", "eu não ronco"],
        ["he can read", "ele sabe ler"],
        ["tom got sick", "tom ficou doente"],
        ["do it for tom", "façam isso pelo tom"],
        ["im like tom", "eu sou como o tom"],
        ["someone came", "alguém veio"],
        ["well attack", "atacaremos"],
        ["im managing", "estou conseguindo"],
        ["im lying", "estou mentindo"],
        ["im dieting", "estou de dieta"],
        ["what a pain", "que dor"],
        ["bring backup", "tragam reforço"],
        ["i like that", "eu gosto disto"],
        ["how pathetic", "que patético"],
        ["do it again", "faz outra vez"],
        ["i liked tom", "gostei de tom"],
        ["im thin", "eu sou magro"],
        ["help me out", "ajudeme a sair"],
        ["sing along", "cante junto"],
        ["hes not sick", "ele não está doente"],
        ["tom won", "tom ganhou"],
        ["did tom reply", "o tom respondeu"],
        ["i like girls", "gosto de meninas"],
        ["ill explain", "explicarei"],
        ["was she seen", "ela foi vista"],
        ["thats good", "isso é bom"],
        ["dont look up", "não olhe para cima"],
        ["he shot at me", "ele atirou em mim"],
        ["tom likes me", "o tom gosta de mim"],
        ["were late", "estamos atrasados"],
        ["give him time", "dêlhe tempo"],
        ["have another", "tome outro"],
        ["are you home", "você está em casa"],
        ["i dont dream", "eu não sonho"],
        ["tom waved", "tom acenou"],
        ["im hungry", "estou faminto"],
        ["i am a boy", "eu sou um menino"],
        ["i cut myself", "eu me cortei"],
        ["listen", "escute"],
        ["i am sure", "tenho certeza"],
        ["its locked", "está trancado"],
        ["look at that", "olhe para aquilo"],
        ["i believe tom", "eu acredito no tom"],
        ["he mocked me", "ele zombou de mim"],
        ["who escaped", "quem escapou"],
        ["i drank milk", "eu bebi leite"],
        ["did i win", "eu ganhei"],
        ["be still", "acalmese"],
        ["i keep a dog", "eu tenho um cachorro"],
        ["stop smoking", "pare de fumar"],
        ["call me later", "me liga depois"],
        ["look alive", "se apresse"],
        ["id buy that", "eu compraria aquele"],
        ["i have won", "eu venci"],
        ["youre old", "você está velho"],
    ],
    dtype="<U195",
)
X_test = np.array(
    json.load(open(os.path.join(DATA_PATH, "nmt/x_test.json"))), dtype="int32"
)


def predict_one(model, sentence, index_to_word):
    """Translate one sentence

    Uses the pre-trained model to translate one Portuguese sentence into English.

    Args:
        model (keras.models.Sequential): The pre-trained model.
        sentence (string): The Portuguese sentence to translate.
        index_to_word (dict): Dictionary containing indexes as keys and words (English) as values.

    Returns:
        The translated sentence.


    """

    sentence = sentence.reshape((1, sentence.shape[0]))
    prediction = model.predict(sentence, verbose=0)[0]
    integers = [np.argmax(vector) for vector in prediction]
    target = list()
    for i in integers:
        word = index_to_word.get(i, None)
        if word is None:
            break
        target.append(word)
    return " ".join(target)


In [76]:
# Function to predict many phrases
def predict_many(model, sentences, index_to_word, raw_dataset):
    for i, sentence in enumerate(sentences):
        # Translate the Portuguese sentence
        translation = predict_one(model, sentence, index_to_word)

        # Get the raw Portuguese and English sentences
        raw_target, raw_src = raw_dataset[i]

        # Print the correct Portuguese and English sentences and the predicted
        print(
            "src=[%s], target=[%s], predicted=[%s]" % (raw_src, raw_target, translation)
        )


predict_many(model, X_test[:10], en_index_to_word, test)


src=[impossível], target=[no way], predicted=[]
src=[nós estamos fracos], target=[were weak], predicted=[]
src=[eu quero sopa], target=[i want soup], predicted=[]
src=[fica com ela], target=[keep it], predicted=[]
src=[ela é velha], target=[she is old], predicted=[]
src=[escolha uma], target=[choose one], predicted=[]
src=[era de tom], target=[it was toms], predicted=[]
src=[nós desistimos], target=[we surrender], predicted=[]
src=[eu vou te processar], target=[ill sue you], predicted=[]
src=[tom pode ganhar], target=[tom can win], predicted=[]
