In [1]:
import numpy as np
import random
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.text import text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)

In [2]:
EPOCHS = 20
BATCH_SIZE = 128
MAX_WORDS = 10000
READ_LINES = 60000
LAYER_SIZE = 256
EMBEDDING_WIDTH = 128
TEST_PERCENT = 0.2
SAMPLE_SIZE = 20
OOV_WORD = 'UNK'
PAD_INDEX = 0
OOV_INDEX = 1
START_INDEX = MAX_WORDS - 2
STOP_INDEX = MAX_WORDS - 1
MAX_LENGTH = 60
SRC_DEST_FILE_NAME = 'C:/Users/pipel/Documents/Javeriana Topicos/Sesion 3/Version 2/fra.txt'

In [3]:
# Funcion para leer el archivo
def read_file_combined (file_name, max_len):
    file = open(file_name,'r',encoding='utf-8')
    src_word_sequences = []
    dest_word_sequences = []
    for i,line in enumerate(file):
        if i == READ_LINES:
            break
        pair = line.split('\t')
        word_sequence = text_to_word_sequence(pair[1])
        src_word_sequence = word_sequence[0:max_len]
        src_word_sequences.append(src_word_sequence)
        word_sequence = text_to_word_sequence(pair[0])
        dest_word_sequence = word_sequence[0:max_len]
        dest_word_sequences.append(dest_word_sequence)
    file.close()
    return src_word_sequences,dest_word_sequences

In [4]:
# Funcion para tokenizar
def tokenize(sequences):
    tokenizer = Tokenizer(num_words=MAX_WORDS-2,
                         oov_token=OOV_WORD)
    tokenizer.fit_on_texts(sequences)
    token_sequences = tokenizer.texts_to_sequences(sequences)
    return tokenizer, token_sequences

# Funcion para devolver
def tokens_to_words(tokenizer, seq):
    word_seq=[]
    for index in seq:
        if index == PAD_INDEX:
            word_seq.append('PAD')
        elif index == OOV_INDEX:
            word_seq.append(OOV_WORD)
        elif index == START_INDEX:
            word_seq.append('START')
        elif index == STOP_INDEX:
            word_seq.append('STOP')
        else:
            word_seq.append(tokenizer.sequences_to_texts([[index]])[0])
    print(word_seq)

In [5]:
#leer el archivo y tokenizar

src_seq, dest_seq = read_file_combined(SRC_DEST_FILE_NAME, MAX_LENGTH)
src_tokenizer, src_token_seq = tokenize(src_seq)
dest_tokenizer, dest_token_seq = tokenize(dest_seq)

In [6]:
# Preparar datos de entrenamiento

dest_target_token_seq = [x + [STOP_INDEX] for x in dest_token_seq]
dest_input_token_seq = [[START_INDEX] + x for x in dest_target_token_seq]

src_input_data = pad_sequences(src_token_seq)
dest_input_data = pad_sequences(dest_input_token_seq, padding='post')

dest_target_data = pad_sequences (dest_target_token_seq, padding='post',
                                 maxlen=len(dest_input_data[0]))

In [7]:
# partir en train y test
rows = len(src_input_data[:,0])
all_indices = list(range(rows))
test_rows = int(rows * TEST_PERCENT)
test_indices = random.sample(all_indices, test_rows)
train_indices = [x for x in all_indices if x not in test_indices]

train_src_input_data = src_input_data[train_indices]
train_dest_input_data = dest_input_data[train_indices]
train_dest_target_data = dest_target_data[train_indices]

test_src_input_data = src_input_data[test_indices]
test_dest_input_data = dest_input_data[test_indices]
test_dest_target_data = dest_target_data[test_indices]

# crear una muestra del test para inspeccionar
test_indices = list(range(test_rows))
sample_indices = random.sample(test_indices, SAMPLE_SIZE)
sample_input_data = test_src_input_data[sample_indices]
sample_target_data = test_dest_target_data[sample_indices]

In [8]:
#encoder

# Input es la sequencia en el lenguage inicial
enc_embedding_input = Input(shape=(None,))

#crear las capas del encoder
enc_embedding_layer = Embedding(output_dim=EMBEDDING_WIDTH,
                               input_dim = MAX_WORDS,
                               mask_zero=True)
enc_layer1 = LSTM(LAYER_SIZE, return_state = True, return_sequences=True)
enc_layer2 = LSTM(LAYER_SIZE, return_state = True)

#conectar las capas del encoder
enc_embedding_layer_outputs = enc_embedding_layer (enc_embedding_input)
enc_layer1_outputs, enc_layer1_state_h, enc_layer1_state_c = enc_layer1(enc_embedding_layer_outputs)
_, enc_layer2_state_h, enc_layer2_state_c = enc_layer2(enc_layer1_outputs)

#ensamblar el modelo
enc_model = Model(enc_embedding_input,
                 [enc_layer1_state_h, enc_layer1_state_c,
                  enc_layer2_state_h, enc_layer2_state_c])

enc_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding (Embedding)        (None, None, 128)         1280000   
_________________________________________________________________
lstm (LSTM)                  [(None, None, 256), (None 394240    
_________________________________________________________________
lstm_1 (LSTM)                [(None, 256), (None, 256) 525312    
Total params: 2,199,552
Trainable params: 2,199,552
Non-trainable params: 0
_________________________________________________________________


In [9]:
#construir el decoder
#input los estados intermedios del encode y las secuencias en el lenguage de destino

dec_layer1_state_input_h = Input(shape=(LAYER_SIZE,))
dec_layer1_state_input_c = Input(shape=(LAYER_SIZE,))
dec_layer2_state_input_h = Input(shape=(LAYER_SIZE,))
dec_layer2_state_input_c = Input(shape=(LAYER_SIZE,))
dec_embedding_input = Input(shape=(None,))

#crear las capas del decoder
dec_embedding_layer = Embedding(output_dim=EMBEDDING_WIDTH,
                               input_dim = MAX_WORDS,
                               mask_zero=True)
dec_layer1 = LSTM(LAYER_SIZE, return_state = True, return_sequences=True)
dec_layer2 = LSTM(LAYER_SIZE, return_state = True, return_sequences=True)
dec_layer3 = Dense(MAX_WORDS, activation = 'softmax')

#conectar las capas del decoder
dec_embedding_layer_outputs = dec_embedding_layer(dec_embedding_input)
dec_layer1_outputs, dec_layer1_state_h, dec_layer1_state_c = dec_layer1(dec_embedding_layer_outputs,
                                                                       initial_state=[dec_layer1_state_input_h,
                                                                                      dec_layer1_state_input_c])
dec_layer2_outputs, dec_layer2_state_h, dec_layer2_state_c = dec_layer2(dec_layer1_outputs,
                                                                       initial_state=[dec_layer2_state_input_h,
                                                                                      dec_layer2_state_input_c])
dec_layer3_outputs = dec_layer3(dec_layer2_outputs)

#construir el modelo
dec_model = Model([dec_embedding_input,
                  dec_layer1_state_input_h,
                  dec_layer1_state_input_c,
                  dec_layer2_state_input_h,
                  dec_layer2_state_input_c],
                 [dec_layer3_outputs,dec_layer1_state_h,dec_layer1_state_c,
                 dec_layer2_state_h,dec_layer2_state_c])

dec_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 128)    1280000     input_6[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 256)]        0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 256)]        0                                            
____________________________________________________________________________________________

In [10]:
#construir y compilar el modelo de entrenamiento
train_enc_embedding_input = Input(shape=(None,))
train_dec_embedding_input = Input(shape=(None,))
intermediate_state = enc_model(train_enc_embedding_input)
train_dec_output, _, _, _, _ = dec_model([train_dec_embedding_input] +
                                         intermediate_state )
training_model = Model ([train_enc_embedding_input,
                         train_dec_embedding_input],
                        train_dec_output)

optimizer = RMSprop(lr=0.01)
training_model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics= ['accuracy'] )
training_model.summary()


Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
model (Model)                   [(None, 256), (None, 2199552     input_7[0][0]                    
__________________________________________________________________________________________________
model_1 (Model)                 [(None, None, 10000) 4769552     input_8[0][0]                    
                                                                 model[1][0]                

In [12]:
# entrenar y testear repetidamente

for i in range (EPOCHS):
    print ('step: ',i)
    #entrenar el modelo para una epoca
    history = training_model.fit([train_src_input_data, train_dest_input_data],train_dest_target_data,
                                validation_data=(
                                [test_src_input_data, test_dest_input_data],test_dest_target_data),
                                batch_size=BATCH_SIZE, epochs=1)
    
    # recorrer la muestra de testeo para ver el resultado 
    for (test_input, test_target) in zip (sample_input_data, sample_target_data):
        #recorrer una frase por el encoder
        x = np.reshape(test_input, (1,-1))
        last_states = enc_model.predict(x, verbose=0)
        #entregar resultado y indice de comienzo al decoder
        prev_word_index = START_INDEX
        produce_string = ''
        pred_seq =[]
        
        for j in range(MAX_LENGTH):
            x= np.reshape(np.array(prev_word_index), (1,1))
            #predecir palabra siguiente y capturar estado interno
            preds, dec_layer1_state_h, dec_layer1_state_c,dec_layer2_state_h,dec_layer2_state_c = dec_model.predict([x]+ last_states, verbose=0)
            last_states=[dec_layer1_state_h,
                         dec_layer1_state_c,
                         dec_layer2_state_h,
                         dec_layer2_state_c]
            #encontrar la palabra mas probable
            prev_word_index = np.asarray(preds[0][0]).argmax()
            pred_seq.append(prev_word_index)
            if prev_word_index == STOP_INDEX:
                break
            
        tokens_to_words (src_tokenizer, test_input)
        tokens_to_words (dest_tokenizer, test_target)
        tokens_to_words (dest_tokenizer, pred_seq)
        print('\n\n')

step:  0
Train on 48000 samples, validate on 12000 samples
['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'une', 'UNK']
['tom', 'has', 'tonsillitis', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'has', 'a', 'tree', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'regardé', 'de', 'près']
['tom', 'watched', 'closely', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'has', 'a', 'weight', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "c'était", 'très', 'amusant']
['it', 'was', 'a', 'lot', 'of', 'fun', 'STOP', 'PAD', 'PAD']
['it', 'was', 'very', 'funny', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'la', 'pièce', 'était', 'vide']
['the', 'room', 'was', 'empty', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['the', 'room', 'was', 'empty', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', "c'est", 'pour

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'le', 'garçon', 'était', 'UNK', 'nu']
['the', 'boy', 'was', 'shirtless', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['the', 'dog', 'was', 'on', '30', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'ne', 'vous', 'mettez', 'pas', 'en', 'colère', "s'il", 'vous', 'plaît']
['please', "don't", 'get', 'angry', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['please', "don't", 'get', 'tired', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'ne', 'le', 'faisais', 'pas']
['i', 'used', 'to', 'not', 'do', 'that', 'STOP', 'PAD', 'PAD']
['i', "didn't", 'know', 'it', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'comme', "c'était", 'bizarre', 'non']
['how', 'weird', 'that', 'was', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['no', 'one', 'is', 'crazy', 'STOP']



step:  2
Train on 48000 samples, validate on 12000 samples
['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'vous', 'ai', 'fait', 'des', 'cookies']
['i', 'made', 'you', 'cookies', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['i', 'made', 'your', 'plans', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "t'as", 'besoin', "d'un", 'chauffeur']
['need', 'a', 'lift', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['do', 'you', 'need', 'a', 'hug', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "j'étais", 'UNK']
['i', 'was', 'overwhelmed', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['i', 'was', 'watching', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'me', 'suis', 'réveillée', 'sur', 'le', 'canapé']
['i', 'woke', 'up', 'on', 'the', 'couch', 'STOP', 'PAD', 'PAD']
['i', 'sat', 'on', 'the', 'grass', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'le', 'garçon', 'était', 'UNK', 'nu']
['the', 'boy', 'was', 'shirtless', 'STO

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'il', 'a', 'insulté', 'notre', 'équipe']
['he', 'insulted', 'our', 'team', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['he', 'is', 'drunk', 'again', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'où', 'est', 'mon', 'peigne\xa0']
["where's", 'my', 'comb', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
["where's", 'my', 'type', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'mettez', 'vous', 'simplement', 'au', 'travail']
['just', 'get', 'to', 'work', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['just', 'get', 'back', 'to', 'work', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'ne', 'me', 'sens', 'pas', 'malade']
['i', "don't", 'feel', 'sick', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['i', "don't", 'feel', 'sick', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'vous', 'ai', 'fait', 'des', 'cookies']
['i', 'made', 'you', 'cookies', 

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', "c'est", 'pour', 'toi']
['tom', 'this', 'is', 'for', 'you', 'STOP', 'PAD', 'PAD', 'PAD']
['tom', 'is', 'going', 'for', 'you', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'attention', 'à', 'la', 'voiture']
['watch', 'out', 'for', 'the', 'car', 'STOP', 'PAD', 'PAD', 'PAD']
['watch', 'at', 'the', 'car', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "n'es", 'tu', 'pas', 'en', 'retard']
["aren't", 'you', 'late', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
["aren't", 'you', 'late', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'il', 'nous', 'faut', 'des', 'règles']
['we', 'need', 'rules', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['we', 'need', 'the', 'rules', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'il', 'a', 'insulté', 'notre', 'équipe']
['he', 'insulted', 'our', 'team', 'STOP', 'PAD'

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'la', 'pièce', 'était', 'vide']
['the', 'room', 'was', 'empty', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['the', 'room', 'was', 'empty', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', "c'est", 'pour', 'toi']
['tom', 'this', 'is', 'for', 'you', 'STOP', 'PAD', 'PAD', 'PAD']
['tom', 'is', 'for', 'you', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'attention', 'à', 'la', 'voiture']
['watch', 'out', 'for', 'the', 'car', 'STOP', 'PAD', 'PAD', 'PAD']
['watch', 'home', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "n'es", 'tu', 'pas', 'en', 'retard']
["aren't", 'you', 'late', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
["aren't", 'you', 'late', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'il', 'nous', 'faut', 'des', 'règles']
['we', 'need', 'rules', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
[

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'une', 'UNK']
['tom', 'has', 'tonsillitis', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'has', 'a', 'picasso', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'regardé', 'de', 'près']
['tom', 'watched', 'closely', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'watched', 'closely', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "c'était", 'très', 'amusant']
['it', 'was', 'a', 'lot', 'of', 'fun', 'STOP', 'PAD', 'PAD']
['it', 'was', 'very', 'fun', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'la', 'pièce', 'était', 'vide']
['the', 'room', 'was', 'empty', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['the', 'room', 'was', 'empty', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', "c'est", 'pour', 'toi']
['tom', 'this', 'is', 'for', 'you', 'STOP', 'PAD

Train on 48000 samples, validate on 12000 samples
['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'une', 'UNK']
['tom', 'has', 'tonsillitis', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'has', 'a', 'mustache', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'regardé', 'de', 'près']
['tom', 'watched', 'closely', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'watched', 'closely', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "c'était", 'très', 'amusant']
['it', 'was', 'a', 'lot', 'of', 'fun', 'STOP', 'PAD', 'PAD']
['that', 'was', 'very', 'fun', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'la', 'pièce', 'était', 'vide']
['the', 'room', 'was', 'empty', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['the', 'room', 'was', 'empty', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', "c'est", 'pour', 't

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'comme', "c'était", 'bizarre', 'non']
['how', 'weird', 'that', 'was', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['how', 'many', 'does', 'it', 'happen', 'STOP']



step:  15
Train on 48000 samples, validate on 12000 samples
['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'une', 'UNK']
['tom', 'has', 'tonsillitis', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'has', 'a', 'big', 'mouth', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tom', 'a', 'regardé', 'de', 'près']
['tom', 'watched', 'closely', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['tom', 'watched', 'with', 'jazz', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "c'était", 'très', 'amusant']
['it', 'was', 'a', 'lot', 'of', 'fun', 'STOP', 'PAD', 'PAD']
['it', 'was', 'very', 'funny', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'me', 'suis', 'réveillée', 'sur', 'le', 'canapé']
['i', 'woke', 'up', 'on', 'the', 'couch', 'STOP', 'PAD', 'PAD']
['i', 'woke', 'up', 'at', 'the', 'sofa', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'le', 'garçon', 'était', 'UNK', 'nu']
['the', 'boy', 'was', 'shirtless', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['the', 'boy', 'had', 'two', 'pain', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'ne', 'vous', 'mettez', 'pas', 'en', 'colère', "s'il", 'vous', 'plaît']
['please', "don't", 'get', 'angry', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
["don't", 'get', 'back', 'off', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'ne', 'le', 'faisais', 'pas']
['i', 'used', 'to', 'not', 'do', 'that', 'STOP', 'PAD', 'PAD']
['i', 'no', 'one', 'is', 'it', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'comme', "c'était", 'bizarre', 'non']
['how', 'weird', 'th

['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'ne', 'me', 'sens', 'pas', 'malade']
['i', "don't", 'feel', 'sick', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['i', "don't", 'feel', 'sick', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'vous', 'ai', 'fait', 'des', 'cookies']
['i', 'made', 'you', 'cookies', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['i', 'let', 'you', 'kill', 'me', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "t'as", 'besoin', "d'un", 'chauffeur']
['need', 'a', 'lift', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['do', 'you', 'need', 'a', 'lift', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', "j'étais", 'UNK']
['i', 'was', 'overwhelmed', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
['i', 'was', 'very', 'tired', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'je', 'me', 'suis', 'réveillée', 'sur', 'le', 'canapé']
['i', 'woke', 'up', 'on', 'the', 