In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
from gensim.models import Word2Vec
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation, Bidirectional,Embedding
import re

In [None]:
data = pd.read_csv('data.csv')

print(data.head())

### Preparacion del Dataset

In [None]:
data.info()

In [None]:
data.columns

In [None]:
# removiendo el espacio de las columnas
data.columns = data.columns.str.strip()
data['consulta'] = data['consulta'].str.strip()
data['intencion'] = data['intencion'].str.strip()

print(data.values)

In [None]:
#sort data by intencion
data = data.sort_values(by=['intencion'], ascending=True)
print(data.head())

In [None]:
# Eliminando los signos de puntuacion
data['consulta'] = data['consulta'].str.replace(r'[^\w\s]', '', regex=True)

In [None]:
text = data['consulta'].values
labels = data['intencion'].values

labels

### Split Text

In [3]:
import json
# cargar datos del json
# Loading json data
with open('data/data_full.json') as file:
  data = json.loads(file.read())

# Loading out-of-scope intent data
val_oos = np.array(data['oos_val'])
train_oos = np.array(data['oos_train'])
test_oos = np.array(data['oos_test'])

# Loading other intents data
val_others = np.array(data['val'])
train_others = np.array(data['train'])
test_others = np.array(data['test'])

# Merging out-of-scope and other intent data
val = np.concatenate([val_oos,val_others])
train = np.concatenate([train_oos,train_others])
test = np.concatenate([test_oos,test_others])
data = np.concatenate([train,test,val])
data = data.T

text = data[0]
labels = data[1]

In [5]:
labels

array(['oos', 'oos', 'oos', ..., 'card_declined', 'card_declined',
       'card_declined'], dtype='<U136')

In [4]:
train_txt,test_txt,train_label,test_labels = train_test_split(text,labels,test_size = 0.3)

### Tokenizacion

In [6]:
# Convertir el texto en una lista de palabras
words = []
for txt in text:
    words += text_to_word_sequence(txt)

# Contar el número de palabras únicas
unique_words = set(words)
num_unique_words = len(unique_words)

# Imprimir el número de palabras únicas
print("Número de palabras únicas: ", num_unique_words)

Número de palabras únicas:  7502


In [7]:
max_num_words = 14000 #el numero de palabras unicas es de 211 entonces agregamos un maximo con holgura
classes = np.unique(labels)

tokenizer = Tokenizer(num_words=max_num_words)
tokenizer.fit_on_texts(train_txt)
word_index = tokenizer.word_index

word_index

{'my': 1,
 'i': 2,
 'to': 3,
 'the': 4,
 'you': 5,
 'what': 6,
 'a': 7,
 'for': 8,
 'me': 9,
 'is': 10,
 'how': 11,
 'do': 12,
 'can': 13,
 'on': 14,
 'in': 15,
 'of': 16,
 'need': 17,
 'card': 18,
 'please': 19,
 'have': 20,
 'tell': 21,
 'are': 22,
 'know': 23,
 'it': 24,
 'at': 25,
 'get': 26,
 'credit': 27,
 'and': 28,
 'from': 29,
 'account': 30,
 'when': 31,
 'that': 32,
 'car': 33,
 'be': 34,
 "what's": 35,
 'want': 36,
 'list': 37,
 'this': 38,
 'like': 39,
 'will': 40,
 'your': 41,
 'if': 42,
 'does': 43,
 'with': 44,
 'would': 45,
 'change': 46,
 'many': 47,
 'an': 48,
 'much': 49,
 'time': 50,
 'new': 51,
 'bank': 52,
 'where': 53,
 'make': 54,
 'there': 55,
 'find': 56,
 'go': 57,
 'up': 58,
 'am': 59,
 'name': 60,
 'out': 61,
 'long': 62,
 'next': 63,
 'any': 64,
 'help': 65,
 'bill': 66,
 'take': 67,
 'about': 68,
 'set': 69,
 'should': 70,
 'was': 71,
 'use': 72,
 'oil': 73,
 'visa': 74,
 'now': 75,
 'call': 76,
 'reservation': 77,
 'phone': 78,
 'good': 79,
 'pay': 80,


In [8]:
ls=[]
for c in train_txt:
    ls.append(len(c.split()))   #cada consulta se convierte en una lista de palabras y se cuenta el numero de palabras

maxLen=int(np.percentile(ls, 98))   #se calcula el percentil 98 de la lista de palabras

train_sequences = tokenizer.texts_to_sequences(train_txt)   #convierte las consultas en secuencias de tokens
print("train sequence tokenice:\n",train_sequences)

train_sequences = pad_sequences(train_sequences, maxlen=maxLen, padding='post')     #rellena con ceros las secuencias para que todas tengan la misma longitud dada por el percentil 98
print("train sequence pad sequence:\n",train_sequences)

test_sequences = tokenizer.texts_to_sequences(test_txt)
print("test sequence tokenice:\n",test_sequences)
test_sequences = pad_sequences(test_sequences, maxlen=maxLen, padding='post')
print("test sequence tokenice:\n",test_sequences)

train sequence tokenice:
 [[13, 2, 574, 499, 8, 250, 15, 7, 318], [6, 164, 127, 43, 1, 132, 30, 426, 44], [13, 2, 76, 5, 1105, 343], [2, 205, 1, 30, 10, 458, 459, 2, 59, 113, 283, 131], [2, 17, 3, 23, 427, 786], [2, 17, 499, 103, 232, 3, 1, 89, 37], [103, 610, 3, 1, 3, 12, 37], [165, 246, 12, 2, 17, 136, 2, 57, 3, 855], [15, 1375, 140, 6, 10, 375], [35, 1, 483, 96, 14, 1, 258], [45, 5, 536, 484, 140, 29, 4, 132, 28, 92, 24, 259, 223], [19, 211, 1, 77, 8, 319, 328, 25, 1256], [19, 162, 1, 37, 16, 396], [6, 12, 2, 12, 8, 7, 477, 33, 353], [2, 1106, 32, 1, 158, 71, 126], [31, 10, 1, 33, 181, 8, 48, 329, 46], [215, 32, 1036, 129], [2, 17, 2030, 330, 68, 233, 980, 1, 477, 33, 353], [11, 62, 40, 499, 460, 15, 4, 575], [21, 9, 4, 271, 3, 54, 48, 658, 758], [2, 36, 3, 217, 6, 10, 14, 1, 396, 37], [201, 2, 188, 1, 820], [11, 12, 2, 56, 126, 158, 15, 48, 387], [42, 2, 821, 14, 1717, 3, 362, 11, 47, 176, 509, 13, 2, 67], [11, 856, 10, 7, 85, 500, 3, 787], [201, 24], [6, 43, 3366, 469], [11, 49, 2

In [9]:
label_encoder = LabelEncoder() # codifica las clases en numeros
integer_encoded = label_encoder.fit_transform(classes) # codifica las clases en numeros

print("Clases: ", classes)
print("Clases codificadas: ", integer_encoded)

Clases:  ['accept_reservations' 'account_blocked' 'alarm' 'application_status'
 'apr' 'are_you_a_bot' 'balance' 'bill_balance' 'bill_due' 'book_flight'
 'book_hotel' 'calculator' 'calendar' 'calendar_update' 'calories'
 'cancel' 'cancel_reservation' 'car_rental' 'card_declined' 'carry_on'
 'change_accent' 'change_ai_name' 'change_language' 'change_speed'
 'change_user_name' 'change_volume' 'confirm_reservation' 'cook_time'
 'credit_limit' 'credit_limit_change' 'credit_score' 'current_location'
 'damaged_card' 'date' 'definition' 'direct_deposit' 'directions'
 'distance' 'do_you_have_pets' 'exchange_rate' 'expiration_date'
 'find_phone' 'flight_status' 'flip_coin' 'food_last' 'freeze_account'
 'fun_fact' 'gas' 'gas_type' 'goodbye' 'greeting' 'how_busy'
 'how_old_are_you' 'improve_credit_score' 'income'
 'ingredient_substitution' 'ingredients_list' 'insurance'
 'insurance_change' 'interest_rate' 'international_fees'
 'international_visa' 'jump_start' 'last_maintenance' 'lost_luggage'
 'm

In [10]:
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)

print(integer_encoded)

[[  0]
 [  1]
 [  2]
 [  3]
 [  4]
 [  5]
 [  6]
 [  7]
 [  8]
 [  9]
 [ 10]
 [ 11]
 [ 12]
 [ 13]
 [ 14]
 [ 15]
 [ 16]
 [ 17]
 [ 18]
 [ 19]
 [ 20]
 [ 21]
 [ 22]
 [ 23]
 [ 24]
 [ 25]
 [ 26]
 [ 27]
 [ 28]
 [ 29]
 [ 30]
 [ 31]
 [ 32]
 [ 33]
 [ 34]
 [ 35]
 [ 36]
 [ 37]
 [ 38]
 [ 39]
 [ 40]
 [ 41]
 [ 42]
 [ 43]
 [ 44]
 [ 45]
 [ 46]
 [ 47]
 [ 48]
 [ 49]
 [ 50]
 [ 51]
 [ 52]
 [ 53]
 [ 54]
 [ 55]
 [ 56]
 [ 57]
 [ 58]
 [ 59]
 [ 60]
 [ 61]
 [ 62]
 [ 63]
 [ 64]
 [ 65]
 [ 66]
 [ 67]
 [ 68]
 [ 69]
 [ 70]
 [ 71]
 [ 72]
 [ 73]
 [ 74]
 [ 75]
 [ 76]
 [ 77]
 [ 78]
 [ 79]
 [ 80]
 [ 81]
 [ 82]
 [ 83]
 [ 84]
 [ 85]
 [ 86]
 [ 87]
 [ 88]
 [ 89]
 [ 90]
 [ 91]
 [ 92]
 [ 93]
 [ 94]
 [ 95]
 [ 96]
 [ 97]
 [ 98]
 [ 99]
 [100]
 [101]
 [102]
 [103]
 [104]
 [105]
 [106]
 [107]
 [108]
 [109]
 [110]
 [111]
 [112]
 [113]
 [114]
 [115]
 [116]
 [117]
 [118]
 [119]
 [120]
 [121]
 [122]
 [123]
 [124]
 [125]
 [126]
 [127]
 [128]
 [129]
 [130]
 [131]
 [132]
 [133]
 [134]
 [135]
 [136]
 [137]
 [138]
 [139]
 [140]
 [141]
 [142]

In [11]:
onehot_encoder.fit(integer_encoded)

print("Onehot catgories:",onehot_encoder.categories_) # codifica las clases en numeros

Onehot catgories: [array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150], dtype=int64)]




In [12]:
# dimesiones del onehot_encoder
num_classes = len(onehot_encoder.categories_[0])
print("Número de clases: ", num_classes)
print("Clases \n", onehot_encoder.categories_[0])

Número de clases:  151
Clases 
 [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150]


In [13]:
train_label

array(['ingredient_substitution', 'interest_rate', 'change_ai_name', ...,
       'restaurant_reservation', 'pto_balance', 'play_music'],
      dtype='<U136')

In [14]:
train_label_encoded = label_encoder.transform(train_label) # codifica las clases en numeros

train_label_encoded

array([ 55,  59,  21, ..., 102,  89,  87])

In [15]:
train_label_encoded = train_label_encoded.reshape(len(train_label_encoded), 1)

train_label_encoded

array([[ 55],
       [ 59],
       [ 21],
       ...,
       [102],
       [ 89],
       [ 87]])

In [16]:
train_label = onehot_encoder.transform(train_label_encoded)    # codifica las clases en numeros

train_label

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [17]:
#ahora lo mismo pero para los datos de testeo

test_labels_encoded = label_encoder.transform(test_labels)
test_labels_encoded = test_labels_encoded.reshape(len(test_labels_encoded), 1)

print("pre-onehot:\n",test_labels)

test_labels = onehot_encoder.transform(test_labels_encoded)

print("\n\npos-onehot:\n",test_labels)

pre-onehot:
 ['application_status' 'who_made_you' 'payday' ... 'repeat'
 'restaurant_reservation' 'update_playlist']


pos-onehot:
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


### Entrenando el modelo de Word2Vec

In [18]:
words

['how',
 'much',
 'is',
 'an',
 'overdraft',
 'fee',
 'for',
 'bank',
 'why',
 'are',
 'exponents',
 'preformed',
 'before',
 'multiplication',
 'in',
 'the',
 'order',
 'of',
 'operations',
 'what',
 'size',
 'wipers',
 'does',
 'this',
 'car',
 'take',
 'where',
 'is',
 'the',
 'dipstick',
 'how',
 'much',
 'is',
 '1',
 'share',
 'of',
 'aapl',
 'how',
 'is',
 'glue',
 'made',
 'any',
 'headlines',
 'from',
 'my',
 'area',
 'what',
 'is',
 'the',
 'largest',
 'state',
 'in',
 'the',
 'us',
 'what',
 'is',
 'the',
 'current',
 'market',
 'trend',
 'what',
 'is',
 'the',
 'most',
 'popular',
 'airline',
 'what',
 'is',
 'the',
 'formula',
 'for',
 'the',
 'circumference',
 'of',
 'a',
 'circle',
 'what',
 'are',
 'some',
 'ways',
 'to',
 'reduce',
 'spending',
 'what',
 'time',
 'does',
 'the',
 'louvre',
 'open',
 'are',
 'there',
 'any',
 'local',
 'stations',
 'covering',
 'the',
 'moral',
 'march',
 'in',
 'raleigh',
 'in',
 'februaryu',
 'how',
 'many',
 'planets',
 'have',
 'we',

In [19]:
w2v_model = Word2Vec(sentences=[words], vector_size=100, window=5, min_count=1, workers=4)
"""
    Parametros:
    sentences: lista de lista de palabras
    size: dimension del vector (posible parametro a modificar 100-300)
    window: tamaño de la ventana de palabras (ventana: palabras que se encuentran a los lados de la palabra objetivo)
    min_count: minimo de veces que debe aparecer una palabra para ser considerada (es un dataset chico asi que se deja en 1)
    workers: numero de hilos de ejecucion
"""


'\n    Parametros:\n    sentences: lista de lista de palabras\n    size: dimension del vector (posible parametro a modificar 100-300)\n    window: tamaño de la ventana de palabras (ventana: palabras que se encuentran a los lados de la palabra objetivo)\n    min_count: minimo de veces que debe aparecer una palabra para ser considerada (es un dataset chico asi que se deja en 1)\n    workers: numero de hilos de ejecucion\n'

In [20]:
# mostrar las dimensiones de la matriz de embeddings
print("Dimensiones de la matriz de embeddings: ", w2v_model.wv.vectors.shape)

Dimensiones de la matriz de embeddings:  (7502, 100)


In [21]:
# mostramos los vectores de las palabras
print(w2v_model.wv.vectors[1])

[-0.01060462 -0.04799705 -0.4940529  -0.50212693  0.43794858 -0.63817066
  0.20009874  0.5262332  -0.8566647   0.5319416  -0.5951051  -0.6908496
  0.66751367 -0.16192362 -0.02235279  0.4255088   0.9423962  -0.41793463
  0.6340214  -0.64064693  0.35239393  0.01061168  0.32923812 -1.0427127
 -0.12821208 -0.18254663 -0.01336661  0.19341753 -0.00999579 -0.3668227
  0.7411385   0.5402332   0.42149714 -0.38358033  0.21152234  0.9064827
 -0.37431303 -0.5985937  -0.2957527  -0.70628136  0.02377852 -0.7312601
 -0.2742655   0.7041882  -0.3564216  -0.11778577 -1.1490397   0.30697417
 -0.3305793   0.27588335  0.43219462 -0.1532492  -0.32519823  0.4596865
 -1.2431175   0.50368124  0.8485592  -0.20151184  0.06527342  0.8617022
  0.3951888   0.33052188 -0.08055306  0.40327796 -1.0908089   0.6219348
 -0.05820879 -0.29923156 -0.511915    0.27888024 -0.72301185  0.60712814
 -0.18411477  0.10747566 -0.22484754 -0.13620529  0.02842789  0.5290266
  0.0297786   0.7829878  -1.0706792  -0.20250751 -0.74950945

In [22]:
# mostramos los embeddings de la palabra "hola"
print(w2v_model.wv['who'])

[-0.00741066 -0.00933663 -0.10818022 -0.1141576   0.10445315 -0.15566973
  0.05063537  0.12021758 -0.20400582  0.12986755 -0.14282854 -0.15804857
  0.15382177 -0.0359382  -0.01376943  0.10554438  0.21211289 -0.10147864
  0.13964845 -0.15807216  0.07151472 -0.00109887  0.07536839 -0.24696383
 -0.0232561  -0.04785752 -0.01105397  0.05254697 -0.01161388 -0.08594333
  0.17445844  0.13207349  0.10574979 -0.0789144   0.0401177   0.21327183
 -0.08200019 -0.14368016 -0.08170979 -0.16888139  0.0018355  -0.17013605
 -0.05220747  0.15581639 -0.07111333 -0.02314042 -0.2765105   0.06381769
 -0.07543898  0.06828455  0.09459054 -0.02826531 -0.07108323  0.1141851
 -0.28487957  0.1137175   0.19802347 -0.04188093  0.01482701  0.20054238
  0.08407293  0.07593261 -0.01347092  0.10081005 -0.25725764  0.1404533
 -0.00827783 -0.0793718  -0.11997599  0.06388276 -0.16605838  0.13898511
 -0.05217023  0.02854645 -0.04986602 -0.0403262   0.0081719   0.11483235
  0.00090933  0.17852129 -0.2454668  -0.05029008 -0.1

In [23]:
# mostramos el tamaño del vocabulario
print("Tamaño del vocabulario: ", len(w2v_model.wv))

Tamaño del vocabulario:  7502


### Creacion del modelo (RNN-LSTM)

**A. Numero de Neuronas:**

&nbsp;&nbsp;&nbsp;&nbsp; Para calcular las neuronas de la capa de LSTM existen 2 formulas:

1)  $N_h = \frac{N_s}{α * (N_i + N_o)}$

2)  $N_h = \frac{2}{3} * (N_i+N_o)$


**B. Numero de Capas:**

&nbsp;&nbsp;&nbsp;&nbsp;En este caso decidi usar solo una capa LSTM bidireccional y una capa densa.

&nbsp;&nbsp;&nbsp;&nbsp;Adicionalmente se debe acompañar con una capa de dropout para evitar el sobreajuste. La idea de esta capa es ignorar neuronas seleccionadas aleatoriamente durante el entrenamiento. Segun he leido con un 20% se puede lograr un equilibrio entre presicion y evite de sobreajuste.

*actualizacion*

&nbsp;&nbsp;&nbsp;&nbsp;**tuve que agregar una capa unidireccional porque la complejidad de los datos no me permitia entrenarlos con tan solo una capa**

**C. Funcion de activacion:**

&nbsp;&nbsp;&nbsp;&nbsp;En cuanto a la funcion de activacion, en nuestro problema, tenemos varias clases, pero sólo una de ellas puede estar presente al mismo tiempo. Para este tipo de problemas, por lo general, la función de activación softmax funciona mejor, ya que nos permite interpretar las salidas como probabilidades.

In [27]:
modelo_LSTM = Sequential()



# modelo_LSTM.add(Embedding(input_dim=num_unique_words, output_dim=100, input_length=maxLen, trainable=False, weights=[w2v_model.wv.vectors]))
modelo_LSTM.add(Embedding(input_dim=num_unique_words, output_dim=100, input_length=train_sequences.shape[1], trainable=False, weights=[w2v_model.wv.vectors]))
"""
    Parametros:
    input_dim: tamaño del vocabulario (numero de palabras unicas)
    output_dim: tamaño del vector de salida (dimension del vector)
    input_length: longitud de la secuencia de entrada (numero de palabras en cada consulta)

"""
modelo_LSTM.add(Bidirectional(LSTM(256, return_sequences=True), 'concat')) #Probablemente pueda usar 71, segun el calculo de la formula: 2/3*(n_input+n_output)
"""
    Parametros:
    units: dimension de la salida del LSTM (numero de neuronas)
    return_sequences: si la salida es una secuencia o un vector (True: secuencia, False: vector)
    dropout: porcentaje de neuronas que se desactivan en cada iteracion (evita el overfitting)
    recurrent_dropout: porcentaje de neuronas que se desactivan en cada iteracion de la capa recurrente (evita el overfitting)

    concat: concatena las salidas de las capas recurrentes (bidireccional)
"""
modelo_LSTM.add(Dropout(0.2))
modelo_LSTM.add(LSTM(256, return_sequences=False))
"""
    esta capa cumple la funcion de reducir la dimension de la salida de la capa anterior
    esto se debe a que la capa anterior devuelve una secuencia de vectores y la siguiente capa espera un vector
"""
modelo_LSTM.add(Dropout(0.2)) # desactiva el 20% de las neuronas

modelo_LSTM.add(Dense(50, activation='relu')) # Aca se usa la mitad de las neuronas de la capa anterior
"""
    Parametros:
    units: dimension de la salida de la capa, es 64 porque es la mitad de la salida de la capa anterior
    activation: funcion de activacion
"""
modelo_LSTM.add(Dense(151, activation='softmax')) # Esta es la capa de salida, va a tener tantas neuronas como clases haya
"""
    Parametros:
    units: dimension de la salida de la capa, es num_classes porque es el numero de clases
    activation: funcion de activacion (softmax porque es un problema de clasificacion)
"""
# modelo_LSTM.add(Activation('softmax')) # funcion de activacion softmax

modelo_LSTM.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

### Entremiendo del Modelo

**Tamaño del lote (batch_size)**

&nbsp;&nbsp;&nbsp;&nbsp;Este numero lo calcule en base a la cantidad de datos de entrenamiento, podria usar 8, 16 o 32

**Epocas (epochs)**

&nbsp;&nbsp;&nbsp;&nbsp;comence con 10, puedo ir probando

In [28]:
# Entrenamiento del modelo
h = modelo_LSTM.fit(train_sequences, train_label, epochs=10, batch_size=64, shuffle=True, validation_data=(test_sequences, test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

KeyboardInterrupt: 