In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from nltk.tokenize import word_tokenize
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Input, Embedding, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from nltk.corpus import stopwords
import optuna
import spacy




  from .autonotebook import tqdm as notebook_tqdm


In [3]:
text_cleaned = pd.read_parquet('data_cleaned.parquet')

In [4]:
text_cleaned

Unnamed: 0,review_text,review_rate
0,"[mayor, virtud, película, existencia.el, hecho...",negative
1,"[experto, cinéfilo, ,, poco, vez, tanto, juego...",negative
2,"[si, incondicional, humor, estilo, tele, 5.si,...",negative
3,"[saber, pasar, ,, si, gente, dejar, llevar, mo...",negative
4,"[`, `, amanecer, ,, quedo, solo, ,, sentir, fo...",negative
...,...,...
56362,"[pensar, película, hacer, buen, trabajo, derec...",positive
56363,"[malo, parcela, ,, mal, diálogo, ,, malo, actu...",negative
56364,"[católico, enseñado, escuela, primario, parroq...",negative
56365,"[ir, tener, desacuerdo, comentario, anterior, ...",negative


In [5]:
text_vectorizer = TextVectorization(output_mode='int')




In [6]:
text_cleaned['review_text'] = text_cleaned['review_text'].map(lambda corpus: ' '.join(corpus))

In [7]:
text_vectorizer.adapt([text_cleaned['review_text']])




In [8]:
text_cleaned

Unnamed: 0,review_text,review_rate
0,mayor virtud película existencia.el hecho pode...,negative
1,"experto cinéfilo , poco vez tanto juego sala c...",negative
2,si incondicional humor estilo tele 5.si termin...,negative
3,"saber pasar , si gente dejar llevar moda , si ...",negative
4,"` ` amanecer , quedo solo , sentir fondoun mar...",negative
...,...,...
56362,pensar película hacer buen trabajo derecha.no ...,positive
56363,"malo parcela , mal diálogo , malo actuación , ...",negative
56364,católico enseñado escuela primario parroquial ...,negative
56365,ir tener desacuerdo comentario anterior lado m...,negative


In [9]:
model = Sequential(name='Text_Vectorizing')
model.add(Input(shape=(1,), dtype=tf.string))
model.add(text_vectorizer)

text_vectorized = model.predict([text_cleaned['review_text']])




In [10]:
size_voc = len(text_vectorizer.get_vocabulary())

In [11]:
y_prediction_rate = OneHotEncoder().fit_transform(np.array(text_cleaned['review_rate']).reshape(-1,1)).toarray()

In [12]:
x_train, x_test, y_train,y_test = train_test_split(text_vectorized.to_tensor().numpy(),y_prediction_rate,random_state=43, train_size=.8)

In [14]:

text_vectorizer = TextVectorization(max_tokens=size_voc, output_mode='int')

# Fit the TextVectorization layer on your text data
text_vectorizer.adapt(text_cleaned['review_text'].values)

# Create a model
model = Sequential(name='Text_Vectorizing')
model.add(Input(shape=(1,), dtype=tf.string))
model.add(text_vectorizer)

# Vectorize the text data
text_vectorized = model.predict([text_cleaned['review_text']])

# Split the data
x_train, x_test, y_train,y_test = train_test_split(text_vectorized.to_tensor().numpy(),y_prediction_rate,random_state=43, train_size=.8)




In [None]:
words =dict(enumerate(text_vectorizer.get_vocabulary()))

In [37]:

def objective(trial):
    len_max = trial.suggest_int('MX_LEN', 1, 300, log=True)
    out_dim = trial.suggest_int('OUT_DIM', 1, 300, log=True)

    x_train_padd = sequence.pad_sequences(x_train, maxlen=len_max, padding='post', truncating='post')
    x_test_padd = sequence.pad_sequences(x_test, maxlen=len_max, padding='post', truncating='post')

    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)

    al_optimizer = trial.suggest_categorical('optimizer', ['adam', 'sgd', 'rmsprop', 'adafactor'])

    model_text = tf.keras.Sequential()
    model_text.add(Embedding(input_dim=size_voc + 1, output_dim=out_dim, input_length=len_max))

    num_hidden = trial.suggest_int('num_ner',1, 200, log=True)

    model_text.add(LSTM(num_hidden))  # You can adjust the number of units as needed

    model_text.add(Dense(2, activation='softmax'))

    # Compile and train the model
    if al_optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif al_optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif al_optimizer == 'rmsprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif al_optimizer == 'adafactor':
        optimizer = tf.keras.optimizers.Adafactor(learning_rate=learning_rate)

    model_text.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    model_text.fit(x_train_padd, y_train, epochs=3, batch_size=120, verbose=0)

    # Evaluate the model on the test set
    _, accuracy = model_text.evaluate(x_test_padd, y_test)

    return accuracy



In [38]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# Get the best parameters
best_params = study.best_params
print(f"Best hyperparameters: {best_params}")

# Get the best accuracy
best_accuracy = study.best_value
print(f"Best accuracy: {best_accuracy}")

[I 2024-02-18 22:18:37,562] A new study created in memory with name: no-name-9f422a55-9dc5-40b4-a9a2-9e5aeb78d29f




[I 2024-02-18 22:20:25,284] Trial 0 finished with value: 0.7046301364898682 and parameters: {'MX_LEN': 12, 'OUT_DIM': 150, 'learning_rate': 0.00023639048369169217, 'optimizer': 'rmsprop', 'num_ner': 83}. Best is trial 0 with value: 0.7046301364898682.




[I 2024-02-18 22:20:59,626] Trial 1 finished with value: 0.7333688139915466 and parameters: {'MX_LEN': 52, 'OUT_DIM': 7, 'learning_rate': 5.379937934576627e-05, 'optimizer': 'adam', 'num_ner': 27}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:23:18,632] Trial 2 finished with value: 0.6520312428474426 and parameters: {'MX_LEN': 5, 'OUT_DIM': 53, 'learning_rate': 0.07143990314777482, 'optimizer': 'adafactor', 'num_ner': 25}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:24:35,689] Trial 3 finished with value: 0.5157885551452637 and parameters: {'MX_LEN': 53, 'OUT_DIM': 5, 'learning_rate': 0.06455861275195442, 'optimizer': 'rmsprop', 'num_ner': 132}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:26:23,869] Trial 4 finished with value: 0.5879900455474854 and parameters: {'MX_LEN': 2, 'OUT_DIM': 41, 'learning_rate': 0.02961388807116321, 'optimizer': 'adam', 'num_ner': 5}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:31:03,063] Trial 5 finished with value: 0.5119744539260864 and parameters: {'MX_LEN': 291, 'OUT_DIM': 42, 'learning_rate': 0.0005037504614004438, 'optimizer': 'sgd', 'num_ner': 39}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:31:25,839] Trial 6 finished with value: 0.49458932876586914 and parameters: {'MX_LEN': 78, 'OUT_DIM': 4, 'learning_rate': 1.1649983266645574e-05, 'optimizer': 'sgd', 'num_ner': 4}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:33:10,958] Trial 7 finished with value: 0.4878481328487396 and parameters: {'MX_LEN': 288, 'OUT_DIM': 46, 'learning_rate': 2.1219163113568057e-05, 'optimizer': 'sgd', 'num_ner': 3}. Best is trial 1 with value: 0.7333688139915466.




[I 2024-02-18 22:34:41,465] Trial 8 finished with value: 0.7565194368362427 and parameters: {'MX_LEN': 61, 'OUT_DIM': 27, 'learning_rate': 0.041612925357161164, 'optimizer': 'adam', 'num_ner': 32}. Best is trial 8 with value: 0.7565194368362427.




[I 2024-02-18 22:36:47,863] Trial 9 finished with value: 0.6813021302223206 and parameters: {'MX_LEN': 8, 'OUT_DIM': 46, 'learning_rate': 0.0008122246445319485, 'optimizer': 'adam', 'num_ner': 2}. Best is trial 8 with value: 0.7565194368362427.




[I 2024-02-18 22:36:55,027] Trial 10 finished with value: 0.5530424118041992 and parameters: {'MX_LEN': 1, 'OUT_DIM': 1, 'learning_rate': 0.006485912522412216, 'optimizer': 'adafactor', 'num_ner': 1}. Best is trial 8 with value: 0.7565194368362427.




[I 2024-02-18 22:37:39,129] Trial 11 finished with value: 0.7518183588981628 and parameters: {'MX_LEN': 39, 'OUT_DIM': 10, 'learning_rate': 7.723190277375097e-05, 'optimizer': 'adam', 'num_ner': 22}. Best is trial 8 with value: 0.7565194368362427.




[I 2024-02-18 22:38:25,684] Trial 12 finished with value: 0.7420613765716553 and parameters: {'MX_LEN': 29, 'OUT_DIM': 13, 'learning_rate': 0.006348138018640897, 'optimizer': 'adam', 'num_ner': 10}. Best is trial 8 with value: 0.7565194368362427.




[I 2024-02-18 22:39:37,787] Trial 13 finished with value: 0.8178108930587769 and parameters: {'MX_LEN': 119, 'OUT_DIM': 13, 'learning_rate': 0.0001223437906965014, 'optimizer': 'adam', 'num_ner': 13}. Best is trial 13 with value: 0.8178108930587769.




[I 2024-02-18 22:52:39,721] Trial 14 finished with value: 0.5497605204582214 and parameters: {'MX_LEN': 136, 'OUT_DIM': 269, 'learning_rate': 0.003935998201865558, 'optimizer': 'adam', 'num_ner': 13}. Best is trial 13 with value: 0.8178108930587769.




[I 2024-02-18 22:53:19,573] Trial 15 finished with value: 0.7280468344688416 and parameters: {'MX_LEN': 21, 'OUT_DIM': 2, 'learning_rate': 0.0021223567078003675, 'optimizer': 'adam', 'num_ner': 67}. Best is trial 13 with value: 0.8178108930587769.




[I 2024-02-18 22:54:49,009] Trial 16 finished with value: 0.8493879437446594 and parameters: {'MX_LEN': 117, 'OUT_DIM': 18, 'learning_rate': 0.0002013856076713824, 'optimizer': 'adam', 'num_ner': 9}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 22:56:33,263] Trial 17 finished with value: 0.5133049488067627 and parameters: {'MX_LEN': 131, 'OUT_DIM': 18, 'learning_rate': 0.00014611456627437212, 'optimizer': 'adafactor', 'num_ner': 9}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 22:58:38,482] Trial 18 finished with value: 0.5169416069984436 and parameters: {'MX_LEN': 135, 'OUT_DIM': 114, 'learning_rate': 0.0002825167227692176, 'optimizer': 'rmsprop', 'num_ner': 1}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 22:58:57,443] Trial 19 finished with value: 0.5157885551452637 and parameters: {'MX_LEN': 18, 'OUT_DIM': 2, 'learning_rate': 4.243642309262294e-05, 'optimizer': 'adam', 'num_ner': 6}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 22:59:39,685] Trial 20 finished with value: 0.8295192718505859 and parameters: {'MX_LEN': 100, 'OUT_DIM': 3, 'learning_rate': 0.0014640863407834176, 'optimizer': 'adam', 'num_ner': 2}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:00:20,813] Trial 21 finished with value: 0.8231328725814819 and parameters: {'MX_LEN': 104, 'OUT_DIM': 3, 'learning_rate': 0.0016300991954795948, 'optimizer': 'adam', 'num_ner': 2}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:01:36,457] Trial 22 finished with value: 0.669061541557312 and parameters: {'MX_LEN': 200, 'OUT_DIM': 3, 'learning_rate': 0.0017088620389275457, 'optimizer': 'adam', 'num_ner': 2}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:02:10,245] Trial 23 finished with value: 0.7809118032455444 and parameters: {'MX_LEN': 87, 'OUT_DIM': 1, 'learning_rate': 0.015871434438535423, 'optimizer': 'adam', 'num_ner': 2}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:02:29,212] Trial 24 finished with value: 0.7296434044837952 and parameters: {'MX_LEN': 31, 'OUT_DIM': 2, 'learning_rate': 0.0006654214980093942, 'optimizer': 'adam', 'num_ner': 1}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:03:01,697] Trial 25 finished with value: 0.6429838538169861 and parameters: {'MX_LEN': 4, 'OUT_DIM': 8, 'learning_rate': 0.0016447248997643303, 'optimizer': 'adam', 'num_ner': 3}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:04:17,921] Trial 26 finished with value: 0.5159659385681152 and parameters: {'MX_LEN': 170, 'OUT_DIM': 5, 'learning_rate': 0.0004439107045464741, 'optimizer': 'adafactor', 'num_ner': 7}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:05:11,177] Trial 27 finished with value: 0.5157885551452637 and parameters: {'MX_LEN': 101, 'OUT_DIM': 21, 'learning_rate': 0.003455053714328629, 'optimizer': 'sgd', 'num_ner': 2}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:06:21,991] Trial 28 finished with value: 0.724676251411438 and parameters: {'MX_LEN': 206, 'OUT_DIM': 3, 'learning_rate': 0.01106885403134609, 'optimizer': 'rmsprop', 'num_ner': 3}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:06:55,651] Trial 29 finished with value: 0.5244811177253723 and parameters: {'MX_LEN': 11, 'OUT_DIM': 1, 'learning_rate': 0.00024294608953277422, 'optimizer': 'rmsprop', 'num_ner': 192}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:07:42,274] Trial 30 finished with value: 0.7266276478767395 and parameters: {'MX_LEN': 21, 'OUT_DIM': 7, 'learning_rate': 0.001206790762054733, 'optimizer': 'adam', 'num_ner': 59}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:08:54,723] Trial 31 finished with value: 0.8237537741661072 and parameters: {'MX_LEN': 87, 'OUT_DIM': 13, 'learning_rate': 0.00011495428756901356, 'optimizer': 'adam', 'num_ner': 16}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:10:20,860] Trial 32 finished with value: 0.8065460324287415 and parameters: {'MX_LEN': 55, 'OUT_DIM': 24, 'learning_rate': 0.00012992083466488116, 'optimizer': 'adam', 'num_ner': 18}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:14:30,499] Trial 33 finished with value: 0.8266808390617371 and parameters: {'MX_LEN': 74, 'OUT_DIM': 80, 'learning_rate': 0.0003591160131542446, 'optimizer': 'adam', 'num_ner': 16}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:20:05,118] Trial 34 finished with value: 0.7694695591926575 and parameters: {'MX_LEN': 40, 'OUT_DIM': 111, 'learning_rate': 4.284963064009659e-05, 'optimizer': 'adam', 'num_ner': 17}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:30:15,359] Trial 35 finished with value: 0.8135533332824707 and parameters: {'MX_LEN': 65, 'OUT_DIM': 200, 'learning_rate': 0.00033268454942241184, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:34:22,597] Trial 36 finished with value: 0.5162320137023926 and parameters: {'MX_LEN': 79, 'OUT_DIM': 68, 'learning_rate': 6.48019436515648e-05, 'optimizer': 'adafactor', 'num_ner': 37}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:39:30,778] Trial 37 finished with value: 0.792442798614502 and parameters: {'MX_LEN': 44, 'OUT_DIM': 79, 'learning_rate': 0.00016863683774683032, 'optimizer': 'adam', 'num_ner': 46}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:42:01,651] Trial 38 finished with value: 0.5152563452720642 and parameters: {'MX_LEN': 236, 'OUT_DIM': 30, 'learning_rate': 0.0005254033499217398, 'optimizer': 'sgd', 'num_ner': 13}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:42:38,874] Trial 39 finished with value: 0.7697356939315796 and parameters: {'MX_LEN': 28, 'OUT_DIM': 18, 'learning_rate': 0.000981366407591256, 'optimizer': 'rmsprop', 'num_ner': 27}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:46:57,996] Trial 40 finished with value: 0.767074704170227 and parameters: {'MX_LEN': 67, 'OUT_DIM': 31, 'learning_rate': 2.887404787105569e-05, 'optimizer': 'adam', 'num_ner': 106}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:47:43,686] Trial 41 finished with value: 0.759269118309021 and parameters: {'MX_LEN': 90, 'OUT_DIM': 3, 'learning_rate': 9.68819591984228e-05, 'optimizer': 'adam', 'num_ner': 5}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:48:55,385] Trial 42 finished with value: 0.5814262628555298 and parameters: {'MX_LEN': 148, 'OUT_DIM': 5, 'learning_rate': 0.002704718435929302, 'optimizer': 'adam', 'num_ner': 4}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:51:16,035] Trial 43 finished with value: 0.5647507309913635 and parameters: {'MX_LEN': 244, 'OUT_DIM': 6, 'learning_rate': 0.0002004792359537026, 'optimizer': 'adam', 'num_ner': 18}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:52:37,083] Trial 44 finished with value: 0.830938458442688 and parameters: {'MX_LEN': 101, 'OUT_DIM': 12, 'learning_rate': 0.0010883192278661506, 'optimizer': 'adam', 'num_ner': 24}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:53:13,498] Trial 45 finished with value: 0.51534503698349 and parameters: {'MX_LEN': 53, 'OUT_DIM': 11, 'learning_rate': 0.00034321592004703336, 'optimizer': 'sgd', 'num_ner': 21}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:59:07,344] Trial 46 finished with value: 0.5138371586799622 and parameters: {'MX_LEN': 291, 'OUT_DIM': 64, 'learning_rate': 0.0007133023647501808, 'optimizer': 'adam', 'num_ner': 33}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-18 23:59:58,619] Trial 47 finished with value: 0.5798296928405762 and parameters: {'MX_LEN': 1, 'OUT_DIM': 16, 'learning_rate': 0.0010412937694564547, 'optimizer': 'adam', 'num_ner': 14}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:00:29,273] Trial 48 finished with value: 0.5282952189445496 and parameters: {'MX_LEN': 2, 'OUT_DIM': 9, 'learning_rate': 1.7845155223235608e-05, 'optimizer': 'adam', 'num_ner': 11}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:03:17,074] Trial 49 finished with value: 0.5156998634338379 and parameters: {'MX_LEN': 169, 'OUT_DIM': 40, 'learning_rate': 0.0004352943365614764, 'optimizer': 'adafactor', 'num_ner': 27}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:05:50,032] Trial 50 finished with value: 0.5188930034637451 and parameters: {'MX_LEN': 110, 'OUT_DIM': 13, 'learning_rate': 0.09474045089142649, 'optimizer': 'adam', 'num_ner': 61}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:06:35,121] Trial 51 finished with value: 0.7962568998336792 and parameters: {'MX_LEN': 104, 'OUT_DIM': 4, 'learning_rate': 0.00520375231529858, 'optimizer': 'adam', 'num_ner': 6}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:07:13,551] Trial 52 finished with value: 0.8286322355270386 and parameters: {'MX_LEN': 77, 'OUT_DIM': 4, 'learning_rate': 0.0013401636026449777, 'optimizer': 'adam', 'num_ner': 4}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:07:47,441] Trial 53 finished with value: 0.8043285608291626 and parameters: {'MX_LEN': 64, 'OUT_DIM': 4, 'learning_rate': 0.0013422613909397326, 'optimizer': 'adam', 'num_ner': 10}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:08:17,461] Trial 54 finished with value: 0.7984743714332581 and parameters: {'MX_LEN': 46, 'OUT_DIM': 6, 'learning_rate': 0.0006039128574757993, 'optimizer': 'adam', 'num_ner': 4}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:08:48,372] Trial 55 finished with value: 0.824552059173584 and parameters: {'MX_LEN': 83, 'OUT_DIM': 2, 'learning_rate': 0.0024332975832236227, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:09:07,774] Trial 56 finished with value: 0.7543906569480896 and parameters: {'MX_LEN': 34, 'OUT_DIM': 2, 'learning_rate': 0.002275177608225001, 'optimizer': 'adam', 'num_ner': 3}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:16:17,929] Trial 57 finished with value: 0.7960794568061829 and parameters: {'MX_LEN': 74, 'OUT_DIM': 150, 'learning_rate': 0.008164766074092857, 'optimizer': 'adam', 'num_ner': 7}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:17:12,405] Trial 58 finished with value: 0.51534503698349 and parameters: {'MX_LEN': 162, 'OUT_DIM': 4, 'learning_rate': 0.002893002791663521, 'optimizer': 'sgd', 'num_ner': 5}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:22:00,498] Trial 59 finished with value: 0.8024658560752869 and parameters: {'MX_LEN': 125, 'OUT_DIM': 297, 'learning_rate': 0.0009624095155658581, 'optimizer': 'rmsprop', 'num_ner': 4}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:22:13,110] Trial 60 finished with value: 0.6641830801963806 and parameters: {'MX_LEN': 6, 'OUT_DIM': 2, 'learning_rate': 0.004471457155182064, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:23:50,792] Trial 61 finished with value: 0.8178995847702026 and parameters: {'MX_LEN': 93, 'OUT_DIM': 16, 'learning_rate': 9.943962464502893e-05, 'optimizer': 'adam', 'num_ner': 16}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:25:10,368] Trial 62 finished with value: 0.8208267092704773 and parameters: {'MX_LEN': 77, 'OUT_DIM': 10, 'learning_rate': 0.00023477972128482588, 'optimizer': 'adam', 'num_ner': 23}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:28:28,357] Trial 63 finished with value: 0.7396664619445801 and parameters: {'MX_LEN': 203, 'OUT_DIM': 33, 'learning_rate': 0.0018310018353003639, 'optimizer': 'adam', 'num_ner': 12}. Best is trial 16 with value: 0.8493879437446594.




[I 2024-02-19 00:29:48,580] Trial 64 finished with value: 0.8535568714141846 and parameters: {'MX_LEN': 122, 'OUT_DIM': 7, 'learning_rate': 0.0004080634013113597, 'optimizer': 'adam', 'num_ner': 9}. Best is trial 64 with value: 0.8535568714141846.




[I 2024-02-19 00:31:13,205] Trial 65 finished with value: 0.8555969595909119 and parameters: {'MX_LEN': 129, 'OUT_DIM': 6, 'learning_rate': 0.0003847254815471461, 'optimizer': 'adam', 'num_ner': 9}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:32:45,187] Trial 66 finished with value: 0.5157885551452637 and parameters: {'MX_LEN': 121, 'OUT_DIM': 8, 'learning_rate': 0.00040155611071863745, 'optimizer': 'adafactor', 'num_ner': 10}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:34:14,255] Trial 67 finished with value: 0.8422920107841492 and parameters: {'MX_LEN': 149, 'OUT_DIM': 6, 'learning_rate': 0.0007970252124452996, 'optimizer': 'adam', 'num_ner': 6}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:35:56,255] Trial 68 finished with value: 0.5047010779380798 and parameters: {'MX_LEN': 185, 'OUT_DIM': 6, 'learning_rate': 0.0008689125549017196, 'optimizer': 'adam', 'num_ner': 6}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:37:23,345] Trial 69 finished with value: 0.6841405034065247 and parameters: {'MX_LEN': 141, 'OUT_DIM': 8, 'learning_rate': 0.00135670483111262, 'optimizer': 'adam', 'num_ner': 5}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:39:12,706] Trial 70 finished with value: 0.7327479124069214 and parameters: {'MX_LEN': 220, 'OUT_DIM': 5, 'learning_rate': 0.000566097050691946, 'optimizer': 'adam', 'num_ner': 7}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:40:16,901] Trial 71 finished with value: 0.814174234867096 and parameters: {'MX_LEN': 110, 'OUT_DIM': 7, 'learning_rate': 0.00030507757822433463, 'optimizer': 'adam', 'num_ner': 3}. Best is trial 65 with value: 0.8555969595909119.




[I 2024-02-19 00:41:26,301] Trial 72 finished with value: 0.8592336177825928 and parameters: {'MX_LEN': 142, 'OUT_DIM': 3, 'learning_rate': 0.0007116182026416922, 'optimizer': 'adam', 'num_ner': 9}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:42:36,233] Trial 73 finished with value: 0.5349476933479309 and parameters: {'MX_LEN': 152, 'OUT_DIM': 3, 'learning_rate': 0.0007685865381263194, 'optimizer': 'adam', 'num_ner': 10}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:44:27,227] Trial 74 finished with value: 0.5158772468566895 and parameters: {'MX_LEN': 250, 'OUT_DIM': 3, 'learning_rate': 0.00017650503738495527, 'optimizer': 'adam', 'num_ner': 9}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:45:33,841] Trial 75 finished with value: 0.8197622895240784 and parameters: {'MX_LEN': 179, 'OUT_DIM': 4, 'learning_rate': 0.0011529490004382884, 'optimizer': 'rmsprop', 'num_ner': 5}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:46:57,437] Trial 76 finished with value: 0.8428242206573486 and parameters: {'MX_LEN': 134, 'OUT_DIM': 11, 'learning_rate': 0.0004925479109357601, 'optimizer': 'adam', 'num_ner': 6}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:47:57,496] Trial 77 finished with value: 0.5155224204063416 and parameters: {'MX_LEN': 141, 'OUT_DIM': 12, 'learning_rate': 0.00046960241079072306, 'optimizer': 'sgd', 'num_ner': 6}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:50:01,606] Trial 78 finished with value: 0.8524037599563599 and parameters: {'MX_LEN': 127, 'OUT_DIM': 23, 'learning_rate': 0.00023916383692470575, 'optimizer': 'adam', 'num_ner': 14}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:52:57,875] Trial 79 finished with value: 0.5182721018791199 and parameters: {'MX_LEN': 270, 'OUT_DIM': 22, 'learning_rate': 0.00025530942970532325, 'optimizer': 'adam', 'num_ner': 13}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:54:34,035] Trial 80 finished with value: 0.5157885551452637 and parameters: {'MX_LEN': 124, 'OUT_DIM': 14, 'learning_rate': 0.00015293829737483461, 'optimizer': 'adafactor', 'num_ner': 9}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:56:29,005] Trial 81 finished with value: 0.843090295791626 and parameters: {'MX_LEN': 108, 'OUT_DIM': 20, 'learning_rate': 0.0006353139064211693, 'optimizer': 'adam', 'num_ner': 15}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 00:59:12,889] Trial 82 finished with value: 0.7418839931488037 and parameters: {'MX_LEN': 194, 'OUT_DIM': 19, 'learning_rate': 0.0006766362013301754, 'optimizer': 'adam', 'num_ner': 22}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:00:27,492] Trial 83 finished with value: 0.8286322355270386 and parameters: {'MX_LEN': 99, 'OUT_DIM': 10, 'learning_rate': 0.0004998071423036199, 'optimizer': 'adam', 'num_ner': 14}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:02:08,205] Trial 84 finished with value: 0.8042398691177368 and parameters: {'MX_LEN': 56, 'OUT_DIM': 27, 'learning_rate': 0.00020354813982504787, 'optimizer': 'adam', 'num_ner': 19}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:03:56,375] Trial 85 finished with value: 0.8495653867721558 and parameters: {'MX_LEN': 162, 'OUT_DIM': 16, 'learning_rate': 0.0003099543483321268, 'optimizer': 'adam', 'num_ner': 11}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:05:44,407] Trial 86 finished with value: 0.8562178611755371 and parameters: {'MX_LEN': 160, 'OUT_DIM': 16, 'learning_rate': 0.0003296380773442589, 'optimizer': 'adam', 'num_ner': 11}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:08:06,226] Trial 87 finished with value: 0.699042022228241 and parameters: {'MX_LEN': 225, 'OUT_DIM': 15, 'learning_rate': 0.00028566714339190797, 'optimizer': 'adam', 'num_ner': 11}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:10:01,650] Trial 88 finished with value: 0.8583466410636902 and parameters: {'MX_LEN': 122, 'OUT_DIM': 26, 'learning_rate': 0.00035542026694762146, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:12:50,640] Trial 89 finished with value: 0.5189817547798157 and parameters: {'MX_LEN': 172, 'OUT_DIM': 38, 'learning_rate': 7.633485484589675e-05, 'optimizer': 'adam', 'num_ner': 15}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:13:14,572] Trial 90 finished with value: 0.7144758105278015 and parameters: {'MX_LEN': 14, 'OUT_DIM': 23, 'learning_rate': 0.000370405022046114, 'optimizer': 'rmsprop', 'num_ner': 12}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:14:38,924] Trial 91 finished with value: 0.8454852104187012 and parameters: {'MX_LEN': 126, 'OUT_DIM': 19, 'learning_rate': 0.00021878155254870414, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:16:00,925] Trial 92 finished with value: 0.8467269539833069 and parameters: {'MX_LEN': 115, 'OUT_DIM': 19, 'learning_rate': 0.00022766345901848722, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:17:43,833] Trial 93 finished with value: 0.84947669506073 and parameters: {'MX_LEN': 119, 'OUT_DIM': 26, 'learning_rate': 0.0002035507301807137, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:20:35,091] Trial 94 finished with value: 0.8087635040283203 and parameters: {'MX_LEN': 158, 'OUT_DIM': 48, 'learning_rate': 0.00014989461720620414, 'optimizer': 'adam', 'num_ner': 11}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:21:59,984] Trial 95 finished with value: 0.6098988652229309 and parameters: {'MX_LEN': 2, 'OUT_DIM': 34, 'learning_rate': 0.0001177708036561177, 'optimizer': 'adam', 'num_ner': 8}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:23:12,106] Trial 96 finished with value: 0.8180769681930542 and parameters: {'MX_LEN': 67, 'OUT_DIM': 26, 'learning_rate': 0.00028850564383244286, 'optimizer': 'adam', 'num_ner': 7}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:24:12,207] Trial 97 finished with value: 0.8356395363807678 and parameters: {'MX_LEN': 91, 'OUT_DIM': 17, 'learning_rate': 0.00017958243263611415, 'optimizer': 'adam', 'num_ner': 9}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:25:16,769] Trial 98 finished with value: 0.5161433219909668 and parameters: {'MX_LEN': 199, 'OUT_DIM': 24, 'learning_rate': 0.00036358766358871036, 'optimizer': 'sgd', 'num_ner': 7}. Best is trial 72 with value: 0.8592336177825928.




[I 2024-02-19 01:26:51,640] Trial 99 finished with value: 0.8293418288230896 and parameters: {'MX_LEN': 115, 'OUT_DIM': 30, 'learning_rate': 9.650738281213013e-05, 'optimizer': 'adam', 'num_ner': 12}. Best is trial 72 with value: 0.8592336177825928.


Best hyperparameters: {'MX_LEN': 142, 'OUT_DIM': 3, 'learning_rate': 0.0007116182026416922, 'optimizer': 'adam', 'num_ner': 9}
Best accuracy: 0.8592336177825928


In [21]:
MX_LEN= 142

x_train_padd = sequence.pad_sequences(x_train, maxlen=MX_LEN, padding='post',
                                         truncating = 'post')

x_test_padd = sequence.pad_sequences(x_test, maxlen=MX_LEN, padding='post',
                                     truncating='post')


In [61]:
model_text = Sequential()

model_text.add(Embedding(input_dim=size_voc+1,output_dim=3,input_length=103))
model_text.add(LSTM(9))
model_text.add(Dense(2, activation='softmax'))

optimiz = tf.optimizers.Adam(learning_rate=0.00071)

model_text.compile(loss='categorical_crossentropy', optimizer=optimiz, metrics=['accuracy'])

In [62]:
model_text.summary()

Model: "sequential_206"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_206 (Embedding)   (None, 103, 3)            866997    
                                                                 
 lstm_101 (LSTM)             (None, 9)                 468       
                                                                 
 dense_206 (Dense)           (None, 2)                 20        
                                                                 
Total params: 867485 (3.31 MB)
Trainable params: 867485 (3.31 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [63]:
history = model_text.fit(x_train_padd, y_train, batch_size=120, epochs=3, validation_data=(x_test_padd, y_test))

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [64]:
lemmer = spacy.load('es_core_news_sm')

def lematizer_text(rows):
    
    doc = [word.lemma_ for word in lemmer(rows)]

    return doc

In [65]:
stopwords_spanish = stopwords.words('spanish')
def clean_stopWords(rows):
    no_stops = []
    for word in rows:
        if word not in stopwords_spanish:
            no_stops.append(word)
    return no_stops

In [72]:

probe_padd = sequence.pad_sequences(model.predict([' '.join(clean_stopWords(lematizer_text('Habría perdido mi tiempo porque no veo películas, pero cuando ví esta película pensé que quizás ver películas puede ser hasta divertidas y sí, valió la pena mi tiempo')))]), maxlen=103, padding='post', truncating='post')

rest = model_text.predict(probe_padd.reshape(1,103,1))

if rest[0][0]> rest[0][1]:
    print(rest[0][0],'Negativo')
else:
    print(rest[0][1],'Positivo')



0.5389313 Positivo


In [74]:
model_text.save('RNN_model_prueba.h5')