# Notebook para predecir si una respuesta es IA (1) vs NoIA (0) usando una red neuronal LSTM usando solamente los vectores GloVe

**Cargar librerias**

In [19]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Masking
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

### Cargar los datos

In [20]:
val_glove = np.load('datos/encuestav1_gloVe_Values.npy', allow_pickle=True)
val_glove.shape

(460,)

In [21]:
data = pd.read_csv('datos/train_poll_v1s1.csv')
labels = data['ai']
labels = np.array(labels)
labels.shape

(460,)

In [22]:
# Mostrar el tamaño del vector de persistencia más largo
maxlen = max([len(i) for i in val_glove])
maxlen

8

**Como los vectores tienen distintos tamaños se usará padding para que la capa de entrada sea igual en el entrenamiento de la red**

## Arquitectura y entrenamiento de la red

In [23]:
max_timesteps = 8  # Define the maximum length for padding

#Select only the n first samples
n_samples = 63
X = val_glove#[:n_samples]
y = labels#[:n_samples]

# Pad the sequences to have the same length
X_padded = pad_sequences(X, maxlen=max_timesteps, dtype='float32', padding='post', truncating='post')

# Define the BiLSTM model
model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(max_timesteps, 300)))  # Mask padded values
model.add(Bidirectional(LSTM(64, return_sequences=False)))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer=Adam(), loss=BinaryCrossentropy(), metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
model.fit(X_padded, y, epochs=5, batch_size=32, validation_split=0.2)

  super().__init__(**kwargs)


Epoch 1/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 53ms/step - accuracy: 0.5627 - loss: 0.6557 - val_accuracy: 0.5435 - val_loss: 0.5364
Epoch 2/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5884 - loss: 0.5356 - val_accuracy: 0.7283 - val_loss: 0.4529
Epoch 3/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8445 - loss: 0.4146 - val_accuracy: 0.9130 - val_loss: 0.3622
Epoch 4/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9360 - loss: 0.3269 - val_accuracy: 0.9565 - val_loss: 0.2802
Epoch 5/5
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9544 - loss: 0.2466 - val_accuracy: 0.9565 - val_loss: 0.2227


<keras.src.callbacks.history.History at 0x1800e655f00>

In [24]:
# Mostrar la precision del modelo
model.evaluate(X_padded, y)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9645 - loss: 0.1822


[0.19791564345359802, 0.9630434513092041]

In [26]:
# Guardar el modelo
model.save('lstm_model.keras')