In [29]:
import tensorflow as tf
from datasets import load_dataset
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Cargar el dataset de "emotion" con trust_remote_code=True
dataset = load_dataset("dair-ai/emotion", trust_remote_code=True)

In [30]:
# Dividir en entrenamiento y prueba
train_data = dataset['train']
test_data = dataset['test']

# Obtener los textos y las etiquetas
train_texts = train_data['text']
train_labels = train_data['label']
test_texts = test_data['text']
test_labels = test_data['label']

# Tokenización y padding
max_features = 10000
max_len = 100

tokenizer = Tokenizer(num_words=max_features, oov_token="<OOV>")
tokenizer.fit_on_texts(train_texts)

X_train = tokenizer.texts_to_sequences(train_texts)
X_train = pad_sequences(X_train, maxlen=max_len, padding='post', truncating='post')

X_test = tokenizer.texts_to_sequences(test_texts)
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', truncating='post')

y_train = tf.keras.utils.to_categorical(train_labels, num_classes=6)
y_test = tf.keras.utils.to_categorical(test_labels, num_classes=6)

# Construcción del modelo
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=max_len),
    LSTM(128, return_sequences=True),
    Dropout(0.2),
    LSTM(64),
    Dropout(0.2),
    Dense(6, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Configurar Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Entrenamiento del modelo
history = model.fit(X_train, y_train, epochs=15, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluación del modelo
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')

# Función para predecir emociones
def preprocess_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post', truncating='post')
    return padded_sequence

def predict_emotion(text):
    processed_text = preprocess_text(text)
    prediction = model.predict(processed_text)
    emotion_labels = ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']
    emotion = emotion_labels[tf.argmax(prediction[0]).numpy()]
    return emotion

Epoch 1/15
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 148ms/step - accuracy: 0.3251 - loss: 1.6038 - val_accuracy: 0.3262 - val_loss: 1.5766
Epoch 2/15
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 142ms/step - accuracy: 0.3380 - loss: 1.5810 - val_accuracy: 0.3262 - val_loss: 1.5761
Epoch 3/15
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 149ms/step - accuracy: 0.3297 - loss: 1.5783 - val_accuracy: 0.3262 - val_loss: 1.5755
Epoch 4/15
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 204ms/step - accuracy: 0.3383 - loss: 1.5756 - val_accuracy: 0.2997 - val_loss: 1.5753
Epoch 5/15
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 138ms/step - accuracy: 0.3329 - loss: 1.5835 - val_accuracy: 0.3262 - val_loss: 1.5767
Epoch 6/15
[1m315/400[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m11s[0m 130ms/step - accuracy: 0.3270 - loss: 1.5857

KeyboardInterrupt: 

In [None]:
# Ejemplo de uso
user_text = "I am very happy today!"
print(f'Text: {user_text}, Emotion: {predict_emotion(user_text)}')

In [31]:
!pip list

Package                   Version
------------------------- -----------
absl-py                   2.1.0
aiohttp                   3.9.5
aiosignal                 1.3.1
altair                    5.3.0
asttokens                 2.4.1
astunparse                1.6.3
attrs                     23.2.0
blinker                   1.8.2
cachetools                5.3.3
certifi                   2024.6.2
charset-normalizer        3.3.2
click                     8.1.7
colorama                  0.4.6
comm                      0.2.2
contourpy                 1.2.1
cycler                    0.12.1
datasets                  2.20.0
debugpy                   1.8.2
decorator                 5.1.1
dill                      0.3.8
executing                 2.0.1
filelock                  3.15.4
flatbuffers               24.3.25
fonttools                 4.53.0
frozenlist                1.4.1
fsspec                    2024.5.0
gast                      0.6.0
gitdb                     4.0.11
GitPython         


[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip
