<a href="https://colab.research.google.com/github/JaiderMon/Emocioness/blob/main/Emocioness.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical


nltk.download('stopwords')
nltk.download('punkt_tab')
nltk.download('wordnet')


def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'@\w+|\#\w+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('spanish') + stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    text = ' '.join(tokens)
    return text


data = pd.read_csv('https://raw.githubusercontent.com/adiacla/sentimientos/refs/heads/main/emociones.csv', sep='|')


data['cleaned_tweet'] = data['tweet'].apply(preprocess_text)

X = data['cleaned_tweet']
y = data['emotion']

from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42
)


max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y_categorical.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
history = model.fit(
    X_train_pad, y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

Epoch 1/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 157ms/step - accuracy: 0.4134 - loss: 1.8037 - val_accuracy: 0.6856 - val_loss: 0.9678
Epoch 2/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 157ms/step - accuracy: 0.7139 - loss: 0.8883 - val_accuracy: 0.7042 - val_loss: 0.9045
Epoch 3/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 159ms/step - accuracy: 0.7558 - loss: 0.7424 - val_accuracy: 0.7124 - val_loss: 0.9036
Epoch 4/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 164ms/step - accuracy: 0.7904 - loss: 0.6491 - val_accuracy: 0.7193 - val_loss: 0.9242
Epoch 5/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 158ms/step - accuracy: 0.8151 - loss: 0.5735 - val_accuracy: 0.7168 - val_loss: 0.9803
Epoch 6/10
[1m817/817[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 159ms/step - accuracy: 0.8303 - loss: 0.5176 - val_accuracy: 0.7127 - val_loss: 1.0569
Epoc

In [9]:
test_phrase = " Deben dejar de esperar que la gente les brinde felicidad, por eso están deprimidos todo el tiempo. Hazte feliz."
print(f"\nPredicción para: '{test_phrase}'")
print("Emoción predicha:", predict_emotion(test_phrase))


Predicción para: ' Deben dejar de esperar que la gente les brinde felicidad, por eso están deprimidos todo el tiempo. Hazte feliz.'
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Emoción predicha: depressed
