# Tomamos una base de datos con Tweets en inglés, que expresan diferentes emociones básicas, convertidas ya a una expresión sencilla.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:

df = pd.read_csv('https://raw.githubusercontent.com/BrunoJDev/DS-III/refs/heads/main/data.csv')

In [None]:
df.shape

In [None]:
df.head

In [None]:
df["text"][5]

In [None]:
emotion_classes = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

Se genera una tokenización de las distintas expresiones, a lo que luego procedemos con un entrenamiento del modelo.

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [None]:
max_len = 100
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_val_pad = pad_sequences(X_val_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

In [None]:
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(6, activation='softmax'))

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train_pad, y_train, epochs=5, batch_size=64, validation_data=(X_val_pad, y_val))

Visualizamos las clases predichas con argmax y calculamos la presición y el informe de clasificación.

Empleamos también un mapa de calor para expresarlo visualmente.

In [None]:
y_pred_probabilities = model.predict(X_test_pad)
y_pred = np.argmax(y_pred_probabilities, axis=-1)


accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=emotion_classes)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

In [None]:
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

report_dict = classification_report(y_test, y_pred, target_names=emotion_classes, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()

report_df = report_df.drop(['accuracy', 'macro avg', 'weighted avg'])


plt.figure(figsize=(10, 6))
sns.heatmap(report_df[['precision', 'recall', 'f1-score']], annot=True, fmt=".2f", cmap="Blues")
plt.title("Reporte de Clasificación por Clase")
plt.yticks(rotation=0)
plt.show()