In [1]:
!pip install datasets kagglehub pandas --quiet
!pip install transformers --quiet
!pip install deep-translator tqdm --quiet
!pip install huggingface_hub --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import pandas as pd
from datasets import Dataset, concatenate_datasets, load_dataset
import os, json, re
import warnings
import logging
import kagglehub
from tqdm import tqdm
from deep_translator import GoogleTranslator
from huggingface_hub import login

warnings.filterwarnings("ignore")
logging.getLogger("huggingface_hub.repocard").setLevel(logging.ERROR)

In [3]:
login('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')

# DATASETS Y PREPROCESAMIENTO

Se carga el modelo de traducción EN -> ES

In [4]:
def translate_text(text, target_lang="es"):
  """
  Se intenta traducir primero con Google Translator.
  Si falla, se intenta con LibreTranslate.
  Si ambos fallan, se devuelve el texto original con el idioma EN.
  """
  try:
    return GoogleTranslator(source="auto",
                            target=target_lang).translate(text), target_lang
  except Exception:
    try:
      return GoogleTranslator(source="auto", target=target_lang,
                              provider="libre").translate(text), target_lang
    except Exception:
      return text, "en"

Se define la función de limpieza

In [5]:
def clean_dataset(df, columns=[]):
  def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r"\s+", " ", text).strip()
    return text
  for c in columns:
    df[c]   = df[c].apply(lambda x: clean_text(x))
  for c in columns:
    df = df[(df[c].str.len() > 0)]
  df = df.drop_duplicates(subset=columns)
  df = df.reset_index(drop=True)

  return df

## 1. Dataset: Intent-Based Mental Health (Kaggle)

In [None]:
path = kagglehub.dataset_download("rishabhpancholi1302/intent-based-mental-health-chatbot-data")
with open(f"{path}/intents.json", "r", encoding="utf-8") as f:
    intents_json = json.load(f)

# Se convierte a dataframe
data_kaggle = []
for intent in intents_json["intents"]:
  tag = intent["tag"]
  patterns = intent["patterns"]
  responses = intent["responses"]

  for p in patterns:
    for r in responses:
      data_kaggle.append({
          "entrada": p.strip(),
          "respuesta": r.strip(),
          "intent": tag
      })
df_intents = pd.DataFrame(data_kaggle)
label_intents = {
    "greeting": "saludar",
    "stress": "mostrar_estres",
    "depression": "mostrar_depresion",
    "motivation": "mostrar_motivacion",
    "self_care": "expresar_deseo_de_autocuidado",
    "loneliness": "mostrar_sentimiento_de_soledad",
    "anger": "mostrar_enojo",
    "relationship": "hablar_sobre_problemas_de_relacion",
    "gratitude": "expresar_gratitud"
}
df_intents["intent"] = df_intents["intent"].map(lambda x: label_intents[x])

# Se agrega la columna idioma
df_intents['idioma'] = 'en'

df_intents.dropna(inplace=True)
df_intents.drop_duplicates(inplace=True)

print("Kaggle Intents:", df_intents.shape)

Kaggle Intents: (34827, 4)


In [None]:
df_intents.describe()

Unnamed: 0,entrada,respuesta,intent,idioma
count,34827,34827,34827,34827
unique,2034,156,9,1
top,I feel like I’m losing it,"Stress can be overwhelming, but it’s important...",expresar_gratitud,en
freq,31,260,5850,34827


In [None]:
df_intents.head()

Unnamed: 0,entrada,respuesta,intent,idioma
0,Hi,Hey! How can I help you today?,saludar,en
1,Hi,Hello! I'm here to listen. What's on your mind?,saludar,en
2,Hi,Hi there! How are you feeling today?,saludar,en
3,Hi,Hey! I’m here to chat whenever you need.,saludar,en
4,Hi,Good to see you! How can I support you today?,saludar,en


In [None]:
len_muestra = 4500

intents = df_intents['intent'].unique()
n_intents = len(intents)

filas_por_intent = len_muestra // n_intents

muestras = []

for intent in intents:
    sub_df_intent = df_intents[df_intents['intent'] == intent]

    muestra = sub_df_intent.sample(n=min(filas_por_intent, len(sub_df_intent)), random_state=42)
    muestras.append(muestra)

df_intents_m = pd.concat(muestras, ignore_index=True)

In [None]:
df_intents_m.describe()

Unnamed: 0,entrada,respuesta,intent,idioma
count,4500,4500,4500,4500
unique,1860,156,9,1
top,I need to feel more confident in myself,Hey! How can I help you today?,saludar,en
freq,8,58,500,4500


In [None]:
translated_data = []
for _, row in tqdm(df_intents[['entrada']].drop_duplicates().iterrows(),
                   total=len(df_intents[['entrada']].drop_duplicates()),
                   desc="Traduciendo"):
  inp, lang_inp = translate_text(row["entrada"])

  translated_data.append({
      "entrada_en": row["entrada"],
      "entrada_es": inp,
  })

df_intents_inp_t = pd.DataFrame(translated_data)

Traduciendo: 100%|██████████| 2034/2034 [10:39<00:00,  3.18it/s]


In [None]:
print(len(df_intents_inp_t[df_intents_inp_t['entrada_en']
    ==df_intents_inp_t['entrada_es']]))
df_intents_inp_t = df_intents_inp_t[df_intents_inp_t['entrada_en']
    != df_intents_inp_t['entrada_es']]
df_intents_inp_t.head()

2


Unnamed: 0,entrada_en,entrada_es
0,Hi,Hola
1,Hello,Hola
2,Hey there,Hola
3,Good morning,Buen día
4,Good evening,Buenas noches


In [None]:
translated_data = []
for _, row in tqdm(df_intents[["respuesta"]].drop_duplicates().iterrows(),
                   total=len(df_intents[["respuesta"]].drop_duplicates()),
                   desc="Traduciendo"):
  resp, lang_resp = translate_text(row["respuesta"])

  translated_data.append({
      "respuesta_en": row["respuesta"],
      "respuesta_es": resp,
  })

df_intents_resp_t = pd.DataFrame(translated_data)

Traduciendo: 100%|██████████| 156/156 [00:17<00:00,  8.99it/s]


In [None]:
print(len(df_intents_resp_t[df_intents_resp_t['respuesta_en']
    ==df_intents_resp_t['respuesta_es']]))
df_intents_resp_t = df_intents_resp_t[df_intents_resp_t['respuesta_en']
    != df_intents_resp_t['respuesta_es']]
df_intents_resp_t.head()

0


Unnamed: 0,respuesta_en,respuesta_es
0,Hey! How can I help you today?,¡Ey! ¿Cómo puedo ayudarte hoy?
1,Hello! I'm here to listen. What's on your mind?,¡Hola! Estoy aquí para escuchar. ¿Qué tienes e...
2,Hi there! How are you feeling today?,¡Hola! ¿Cómo te sientes hoy?
3,Hey! I’m here to chat whenever you need.,¡Ey! Estoy aquí para chatear cuando lo necesite.
4,Good to see you! How can I support you today?,¡Es bueno verte! ¿Cómo puedo apoyarte hoy?


In [None]:
df_intents_inp_t = df_intents_inp_t.rename(columns={'entrada_en': 'entrada',
                                                        'entrada_es': 'entrada_es'})
df_intents_resp_t = df_intents_resp_t.rename(columns={'respuesta_en': 'respuesta',
                                                          'respuesta_es': 'respuesta_es'})

df_merged = df_intents.merge(df_intents_inp_t, on='entrada', how='left')
df_merged = df_merged.merge(df_intents_resp_t, on='respuesta', how='left')

df_intents_t = df_merged.dropna(subset=['entrada_es', 'respuesta_es'])
df_intents_t = df_intents_t[['entrada_es', 'respuesta_es', 'intent']].rename(columns={
    'entrada_es': 'entrada',
    'respuesta_es': 'respuesta',
})
df_intents_t.head()

Unnamed: 0,entrada,respuesta,intent
0,Hola,¡Ey! ¿Cómo puedo ayudarte hoy?,saludar
1,Hola,¡Hola! Estoy aquí para escuchar. ¿Qué tienes e...,saludar
2,Hola,¡Hola! ¿Cómo te sientes hoy?,saludar
3,Hola,¡Ey! Estoy aquí para chatear cuando lo necesite.,saludar
4,Hola,¡Es bueno verte! ¿Cómo puedo apoyarte hoy?,saludar


## 2. Dataset de emociones (Hugging Face)

In [None]:
df_emotions = load_dataset("boltuix/emotions-dataset")
print(df_emotions)

# Se convierte a dataframe
df_emotions = pd.DataFrame(df_emotions["train"])
label_map_emotions = {
    'happiness':'felicidad',
    'neutral':'neutral',
    'sadness':'tristeza',
    'surprise':'sorpresa',
    'love':'amor',
    'fear':'miedo',
    'confusion':'confusion',
    'disgust':'disgusto',
    'desire':'deseo',
    'shame':'verguenza',
    'sarcasm':'sarcasmo',
    'anger':'enojo',
    'guilt':'culpa'
}
df_emotions["Label"] = df_emotions["Label"].map(lambda x: label_map_emotions[x])
df_emotions.rename(
    columns={'Sentence': 'mensaje', 'Label': 'emocion'}, inplace=True)

# Se agrega la columna idioma
df_emotions['idioma'] = 'en'

df_emotions.dropna(inplace=True)
df_emotions.drop_duplicates(inplace=True)

print("Emociones:", df_emotions.shape)

README.md: 0.00B [00:00, ?B/s]

emotions_dataset.parquet:   0%|          | 0.00/7.41M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/131306 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['Sentence', 'Label'],
        num_rows: 131306
    })
})
Emociones: (131306, 3)


In [None]:
df_emotions.describe()

Unnamed: 0,mensaje,emocion,idioma
count,131306,131306,131306
unique,122336,13,1
top,Hey we have feelings too nerd,felicidad,en
freq,5,31205,131306


In [None]:
df_emotions.head()

Unnamed: 0,mensaje,emocion,idioma
0,Unfortunately later died from eating tainted m...,felicidad,en
1,Last time I saw was loooong ago. Basically bef...,neutral,en
2,You mean by number of military personnel? Beca...,neutral,en
3,Need to go middle of the road no NAME is going...,tristeza,en
4,feel melty miserable enough imagine must,tristeza,en


In [None]:
len_muestra = 1500

emotions = df_emotions['emocion'].unique()
n_emotions = len(emotions)

filas_por_emotion = len_muestra // n_emotions

muestras = []

for emotion in emotions:
    sub_df_emotion = df_emotions[df_emotions['emocion'] == emotion]

    muestra = sub_df_emotion.sample(n=min(filas_por_emotion, len(sub_df_emotion)), random_state=42)
    muestras.append(muestra)

df_emotions_m = pd.concat(muestras, ignore_index=True)

In [None]:
df_emotions_m.describe()

Unnamed: 0,mensaje,emocion,idioma
count,1495,1495,1495
unique,1495,13,1
top,I’m tormented by guilt for trusting the app’s ...,felicidad,en
freq,1,115,1495


In [None]:
translated_data = []
for _, row in tqdm(df_emotions_m.iterrows(),
                   total=len(df_emotions_m),
                   desc="Traduciendo"):
  text, lang_text = translate_text(row["mensaje"])

  translated_data.append({
      "mensaje_en": row["mensaje"],
      "mensaje_es": text,
      "emocion": row["emocion"]
  })

df_emotions_m_t = pd.DataFrame(translated_data)

Traduciendo: 100%|██████████| 1495/1495 [30:10<00:00,  1.21s/it]


In [None]:
print(len(df_emotions_m_t[df_emotions_m_t['mensaje_en']
    ==df_emotions_m_t['mensaje_es']]))
df_emotions_m_t = df_emotions_m_t[df_emotions_m_t['mensaje_en']
    != df_emotions_m_t['mensaje_es']]
df_emotions_m_t = df_emotions_m_t[['mensaje_es', 'emocion']].rename(columns={
    'mensaje_es': 'mensaje'
})
df_emotions_m_t.head()

0


Unnamed: 0,mensaje,emocion
0,Me encanta que realmente me hayas educado un p...,felicidad
1,Me acabo de decir que no iba a revisar este su...,felicidad
2,dándome la bienvenida en la estación en medio ...,felicidad
3,aunque se siente bastante alegre,felicidad
4,"¡Gracias, chicos, aprecian las aclaraciones!",felicidad


## 3. Dataset de salud mental (Kaggle)

In [None]:
path = kagglehub.dataset_download("suchintikasarkar/sentiment-analysis-for-mental-health")

# Se convierte a dataframe
df_mental = pd.read_csv(path + "/Combined Data.csv")[['statement','status']]
label_map_mental = {
    "Depression": "depresion",
    "Suicidal": "suicida",
    "Anxiety": "ansiedad",
    "Stress": "estres",
    "Bipolar": "bipolaridad",
    "Normal": "normal",
    "Personality disorder": "trastorno de personalidad"
}
df_mental["status"] = df_mental["status"].map(lambda x: label_map_mental[x])
df_mental.rename(
    columns={'statement': 'mensaje', 'status': 'estado mental'}, inplace=True)

# Se agrega la columna idioma
df_mental['idioma'] = 'en'

df_mental.dropna(inplace=True)
df_mental.drop_duplicates(inplace=True)

print("Estado de Salud mental:", df_mental.shape)

Estado de Salud mental: (51093, 3)


In [None]:
df_mental.describe()

Unnamed: 0,mensaje,estado mental,idioma
count,51093,51093,51093
unique,51073,7,1
top,#NAME?,normal,en
freq,4,16040,51093


In [None]:
df_mental.head()

Unnamed: 0,mensaje,estado mental,idioma
0,oh my gosh,ansiedad,en
1,"trouble sleeping, confused mind, restless hear...",ansiedad,en
2,"All wrong, back off dear, forward doubt. Stay ...",ansiedad,en
3,I've shifted my focus to something else but I'...,ansiedad,en
4,"I'm restless and restless, it's been a month n...",ansiedad,en


In [None]:
len_muestra = 1500

statuses = df_mental['estado mental'].unique()
n_statuses = len(statuses)

filas_por_status = len_muestra // n_statuses

muestras = []

for status in statuses:
    sub_df_status = df_mental[df_mental['estado mental'] == status]

    muestra = sub_df_status.sample(n=min(filas_por_status, len(sub_df_status)), random_state=42)
    muestras.append(muestra)

df_mental_m = pd.concat(muestras, ignore_index=True)

In [None]:
df_mental_m.describe()

Unnamed: 0,mensaje,estado mental,idioma
count,1498,1498,1498
unique,1498,7,1
top,what is your daily screen time? *please pick w...,ansiedad,en
freq,1,214,1498


In [None]:
translated_data = []
for _, row in tqdm(df_mental_m.iterrows(),
                   total=len(df_mental_m),
                   desc="Traduciendo"):
  text, lang_text = translate_text(row["mensaje"])

  translated_data.append({
      "mensaje_en": row["mensaje"],
      "mensaje_es": text,
      "estado mental": row["estado mental"]
  })

df_mental_m_t = pd.DataFrame(translated_data)

Traduciendo: 100%|██████████| 1498/1498 [30:59<00:00,  1.24s/it]


In [None]:
print(len(df_mental_m_t[df_mental_m_t['mensaje_en']
    ==df_mental_m_t['mensaje_es']]))
df_mental_m_t = df_mental_m_t[df_mental_m_t['mensaje_en']
    != df_mental_m_t['mensaje_es']]
df_mental_m_t = df_mental_m_t[['mensaje_es', 'estado mental']].rename(columns={
    'mensaje_es': 'mensaje'
})
df_mental_m_t.head()

10


Unnamed: 0,mensaje,estado mental
0,"Mi historia de ansiedad por VIH hola,\n\nDuran...",ansiedad
1,"Lógicamente, sé que mis uñas son de un color s...",ansiedad
2,Hipersensibilidad No tengo ansiedad de salud c...,ansiedad
3,"Esta noche estoy tan nervioso Huhu, ¿qué está ...",ansiedad
4,"Al final de mi cuerda, no tengo idea de qué ha...",ansiedad


## Guardado y Limpieza

In [None]:
os.makedirs("datasets_formateados", exist_ok=True)

df_intents_t.to_excel("datasets_formateados/intent_chatbot_es.xlsx", index=False)
df_emotions_m_t.to_excel("datasets_formateados/emotions_es.xlsx", index=False)
df_mental_m_t.to_excel("datasets_formateados/mental_health_es.xlsx", index=False)

In [6]:
df_intent_es    = pd.read_excel("datasets_formateados/intent_chatbot_es.xlsx")
df_emotions_es  = pd.read_excel("datasets_formateados/emotions_es.xlsx")
df_mental_es    = pd.read_excel("datasets_formateados/mental_health_es.xlsx")

In [7]:
df_intent_es    = clean_dataset(df_intent_es[['entrada','respuesta','intent']], ['entrada','respuesta'])
df_emotions_es  = clean_dataset(df_emotions_es[['mensaje','emocion']], ['mensaje'])
df_mental_es    = clean_dataset(df_mental_es[['mensaje','estado mental']], ['mensaje'])

# ENTRENAMIENTO Y VALIDACIÓN DEL CLASIFICADOR DE EMOCIONES

In [None]:
import numpy as np
import pandas as pd
import json
import os

from transformers import BertTokenizer
from transformers import TFBertModel, BertConfig

from tf_keras.layers import Input, Dense
from tf_keras.models import Model
from tf_keras.optimizers import Adam
from tf_keras.callbacks import ModelCheckpoint

from tf_keras import backend as K

from tf_keras.utils import plot_model

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [None]:
df_emociones = df_emotions_es.copy()

In [None]:
df_emociones.groupby("emocion").count()

Unnamed: 0_level_0,mensaje
emocion,Unnamed: 1_level_1
amor,115
confusion,115
culpa,115
deseo,115
disgusto,115
enojo,115
felicidad,115
miedo,115
neutral,115
sarcasmo,115


In [None]:
df_emociones["mensaje"].str.len().max()

398

In [None]:
df_emociones = df_emociones[df_emociones["mensaje"].str.len() < 256]

In [None]:
df_emociones.groupby("emocion").count()

Unnamed: 0_level_0,mensaje
emocion,Unnamed: 1_level_1
amor,115
confusion,115
culpa,113
deseo,115
disgusto,112
enojo,112
felicidad,114
miedo,113
neutral,115
sarcasmo,115


In [None]:
df_emociones["emocion"] = pd.Categorical(df_emociones["emocion"])

In [None]:
emotions_to_number_dictionary = dict((label, index) for index, label in enumerate(df_emociones["emocion"].cat.categories))
number_to_emotions_dictionary = {value: key for key, value in emotions_to_number_dictionary.items()}

In [None]:
print(emotions_to_number_dictionary)
print(number_to_emotions_dictionary)

{'amor': 0, 'confusion': 1, 'culpa': 2, 'deseo': 3, 'disgusto': 4, 'enojo': 5, 'felicidad': 6, 'miedo': 7, 'neutral': 8, 'sarcasmo': 9, 'sorpresa': 10, 'tristeza': 11, 'verguenza': 12}
{0: 'amor', 1: 'confusion', 2: 'culpa', 3: 'deseo', 4: 'disgusto', 5: 'enojo', 6: 'felicidad', 7: 'miedo', 8: 'neutral', 9: 'sarcasmo', 10: 'sorpresa', 11: 'tristeza', 12: 'verguenza'}


In [None]:
dataframe_train, dataframe_test = train_test_split(df_emociones, test_size=0.2, stratify=df_emociones[["emocion"]])

In [None]:
dataframe_train["emocion"]  = pd.Categorical(dataframe_train["emocion"])
dataframe_test["emocion"]   = pd.Categorical(dataframe_test["emocion"])

In [None]:
dataframe_train["emocion"] = dataframe_train["emocion"].replace(emotions_to_number_dictionary)
dataframe_test["emocion"]  = dataframe_test["emocion"].replace(emotions_to_number_dictionary)

In [None]:
subdataframe_train, subdataframe_val = train_test_split(dataframe_train, test_size=0.2, stratify=dataframe_train[["emocion"]])

In [None]:
model_name = "dccuchile/bert-base-spanish-wwm-cased"

tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=model_name, do_lower_case=False)

In [None]:
max_length = 256

x_train = tokenizer(
                    text=subdataframe_train["mensaje"].to_list(),
                    add_special_tokens=True,
                    max_length=max_length,
                    truncation=True,
                    padding="max_length",
                    return_tensors="tf",
                    return_token_type_ids=False,
                    return_attention_mask=True
                   )

x_val = tokenizer(
                   text=subdataframe_val["mensaje"].to_list(),
                   add_special_tokens=True,
                   max_length=max_length,
                   truncation=True,
                   padding="max_length",
                   return_tensors="tf",
                   return_token_type_ids=False,
                   return_attention_mask=True
                  )

x_test = tokenizer(
                   text=dataframe_test["mensaje"].to_list(),
                   add_special_tokens=True,
                   max_length=max_length,
                   truncation=True,
                   padding="max_length",
                   return_tensors="tf",
                   return_token_type_ids=False,
                   return_attention_mask=True
                  )

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


In [None]:
configuration = BertConfig.from_pretrained(model_name)
configuration.output_hidden_states = False

transformer_model = TFBertModel.from_pretrained(model_name, config=configuration)
transformer_model.trainable = True

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some layers from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing TFBertModel: ['mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFBertModel were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert/pooler/dense/kernel:0', 'bert/pooler/dense/bias:0']
You should probably TRAIN this model on

In [None]:
input_ids_length = x_train["input_ids"].shape[-1]
print(input_ids_length)
attention_mask_length = x_train["attention_mask"].shape[-1]
print(attention_mask_length)
emotions_number = len(set(dataframe_train["emocion"]))
print(emotions_number)

256
256
13


In [None]:
K.clear_session()

input_ids = Input(shape=(input_ids_length,), name="input_ids", dtype="int32")
attention_mask = Input(shape=(attention_mask_length,), name="attention_mask", dtype="int32")

inputs = {"input_ids": input_ids, "attention_mask": attention_mask}

bert_layer = transformer_model(inputs)
bert_pooling_layer = bert_layer.pooler_output

dense1 = Dense(256, activation="relu", name="dense1")(bert_pooling_layer)

dense_outputs = Dense(emotions_number, activation="softmax", name="outputs")(dense1)
outputs = {"outputs": dense_outputs}

model = Model(inputs=inputs, outputs=outputs)

In [None]:
optimizer = Adam(learning_rate=5e-05)

model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [None]:
y_train = subdataframe_train["emocion"].to_numpy()

y_val = subdataframe_val["emocion"].to_numpy()

y_test = dataframe_test["emocion"].to_numpy()

In [None]:
batch_size = 32
epochs = 10

x_train_for_model = {"input_ids": x_train["input_ids"], "attention_mask": x_train["attention_mask"]}
y_train_for_model = {"outputs": y_train}

x_val_for_model = {"input_ids": x_val["input_ids"], "attention_mask": x_val["attention_mask"]}
y_val_for_model = {"outputs": y_val}

x_test_for_model = {"input_ids": x_test["input_ids"], "attention_mask": x_test["attention_mask"]}
y_test_for_model = {"outputs": y_test}

In [None]:
checkpoint_path = "model-{epoch:03d}-{val_accuracy:.4f}.weights.h5"

monitor = "val_accuracy"

mode = "max"

model_checkpoint = ModelCheckpoint(checkpoint_path, monitor=monitor, verbose=1, save_best_only=True, save_weights_only=True, mode=mode)

In [None]:
history = model.fit(x=x_train_for_model, y=y_train_for_model, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_val_for_model, y_val_for_model), callbacks=[model_checkpoint])

Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.36287, saving model to model-001-0.3629.weights.h5
Epoch 2/10
Epoch 2: val_accuracy improved from 0.36287 to 0.46835, saving model to model-002-0.4684.weights.h5
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.46835
Epoch 4/10
Epoch 4: val_accuracy improved from 0.46835 to 0.47679, saving model to model-004-0.4768.weights.h5
Epoch 5/10
Epoch 5: val_accuracy improved from 0.47679 to 0.49789, saving model to model-005-0.4979.weights.h5
Epoch 6/10
Epoch 6: val_accuracy improved from 0.49789 to 0.52743, saving model to model-006-0.5274.weights.h5
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.52743
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.52743
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.52743
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.52743


In [None]:
model.load_weights("model-006-0.5274.weights.h5")

In [None]:
evaluation = model.evaluate(x=x_test_for_model, y=y_test_for_model)



In [None]:
predictions = model.predict(x_test_for_model)



In [None]:
target_names = list(emotions_to_number_dictionary.keys())

print(classification_report(y_test_for_model["outputs"], predictions["outputs"].argmax(axis=-1), target_names=target_names, zero_division=np.nan))

              precision    recall  f1-score   support

        amor       0.58      0.30      0.40        23
   confusion       0.24      0.35      0.29        23
       culpa       0.79      0.65      0.71        23
       deseo       0.79      0.83      0.81        23
    disgusto       0.37      0.32      0.34        22
       enojo       0.25      0.18      0.21        22
   felicidad       0.42      0.48      0.45        23
       miedo       0.48      0.43      0.45        23
     neutral       0.35      0.35      0.35        23
    sarcasmo       0.92      1.00      0.96        23
    sorpresa       0.43      0.39      0.41        23
    tristeza       0.50      0.48      0.49        23
   verguenza       0.28      0.43      0.34        23

    accuracy                           0.48       297
   macro avg       0.49      0.48      0.48       297
weighted avg       0.49      0.48      0.48       297



In [None]:
model.save("bert_emociones")

In [None]:
!zip -r bert_emociones.zip bert_emociones

  adding: bert_emociones/ (stored 0%)
  adding: bert_emociones/keras_metadata.pb (deflated 96%)
  adding: bert_emociones/variables/ (stored 0%)
  adding: bert_emociones/variables/variables.data-00000-of-00001 (deflated 20%)
  adding: bert_emociones/variables/variables.index (deflated 79%)
  adding: bert_emociones/assets/ (stored 0%)
  adding: bert_emociones/saved_model.pb (deflated 92%)
  adding: bert_emociones/fingerprint.pb (stored 0%)


In [None]:
from google.colab import files
files.download("bert_emociones.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# ENTRENAMIENTO Y VALIDACIÓN DEL CLASIFICADOR DE ESTADO MENTAL

In [8]:
import numpy as np
import pandas as pd
import json
import os

from transformers import BertTokenizer
from transformers import TFBertModel, BertConfig

from tf_keras.layers import Input, Dense
from tf_keras.models import Model
from tf_keras.optimizers import Adam
from tf_keras.callbacks import ModelCheckpoint

from tf_keras import backend as K

from tf_keras.utils import plot_model

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [9]:
df_estado_mental = df_mental_es.copy()

In [10]:
df_estado_mental.groupby("estado mental").count()

Unnamed: 0_level_0,mensaje
estado mental,Unnamed: 1_level_1
ansiedad,213
bipolaridad,214
depresion,212
estres,214
normal,210
suicida,212
trastorno de personalidad,213


In [11]:
df_estado_mental["mensaje"].str.len().max()

5492

In [12]:
df_estado_mental = df_estado_mental[df_estado_mental["mensaje"].str.len() < 384]

In [13]:
df_estado_mental.groupby("estado mental").count()

Unnamed: 0_level_0,mensaje
estado mental,Unnamed: 1_level_1
ansiedad,78
bipolaridad,49
depresion,65
estres,52
normal,195
suicida,93
trastorno de personalidad,59


In [14]:
df_estado_mental["estado mental"] = pd.Categorical(df_estado_mental["estado mental"])

In [15]:
statuses_to_number_dictionary = dict((label, index) for index, label in enumerate(df_estado_mental["estado mental"].cat.categories))
number_to_statuses_dictionary = {value: key for key, value in statuses_to_number_dictionary.items()}

In [16]:
print(statuses_to_number_dictionary)
print(number_to_statuses_dictionary)

{'ansiedad': 0, 'bipolaridad': 1, 'depresion': 2, 'estres': 3, 'normal': 4, 'suicida': 5, 'trastorno de personalidad': 6}
{0: 'ansiedad', 1: 'bipolaridad', 2: 'depresion', 3: 'estres', 4: 'normal', 5: 'suicida', 6: 'trastorno de personalidad'}


In [17]:
dataframe_train, dataframe_test = train_test_split(df_estado_mental, test_size=0.10, stratify=df_estado_mental[["estado mental"]])

In [18]:
dataframe_train["estado mental"]  = pd.Categorical(dataframe_train["estado mental"])
dataframe_test["estado mental"]   = pd.Categorical(dataframe_test["estado mental"])

In [19]:
dataframe_train["estado mental"] = dataframe_train["estado mental"].replace(statuses_to_number_dictionary)
dataframe_test["estado mental"]  = dataframe_test["estado mental"].replace(statuses_to_number_dictionary)

In [20]:
subdataframe_train, subdataframe_val = train_test_split(dataframe_train, test_size=0.15, stratify=dataframe_train[["estado mental"]])

In [21]:
model_name = "dccuchile/bert-base-spanish-wwm-cased"

tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path=model_name, do_lower_case=False)

tokenizer_config.json:   0%|          | 0.00/364 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/134 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

In [22]:
max_length = 384

x_train = tokenizer(
                    text=subdataframe_train["mensaje"].to_list(),
                    add_special_tokens=True,
                    max_length=max_length,
                    truncation=True,
                    padding="max_length",
                    return_tensors="tf",
                    return_token_type_ids=False,
                    return_attention_mask=True
                   )

x_val = tokenizer(
                   text=subdataframe_val["mensaje"].to_list(),
                   add_special_tokens=True,
                   max_length=max_length,
                   truncation=True,
                   padding="max_length",
                   return_tensors="tf",
                   return_token_type_ids=False,
                   return_attention_mask=True
                  )

x_test = tokenizer(
                   text=dataframe_test["mensaje"].to_list(),
                   add_special_tokens=True,
                   max_length=max_length,
                   truncation=True,
                   padding="max_length",
                   return_tensors="tf",
                   return_token_type_ids=False,
                   return_attention_mask=True
                  )

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


In [23]:
configuration = BertConfig.from_pretrained(model_name)
configuration.output_hidden_states = False

transformer_model = TFBertModel.from_pretrained(model_name, config=configuration)
transformer_model.trainable = True

tf_model.h5:   0%|          | 0.00/537M [00:00<?, ?B/s]

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some layers from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing TFBertModel: ['mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFBertModel were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert/pooler/dense/bias:0', 'bert/pooler/dense/kernel:0']
You should probably TRAIN this model on

In [24]:
input_ids_length = x_train["input_ids"].shape[-1]
print(input_ids_length)
attention_mask_length = x_train["attention_mask"].shape[-1]
print(attention_mask_length)
statuses_number = len(set(dataframe_train["estado mental"]))
print(statuses_number)

384
384
7


In [25]:
K.clear_session()

input_ids = Input(shape=(input_ids_length,), name="input_ids", dtype="int32")
attention_mask = Input(shape=(attention_mask_length,), name="attention_mask", dtype="int32")

inputs = {"input_ids": input_ids, "attention_mask": attention_mask}

bert_layer = transformer_model(inputs)
bert_pooling_layer = bert_layer.pooler_output

dense1 = Dense(256, activation="relu", name="dense1")(bert_pooling_layer)

# dense2 = Dense(256, activation="relu", name="dense2")(dense1)

dense_outputs = Dense(statuses_number, activation="softmax", name="outputs")(dense1)
outputs = {"outputs": dense_outputs}

model = Model(inputs=inputs, outputs=outputs)

In [26]:
optimizer = Adam(learning_rate=5e-05)

model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [27]:
y_train = subdataframe_train["estado mental"].to_numpy()

y_val = subdataframe_val["estado mental"].to_numpy()

y_test = dataframe_test["estado mental"].to_numpy()

In [28]:
batch_size = 16
epochs = 20

x_train_for_model = {"input_ids": x_train["input_ids"], "attention_mask": x_train["attention_mask"]}
y_train_for_model = {"outputs": y_train}

x_val_for_model = {"input_ids": x_val["input_ids"], "attention_mask": x_val["attention_mask"]}
y_val_for_model = {"outputs": y_val}

x_test_for_model = {"input_ids": x_test["input_ids"], "attention_mask": x_test["attention_mask"]}
y_test_for_model = {"outputs": y_test}

In [29]:
checkpoint_path = "model-mental-{epoch:03d}-{val_accuracy:.4f}.weights.h5"

monitor = "val_accuracy"

mode = "max"

model_checkpoint = ModelCheckpoint(checkpoint_path, monitor=monitor, verbose=1, save_best_only=True, save_weights_only=True, mode=mode)

In [30]:
history = model.fit(x=x_train_for_model, y=y_train_for_model, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_val_for_model, y_val_for_model), callbacks=[model_checkpoint])

Epoch 1/20
Epoch 1: val_accuracy improved from -inf to 0.50000, saving model to model-mental-001-0.5000.weights.h5
Epoch 2/20
Epoch 2: val_accuracy improved from 0.50000 to 0.65000, saving model to model-mental-002-0.6500.weights.h5
Epoch 3/20
Epoch 3: val_accuracy improved from 0.65000 to 0.68750, saving model to model-mental-003-0.6875.weights.h5
Epoch 4/20
Epoch 4: val_accuracy improved from 0.68750 to 0.71250, saving model to model-mental-004-0.7125.weights.h5
Epoch 5/20
Epoch 5: val_accuracy improved from 0.71250 to 0.73750, saving model to model-mental-005-0.7375.weights.h5
Epoch 6/20
Epoch 6: val_accuracy did not improve from 0.73750
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.73750
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.73750
Epoch 9/20
Epoch 9: val_accuracy did not improve from 0.73750
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.73750
Epoch 11/20
Epoch 11: val_accuracy did not improve from 0.73750
Epoch 12/20
Epoch 12: val_accuracy di

In [31]:
model.load_weights("model-mental-005-0.7375.weights.h5")

In [32]:
evaluation = model.evaluate(x=x_test_for_model, y=y_test_for_model)



In [33]:
predictions = model.predict(x_test_for_model)



In [34]:
target_names = list(statuses_to_number_dictionary.keys())

print(classification_report(y_test_for_model["outputs"], predictions["outputs"].argmax(axis=-1), target_names=target_names, zero_division=np.nan))

                           precision    recall  f1-score   support

                 ansiedad       0.67      0.75      0.71         8
              bipolaridad       0.25      0.20      0.22         5
                depresion       0.40      0.57      0.47         7
                   estres       0.40      0.40      0.40         5
                   normal       0.95      0.90      0.92        20
                  suicida       0.71      0.56      0.62         9
trastorno de personalidad       0.83      0.83      0.83         6

                 accuracy                           0.68        60
                macro avg       0.60      0.60      0.60        60
             weighted avg       0.70      0.68      0.69        60



In [35]:
model.save("bert_estado_mental")

In [36]:
!zip -r bert_estado_mental.zip bert_estado_mental

  adding: bert_estado_mental/ (stored 0%)
  adding: bert_estado_mental/assets/ (stored 0%)
  adding: bert_estado_mental/keras_metadata.pb (deflated 96%)
  adding: bert_estado_mental/fingerprint.pb (stored 0%)
  adding: bert_estado_mental/variables/ (stored 0%)
  adding: bert_estado_mental/variables/variables.index (deflated 79%)
  adding: bert_estado_mental/variables/variables.data-00000-of-00001 (deflated 20%)
  adding: bert_estado_mental/saved_model.pb (deflated 92%)


In [37]:
from google.colab import files
files.download("bert_estado_mental.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# ENTRENAMIENTO Y VALIDACIÓN DEL CLASIFICADOR DE INTENTS

In [None]:
import numpy as np
import pandas as pd
import json
import os

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [None]:
df_intent_rasa = df_intent_es.copy()
df_intent_rasa['mensaje'] =df_intent_rasa['entrada']
df_intent_rasa = df_intent_rasa[['mensaje','intent']]
df_intent_rasa.drop_duplicates(inplace=True)
df_intent_rasa.reset_index(drop=True, inplace=True)

In [None]:
df_intent_rasa

Unnamed: 0,mensaje,intent
0,Hola,saludar
1,Buen día,saludar
2,Buenas noches,saludar
3,¿Qué pasa?,saludar
4,Hola bot,saludar
...,...,...
2000,Estoy tan agradecido por el amor y el respeto ...,expresar_gratitud
2001,Gracias por alegrar siempre mi día,expresar_gratitud
2002,Estoy agradecido por tu presencia reflexiva,expresar_gratitud
2003,Gracias por siempre poner una sonrisa en mi cara,expresar_gratitud


In [None]:
df_intent_rasa["mensaje"].str.len().max()

80

In [None]:
df_intent_rasa.groupby("intent").count()

Unnamed: 0_level_0,mensaje
intent,Unnamed: 1_level_1
expresar_deseo_de_autocuidado,210
expresar_gratitud,225
hablar_sobre_problemas_de_relacion,204
mostrar_depresion,249
mostrar_enojo,220
mostrar_estres,255
mostrar_motivacion,226
mostrar_sentimiento_de_soledad,212
saludar,204


In [None]:
dataframe_train, dataframe_test = train_test_split(df_intent_rasa, test_size=0.2, stratify=df_intent_rasa[["intent"]])

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

⏬ Downloading https://github.com/jaimergp/miniforge/releases/download/24.11.2-1_colab/Miniforge3-colab-24.11.2-1_colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:19
🔁 Restarting kernel...


In [None]:
!conda create --name myenv python=3.10 --quiet

Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / done
Solving environment: \ | done


    current version: 24.11.2
    latest version: 25.7.0

Please update conda by running

    $ conda update -n base -c conda-forge conda



## Package Plan ##

  environment location: /usr/local/envs/myenv

  added / updated specs:
    - python=3.10


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2025.8.3   |       hbd8a1cb_0         151 KB  conda-forge
    ld_impl_linux-64-2.44      |       h1423503_1         660 KB  conda-forge
    libexpat-2.7.1             |       hecca717_0          73 KB  conda-forge


In [None]:
%%shell
eval "$(conda shell.bash hook)"
conda activate myenv
python3 -m pip install rasa==3.6.21 rasa[spacy]==3.6.21 pandas --quiet

Collecting rasa==3.6.21
  Downloading rasa-3.6.21-py3-none-any.whl.metadata (28 kB)
Collecting pandas
  Downloading pandas-2.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting CacheControl<0.13.0,>=0.12.9 (from rasa==3.6.21)
  Downloading CacheControl-0.12.14-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting PyJWT<3.0.0,>=2.0.0 (from PyJWT[crypto]<3.0.0,>=2.0.0->rasa==3.6.21)
  Downloading PyJWT-2.10.1-py3-none-any.whl.metadata (4.0 kB)
Collecting SQLAlchemy<1.5.0,>=1.4.0 (from rasa==3.6.21)
  Downloading SQLAlchemy-1.4.54-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting absl-py<1.5,>=0.9 (from rasa==3.6.21)
  Downloading absl_py-1.4.0-py3-none-any.whl.metadata (2.3 kB)
Collecting aio-pika<8.2.4,>=6.7.1 (from rasa==3.6.21)
  Downloading aio_pika-8.2.3-py3-none-any.whl.metadata (9.5 kB)
Collecting aiogram<2.26 (from rasa==3.6.21)
  Downloading aiogram-2.25.2-py3-none-any.



In [None]:
%%shell
eval "$(conda shell.bash hook)"
conda activate myenv
python3 -m spacy download es_core_news_md --quiet

2025-08-29 05:52:28.073650: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-29 05:52:28.126378: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-29 05:52:28.126950: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-29 05:52:32.181851: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-08-29 05:52:33.027880: W tensorflow/core/common_runtime/gpu/gpu_device.



In [None]:
os.makedirs("rasa/data", exist_ok=True)

with open("rasa/data/training_data.yml", "w", encoding="utf-8") as f:
    f.write("version: '3.1'\n")
    f.write("nlu:\n")
    for intent in dataframe_train["intent"].unique():
        examples = dataframe_train[dataframe_train["intent"] == intent]["mensaje"].tolist()
        f.write(f"- intent: {intent}\n")
        f.write("  examples: |\n")
        for ex in examples:
            f.write(f"    - {ex}\n")

with open("rasa/data/test_data.yml", "w", encoding="utf-8") as f:
    f.write("version: '3.1'\n")
    f.write("nlu:\n")
    for intent in dataframe_test["intent"].unique():
        examples = dataframe_test[dataframe_test["intent"] == intent]["mensaje"].tolist()
        f.write(f"- intent: {intent}\n")
        f.write("  examples: |\n")
        for ex in examples:
            f.write(f"    - {ex}\n")

In [None]:
config_content = """recipe: default.v1
language: es
pipeline:
  - name: SpacyNLP
    model: es_core_news_md
    case_sensitive: False
  - name: SpacyTokenizer
    model: es_core_news_md
  - name: SpacyFeaturizer
    model: es_core_news_md
  - name: DIETClassifier
    epochs: 25
    constrain_similarities: true
    model_confidence: softmax"""
with open("rasa/config.yml", "w") as f:
    f.write(config_content)

with open("rasa/domain.yml", "w") as f:
    f.write("version: '3.1'\n")
    f.write("intents:\n")
    for intent in dataframe_train["intent"].unique():
        f.write(f"- {intent}\n")

In [None]:
%mkdir -p rasa_model_trained
%mkdir -p rasa_model_tested

In [None]:
%%shell
eval "$(conda shell.bash hook)"
conda activate myenv
rasa telemetry disable

  Base: DeclarativeMeta = declarative_base()
  import pkg_resources
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
[92mDisabled telemetry reporting.[0m




In [None]:
%%shell
eval "$(conda shell.bash hook)"
conda activate myenv
rasa train nlu --nlu rasa/data/training_data.yml --config rasa/config.yml --domain rasa/domain.yml --out rasa_model_trained

  Base: DeclarativeMeta = declarative_base()
  import pkg_resources
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  from jax import xla_computation as _xla_computation
  cli: click.BaseCommand,
  from click.parser import split_arg_string
2025-08-29 06:01:54 [1;30mINFO    [0m [34mrasa.nlu.utils.spacy_utils[0m  - Trying to load SpaCy model with name 'es_core_news_md'.
2025-08-29 06:01:56 [1;30mINFO    [0m [34mrasa.nlu.utils.spacy_utils[0m  - Trying to load SpaCy model with name 'es_core_news_md'.
2025-08-29 06:02:01 [1;30mINFO    [0m [34mrasa.engine.training.



In [None]:
%%shell
eval "$(conda shell.bash hook)"
conda activate myenv
rasa test nlu --model rasa_model_trained/nlu-20250829-060154-boxy-orchestrator.tar.gz --nlu rasa/data/test_data.yml \
               --config rasa/config.yml --domain rasa/domain.yml --out rasa_model_tested --no-plot

  Base: DeclarativeMeta = declarative_base()
  import pkg_resources
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  from jax import xla_computation as _xla_computation
2025-08-29 06:08:06 [1;30mINFO    [0m [34mrasa.core.processor[0m  - Loading model rasa_model_trained/nlu-20250829-060154-boxy-orchestrator.tar.gz...
2025-08-29 06:08:06 [1;30mINFO    [0m [34mrasa.nlu.utils.spacy_utils[0m  - Trying to load SpaCy model with name 'es_core_news_md'.
  cli: click.BaseCommand,
  from click.parser import split_arg_string
2025-08-29 06:08:08 [1;30mINFO    [0m [34mras



In [None]:
import json

with open("rasa_model_tested/intent_report.json", "r") as f:
  data = json.load(f)

table = []
for intent, metrics in data.items():
  if not isinstance(metrics, dict):
      continue
  if not all(k in metrics for k in ['precision', 'recall', 'f1-score', 'support']):
      continue

  row = [
      intent,
      round(metrics['precision'], 2),
      round(metrics['recall'], 2),
      round(metrics['f1-score'], 2),
      metrics['support']
  ]
  table.append(row)

print("{:50s} {:>10s} {:>10s} {:>10s} {:>10s}".format("", "precision", "recall", "f1-score", "support"))

for row in table:
  label, p, r, f1, s = row

  if label == "accuracy":
    print("\n{:50s} {:>30.2f} {:>10d}".format(label, p, s))
  else:
    print("{:50s} {:10.2f} {:10.2f} {:10.2f} {:10d}".format(
        label, float(p), float(r), float(f1), int(s)
    ))

                                                    precision     recall   f1-score    support
expresar_gratitud                                        1.00       1.00       1.00         45
expresar_deseo_de_autocuidado                            0.87       0.93       0.90         42
hablar_sobre_problemas_de_relacion                       1.00       1.00       1.00         41
mostrar_motivacion                                       0.91       0.87       0.89         45
mostrar_estres                                           0.80       0.76       0.78         51
mostrar_depresion                                        0.69       0.74       0.71         50
mostrar_enojo                                            0.85       0.80       0.82         44
mostrar_sentimiento_de_soledad                           0.83       0.83       0.83         42
saludar                                                  1.00       1.00       1.00         41
macro avg                                         