In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import Dataset
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import torch
from transformers import EarlyStoppingCallback

#Cargar y preparar el dataset
df = pd.read_csv("../data/mails_dataset.csv")  

#Concatenar asunto y cuerpo separados por espacio
df['text_combined'] = df['subject'].fillna('') + ' </s> ' + df['text'].fillna('')

df = df[['text_combined', 'category']].dropna()

# Codificar etiquetas
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["category"])

# Guardar las etiquetas para decodificar luego
label2id = {label: i for i, label in enumerate(label_encoder.classes_)}
id2label = {i: label for label, i in label2id.items()}

# Crear Dataset de Hugging Face
dataset = Dataset.from_pandas(df.rename(columns={"text_combined": "text", "label": "label"}))

#Tokenización
model_name = "pysentimiento/robertuito-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

dataset = dataset.map(tokenize, batched=True)

# División entrenamiento y validación
dataset = dataset.train_test_split(test_size=0.2)

# Cargar modelo preentrenado
num_labels = len(label2id)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

# Configurar entrenamiento
training_args = TrainingArguments(
    output_dir="./category_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,      
    metric_for_best_model="f1",        
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=6,
    weight_decay=0.01,
    logging_dir="./logs_category",
    logging_steps=10
)

# Función de evaluación
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

# Entrenador
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

trainer.train()

# Guardar el modelo y tokenizer
trainer.save_model("./category_model")
tokenizer.save_pretrained("./category_model")



Map:   0%|          | 0/234 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at pysentimiento/robertuito-base-uncased and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/144 [00:00<?, ?it/s]



{'loss': 1.348, 'grad_norm': 9.223075866699219, 'learning_rate': 1.8611111111111114e-05, 'epoch': 0.42}
{'loss': 1.1378, 'grad_norm': 6.37191104888916, 'learning_rate': 1.7222222222222224e-05, 'epoch': 0.83}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.9002441763877869, 'eval_accuracy': 0.8297872340425532, 'eval_f1': 0.8338931007309583, 'eval_runtime': 1.1667, 'eval_samples_per_second': 40.284, 'eval_steps_per_second': 5.143, 'epoch': 1.0}




{'loss': 0.882, 'grad_norm': 5.465376853942871, 'learning_rate': 1.5833333333333333e-05, 'epoch': 1.25}
{'loss': 0.6737, 'grad_norm': 4.966735363006592, 'learning_rate': 1.4444444444444446e-05, 'epoch': 1.67}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.4878710210323334, 'eval_accuracy': 0.8936170212765957, 'eval_f1': 0.895744680851064, 'eval_runtime': 1.1383, 'eval_samples_per_second': 41.29, 'eval_steps_per_second': 5.271, 'epoch': 2.0}




{'loss': 0.5355, 'grad_norm': 8.049203872680664, 'learning_rate': 1.3055555555555557e-05, 'epoch': 2.08}
{'loss': 0.4231, 'grad_norm': 3.6527113914489746, 'learning_rate': 1.1666666666666668e-05, 'epoch': 2.5}
{'loss': 0.342, 'grad_norm': 5.635288715362549, 'learning_rate': 1.0277777777777777e-05, 'epoch': 2.92}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.30853959918022156, 'eval_accuracy': 0.9148936170212766, 'eval_f1': 0.916250385445575, 'eval_runtime': 1.2731, 'eval_samples_per_second': 36.919, 'eval_steps_per_second': 4.713, 'epoch': 3.0}




{'loss': 0.2247, 'grad_norm': 5.4576263427734375, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.33}
{'loss': 0.2328, 'grad_norm': 1.2899322509765625, 'learning_rate': 7.500000000000001e-06, 'epoch': 3.75}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.2633228302001953, 'eval_accuracy': 0.9361702127659575, 'eval_f1': 0.9365248226950355, 'eval_runtime': 1.2785, 'eval_samples_per_second': 36.762, 'eval_steps_per_second': 4.693, 'epoch': 4.0}




{'loss': 0.2527, 'grad_norm': 4.916790962219238, 'learning_rate': 6.111111111111112e-06, 'epoch': 4.17}
{'loss': 0.2346, 'grad_norm': 4.390562534332275, 'learning_rate': 4.722222222222222e-06, 'epoch': 4.58}
{'loss': 0.1125, 'grad_norm': 1.4338359832763672, 'learning_rate': 3.3333333333333333e-06, 'epoch': 5.0}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.2382822185754776, 'eval_accuracy': 0.9361702127659575, 'eval_f1': 0.9365248226950355, 'eval_runtime': 1.2679, 'eval_samples_per_second': 37.07, 'eval_steps_per_second': 4.732, 'epoch': 5.0}




{'loss': 0.1114, 'grad_norm': 0.8540238738059998, 'learning_rate': 1.944444444444445e-06, 'epoch': 5.42}
{'loss': 0.1817, 'grad_norm': 11.23680591583252, 'learning_rate': 5.555555555555555e-07, 'epoch': 5.83}


  0%|          | 0/6 [00:00<?, ?it/s]

{'eval_loss': 0.2115934044122696, 'eval_accuracy': 0.9361702127659575, 'eval_f1': 0.9365248226950355, 'eval_runtime': 1.2686, 'eval_samples_per_second': 37.05, 'eval_steps_per_second': 4.73, 'epoch': 6.0}
{'train_runtime': 152.2908, 'train_samples_per_second': 7.367, 'train_steps_per_second': 0.946, 'train_loss': 0.46717740160723525, 'epoch': 6.0}


('./category_model\\tokenizer_config.json',
 './category_model\\special_tokens_map.json',
 './category_model\\tokenizer.json')

In [6]:
from sklearn.metrics import classification_report
import numpy as np

# Obtener predicciones sobre el set de evaluación
predictions = trainer.predict(dataset["test"])
y_true = predictions.label_ids
y_pred = np.argmax(predictions.predictions, axis=1)

# Mostrar el reporte con los nombres reales de las clases
print("Reporte de clasificación por clase:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))




  0%|          | 0/6 [00:00<?, ?it/s]

Reporte de clasificación por clase:
              precision    recall  f1-score   support

   comercial       1.00      1.00      1.00         9
        otro       1.00      0.88      0.93        16
       queja       0.91      0.91      0.91        11
   solicitud       0.85      1.00      0.92        11

    accuracy                           0.94        47
   macro avg       0.94      0.95      0.94        47
weighted avg       0.94      0.94      0.94        47



In [3]:
from transformers import pipeline
import pandas as pd

# Cargar modelo y tokenizer ya entrenados
clf = pipeline("text-classification", model="./modelo_categoria", tokenizer="./modelo_categoria")

# Lista de pruebas: (texto, categoría esperada)
ejemplos = [
    ("Mi pedido no ha llegado y ya pasaron 10 días", "queja"),
    ("¿Pueden confirmarme si tienen stock del producto?", "solicitud"),
    ("Estamos interesados en su oferta para empresas", "comercial"),
    ("Gracias por la atención, todo perfecto", "otro"),
    ("Solicito el número de seguimiento del pedido", "solicitud"),
    ("Recibí el producto roto y nadie contesta", "queja"),
    ("¿Ofrecen descuentos para distribuidores?", "comercial"),
    ("Todo ha ido perfecto, gracias", "otro"),
    ("Quiero programar una reunión comercial", "comercial"),
    ("Me gustaría confirmar si el producto tiene garantía", "solicitud"),
]

# Ejecutar predicciones
print(f"{'Texto':<60} | {'Esperado':<10} | {'Predicción':<10} | {'Score'}")
print("-" * 100)

for texto, esperado in ejemplos:
    pred = clf(texto)[0]
    print(f"{texto[:57]:<60} | {esperado:<10} | {pred['label']:<10} | {pred['score']:.4f}")


Texto                                                        | Esperado   | Predicción | Score
----------------------------------------------------------------------------------------------------
Mi pedido no ha llegado y ya pasaron 10 días                 | queja      | queja      | 0.8492
¿Pueden confirmarme si tienen stock del producto?            | solicitud  | solicitud  | 0.5591
Estamos interesados en su oferta para empresas               | comercial  | comercial  | 0.8952
Gracias por la atención, todo perfecto                       | otro       | otro       | 0.7990
Solicito el número de seguimiento del pedido                 | solicitud  | solicitud  | 0.8269
Recibí el producto roto y nadie contesta                     | queja      | queja      | 0.8825
¿Ofrecen descuentos para distribuidores?                     | comercial  | comercial  | 0.8675
Todo ha ido perfecto, gracias                                | otro       | otro       | 0.7804
Quiero programar una reunión comerci