<a href="https://colab.research.google.com/github/Lolit-78/T3-PROYECT/blob/main/chatbot2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Instalación de Hugging Face y otras librerías
!pip install --upgrade transformers datasets scikit-learn -q


In [None]:
from datasets import load_dataset

# Cargar el dataset público de intenciones bancarias
dataset = load_dataset("banking77")


In [None]:
from transformers import AutoTokenizer

# Usamos tokenizer base de BERT
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Función para tokenizar cada texto
def tokenize(example):
    return tokenizer(example["text"], padding="max_length", truncation=True)

# Aplicar la tokenización
encoded_dataset = dataset.map(tokenize, batched=True)


In [None]:
from transformers import AutoModelForSequenceClassification

# Número de clases (77 intenciones bancarias)
num_labels = 77

# Modelo BERT adaptado para clasificación
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# Función para cálculo de métricas
def compute_metrics(pred):
    preds = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted")
    }


In [None]:
from transformers import TrainingArguments

# Argumentos del entrenamiento
training_args = TrainingArguments(
    output_dir="./banking77_model",
    do_train=True,
    do_eval=True,
    logging_dir="./logs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch"  # guarda al final de cada época
)


In [None]:
pip install wandb


In [None]:

!pip install --upgrade transformers


In [None]:
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from datasets import load_dataset

# 1. Cargar modelo y tokenizer
checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

# 2. Cargar y tokenizar datos
dataset = load_dataset("imdb")
def tokenize_function(example):
    return tokenizer(example["text"], truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# 3. Usar DataCollator (evita pasar tokenizer directamente al Trainer)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 4. Configurar TrainingArguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch", # Changed from evaluation_strategy to eval_strategy
    save_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    logging_dir="./logs",
    report_to="none"
)

# 5. Crear Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(2000)),  # Ejemplo reducido
    eval_dataset=tokenized_datasets["test"].select(range(500)),
    data_collator=data_collator,
    tokenizer=tokenizer
)

# 6. Entrenar modelo
trainer.train()

In [None]:
# Guardar modelo y tokenizer en carpeta local
model.to('cpu').save_pretrained("modelo_banking77")
tokenizer.save_pretrained("modelo_banking77")

In [None]:
from transformers import pipeline

# Crear pipeline de clasificación con el modelo entrenado
clasifier = pipeline("text-classification", model="modelo_banking77", tokenizer="modelo_banking77")

# Frase de prueba
phrase = "I want to check my account balance"
result = clasifier(phrase)
print("Prediction:", result)



In [None]:
phrases = [
    "I lost my debit card and need a replacement",
    "I want to transfer 500 dollars to another account",
    "How can I check my account balance?",
    "Where is the closest ATM near me?",
    "I forgot my PIN code",
    "Can I open a savings account online?",
    "What is the interest rate for home loans?",
    "I need help logging into my online banking",
    "Cancel my credit card immediately"
]

for phrase in phrases:
    result = clasifier(phrase)
    print(f"\nCustomer: {phrase}")
    print("Detected intent:", result)
