In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Cargar el modelo y el tokenizer fine-tuneados
model_base = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"
adapter_path = "./fine_tuned_model_both"

tokenizer = AutoTokenizer.from_pretrained(adapter_path)
model_id = AutoModelForCausalLM.from_pretrained(model_base)
model = PeftModel.from_pretrained(model_id, adapter_path)


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Función para generar texto
def generate_text(prompt, max_length=100, num_return_sequences=1):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.01
    )
    return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

In [3]:
# Ejemplo de uso
prompt = "<s>[INST] ¿Cuál es el teléfono de Bartolome? [/INST]"
generated_texts = generate_text(prompt, max_length=45, num_return_sequences=1)

# Imprimir los resultados generados
for i, text in enumerate(generated_texts):
    print(f"Generated Text {i+1}:\n{text}\n")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Generated Text 1:
[INST] ¿Cuál es el teléfono de Bartolome? [/INST] El teléfono de Bartolome es el 606588477

