In [2]:
!pip install transformers datasets
!pip install peft gradio
!pip install peft

Collecting gradio
  Downloading gradio-5.29.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [3]:
import json
from datasets import Dataset
from transformers import AutoTokenizer,AutoModelForCausalLM,Trainer,TrainingArguments,DataCollatorForLanguageModeling
#El collator DataCollatorForLanguageModeling lo utilizamos para modelos como Qwen, que aprenden de forma autoregresiva


# 1. Cargar dataset desde archivo JSON
dataset_path = "/content/datos.json"
with open(dataset_path, "r", encoding="utf-8") as f:
    data = json.load(f)

dataset = Dataset.from_list(data)

# 2. Cargar modelo y tokenizer
model_name = "Qwen/Qwen2.5-0.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

from peft import get_peft_model, LoraConfig, TaskType

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # puede cambiar según arquitectura, verifica en Qwen
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


# 3. Asegurar que haya token de padding
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<pad>'})
    model.resize_token_embeddings(len(tokenizer))

# 4. Agregar tokens especiales si no están
special_tokens = ['<|user|>', '<|assistant|>']
tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
model.resize_token_embeddings(len(tokenizer))

# 5. Construcción del prompt
def build_prompt(example):
    prompt = f"<|user|>\n{example['instruction']}\n<|assistant|>\n{example['response']}"
    return {"text": prompt}

dataset = dataset.map(build_prompt, remove_columns=["instruction", "response"])

# 6. Tokenización
def tokenize(example):
    out = tokenizer(
        example["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )
    out["labels"] = out["input_ids"].copy()
    return out

tokenized_dataset = dataset.map(tokenize, batched=True)

# 7. Dividir en train/test
splits = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
train_data = splits["train"]
eval_data = splits["test"]

# 8. Collator para entrenamiento causal
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False  # Muy importante: no es masked LM, es causal LM
)

# 9. Configurar entrenamiento
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    save_total_limit=2,
    logging_dir="./logs",
    report_to="none"  # Desactiva W&B y otros trackers
)

# 10. Inicializar y entrenar
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=eval_data,
    tokenizer=tokenizer, #processing_class="tokenizer"
    data_collator=data_collator
)

trainer.train()

# 11. Guardar modelo fine-tuneado
trainer.save_model("./results/Qwen_finetuned")
tokenizer.save_pretrained("./results/Qwen_finetuned")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

trainable params: 540,672 || all params: 494,573,440 || trainable%: 0.1093


Map:   0%|          | 0/113 [00:00<?, ? examples/s]

Map:   0%|          | 0/113 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,No log,3.148079
2,No log,3.006385
3,No log,2.954664




('./results/Qwen_finetuned/tokenizer_config.json',
 './results/Qwen_finetuned/special_tokens_map.json',
 './results/Qwen_finetuned/vocab.json',
 './results/Qwen_finetuned/merges.txt',
 './results/Qwen_finetuned/added_tokens.json',
 './results/Qwen_finetuned/tokenizer.json')

In [4]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Import PeftModel to load the fine-tuned LoRA model
from peft import PeftModel

# Cargar modelo LoRA fine-tuneado
model_path = "./results/Qwen_finetuned"

# Cargar el tokenizer que fue usado durante el entrenamiento (con los tokens especiales)
# The tokenizer was saved in the previous cell, so this should load the correct one.
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Cargar el modelo base primero
base_model_name = "Qwen/Qwen2.5-0.5B"
# Load the base model. Setting device_map="auto" is correct here.
base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto")

# Ensure the base model tokenizer has the same special tokens if not already added
# This might be redundant if the saved tokenizer handles it, but good for robustness
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<pad>'})
    base_model.resize_token_embeddings(len(tokenizer)) # Resize base model embeddings

special_tokens = ['<|user|>', '<|assistant|>']
tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
base_model.resize_token_embeddings(len(tokenizer)) # Resize base model embeddings

# Cargar los adaptadores LoRA sobre el modelo base
# Use PeftModel.from_pretrained to load the LoRA adapters
model = PeftModel.from_pretrained(base_model, model_path)

# Set the model to evaluation mode
model.eval()

def generar_respuesta(prompt):
    # Encode the prompt including the special tokens
    input_ids = tokenizer.encode(f"<|user|>\n{prompt}\n<|assistant|>\n", return_tensors="pt").to(model.device)
    # Generate the output
    with torch.no_grad(): # Disable gradient calculation for inference
        output = model.generate(
            input_ids,
            max_new_tokens=150,
            do_sample=True,
            top_p=0.9,
            # Add pad_token_id if your tokenizer has one
            pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id
        )
    # Decode the output, removing the input prompt part
    # Decode the entire output sequence first
    full_response = tokenizer.decode(output[0], skip_special_tokens=False) # Keep special tokens to split

    # Find the start of the assistant's response after the prompt
    assistant_start_index = full_response.find("<|assistant|>")
    if assistant_start_index != -1:
        # Extract the part after "<|assistant|>" and strip leading/trailing whitespace
        respuesta = full_response[assistant_start_index + len("<|assistant|>"):].strip()
    else:
        # If the tag is not found, return the full decoded sequence (or handle error)
        respuesta = full_response.strip()

    return respuesta


demo = gr.Interface(fn=generar_respuesta,
                    inputs="text",
                    outputs="text",
                    title="Asistente para Cultivo de Orquídeas",
                    description="Haz preguntas sobre el cultivo de orquídeas tropicales en invernaderos.")

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e74ecf9d894a4d86c5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


