<a href="https://colab.research.google.com/github/Filarh/Cringe-scraper/blob/main/entrenar_qwen_mks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qq --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!pip install -qq sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install -qq --no-deps unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

In [None]:
# Load the base model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-0.6B",
    max_seq_length = 2048,   # Define context length
    load_in_4bit = True,     # Enable 4-bit quantization
    # token = "hf_...",      # Add your token if using a gated model
)

In [None]:
# Add LoRA adapters to the model
model = FastLanguageModel.get_peft_model(
    model,
    r = 32,           # LoRA rank (higher rank = more parameters, potentially better fit but more memory)
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", # Target attention and MLP layers
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,  # Scaling factor (often set to r or 2*r)
    lora_dropout = 0, # Dropout probability for LoRA layers
    bias = "none",    # Fine-tuning bias terms ('none' is often optimal)
    # Use Unsloth's gradient checkpointing for memory saving
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False, # Rank Stable LoRA (optional)
    loftq_config = None, # LoftQ initialization (optional)
)

In [None]:
from datasets import Dataset
import re
import json

# Ruta a tu archivo original ya cargado
file_path = "/content/postexpandedv2.jsonl"

# Leer y limpiar las entradas
data = []
with open(file_path, "r", encoding="utf-8") as f:
    for line in f:
        try:
            entry = json.loads(line.strip())
            # Añadir <think></think> si no está presente
            if "<think>" not in entry["output"].lower():
                entry["output"] = "<think></think> " + entry["output"]
            data.append(entry)
        except json.JSONDecodeError:
            continue

# Función para detectar contenido real dentro de <think>...</think>
def has_thinking_content(text):
    match = re.search(r"<think>\s*(.*?)\s*</think>", text, re.DOTALL | re.IGNORECASE)
    return bool(match and match.group(1).strip())

# Dividir según si tienen razonamiento real o no
reasoning_data = [e for e in data if has_thinking_content(e["output"])]
non_reasoning_data = [e for e in data if not has_thinking_content(e["output"])]

# Crear datasets
reasoning_dataset = Dataset.from_list(reasoning_data)
non_reasoning_dataset = Dataset.from_list(non_reasoning_data)

# Mostrar ejemplos
print("✅ Razonamiento detectado:", len(reasoning_dataset))
print("🟡 Sin razonamiento:", len(non_reasoning_dataset))
print("\nEjemplo razonamiento:")
print(reasoning_dataset[0])

if len(non_reasoning_dataset) > 0:
    print("\nEjemplo sin razonamiento:")
    print(non_reasoning_dataset[0])
else:
    print("\n⚠️ No se encontraron ejemplos sin razonamiento.")


In [None]:
def generate_reasoning_conversation(examples):
    instructions = examples["instruction"]
    problems     = examples["input"]
    solutions    = examples["output"]

    conversations = []
    for instruction, problem, solution in zip(instructions, problems, solutions):
        conversations.append([
            {"role": "system",    "content": instruction},
            {"role": "user",      "content": problem},
            {"role": "assistant", "content": solution},
        ])
    return { "conversations": conversations }


In [None]:
def generate_non_reasoning_conversation(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]

    conversations = []
    for instruction, input_text, output_text in zip(instructions, inputs, outputs):
        conversations.append([
            {"role": "system",    "content": instruction},
            {"role": "user",      "content": input_text},
            {"role": "assistant", "content": output_text},
        ])
    return { "conversations": conversations }


In [None]:
# Aplicar la plantilla de conversación
reasoning_formatted_texts = tokenizer.apply_chat_template(
    reasoning_dataset.map(generate_reasoning_conversation, batched=True)["conversations"],
    tokenize=False,
)

# Mostrar el primer resultado formateado
print("First formatted Reasoning Row:")
print(reasoning_formatted_texts[0])


In [None]:
non_reasoning_formatted_texts = tokenizer.apply_chat_template(
    non_reasoning_dataset.map(generate_non_reasoning_conversation, batched=True)["conversations"],
    tokenize=False,
)

print("\nFirst formatted Non-Reasoning Row:")
print(non_reasoning_formatted_texts[0])


In [None]:
import pandas as pd
from datasets import Dataset

In [None]:
# Define desired chat data percentage
chat_percentage = 0.75 # Aim for 75% chat data
# Convert to Pandas Series for easier sampling
reasoning_series = pd.Series(reasoning_formatted_texts)
non_reasoning_series = pd.Series(non_reasoning_formatted_texts)
# Sample non-reasoning data based on the desired ratio relative to reasoning data
# Calculate how many non-reasoning samples we need
num_non_reasoning_samples = int(len(reasoning_series) * (chat_percentage / (1.0 - chat_percentage)))
# Ensure we don't request more samples than available
num_non_reasoning_samples = min(num_non_reasoning_samples, len(non_reasoning_series))

print(f"Using {len(reasoning_series)} reasoning samples.")
print(f"Sampling {num_non_reasoning_samples} non-reasoning samples.")

In [None]:
non_reasoning_subset = non_reasoning_series.sample(
    n = num_non_reasoning_samples,
    random_state = 2407, # for reproducibility
)

# Combine the datasets
combined_series = pd.concat([reasoning_series, non_reasoning_subset])
combined_series.name = "text" # The SFTTrainer expects this column name

# Convert back to Hugging Face Dataset and shuffle
combined_dataset = Dataset.from_pandas(pd.DataFrame(combined_series))
combined_dataset = combined_dataset.shuffle(seed = 3407)

print(f"\nFinal Combined Dataset size: {len(combined_dataset)}")
print("Example entry from combined dataset:")
print(combined_dataset[0]['text'])

In [None]:
from trl import SFTTrainer, SFTConfig

In [None]:
sftconfig = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Effective batch size = 2 * 4 = 8
        warmup_steps = 5,
        max_steps = 30,                 # Short run for demonstration; set to None for full epochs
        # num_train_epochs = 1,         # Alternatively, train for 1 full epoch
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(), # Use bf16 if available, else fp16
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",           # Use 8-bit AdamW optimizer
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",         # Directory to save checkpoints
        report_to = "none",             # Disable external reporting (like WandB) for this example
    )

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = combined_dataset,
    dataset_text_field = "text", # Column name we created
    max_seq_length = 2048,      # Should match model loading
    args = sftconfig
)

In [29]:
# Start training
print("Starting training...")
trainer_stats = trainer.train()
print("Training finished.")
# You can print training stats if needed
# print(trainer_stats)

Step,Training Loss
1,3.8024
2,3.2912
3,3.5511
4,3.5573
5,3.6301
6,3.1165
7,1.9903
8,2.8351
9,2.4904
10,2.5584


Training finished.


In [None]:
from transformers import TextStreamer
messages = [
    {"role" : "user", "content" : "Solve (x + 2)^2 = 0."}
]

In [None]:
# Format the prompt, explicitly DISABLING thinking mode
text_input_no_think = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True, # Crucial for generation
    enable_thinking = False,      # *** Disable thinking ***
)


print("--- Non-Thinking Inference ---")
print("Formatted Input:\n", text_input_no_think)

In [None]:
# Generate response using parameters suitable for non-thinking/chat
inputs = tokenizer(text_input_no_think, return_tensors = "pt").to("cuda")
streamer_no_think = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    **inputs,
    max_new_tokens = 256,
    temperature = 0.7, # Recommended for chat
    top_p = 0.8,       # Recommended for chat
    top_k = 20,
    streamer = streamer_no_think,
    eos_token_id = tokenizer.eos_token_id # Ensure generation stops properly
)
print("\n-----------------------------")

## **Thinking Inference:**

In [None]:
# Format the prompt, explicitly ENABLING thinking mode
text_input_think = tokenizer.apply_chat_template(
    messages, # Same user message
    tokenize = False,
    add_generation_prompt = True,
    enable_thinking = True,       # *** Enable thinking ***
)

print("--- Thinking Inference ---")
print("Formatted Input:\n", text_input_think)

In [None]:
# Generate response using parameters suitable for thinking/reasoning
inputs_think = tokenizer(text_input_think, return_tensors = "pt").to("cuda")
streamer_think = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    **inputs_think,
    max_new_tokens = 1024, # Allow more tokens for reasoning steps
    temperature = 0.6,   # Recommended for reasoning
    top_p = 0.95,        # Recommended for reasoning
    top_k = 20,
    streamer = streamer_think,
    eos_token_id = tokenizer.eos_token_id # Ensure generation stops properly
)
print("\n-----------------------------")

In [None]:
# Save LoRA adapters locally
model.save_pretrained("qwen3_0.6b_reasoning_chat_lora")
tokenizer.save_pretrained("qwen3_0.6b_reasoning_chat_lora")

print("LoRA adapters saved locally to 'qwen3_0.6b_reasoning_chat_lora'")

# Optional: Push to Hugging Face Hub
# model.push_to_hub("your_username/qwen3_14b_reasoning_chat_lora", token="YOUR_HF_TOKEN")
# tokenizer.push_to_hub("your_username/qwen3_14b_reasoning_chat_lora", token="YOUR_HF_TOKEN")

# To load these adapters later:
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "qwen3_14b_reasoning_chat_lora", # Path to saved adapters
#     load_in_4bit = True,
# )