<a href="https://colab.research.google.com/github/Realmbird/Recreation-Consitutional-AI/blob/main/Helpful_RLHF_Qwen_0_6B_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
!pip install datasets==4.0.0 transformers==4.54.1 trl==0.20.0 bitsandbytes==0.46.1 accelerate==1.9.0 peft==0.16.0 huggingface-hub==0.34.1

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[0m

In [23]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, AutoModelForCausalLM, pipeline

import torch


model_name = "Qwen/Qwen3-0.6B" # Or any other suitable model

mname = model_name

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Important: Add a pad token if the tokenizer doesn't have one, especially for decoder models.

if tokenizer.pad_token is None:

    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})

In [24]:
def get_prompts(dataset):


    prompts = []


    for example in dataset:


        chosen_text = example['chosen']


        # Find the end of the human's prompt (before "Assistant:")


        prompt_end_idx = chosen_text.find("\n\nAssistant:")


        if prompt_end_idx != -1:


            prompts.append(chosen_text[:prompt_end_idx].strip())


    return prompts

In [25]:
from datasets import load_dataset, Dataset


dataset = load_dataset("Anthropic/hh-rlhf", data_dir="helpful-base")



In [26]:
def tokenize_prompts(prompts_list, tokenizer):
    tokenized_data = []
    for prompt in prompts_list:
        # TRL's PPOTrainer often expects "input_ids" and "attention_mask"
        # for the queries.
        tokenized_prompt = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
        tokenized_data.append({
            "input_ids": tokenized_prompt["input_ids"].squeeze(0),
            "attention_mask": tokenized_prompt["attention_mask"].squeeze(0)
        })
    return Dataset.from_list(tokenized_data) # Convert list of dicts to a datasets.Dataset

In [29]:
# prompts had to formated of just queries
train_prompts_list = get_prompts(dataset["train"])
eval_prompts_list = get_prompts(dataset["test"])

train_dataset = tokenize_prompts(train_prompts_list, tokenizer)
eval_dataset = tokenize_prompts(eval_prompts_list, tokenizer)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,   # âœ… Use 4-bit instead
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)





model = AutoModelForCausalLM.from_pretrained(

    model_name,

    quantization_config=bnb_config,

    device_map="auto"

)

ref_model = AutoModelForCausalLM.from_pretrained(

    model_name,

    quantization_config=bnb_config,

    device_map="auto"

)



In [None]:
from peft import LoraConfig, TaskType, PeftModel, get_peft_model


# stacks on top of BitsAndBytes

peft_config = LoraConfig(

    task_type=TaskType.SEQ_CLS,

    inference_mode=False,

    r=8,

    lora_alpha=32,

    lora_dropout=0.1,

)

In [None]:
from trl.trainer.utils import disable_dropout_in_model

from trl import PPOTrainer, PPOConfig

from transformers import TrainingArguments


# Load the reward model directly

reward_model_name = "Realmbird/helpfulness-preference-model-qwen-0.6B-v2"

reward_model = AutoModelForSequenceClassification.from_pretrained(reward_model_name, num_labels=1, device_map="auto", torch_dtype=torch.float16) # Added device_map="auto"



In [None]:
value_model = AutoModelForSequenceClassification.from_pretrained(

    model_name,

    num_labels=1,

    quantization_config=bnb_config,

    device_map="auto"

)

value_model.resize_token_embeddings(len(tokenizer))


# Apply PEFT to the value model as well

peft_config_value = LoraConfig(

    task_type=TaskType.SEQ_CLS,

    inference_mode=False,

    r=8,

    lora_alpha=32,

    lora_dropout=0.1,

)

value_model = get_peft_model(value_model, peft_config_value)

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-0.6B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# --- NEW: Set pad_token_id on the model's config ---
model.config.pad_token_id = tokenizer.pad_token_id
ref_model.config.pad_token_id = tokenizer.pad_token_id
value_model.config.pad_token_id = tokenizer.pad_token_id
reward_model.config.pad_token_id = tokenizer.pad_token_id

In [None]:


ppo_config = PPOConfig(

    output_dir= "./rlhf_helpful_Qwen3_0.6B",

    learning_rate=1.41e-5,

    num_train_epochs=1,

    batch_size=4,

    gradient_accumulation_steps=32,

    report_to="none"

)

trainer = PPOTrainer(

    args=ppo_config,

    model=model,

    ref_model = False,

    processing_class=tokenizer,

    train_dataset=train_dataset, # Use the training split

    eval_dataset=eval_dataset,

    peft_config=peft_config,

    reward_model=reward_model,

    value_model= value_model,

)

In [None]:
with torch.autocast(device_type="cuda", dtype=torch.bfloat16): # or torch.bfloat16
    trainer.train()

===training policy===


`generation_config` default values have been modified to match model-specific defaults: {'top_p': 0.95, 'pad_token_id': 151643, 'bos_token_id': 151643}. If this is not desired, please set these values explicitly.


Step,Training Loss


In [30]:
trainer.save_model("./rlhf_helpful_Qwen3_0.6B")

In [32]:
trainer.save_tokenizer("./rlhf_helpful_Qwen3_0.6B")

AttributeError: 'PPOTrainer' object has no attribute 'save_tokenizer'

In [None]:
local_save_directory = "./fine_tuned_qwen_rlhf_merged"

In [33]:
from huggingface_hub import login, HfApi, login

In [35]:
your_hf_token = "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"
login(token=your_hf_token)

In [47]:
# Save the PPO-trained RLHF model
print("=== Saving PPO-trained RLHF Model ===")

# Get the policy model (the main model that was trained)
policy_model = trainer.model.policy
lora_save_directory = "./rlhf_qwen_lora_adapter"

# Save the LoRA adapter
print("1. Saving LoRA adapter...")
policy_model.save_pretrained(lora_save_directory, safe_serialization=True)
tokenizer.save_pretrained(lora_save_directory)
print(f"âœ“ LoRA adapter saved to: {lora_save_directory}")

# Create merged model (optional but recommended for deployment)
print("\n2. Creating merged model...")
try:
    from peft import PeftModel
    
    # Load base model without quantization for merging
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    # Load and merge the LoRA adapter
    merged_model = PeftModel.from_pretrained(base_model, lora_save_directory)
    merged_model = merged_model.merge_and_unload()
    
    # Save the merged model
    merged_save_directory = "./rlhf_qwen_merged_model"
    merged_model.save_pretrained(merged_save_directory, safe_serialization=True)
    tokenizer.save_pretrained(merged_save_directory)
    print(f"âœ“ Merged model saved to: {merged_save_directory}")
    
except Exception as e:
    print(f"âœ— Merging failed: {e}")
    print("  Note: You can still use the LoRA adapter for inference")

print("\n=== Model Export Summary ===")
print(f"âœ“ LoRA adapter: {lora_save_directory}")
if 'merged_save_directory' in locals():
    print(f"âœ“ Merged model: {merged_save_directory}")
print("\nYour PPO-trained RLHF model is ready to use!")

=== Saving PPO-trained RLHF Model ===
1. Saving LoRA adapter...
âœ“ LoRA adapter saved to: ./rlhf_qwen_lora_adapter

2. Creating merged model...
âœ“ LoRA adapter saved to: ./rlhf_qwen_lora_adapter

2. Creating merged model...
âœ“ Merged model saved to: ./rlhf_qwen_merged_model

=== Model Export Summary ===
âœ“ LoRA adapter: ./rlhf_qwen_lora_adapter
âœ“ Merged model: ./rlhf_qwen_merged_model

Your PPO-trained RLHF model is ready to use!
âœ“ Merged model saved to: ./rlhf_qwen_merged_model

=== Model Export Summary ===
âœ“ LoRA adapter: ./rlhf_qwen_lora_adapter
âœ“ Merged model: ./rlhf_qwen_merged_model

Your PPO-trained RLHF model is ready to use!


In [None]:
# Upload to Hugging Face Hub
from huggingface_hub import HfApi
import os

try:
    api = HfApi()
    
    # Upload LoRA adapter
    lora_repo_name = "Realmbird/qwen-rlhf-helpful-lora-v1"  # Update with your preferred name
    print(f"Uploading LoRA adapter to {lora_repo_name}...")
    api.create_repo(lora_repo_name, exist_ok=True)
    api.upload_folder(
        folder_path="./rlhf_qwen_lora_adapter",
        repo_id=lora_repo_name,
        repo_type="model"
    )
    print(f"âœ“ LoRA adapter uploaded to: https://huggingface.co/{lora_repo_name}")
    
    # Upload merged model
    merged_repo_name = "Realmbird/qwen-rlhf-helpful-merged-v1"  # Update with your preferred name
    print(f"\nUploading merged model to {merged_repo_name}...")
    api.create_repo(merged_repo_name, exist_ok=True)
    api.upload_folder(
        folder_path="./rlhf_qwen_merged_model",
        repo_id=merged_repo_name,
        repo_type="model"
    )
    print(f"âœ“ Merged model uploaded to: https://huggingface.co/{merged_repo_name}")
    
    print("\nðŸŽ‰ Upload completed successfully!")
    print("Your RLHF models are now available on Hugging Face Hub!")
    
except Exception as e:
    print(f"Upload failed: {e}")
    print("Make sure you're logged in and have the correct permissions.")

No Hugging Face token found.
To upload later:
1. Set HF_TOKEN environment variable
2. Or run: huggingface-cli login
3. Then use the upload code above


In [44]:
# Test the saved model
print("Testing the RLHF-trained model...")

# Load the merged model for testing
test_tokenizer = AutoTokenizer.from_pretrained(merged_save_directory)
test_model = AutoModelForCausalLM.from_pretrained(
    merged_save_directory,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Create a pipeline for easier text generation
pipe = pipeline(
    "text-generation",
    model=test_model,
    tokenizer=test_tokenizer,
    max_new_tokens=100,
    do_sample=True,
    temperature=0.7,
    pad_token_id=test_tokenizer.eos_token_id
)

# Test with a sample prompt
test_prompt = "Human: How can I improve my productivity while working from home?\n\nAssistant:"
response = pipe(test_prompt)
print("Test Response:")
print(response[0]['generated_text'])

print("\nModel testing completed!")

Testing the RLHF-trained model...


Device set to use cuda:0


Test Response:
Human: How can I improve my productivity while working from home?

Assistant: 

Assistant:

Working from home can be a great way to improve productivity, but it requires a strategic approach. Here are some effective strategies to help you maximize your productivity while working from home:

1. **Create a dedicated workspace**: Set up a quiet, organized workspace that minimizes distractions. Use screens-free or low-light environments to enhance focus.

2. **Break tasks into smaller, manageable parts**: This helps prevent burnout and keeps you on track. Use time-blocking techniques to allocate specific time blocks

Model testing completed!


In [None]:
# Utility functions for loading your RLHF model later

def load_rlhf_model_lora(adapter_path="./rlhf_qwen_lora_adapter"):
    """Load the LoRA adapter version of your RLHF model"""
    from peft import PeftModel, PeftConfig
    
    # Load the base model
    base_model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    
    # Load the LoRA adapter
    model = PeftModel.from_pretrained(base_model, adapter_path)
    tokenizer = AutoTokenizer.from_pretrained(adapter_path)
    
    return model, tokenizer

def load_rlhf_model_merged(model_path="./rlhf_qwen_merged_model"):
    """Load the merged version of your RLHF model"""
    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    return model, tokenizer

def create_rlhf_pipeline(model_path="./rlhf_qwen_merged_model"):
    """Create a text generation pipeline with your RLHF model"""
    model, tokenizer = load_rlhf_model_merged(model_path)
    
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    
    return pipe

print("Utility functions defined!")
print("Use load_rlhf_model_lora() or load_rlhf_model_merged() to load your model")
print("Use create_rlhf_pipeline() for easy text generation")