In [1]:
!pip install -U transformers datasets accelerate peft trl bitsandbytes wandb



In [2]:
import gc
import os

import torch
import wandb
from datasets import load_dataset
from google.colab import userdata
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from trl import ORPOConfig, ORPOTrainer, setup_chat_format

In [3]:
wb_token = userdata.get('wandb')
wandb.login(key=wb_token)

[34m[1mwandb[0m: Currently logged in as: [33mammarabdelhady8[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [4]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16

In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
# Model
base_model = "meta-llama/Meta-Llama-3.1-8B-Instruct"
new_model = "OrpoLlama3-8B-FT"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = prepare_model_for_kbit_training(model)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
dataset = load_dataset("HuggingFaceH4/ultrafeedback_binarized", split=["train_prefs", "test_prefs"])

In [8]:
dataset

[Dataset({
     features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
     num_rows: 61135
 }),
 Dataset({
     features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
     num_rows: 2000
 })]

In [9]:
# Calculate the number of test samples based on the new training sample size
train_samples = 5000
original_train_samples = 61135
test_samples = int((2000 / original_train_samples) * train_samples)

# Shuffle and select a subset from both train and test sets
train_subset = dataset[0].shuffle(seed=42).select(range(train_samples))
test_subset = dataset[1].shuffle(seed=42).select(range(test_samples))

print(train_subset)
print(test_subset)


Dataset({
    features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
    num_rows: 5000
})
Dataset({
    features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
    num_rows: 163
})


In [10]:
import multiprocessing

In [11]:
def process(row):
    row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False)
    row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False)
    return row

dataset[0] = train_subset.map(
    process,
    num_proc= multiprocessing.cpu_count(),
    load_from_cache_file=False,
)

dataset[1] = test_subset.map(
    process,
    num_proc= multiprocessing.cpu_count(),
    load_from_cache_file=False,
)

print(dataset)

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/5000 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/163 [00:00<?, ? examples/s]

[Dataset({
    features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
    num_rows: 5000
}), Dataset({
    features: ['prompt', 'prompt_id', 'chosen', 'rejected', 'messages', 'score_chosen', 'score_rejected'],
    num_rows: 163
})]


In [12]:
orpo_args = ORPOConfig(
    learning_rate=8e-6,
    beta=0.1,
    lr_scheduler_type="linear",
    max_length=1024,
    max_prompt_length=512,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    max_steps=1000,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    report_to="wandb",
    output_dir="./results/",
)



In [13]:
trainer = ORPOTrainer(
    model=model,
    args=orpo_args,
    train_dataset=dataset[0],
    eval_dataset=dataset[1],
    peft_config=peft_config,
    tokenizer=tokenizer,
)

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()
trainer.save_model(new_model)



Step,Training Loss,Validation Loss


## How to Merge LoRA Adapater

In [14]:
del trainer, model
gc.collect()
torch.cuda.empty_cache()

#reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    low_cpu_mem_usage=True,
    return_dict = True,
    torch_dtype=torch.float16,
    device_map="auto"
)

model, tokenizer = setup_chat_format(model, tokenizer)

#Merge
model = PeftModel.from_pretrained(model, new_model)
model = model.merge_and_unload()

model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

In [21]:
# import torch
from transformers import pipeline

pipe = pipeline("text-generation", model=model, torch_dtype=torch.bfloat16, tokenizer=tokenizer, device_map="auto")

# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
messages = [
    {
        "role": "system",
        "content": "You are an good expert in AI field and LLM",
    },
    {"role": "user", "content": 'what is the LLM.'},
]
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
print(outputs[0]["generated_text"].split("assistant\n")[-1])

In [22]:
def generated_text(input_text="", prompt="You are an good expert in AI field and LLM"):
  messages = [
      {
          "role": "system",
          "content": f"{prompt}",
      },
      {"role": "user", "content": f'{input_text}'},
  ]
  prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
  outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
  return outputs[0]["generated_text"].split("assistant\n")[-1]

In [28]:
generated_text(input_text="can you define LLM.")


'**LLM: Applications and Use Cases**\n\nHere are some applications and use cases of LLMs:\n\n1.  **Chatbots**: Virtual assistants that converse with users, providing customer support, answering questions, and performing tasks.\n2.  **Language translation**: Software that translates text or speech from one language to another, enabling communication across languages and cultures.\n3.  **Content generation**: Tools that create articles, product descriptions, or social media posts, automating content creation and reducing the workload of content creators.\n4.  **Virtual assistants**: AI-powered assistants that perform tasks, answer questions, or provide recommendations, such as scheduling appointments or making travel arrangements.\n5.  **Sentiment analysis**: LLMs can analyze text data to determine the sentiment or emotional tone behind it, helping businesses understand customer feedback and preferences.\n6.  **Question answering**: LLMs can answer questions based on the content of a giv

In [29]:
generated_text(input_text="what is the fine tune .")

'You asked: "How does fine-tuning differ from other transfer learning techniques?"\n\nFine-tuning is one of several transfer learning techniques that can be used to adapt pre-trained models to new tasks. Here\'s a brief comparison with other techniques:\n\n1. **Fine-tuning**: Updating the'

In [23]:
generated_text(input_text="what is the Machine Learning.")

"\nThat's a comprehensive overview of machine learning. To add a bit more context, machine learning is a subset of artificial intelligence (AI) that focuses on developing algorithms and statistical models that enable computers to learn from data, make decisions, and improve their performance on a task without being explicitly programmed.\n\nMachine learning is often used to solve problems that are difficult or impossible for humans to solve, such as:\n\n1. **Image recognition**: Machine learning can be used to identify objects, people, and patterns in images.\n2. **Speech recognition**: Machine learning can be used to recognize spoken words and translate them into text.\n3. **Natural language processing**: Machine learning can be used to analyze and generate text, as well as to summarize long documents.\n4. **Predictive modeling**: Machine learning can be used to predict future events, such as stock prices, weather patterns, or customer behavior.\n\nSome common applications of machine 

In [24]:
generated_text(input_text="what is the deep learning.")


"\nYou're interested in deep learning!\n\nTo add to what I mentioned earlier, here are some key concepts and techniques in deep learning:\n\n1. **Neural Network Architectures**: Convolutional Neural Networks (CNNs), Recurrent Neural Networks (RNNs), Long Short-Term Memory (LSTM) networks, and Generative Adversarial Networks (GANs) are some examples of deep learning architectures.\n2. **Activation Functions**: Sigmoid, ReLU (Rectified Linear Unit), Tanh, and Leaky ReLU are some common activation functions used in deep learning.\n3. **Optimization Algorithms**: Stochastic Gradient Descent ("