<a href="https://colab.research.google.com/github/PARTHIBAN-007/Hands-ON-LLMs/blob/main/12_FineTuning_Generatrion_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q accelerate peft  transformers trl sentencepiece
!pip install -U bitsandbytes



# Supervised Fine Tuning

## Data Preprocessing

In [2]:
from transformers import AutoTokenizer
from datasets import load_dataset


template_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

def format_prompt(example):
    """Format the prompt to using the <|user|> template TinyLLama is using"""

    chat = example["messages"]
    prompt = template_tokenizer.apply_chat_template(chat, tokenize=False)

    return {"text": prompt}

dataset = (
    load_dataset("HuggingFaceH4/ultrachat_200k",  split="test_sft")
      .shuffle(seed=42)
      .select(range(3_000))
)
dataset = dataset.map(format_prompt)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
print(dataset["text"][2576])


<|user|>
Given the text: Knock, knock. Who’s there? Hike.
Can you continue the joke based on the given text material "Knock, knock. Who’s there? Hike"?</s>
<|assistant|>
Sure! Knock, knock. Who's there? Hike. Hike who? Hike up your pants, it's cold outside!</s>
<|user|>
Can you tell me another knock-knock joke based on the same text material "Knock, knock. Who's there? Hike"?</s>
<|assistant|>
Of course! Knock, knock. Who's there? Hike. Hike who? Hike your way over here and let's go for a walk!</s>



#Model Quantization

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

# 4-bit quantization configuration - Q in QLoRA
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit precision model loading
    bnb_4bit_quant_type="nf4",  # Quantization type
    bnb_4bit_compute_dtype="float16",  # Compute dtype
    bnb_4bit_use_double_quant=True,  # Apply nested quantization
)

# Load the model to train on the GPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",

    # Leave this out for regular SFT
    quantization_config=bnb_config,
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = template_tokenizer
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

## Configuration

### Lora Configuration

In [5]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

# Prepare LoRA Configuration
peft_config = LoraConfig(
    lora_alpha=32,  # LoRA Scaling
    lora_dropout=0.1,  # Dropout for LoRA Layers
    r=64,  # Rank
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=  # Layers to target
     ['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)


model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

### Training Configuration

In [6]:
from transformers import TrainingArguments

output_dir = "./results"

# Training arguments
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    num_train_epochs=1,
    logging_steps=10,
    fp16=True,
    gradient_checkpointing=True
)

### Training

In [7]:
from trl import SFTTrainer

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    tokenizer=tokenizer,
    args=training_arguments,

    # Leave this out for regular SFT
    peft_config=peft_config,
)

# Train model
trainer.train()

# Save QLoRA weights
trainer.model.save_pretrained("TinyLlama-1.1B-qlora")

  trainer = SFTTrainer(


Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
10,1.5647
20,1.3847
30,1.3155
40,1.336
50,1.3514
60,1.2771
70,1.3753
80,1.3545
90,1.3078
100,1.2945


### Merged Adaptet

In [2]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
     "TinyLlama-1.1B-qlora",
      low_cpu_mem_usage=True,
      device_map="auto",

    )

merged_model = model.merge_and_unload()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


ValueError: Can't find 'adapter_config.json' at 'TinyLlama-1.1B-qlora'

### Inference

In [15]:
from transformers import pipeline

prompt = """<|user|>
Tell me something about large Lanuage Models
<|assistant|>"""


pipe= pipeline(task ="text-generation",model=merged_model,tokenizer =tokenizer)
pipe(prompt)[0]['generated_text']

Device set to use cuda:0


'<|user|>\nTell me something about large Lanuage Models\n<|assistant|>\nLarge Lanuage Models are a type of language model that is capable of generating large amounts of text. They are typically used for generating text in a specific language, such as English or Chinese.\nOne of the most popular large Lanuage Models is the GPT-3 model, which is capable of generating text in over 100 languages. The model is trained on a large dataset of text, and it is capable of generating text that is highly accurate and realistic.\nAnother popular large Lanuage Model is the BART model, which is capable of generating text in a specific language, such as English or Chinese. The model is trained on a large dataset of text, and it is capable of generating text that is highly accurate and realistic.\nOverall, large Lanuage Models are a powerful tool for generating text in a specific language, and they are becoming increasingly popular in the field of Natural Language Processing.'

## preference Tuning(PPO/DP)

## Data Preprocessing

In [5]:
from datasets import load_dataset

def format_prompt(example):
    """Format the prompt to using the <|user|> template TinyLLama is using"""

    system = "<|system|> \n" + example["system"] + "</s>\n"
    prompt = "<|user|>\n" + example["input"] + "</s>\n <|assistant|>"
    chosen = example["chosen"] + "</s>\n"
    rejected = example['rejected'] + "</s>\n"

    return {
        "prompt" : system +prompt ,
        "chosen" : chosen ,
        "rejected" : rejected
    }


dpo_dataset = load_dataset("argilla/distilabel-intel-orca-dpo-pairs",split="train")
dpo_dataset = dpo_dataset.filter(
    lambda r:
    r["status"] != "tie" and
    r["chosen_score"] >= 8 and
    not r["in_gsm8k_train"]
)

dpo_dataset = dpo_dataset.map(format_prompt, remove_columns=dpo_dataset.column_names)
dpo_dataset

README.md:   0%|          | 0.00/10.1k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/79.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/12859 [00:00<?, ? examples/s]

Filter:   0%|          | 0/12859 [00:00<?, ? examples/s]

Map:   0%|          | 0/5922 [00:00<?, ? examples/s]

Dataset({
    features: ['chosen', 'rejected', 'prompt'],
    num_rows: 5922
})

### Models - Quantization

In [10]:
from peft import AutoPeftModelForCausalLM
from transformers import BitsAndBytesConfig

model_name = "TinyLlama-1.1B-qlora"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True
)

model = AutoPeftModelForCausalLM.from_pretrained(
    model_name ,
    low_cpu_mem_usage = True,
    device_map = "auto",
    quantization_config = bnb_config,
)

merged_mmodel = model.merge_and_unload()

model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
tokenizer = AutoTokenizer.from_pretrained(model_name ,trust_remote_code=True)
tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

ValueError: Can't find 'adapter_config.json' at 'TinyLlama-1.1B-qlora'

### Configuration

In [27]:
from peft import LoraConfig , prepare_model_for_kbit_training ,get_peft_model

peft_config = LoraConfig(
    lora_alpha = 32,
    lora_dropout= 0.1,
    r = 64,
    bias = "none",
    task_type = "CAUSAL_LM",
    target_modules = ["k_proj","gate_proj","up_proj","q_proj","o_proj","down_proj"]
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model,peft_config)

In [3]:
from trl import DPOConfig

output_dir = "./results"

training_arguments = DPOConfig(
    output_dir = output_dir,
    per_device_train_batch_size = 2,
    gradient_accumulation_steps =4,
    optim = "paged_adamw_32bit",
    learning_rate = 1e-5,
    lr_scheduler_type = "cosine",
    max_steps = 200,
    logging_steps = 10,
    fp16 =True,
    gradient_checkpointing = True,
    warmup_ratio = 0.1

)

In [4]:
from trl import DPOTrainer

dpo_trainer = DPOTrainer(
    model,
    args = training_arguments,
    train_dataset =  dpo_dataset ,
    tokenizer = tokenizer,
    peft_config = peft_config,
)

dpo_trainer.train()

dpo_trainer.model.savee_pretrained("TinyLlama-1.1B-dpo-qlora")

NameError: name 'model' is not defined

In [None]:
from peft import PeftModel
model_name = "TinyLlama-1.1B-qlora"
model = AutoPeftModelForCausalLM.from_pretrained(
    model_name ,
    low_cpu_mem_usage = True,
    device_map = "auto"
)

sft_model = model.merge_and_unload()

dpo_model = PeftModel.from_pretrained(
    sft_model,
    model_name,
    device_map = "auto"
)

dpo_model = dpo_model.merge_and_unload()

In [None]:
from transformers import pipeline

prompt = """
<|user|>
Tell me something about large language Model
<|assistant|>"""

pipe = pipeline(task="text-generation",model= dpo_model,tokenizer = tokenizer)
pipe(prompt)[0]["generated_text"]