# LLM-Instruction tuning

In [None]:
!pip install transformers[torch]
!pip install datasets
!pip install trl



In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    TrainingArguments,
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
from trl import SFTTrainer

In [None]:
batch_size = 16
num_workers = os.cpu_count()
max_steps = 3000
bf16 = False
fp16 = True
gradient_accumulation_steps = 2
context_length = 128
logging_steps = 500
save_steps = 500
learning_rate = 0.001
model_name = 'openai-community/gpt2'
out_dir = '/content/'

In [None]:
dataset = load_dataset('json', data_files='dataset.json')

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
full_dataset = dataset['train'].train_test_split(test_size=0.1, shuffle=True)
dataset_train = full_dataset['train']
dataset_valid = full_dataset['test']

print(dataset_train)
print(dataset_valid)

Dataset({
    features: ['answer', 'input'],
    num_rows: 297
})
Dataset({
    features: ['answer', 'input'],
    num_rows: 33
})


In [None]:
def preprocess_function(example):
    """
    Formatting function returning a list of samples (kind of necessary for SFT API).
    """
    text = f"Input:\n{example['input']}\n\n### Response:\n{example['answer']}"
    return text

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
    use_fast=False
)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
training_args = TrainingArguments(
    output_dir=f"{out_dir}/logs",
    evaluation_strategy='steps',
    weight_decay=0.01,
    load_best_model_at_end=True,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    logging_strategy='steps',
    save_strategy='steps',
    logging_steps=logging_steps,
    save_steps=save_steps,
    save_total_limit=2,
    bf16=bf16,
    fp16=fp16,
    report_to='tensorboard',
    max_steps=max_steps,
    dataloader_num_workers=num_workers,
    gradient_accumulation_steps=gradient_accumulation_steps,
    learning_rate=learning_rate,
    lr_scheduler_type='constant',
)



In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_train,
    eval_dataset=dataset_valid,
    max_seq_length=context_length,
    tokenizer=tokenizer,
    args=training_args,
    formatting_func=preprocess_function,
    packing=True
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
history= trainer.train()

  self.pid = os.fork()


Step,Training Loss,Validation Loss
500,0.1486,3.110167
1000,0.0067,3.348337
1500,0.0038,3.402031
2000,0.0025,3.525433
2500,0.0019,3.704328
3000,0.0019,3.560579


  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
  self.pid = os.fork()
There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


In [None]:
out_dir= '/content/drive/MyDrive'
model.save_pretrained(f"{out_dir}/best_model")
tokenizer.save_pretrained(f"{out_dir}/best_model")

Non-default generation parameters: {'max_length': 50, 'do_sample': True}


('/content/drive/MyDrive/best_model/tokenizer_config.json',
 '/content/drive/MyDrive/best_model/special_tokens_map.json',
 '/content/drive/MyDrive/best_model/vocab.json',
 '/content/drive/MyDrive/best_model/merges.txt',
 '/content/drive/MyDrive/best_model/added_tokens.json',
 '/content/drive/MyDrive/best_model/tokenizer.json')

# Inference of fine tuned LLM

In [None]:
from transformers import (
    AutoModelForCausalLM,
    logging,
    pipeline,
    AutoTokenizer
)
import torch

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = AutoModelForCausalLM.from_pretrained('/content/drive/MyDrive/best_model_DPO')
tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/best_model_DPO')
tokenizer.pad_token = tokenizer.eos_token

NameError: name 'torch' is not defined

In [None]:
pipe = pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
    max_length=256, # Prompt + new tokens to generate.
    device_map=device
)

In [None]:
template = """### Instruction:
{}
### Response:
{}"""

In [None]:
instructions = 'what is GPT'
response = ''
prompt = template.format(instructions,response)

In [None]:
outputs = pipe(
    prompt,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.1,
)
print(outputs[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


### Instruction:
What is the role of a telecom human resources manager
### Response:
An HR manager oversees recruitment, training, employee relations, performance management, and compliance with labor laws.


# DPO for preference alignment

In [None]:
import trl
from trl import DPOConfig, DPOTrainer
from datasets import load_dataset, Dataset, DatasetDict
import pandas as pd

In [None]:
from transformers import (
    AutoModelForCausalLM,
    logging,
    pipeline,
    AutoTokenizer
)
import torch

In [None]:
def chatml_format(example):
    # Format system
    if len(example['system']) > 0:
        message = {"role": "system", "content": example['system']}
        system = tokenizer.apply_chat_template([message], tokenize=False)
    else:
        system = ""

    # Format instruction
    message = {"role": "user", "content": example['question']}
    prompt = tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)

    # Format chosen answer
    chosen = example['chosen'] + "<|im_end|>\n"

    # Format rejected answer
    rejected = example['rejected'] + "<|im_end|>\n"

    return {
        "prompt": system + prompt,
        "chosen": chosen,
        "rejected": rejected,
    }

# Load dataset
dataset = load_dataset("Intel/orca_dpo_pairs")['train']

In [None]:
model = AutoModelForCausalLM.from_pretrained('/content/drive/MyDrive/best_model')

In [None]:
# Save columns
original_columns = dataset.column_names

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/best_model')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

# Format dataset
dataset = dataset.map(
    chatml_format,
    remove_columns=original_columns
)

In [None]:
batch_size = 4
max_steps = 200
training_args = DPOConfig(
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=batch_size,
    load_best_model_at_end=True,
    gradient_checkpointing=True,
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    max_steps=max_steps,
    save_strategy="no",
    logging_steps=50,
    output_dir="/content/drive/MyDrive/model",
    warmup_steps=100,
    fp16=True,
)

In [None]:
max_prompt_length = 512
max_length = 1024
dpo_trainer = DPOTrainer(
    model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    beta=0.1,
    max_prompt_length=max_prompt_length,
    max_length=max_length,

)

# Fine-tune model with DPO
dpo_trainer.train()

Map:   0%|          | 0/12859 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1136 > 1024). Running this sequence through the model will result in indexing errors
max_steps is given, it will override any value given in num_train_epochs
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss
50,0.2839
100,0.0517
150,0.043
200,0.0057


TrainOutput(global_step=200, training_loss=0.09607011765241623, metrics={'train_runtime': 1197.094, 'train_samples_per_second': 2.673, 'train_steps_per_second': 0.167, 'total_flos': 0.0, 'train_loss': 0.09607011765241623, 'epoch': 0.24883359253499224})

In [None]:
from transformers import AutoModelForCausalLM, GenerationConfig

In [None]:
out_dir= '/content/drive/MyDrive'
model.save_pretrained(f"{out_dir}/model")
tokenizer.save_pretrained(f"{out_dir}/model")

Non-default generation parameters: {'max_length': 50, 'do_sample': True}


('/content/drive/MyDrive/model/tokenizer_config.json',
 '/content/drive/MyDrive/model/special_tokens_map.json',
 '/content/drive/MyDrive/model/vocab.json',
 '/content/drive/MyDrive/model/merges.txt',
 '/content/drive/MyDrive/model/added_tokens.json',
 '/content/drive/MyDrive/model/tokenizer.json')

In [None]:
device= 'cpu'

In [None]:
model = AutoModelForCausalLM.from_pretrained('/content/drive/MyDrive/model')
tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/model')
tokenizer.pad_token = tokenizer.eos_token

In [None]:
pipe = pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
    max_length=512, # Prompt + new tokens to generate.
    device_map=device
)

In [None]:
template = """### Instruction:
{}
### Response:
{}"""

In [None]:
instructions = ' What is network optimization?'
response = ''
prompt = template.format(instructions,response)

In [None]:
outputs = pipe(
    prompt,
    do_sample=True,
    temperature=1.0,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.1,
)
print(outputs[0]['generated_text'])

### Instruction:
 What is network optimization?
### Response:
Network optimization involves analyzing performance metrics, adjusting configurations, and deploying additional cell sites.
