# [Supervised Fine-tuning Trainer](https://huggingface.co/docs/trl/sft_trainer)

Supervised fine-tuning (or SFT for short) is a crucial step in RLHF. In TRL we provide an easy-to-use API to create your SFT models and train them with few lines of code on your dataset.

[Python Script](https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py)

In [1]:
# !pip3 install peft==0.7.1
# !pip3 install trl==0.7.4
# !pip3 install transformer==4.36.2

In [2]:
import transformers
transformers.__version__

  from .autonotebook import tqdm as notebook_tqdm


'4.36.2'

In [3]:
import trl
trl.__version__



'0.7.4'

In [4]:
import os
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## Instruction-Tuning
Train on completions only
- Use the DataCollatorForCompletionOnlyLM to train your model on the generated prompts only.
- Note that this works only in the case when packing=False.
- To instantiate that collator for instruction data, pass a response template and the tokenizer. 

In [5]:
# Step 1: Load the dataset
from datasets import load_dataset

train_set = load_dataset('json', data_files='dataset/alpaca_data.json', split='train')
eval_set = load_dataset("tatsu-lab/alpaca_eval", split='eval')
eval_set = eval_set.remove_columns(["generator", "dataset"])
eval_set

Generating train split: 52002 examples [00:00, 111072.45 examples/s]
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


Dataset({
    features: ['instruction', 'output'],
    num_rows: 805
})

In [6]:
# Step 2: Load the model & Tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name_or_path = "distilgpt2"

model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    device_map = 'auto')

tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path)

tokenizer.pad_token = tokenizer.eos_token

# Make sure to pass a correct value for max_seq_length as the default value will be set to min(tokenizer.model_max_length, 1024).
max_seq_length = min(tokenizer.model_max_length, 1024)
max_seq_length

1024

In [7]:
def formatting_prompts_func(examples):
	output_texts = []

	for i in range(len(examples['instruction'])):
		if 'input' in examples.keys():
			input_text = examples["input"][i] 
		else:
			input_text = None
	
		if input_text:
			text = f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{examples["instruction"][i]}

### Input:
{input_text}

### Response:
{examples["output"][i]}
""".strip()
			
		else:
			text = f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{examples["instruction"][i]}

### Response:
{examples["output"][i]}
""".strip()

		output_texts.append(text)

	return output_texts

In [8]:
# use the DataCollatorForCompletionOnlyLM to train your model on the generated prompts only
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import TrainingArguments

In [9]:
response_template = "### Response:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
collator

output_path = './results'
final_output_path = './results/final'

training_args = TrainingArguments(
    output_dir = output_path, #default = 'tmp_trainer'
    save_strategy = 'epoch',
    gradient_checkpointing = True,
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    num_train_epochs = 3, #default = 3
)

trainer = SFTTrainer(
    model,
    args = training_args,
    train_dataset = train_set.select(range(1000)),
    eval_dataset = eval_set,
    formatting_func = formatting_prompts_func,
    data_collator = collator,
    max_seq_length = max_seq_length,
)

trainer.train()

Map: 100%|██████████| 1000/1000 [00:00<00:00, 5266.33 examples/s]
Map: 100%|██████████| 805/805 [00:00<00:00, 3272.39 examples/s]
  0%|          | 0/1500 [00:00<?, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
 33%|███▎      | 500/1500 [00:56<01:35, 10.44it/s]

{'loss': 2.6708, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.0}


 67%|██████▋   | 1000/1500 [01:54<00:56,  8.79it/s]

{'loss': 2.1571, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}


100%|██████████| 1500/1500 [02:54<00:00,  8.17it/s]

{'loss': 1.9212, 'learning_rate': 0.0, 'epoch': 3.0}


100%|██████████| 1500/1500 [02:57<00:00,  8.44it/s]

{'train_runtime': 178.1699, 'train_samples_per_second': 16.838, 'train_steps_per_second': 8.419, 'train_loss': 2.2497074788411457, 'epoch': 3.0}





TrainOutput(global_step=1500, training_loss=2.2497074788411457, metrics={'train_runtime': 178.1699, 'train_samples_per_second': 16.838, 'train_steps_per_second': 8.419, 'train_loss': 2.2497074788411457, 'epoch': 3.0})

In [10]:
# save model
trainer.save_model(final_output_path)

In [11]:
from transformers import pipeline

model = AutoModelForCausalLM.from_pretrained(
    final_output_path,
    device_map = 'auto')

text_generator = pipeline(
    "text-generation",
    model = model,
    tokenizer = tokenizer,
    device_map = 'auto',
    pad_token_id = tokenizer.eos_token_id,
    max_new_tokens = 50
)

In [12]:
def format_input(sample):
	
	if 'input' in sample.keys():
		return f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{sample['instruction']}

### Input:
{sample['input']}

### Response:
""".strip()
			
	else:
		return f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{sample['instruction']}

### Response:
""".strip()

In [17]:
formatted_input = format_input(eval_set[0])
output = text_generator(formatted_input)

print(f"{output[0]['generated_text']}\n")
print(f"### Actual Response:\n{eval_set['output'][0]}")

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
What are the names of some famous actors that started their careers on Broadway?

### Response:
The name "Methraki" is Tom Hanks’s original and unforgettable performance ’s protagonist, played by Mark Wahlberg. The song plays about the experiences and their lives, as they travel to the remote jungle of

### Actual Response:
Some famous actors that started their careers on Broadway include: 
1. Hugh Jackman 
2. Meryl Streep 
3. Denzel Washington 
4. Julia Roberts 
5. Christopher Walken 
6. Anthony Rapp 
7. Audra McDonald 
8. Nathan Lane 
9. Sarah Jessica Parker 
10. Lin-Manuel Miranda
