# [Supervised Fine-tuning Trainer](https://huggingface.co/docs/trl/sft_trainer)

Supervised fine-tuning (or SFT for short) is a crucial step in RLHF. In TRL we provide an easy-to-use API to create your SFT models and train them with few lines of code on your dataset.

[Python Script](https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py)

In [1]:
!pip3 install peft==0.7.1
!pip3 install trl==0.7.4
!pip3 install transformer==4.36.2

Collecting peft==0.7.1
  Downloading peft-0.7.1-py3-none-any.whl (168 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/168.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━[0m [32m122.9/168.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft==0.7.1)
  Downloading accelerate-0.28.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->peft==0.7.1)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runt

In [2]:
import transformers
transformers.__version__

'4.38.2'

In [3]:
import trl
trl.__version__



'0.7.4'

In [4]:
import os
import torch
# Set GPU device
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

#os.environ['http_proxy']  = 'http://192.41.170.23:3128'
#os.environ['https_proxy'] = 'http://192.41.170.23:3128'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

## Basic SFT

In [5]:
import json

# Open the JSON file
#with open('alpaca_data.json', 'r') as f:
    # Read the JSON data
 #   dataset = json.load(f)

In [23]:
# Step 1: Load the dataset
from datasets import load_dataset
#sentiment analysis 0 : negative 1 : positve
dataset = load_dataset("json",data_files='/content/alpaca_data.json',split="train")
dataset

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['output', 'input', 'instruction'],
    num_rows: 52002
})

In [24]:
dataset[0]

{'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.',
 'input': '',
 'instruction': 'Give three tips for staying healthy.'}

In [25]:
# Step 2: Load the model & Tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name_or_path = "distilgpt2"
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    device_map = 'auto'
)
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path)

# Make sure to pass a correct value for max_seq_length as the default value will be set to min(tokenizer.model_max_length, 1024).
max_seq_length = min(tokenizer.model_max_length, 1024)
max_seq_length

1024

In [33]:
# Step 3: Define the Trainer
from transformers import TrainingArguments
from trl import SFTTrainer
training_args = TrainingArguments(
    output_dir = 'tmp_trainer', #default = 'tmp_trainer'
    num_train_epochs=3, #default = 3
)

trainer = SFTTrainer(
    model = model,
    args = training_args,
    train_dataset = dataset.select(range(500)),
    dataset_text_field = "instruction",
    max_seq_length = max_seq_length,
)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [34]:
trainer.train()

Step,Training Loss


TrainOutput(global_step=189, training_loss=2.5600983150421626, metrics={'train_runtime': 502.7892, 'train_samples_per_second': 2.983, 'train_steps_per_second': 0.376, 'total_flos': 6122101211136.0, 'train_loss': 2.5600983150421626, 'epoch': 3.0})

## Instruction-Tuning
Train on completions only
- Use the DataCollatorForCompletionOnlyLM to train your model on the generated prompts only.
- Note that this works only in the case when packing=False.
- To instantiate that collator for instruction data, pass a response template and the tokenizer.

In [35]:
# Step 1: Load the dataset
from datasets import load_dataset
dataset = load_dataset("lucasmccabe-lmi/CodeAlpaca-20k", split="train")
dataset

Downloading readme:   0%|          | 0.00/677 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.45M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/20022 [00:00<?, ? examples/s]

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 20022
})

In [36]:
dataset[20000]

{'instruction': 'Design an algorithm for finding the nth node from the tail of a linked list.',
 'input': '',
 'output': '"""\ndef nthFromLast(head, n): \n    # Initialize slow and fast pointers \n    slow  = head \n    fast = head \n  \n    # Move fast pointer n-1 times \n    while (n > 0): \n        if (fast == None): \n            return None\n  \n        fast = fast.next\n        n = n - 1\n  \n    # Move both slow and fast pointer together \n    while (fast.next != None): \n        slow = slow.next\n        fast = fast.next\n  \n    return slow\n"""'}

In [None]:
# Step 2: Load the model & Tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name_or_path = "distilgpt2"
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path, device_map = 'auto')
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [38]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['instruction'])):
        text = f"### Question: {example['instruction'][i]}\n ### Answer: {example['output'][i]}"
        output_texts.append(text)
    return output_texts

#check instruction-prompt
formatting_prompts_func(dataset[:2])

['### Question: Create a function that takes a specific input and produces a specific output using any mathematical operators. Write corresponding code in Python.\n ### Answer: def f(x):\n    """\n    Takes a specific input and produces a specific output using any mathematical operators\n    """\n    return x**2 + 3*x',
 "### Question: Generate a unique 8 character string that contains a lowercase letter, an uppercase letter, a numerical digit, and a special character. Write corresponding code in Python.\n ### Answer: import string\nimport random\n\ndef random_password_string():\n    characters = string.ascii_letters + string.digits + string.punctuation\n    password = ''.join(random.sample(characters, 8))\n    return password\n\nif __name__ == '__main__':\n    print(random_password_string())"]

In [39]:
# use the DataCollatorForCompletionOnlyLM to train your model on the generated prompts only
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
response_template = " ### Answer:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)
collator

DataCollatorForCompletionOnlyLM(tokenizer=GPT2TokenizerFast(name_or_path='distilgpt2', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}, mlm=False, mlm_probability=0.15, pad_to_multiple_of=None, tf_experimental_compile=False, return_tensors='pt')

In [1]:
# Step 3: Define the Trainer
trainer = SFTTrainer(
    model,
    train_dataset=dataset.select(range(100)),
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

trainer.train()

NameError: name 'SFTTrainer' is not defined

### Standard-Alpaca : Format your input prompts
For instruction fine-tuning, it is quite common to have two columns inside the dataset: one for the prompt & the other for the response.

This allows people to format examples like Stanford-Alpaca did as follows:

In [None]:
test = '''
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Response:
{response}
'''

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTTrainer

dataset = load_dataset("HuggingFaceH4/instruction-dataset")
dataset = dataset.remove_columns("meta")
dataset

In [None]:
def format_instruction(sample):
	return f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{sample['prompt']}

### Response:
{sample['completion']}
""".strip()

format_instruction(dataset['test'][0])

In [None]:
model = AutoModelForCausalLM.from_pretrained("distilgpt2", device_map = 'auto')
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
tokenizer.pad_token = tokenizer.eos_token

In [None]:
trainer = SFTTrainer(
    model,
    train_dataset=dataset['test'],
    tokenizer=tokenizer,
    max_seq_length=1024,
    formatting_func=format_instruction,
)

trainer.train()