In [4]:
from huggingface_hub import interpreter_login

interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



In [5]:
import torch
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import DPOTrainer, DPOConfig

## Format dataset

In [6]:
dataset = load_dataset(path="trl-lib/ultrafeedback_binarized", split="train")

## Select the model

In [9]:
model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [17]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = model_name,
    torch_dtype = torch.float32,
).to(device)

model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [11]:
#Set out name for the finetune to be saved &/ uploaded to 
finetune_name = "SmolLM2-FT-DPO-KSV"
finetune_tags = ["smol-course", "module_1"]

## Train model with DPO

In [15]:
# Training arguments

training_args = DPOConfig(
    #Training batch size per GPU
    per_device_train_batch_size = 4,
    #Number of updates steps to accumulate before performing a backward/update pass
    #Effective batch size = per_device_train_batch_size * gradient_accumulation_steps
    gradient_accumulation_steps=4,
    #Saves memory by  not storing activations during forward pass
    # Instead recomputes them during backward pass
    gradient_checkpointing=True,
    #Base learning rate for training
    learning_rate = 5e-5,
    #Learning rate schedule - "cosine" gradually decreases LR following cosine curve
    lr_scheduler_type = "cosine",
    #Total number of training steps
    max_steps = 200,
    # Disables model checkpointing during training
    save_strategy = "no",
    #how often to log training metrics
    logging_steps = 1,
    #directory to save model outputs
    output_dir = "smol_dpo_output",
    #Number of steps for learning rate warmuo
    warmup_steps = 100,
    #Use bfloat precision for faster training
    bf16 = True,
    #Disable wandb/tensorboard loggging
    report_to = None,
    #Keep all columns in dataset even if not used
    remove_unused_columns = False,
    #model ID for HuggingFace Hub uploads
    hub_model_id= finetune_name
)

In [18]:
trainer = DPOTrainer(
                    model= model, args= training_args, train_dataset=dataset, processing_class= tokenizer,
                    #DPO-specific temperature parameter that controls the strength of the preference model
                    #Lower values (like 0.1) make the model more conservative in the following preference 
                    beta = 0.1,
                    #maximum length of the input prompt in tokens
                    max_prompt_length = 1024, 
                    #maximum combined length of prompt + response in tokens
                    max_length = 1536
                    )


Deprecated positional argument(s) used in DPOTrainer, please use the DPOConfig to set these arguments instead.
Extracting prompt from train dataset: 100%|██████████| 62135/62135 [00:19<00:00, 3217.57 examples/s]
Applying chat template to train dataset: 100%|██████████| 62135/62135 [00:33<00:00, 1867.26 examples/s]
Tokenizing train dataset:   1%|          | 740/62135 [00:03<05:53, 173.78 examples/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2379 > 2048). Running this sequence through the model will result in indexing errors
Tokenizing train dataset: 100%|██████████| 62135/62135 [04:14<00:00, 243.81 examples/s]
max_steps is given, it will override any value given in num_train_epochs


In [22]:
dataset

Dataset({
    features: ['chosen', 'rejected', 'score_chosen', 'score_rejected'],
    num_rows: 62135
})

In [None]:
trainer.train()

trainer.save_model(f"./{finetune_name}")

if os.getenv("HF_TOKEN"):
    trainer.push_to_hub(tags=finetune_tags)

  ctx_manager = torch.cpu.amp.autocast(cache_enabled=cache_enabled, dtype=self.amp_dtype)


In [None]:
moisturizer wash
beard oil
beard balm