In [1]:
%pip install -U trl transformers peft bitsandbytes --quiet

In [2]:
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
from transformers import pipeline

model_id = "CKeibel/gpt2-medium-chat"

pipe = pipeline("text-generation", model=model_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading weights:   0%|          | 0/292 [00:00<?, ?it/s]

In [4]:
messages = [
    {"role": "user", "content": "What is Deep Learning?"}
]

output = pipe(messages, max_new_tokens=50)

Passing `generation_config` together with generation-related arguments=({'max_new_tokens'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.
Both `max_new_tokens` (=50) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


In [5]:
print(output[0]['generated_text'][-1]['content'])

Deep Learning is a branch of Artificial Intelligence that focuses on the creation of neural networks that can process large amounts of data and make predictions based on it. The key goal of deep learning is to enable computers to learn from data and make predictions based on it


# Training

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)



Loading weights:   0%|          | 0/292 [00:00<?, ?it/s]

In [7]:
from peft import PeftModel, LoraConfig
from trl import DPOTrainer, DPOConfig

peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["c_attn"]
)

dpo_config = DPOConfig(
    output_dir="./gpt2_pirate_dpo",
    beta=0.5,
    learning_rate=1e-6,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    max_steps=200,
    logging_steps=10,
    optim="adamw_8bit",
    remove_unused_columns=False
)

In [8]:
from datasets import load_dataset

dataset_id = "CKeibel/synthetic-pirate-alpaca-small"
dataset = load_dataset(dataset_id, split="train")

In [9]:
trainer = DPOTrainer(
    model=model,
    ref_model=None,
    args=dpo_config,
    peft_config=peft_config,
    train_dataset=dataset,
)



In [10]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


Step,Training Loss
10,0.690303
20,0.694232
30,0.690777
40,0.679959
50,0.675151
60,0.656151
70,0.653528
80,0.643239
90,0.634006
100,0.630089


TrainOutput(global_step=200, training_loss=0.6227134132385254, metrics={'train_runtime': 1875.5322, 'train_samples_per_second': 0.853, 'train_steps_per_second': 0.107, 'total_flos': 0.0, 'train_loss': 0.6227134132385254, 'epoch': 3.176})

In [11]:
final_model = trainer.model.merge_and_unload()

In [15]:
messages = [
    {"role": "user", "content": "What is Deep Learning?"}
]

text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
)

model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

generated_ids = final_model.generate(
    **model_inputs,
    max_new_tokens=100,
    do_sample=True,
    temperature=0.8,
    repetition_penalty=1.2,
    pad_token_id=tokenizer.eos_token_id
)

generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])

Deep D  ... and as in,," " } // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //


In [13]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

In [None]:
final_model.push_to_hub("CKeibel/gpt2-medium-chat-pirate-dpo")