In [None]:
!pip install transformers datasets accelerate trl peft tensorboard

Collecting trl
  Downloading trl-0.24.0-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.24.0-py3-none-any.whl (423 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl
Successfully installed trl-0.24.0


In [None]:
import os
import math
import json
from pathlib import Path
from typing import Dict, Any
import glob
import shutil

import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.nn.utils import clip_grad_norm_

from peft import LoraConfig, get_peft_model, PeftModel

from torch.utils.tensorboard import SummaryWriter

from transformers import get_linear_schedule_with_warmup


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
workspace = 'SFT'
import os
os.chdir(f"/content/drive/My Drive/{workspace}")
print("Current working dir:", os.getcwd())

Current working dir: /content/drive/My Drive/SFT


##Pretrained GPT2 Behavior

In [None]:
model_name = "gpt2"

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto")

In [None]:
model.to(device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
print("EOS token:", tokenizer.eos_token)
print("EOS token ID:", tokenizer.eos_token_id)
print("PAD token:", tokenizer.pad_token)
print("PAD token ID:", tokenizer.pad_token_id)

EOS token: <|endoftext|>
EOS token ID: 50256
PAD token: None
PAD token ID: None


In [None]:
tokenizer.pad_token = tokenizer.eos_token

In [None]:
print("PAD token ID:", tokenizer.pad_token_id)

PAD token ID: 50256


In [None]:
print("Tokenizer max length:", tokenizer.model_max_length)

Tokenizer max length: 1024


In [None]:
tokenizer.padding_side = 'right'

In [None]:
test_prompts = [
    "What is machine learning?",
    "Explain reinforcement learning",
    "How does PPO work?",
]

for prompt in test_prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        num_beams=5,
        early_stopping=True,
        no_repeat_ngram_size=3,
        length_penalty=1.0,
        do_sample=False,
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Prompt: {prompt}")
    print(f"Response: {response}\n")
    print("-" * 80)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: What is machine learning?
Response: What is machine learning?

Machine learning is the process by which a computer learns to perform a task. It is a process that can be done in a number of different ways.

For example, machine learning can be used to train a computer to perform certain tasks. It can also be used as a way to learn new skills. For example, a machine learning algorithm can be trained to perform tasks that are difficult to perform in the real world, such as learning how to read a book or how to write a letter. It may also be able to teach a computer how to do certain tasks in a way that is not possible in a real world. In other words, it can teach a machine how to learn how to solve certain problems in a certain way.
. Machine learning is one of the most important aspects of machine learning. It allows the computer to learn to perform specific tasks. For instance, a computer can learn to do a task that requires a certain amount of memory, or it can learn new tasks 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Explain reinforcement learning
Response: Explain reinforcement learning.

In this post, I'll show you how you can use reinforcement learning to train your brain to recognize and respond appropriately to situations. I'll also show you a simple way to learn reinforcement learning in Python, and how to use it in your own projects. I hope you enjoy this post as much as I enjoyed writing it!

If you want to learn more about reinforcement learning, check out my previous posts on reinforcement learning and reinforcement learning for Python.

--------------------------------------------------------------------------------
Prompt: How does PPO work?
Response: How does PPO work?

PPO is an open source project that allows you to create and manage your own PPO projects. PPO is a free, open-source project that can be used by anyone who wants to create a PPO project.

How do I get started with PPO

You can start using PPO by following these steps:

1. Download and install PPO from the PPO we

##Alpaca Dataset

In [None]:
from datasets import load_dataset
dataset = load_dataset("tatsu-lab/alpaca")

README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001-a09b74b3ef9c3b(…):   0%|          | 0.00/24.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/52002 [00:00<?, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 52002
    })
})

In [None]:
from datasets import DatasetDict
split = dataset['train'].train_test_split(test_size=0.05, seed=42)
dataset = DatasetDict({
    'train': split['train'],
    'validation': split['test']
})

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 49401
    })
    validation: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 2601
    })
})

In [None]:
train_ds = dataset['train']
val_ds = dataset['validation']
train_ds[0]

{'instruction': 'Given a sentence, change the verb to make it in the past tense',
 'input': 'I enjoy going to the beach',
 'output': 'I enjoyed going to the beach.',
 'text': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nGiven a sentence, change the verb to make it in the past tense\n\n### Input:\nI enjoy going to the beach\n\n### Response:\nI enjoyed going to the beach.'}

In [None]:
train_ds[500]

{'instruction': 'Explain why the internet has become such an important tool.',
 'input': '',
 'output': 'The internet has become an essential tool for a variety of reasons, but primarily because it can provide near-instant access to a world of information. It enables people to connect with friends, family, and people from all over the world. It has also revolutionized the way people do business by allowing for online Sales and e-commerce. Additionally, it has allowed for the rapid spread of multimedia, such as music, videos, and images, as well as streaming services like Netflix. In many ways, the internet has become a vital part of our lives, connecting us with the world in ways that were never before possible.',
 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nExplain why the internet has become such an important tool.\n\n### Response:\nThe internet has become an essential tool for a variety of re

Change to Standard Prompt Completion

In [None]:
def preprocess_function(example):
    prompt = f"Human: {example['instruction']} {example['input']} "
    completion = f"Assistant: {example['output']}"
    full_text = prompt + completion
    tokenized = tokenizer(
            full_text,
            truncation=True,
            padding="max_length",
            max_length=1024,
            return_tensors="pt",
    )
    #only want to compute loss on completion
    prompt_len = len(tokenizer(prompt)["input_ids"])
    labels = tokenized["input_ids"].clone()
    labels[:, :prompt_len] = -100

    #mask out padding (except the first pad token which is the eos token)
    pad_token_id = tokenizer.eos_token_id
    for i in range(labels.shape[0]):
        eos_positions = (labels[i] == pad_token_id).nonzero(as_tuple=True)[0]
        if len(eos_positions) > 1:
            # Keep the first EOS (end of completion) and mask the rest (padding)
            labels[i, eos_positions[1:]] = -100

    # fraction = (labels.squeeze() != -100).float().mean()
    # print(fraction)
    return {
            "input_ids": tokenized["input_ids"].squeeze(),
            "attention_mask": tokenized["attention_mask"].squeeze(),
            "labels": labels.squeeze(),
    }

train_ds = train_ds.map(preprocess_function, remove_columns=["instruction", "input", "output", "text"])
val_ds = val_ds.map(preprocess_function, remove_columns=["instruction", "input", "output", "text"])

Map:   0%|          | 0/49401 [00:00<?, ? examples/s]

Map:   0%|          | 0/2601 [00:00<?, ? examples/s]

##SFT Trainer

In [None]:
from transformers import Trainer, TrainingArguments

Peft Config

In [None]:
LORA_CONFIG = dict(
    r=32,
    lora_alpha=64,
    target_modules=["c_attn", "c_proj", "q_attn", "wte", "wpe"],
    lora_dropout=0.2,
    bias="none",
    task_type="CAUSAL_LM",
)
lora_config = LoraConfig(**LORA_CONFIG)

In [None]:
model = get_peft_model(model, lora_config)



In [None]:
model = PeftModel.from_pretrained(model, "checkpoints/checkpoint-700")

In [None]:
def inspect_trainable_params(model):
    total = 0
    trainable = 0
    details = []
    for n, p in model.named_parameters():
        total += p.numel()
        if p.requires_grad:
            trainable += p.numel()
            details.append(n)
    print(f"Trainable params: {trainable:,} / {total:,} ({100 * trainable / total:.2f}%)")
    print("Example trainable params:", details[:20])
    return details

In [None]:
inspect_trainable_params(model)

Trainable params: 0 / 129,373,984 (0.00%)
Example trainable params: []


[]

In [None]:
for name, parameter in model.named_parameters():
    if "lora_" in name:
        parameter.requires_grad = True

In [None]:
inspect_trainable_params(model)

Trainable params: 4,934,176 / 129,373,984 (3.81%)
Example trainable params: ['base_model.model.transformer.wte.lora_embedding_A.default', 'base_model.model.transformer.wte.lora_embedding_B.default', 'base_model.model.transformer.wpe.lora_embedding_A.default', 'base_model.model.transformer.wpe.lora_embedding_B.default', 'base_model.model.transformer.h.0.attn.c_attn.lora_A.default.weight', 'base_model.model.transformer.h.0.attn.c_attn.lora_B.default.weight', 'base_model.model.transformer.h.0.attn.c_proj.lora_A.default.weight', 'base_model.model.transformer.h.0.attn.c_proj.lora_B.default.weight', 'base_model.model.transformer.h.0.mlp.c_proj.lora_A.default.weight', 'base_model.model.transformer.h.0.mlp.c_proj.lora_B.default.weight', 'base_model.model.transformer.h.1.attn.c_attn.lora_A.default.weight', 'base_model.model.transformer.h.1.attn.c_attn.lora_B.default.weight', 'base_model.model.transformer.h.1.attn.c_proj.lora_A.default.weight', 'base_model.model.transformer.h.1.attn.c_proj.lora_

['base_model.model.transformer.wte.lora_embedding_A.default',
 'base_model.model.transformer.wte.lora_embedding_B.default',
 'base_model.model.transformer.wpe.lora_embedding_A.default',
 'base_model.model.transformer.wpe.lora_embedding_B.default',
 'base_model.model.transformer.h.0.attn.c_attn.lora_A.default.weight',
 'base_model.model.transformer.h.0.attn.c_attn.lora_B.default.weight',
 'base_model.model.transformer.h.0.attn.c_proj.lora_A.default.weight',
 'base_model.model.transformer.h.0.attn.c_proj.lora_B.default.weight',
 'base_model.model.transformer.h.0.mlp.c_proj.lora_A.default.weight',
 'base_model.model.transformer.h.0.mlp.c_proj.lora_B.default.weight',
 'base_model.model.transformer.h.1.attn.c_attn.lora_A.default.weight',
 'base_model.model.transformer.h.1.attn.c_attn.lora_B.default.weight',
 'base_model.model.transformer.h.1.attn.c_proj.lora_A.default.weight',
 'base_model.model.transformer.h.1.attn.c_proj.lora_B.default.weight',
 'base_model.model.transformer.h.1.mlp.c_pro

In [None]:
training_args = TrainingArguments(
    output_dir = "./checkpoints",
    eval_strategy = 'steps',
    per_device_train_batch_size = 4,
    per_device_eval_batch_size = 4,
    gradient_accumulation_steps = 4,
    learning_rate = 2e-5,
    num_train_epochs = 3,
    lr_scheduler_type = 'cosine',
    warmup_steps = 100,
    save_steps = 50,
    logging_strategy = "steps",
    logging_steps = 50,
    save_strategy = "steps",
    save_total_limit = 2,
    eval_steps = 200,
  )

In [None]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train_ds,
    eval_dataset = val_ds,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [None]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.
  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marnavnmehta1[0m ([33marnavnmehta1-nutanix[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss,Validation Loss
200,2.1283,2.115093
400,2.1554,2.112847
600,2.0985,2.112167
800,2.2577,2.106889
1000,2.2337,2.102957
1200,2.2796,2.099492
1400,2.1931,2.095898
1600,2.254,2.093679
1800,2.2111,2.092422
2000,2.2116,2.090225
