In [None]:
!pip install -q transformers huggingface_hub datasets trl tqdm torch

In [2]:
import torch
from tqdm import tqdm
from transformers import pipeline, AutoTokenizer,AutoModelForSequenceClassification
from datasets import load_dataset
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler
from huggingface_hub import interpreter_login

In [3]:
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Enter your token (input will not be visible): ··········
Add token as git credential? (Y/n) n


In [4]:
def data_prep(settings,model_name="lvwerra/gpt2-imdb", data="stanfordnlp/imdb", text_min_length=2, text_max_length=8,max_rows=1000):

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.pad_token = tokenizer.eos_token

  ds = load_dataset(data, split="train")
  ds = ds.rename_columns({"text": "review"})

  ds = ds.filter(lambda item: len(item["review"]) > 200, batched=False)
  ds = ds.select(range(max_rows))
  segment_size = LengthSampler(text_min_length, text_max_length)

  def tokenize_fn(example):
      ids = tokenizer.encode(example["review"])
      cut = segment_size()
      example["input_ids"] = ids[:cut]
      example["query"] = tokenizer.decode(ids[:cut])
      return example
  ds = ds.map(tokenize_fn, batched=False, remove_columns=["review"])
  return ds

def batch_collator(entries):
    return {k: [e[k] for e in entries] for k in entries[0]}


In [5]:
settings = PPOConfig(
    learning_rate = 1.5e-5,

)

In [None]:
dataset = data_prep(settings)
model_name = "lvwerra/gpt2-imdb"
model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)
frozen_model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

reward_model = AutoModelForSequenceClassification.from_pretrained("lvwerra/distilbert-imdb")

ppo_trainer = PPOTrainer(args=settings, processing_class=tokenizer, model=model, ref_model=frozen_model,reward_model=reward_model,value_model=model,  train_dataset=dataset, data_collator=batch_collator)

device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
        device = 0 if torch.cuda.is_available() else "cpu" 

sentiment_pipe = pipeline("sentiment-analysis", model="lvwerra/distilbert-imdb", device=device)


sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 16}

In [None]:
text = "this movie was really bad!!"
print(sentiment_pipe(text, **sent_kwargs))

In [None]:
text = "this movie was really good!!"
print(sentiment_pipe(text, **sent_kwargs))

In [None]:
ppo_trainer.train()


In [None]:
ppo_trainer.save_pretrained("aligned-gpt2-imdb")


In [None]:
from transformers import AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("aligned-gpt2-imdb",push_to_hub=True)
model= AutoModelForCausalLM.from_pretrained("aligned-gpt2-imdb",push_to_hub=True)


In [None]:
inputs = tokenizer("The meaning of life is", return_tensors="pt")
out    = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(out[0], skip_special_tokens=True))