# GPT

In [1]:
#%pip install trl
#%pip install datasets
#%pip install -U transformers==4.38.1

In [2]:
from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler

import pandas as pd

import torch
from torch.utils.data import Dataset
from transformers import TrainingArguments, Trainer, pipeline
from sklearn.metrics import accuracy_score, f1_score
import json
from datasets import load_dataset, Split

from transformers import BertTokenizerFast, BertForSequenceClassification
import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## Load Data

In [3]:
# rlhf = 'anime'
rlhf = 'steam' 
# rlhf = 'movie'

In [4]:
dataset = load_dataset("csv", data_files=f"{rlhf}.csv", split=Split.ALL)

In [5]:
dataset

Dataset({
    features: ['RPG', 'Strategy', 'Casual', 'age', 'description'],
    num_rows: 4169
})

In [6]:
display(pd.DataFrame(dataset[:5]))

Unnamed: 0,RPG,Strategy,Casual,age,description
0,False,False,False,0,"<strong>LAND, LOOT, SURVIVE!</strong><br>Play ..."
1,False,False,False,0,Counter-Strike: Global Offensive (CS: GO) expa...
2,True,False,False,17,Embark on an odyssey for the Lost Ark in a vas...
3,False,True,False,0,<strong>The most-played game on Steam.</strong...
4,True,False,False,18,<h1>Pacote Cyberpunk 2077 e Phantom Liberty</h...


## Setup Model

In [7]:
config = PPOConfig(
    model_name    = "lvwerra/gpt2-imdb",
    learning_rate = 1.41e-5,
    ## log_with      = "wandb",
)

sent_kwargs = {
         "return_all_scores": True, 
         "function_to_apply": "none", 
         "batch_size": 16
}

with open('secrets.json') as f:
    secrets = json.load(f)

In [8]:
tokenizer           = AutoTokenizer.from_pretrained(config.model_name, token=secrets['huggingface-token'])
tokenizer.pad_token = tokenizer.eos_token

In [9]:
def tokenize(sample):
        input_size = LengthSampler(2, 8)
        sample["input_ids"] = tokenizer.encode( sample["description"]    )[: input_size()]
        sample["query"]     = tokenizer.decode( sample["input_ids"] )
        return sample

dataset = dataset.filter(lambda x: x['description'] != None and len(x["description"]) > 200 and len(x['description']) < 1024
                         , batched=False)
dataset = dataset.map(tokenize, batched=False)
dataset.set_format(type="torch")
print(dataset.__len__)

<bound method Dataset.__len__ of Dataset({
    features: ['RPG', 'Strategy', 'Casual', 'age', 'description', 'input_ids', 'query'],
    num_rows: 704
})>


In [10]:
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [11]:
model     = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name, token=secrets['huggingface-token'])
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name, token=secrets['huggingface-token'])

ppo_trainer = PPOTrainer(
                 config, 
                 model, 
                 ref_model, 
                 tokenizer, 
                 dataset=dataset, 
                 data_collator=collator
)

device = 'cuda'
print(model)

  return self.fget.__get__(instance, owner)()


AutoModelForCausalLMWithValueHead(
  (pretrained_model): GPT2LMHeadModel(
    (transformer): GPT2Model(
      (wte): Embedding(50257, 768)
      (wpe): Embedding(1024, 768)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-11): 12 x GPT2Block(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2Attention(
            (c_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): NewGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=768, out_features=50257, bias=False)
  )
  (

## Load Classifier (Reward Functino)

In [12]:
# i guess it makes sense they cant have the same tokenizer
model_class     = BertForSequenceClassification.from_pretrained(f"{rlhf}-model/final-trainer")
tokenizer_class = BertTokenizerFast.from_pretrained("bert-base-uncased")
print(tokenizer)

sent_kwargs = {
         "return_all_scores": True, 
         "function_to_apply": "none", 
         "batch_size": 16
}

nlp = pipeline("sentiment-analysis", model=model_class, tokenizer=tokenizer_class, device=device)

GPT2TokenizerFast(name_or_path='lvwerra/gpt2-imdb', vocab_size=50257, model_max_length=1024, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<|endoftext|>'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}


In [13]:
text = "For hearing-impaired university student Yuki Itose, silence has been a natural part of life since birth. Her world is small and isolated; she commutes to campus, interacts with her best friend Rin Fujishiro, and communicates through writing and text messages—a lifestyle that offers little to no change. One day, during her commute, Yuki meets fellow student Itsuomi Nagi, a multilingual travel enthusiast and friend of Rin. When Itsuomi learns of Yuki's condition, he takes it in stride, moving Yuki's heart. From this one simple gesture, Yuki and Itsuomi's lives start changing day by day as they let each other into their own worlds."
nlp(text, **sent_kwargs)



[[{'label': 'RPG', 'score': -0.17563088238239288},
  {'label': 'Strategy', 'score': -1.0779798030853271},
  {'label': 'Casual', 'score': 0.7542895078659058}]]

## Generator

In [14]:
gen_kwargs = {
         "min_length":   -1, 
         "top_k":       0.0, 
         "top_p":       1.0, 
         "do_sample":  True, 
         "pad_token_id": tokenizer.eos_token_id,
}

In [15]:
output_min_length     = 4
output_max_length     = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)

In [16]:
pbar = tqdm.tqdm(enumerate(ppo_trainer.dataloader), total=len(ppo_trainer.dataloader))
for epoch, batch in pbar:
    query_tensors = batch["input_ids"]

    #### Get response from gpt2
    response_tensors = []
    for i, query in enumerate(query_tensors):
        pbar.set_description(f"{i} / {len(query_tensors)}")
        gen_len                             = output_length_sampler()
        gen_kwargs["max_new_tokens"] = gen_len
        response                            = ppo_trainer.generate(query, **gen_kwargs)
        response_tensors.append( response.squeeze()[-gen_len:] )
    batch["response"] = [ tokenizer.decode(r.squeeze()) for r in response_tensors ]

    #### Compute sentiment score
    pbar.set_description("Sentiment")
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = nlp(texts, **sent_kwargs)
    rewards = [ torch.tensor(output[2]["score"]) for output in pipe_outputs]

    #### Run PPO step
    pbar.set_description("PPO")
    stats = ppo_trainer.step(
                     query_tensors, 
                     response_tensors, 
                     rewards
    )
    ppo_trainer.log_stats(stats, batch, rewards)

PPO: 100%|███████████████████████████████████████████████████████████████████████████████| 5/5 [04:34<00:00, 54.86s/it]


## Get the Results

In [17]:
bs                 = 16
game_data          = {}

dataset.set_format("pandas")
df_batch           = dataset[:].sample(bs)
df_batch 

Unnamed: 0,RPG,Strategy,Casual,age,description,input_ids,query
18,False,False,True,0,Meet beautiful girls and spend time in their c...,"[29318, 4950]",Meet beautiful
337,False,False,True,0,Spikit is a bullet hell game developed by a si...,"[4561, 1134, 270, 318, 257]",Spikit is a
174,False,True,True,0,"Sometimes: Success Requires Sacrifice, the tit...","[15468, 25, 16282]",Sometimes: Success
51,False,True,True,0,A chaotic strategy game with cubes!<br><br>You...,"[32, 23458, 4811, 983]",A chaotic strategy game
498,False,False,False,0,ESEA is a third party matchmaking service that...,"[1546, 16412, 318, 257, 2368, 2151]",ESEA is a third party
63,False,False,False,0,&quot;The ultimate depiction of the Three King...,"[5, 421, 313, 26]",&quot;
264,False,False,True,0,"Broken Age is a family friendly, hand-animated...","[15783, 3464, 7129, 318, 257, 1641]",Broken Age is a family
12,False,False,True,0,Welcome to HOLOCURE!<br />\r\n<br />\r\nThis i...,"[14618, 284, 49707, 4503, 11335, 0, 27]",Welcome to HOLOCURE!<
609,False,False,False,0,"<h1>Buzz</h1><p><a href=""http://store.steampow...","[27, 71, 16, 29, 48230, 3556, 71]",<h1>Buzz</h
125,False,False,False,0,"Dungeon of Zolthan is a casual, arcade-style m...","[30128, 6281, 286]",Dungeon of


In [18]:
game_data["query"] = df_batch["query"].tolist()
query_tensors      = df_batch["input_ids"].tolist()
response_tensors_ref, response_tensors = [], []
#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    
    output  = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    
    
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

In [19]:
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"]  = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[2]["score"] for output in nlp(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[2]["score"] for output in nlp(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,Meet beautiful,which appears to be a shabby funhouse,Meet beautiful and humanly accessible.\n<|endo...,1.987484,1.234699
1,Spikit is a,helps his daughter care for his little sister,strengths and undoubtedly all the flaws to be,1.338162,0.979596
2,Sometimes: Success,Sometimes: Successful low-rent films from Jona...,Sometimes: Success and probably repressed succ...,1.477527,1.372287
3,A chaotic strategy game,chaotic strategy game that lacks any human in...,"chaotic strategy game, though it sounds very ...",0.351071,0.32374
4,ESEA is a third party,attention (or move it) to,><br />If you look centuries,1.202083,0.869735
5,&quot;,further than a warning,title girl in the,0.642475,0.954562
6,Broken Age is a family,"reasons). Folks like Chappelle,",really like my babies. Pregnant is,1.062482,0.703955
7,Welcome to HOLOCURE!<,Díaz is a,is a masterpiece. It,1.293093,1.608545
8,<h1>Buzz</h,/>The Captain was made by The Crimson Tide,and four models quickly jockey up a hill,0.270884,1.617888
9,Dungeon of,Dungeon of Elven IV features a terrible side e...,Dungeon of Bog Valley. Despite its reputation ...,-1.040911,0.099277


In [20]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)    0.854458
rewards (after)     0.918902
dtype: float64


median:


rewards (before)    0.948932
rewards (after)     0.967079
dtype: float64

## Save the Model

In [21]:
model.save_pretrained(    f"{rlhf}-model/gpt2-horror", push_to_hub=False)
tokenizer.save_pretrained(f"{rlhf}-model/gpt2-horror", push_to_hub=False)

('steam-model/gpt2-horror\\tokenizer_config.json',
 'steam-model/gpt2-horror\\special_tokens_map.json',
 'steam-model/gpt2-horror\\vocab.json',
 'steam-model/gpt2-horror\\merges.txt',
 'steam-model/gpt2-horror\\added_tokens.json',
 'steam-model/gpt2-horror\\tokenizer.json')