In [1]:
import torch
from tqdm import tqdm
import pandas as pd

tqdm.pandas()

from transformers import pipeline, AutoTokenizer
from datasets import load_dataset

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
config = PPOConfig(
    model_name="radlab/polish-gpt2-small-v2",
    learning_rate=1.41e-5,
    log_with="wandb",
)

sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 16}

In [3]:
import wandb

wandb.init()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjungliana[0m ([33mpiksle[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
def build_dataset(config, dataset_name="allegro_reviews", input_min_text_length=2, input_max_text_length=8):
    """
    Build dataset for training. This builds the dataset from `load_dataset`, one should
    customize this function to train the model on its own dataset.

    Args:
        dataset_name (`str`):
            The name of the dataset to be loaded.

    Returns:
        dataloader (`torch.utils.data.DataLoader`):
            The dataloader for the dataset.
    """

    # Define a custom function to convert ratings to True or False
    def convert_to_boolean(example):
        example["label"] = example["label"] > 3
        return example
    
    tokenizer = AutoTokenizer.from_pretrained(config.model_name)
    tokenizer.pad_token = tokenizer.eos_token
    # load imdb with datasets
    ds = load_dataset(dataset_name, split="train")
    ds = ds.rename_columns({"text": "review"})
    ds = ds.rename_columns({"rating": "label"})
    ds = ds.filter(lambda x: len(x["review"]) > 100, batched=False)
    ds = ds.map(convert_to_boolean)

    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["review"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")
    return ds

In [5]:
dataset = build_dataset(config)


def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

tokenizer_config.json: 100%|██████████| 322/322 [00:00<00:00, 161kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
vocab.json: 100%|██████████| 923k/923k [00:00<00:00, 2.17MB/s]
merges.txt: 100%|██████████| 569k/569k [00:00<00:00, 1.38MB/s]
tokenizer.json: 100%|██████████| 2.38M/2.38M [00:00<00:00, 11.2MB/s]
special_tokens_map.json: 100%|██████████| 187/187 [00:00<00:00, 187kB/s]
Downloading data: 100%|██████████| 3.23M/3.23M [00:01<00:00, 2.06MB/s]
Downloading data: 100%|██████████| 349k/349k [00:01<00:00, 340kB/s]
Downloading data: 100%|██████████| 349k/349k [00:01<00:00, 309kB/s]
Generating train split: 100%|██████████| 9577/9577 [00:00<00:00, 341987.70 examples/s]
Generating test split: 100%|██████████| 1006/1006 [00:00<00:00, 143714.91 examples/s]
Generating valid

In [6]:
model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(config.model_name)
tokenizer = AutoTokenizer.from_pretrained(config.model_name)

tokenizer.pad_token = tokenizer.eos_token

config.json: 100%|██████████| 912/912 [00:00<00:00, 912kB/s]
model.safetensors: 100%|██████████| 503M/503M [00:10<00:00, 49.8MB/s] 
generation_config.json: 100%|██████████| 124/124 [00:00<?, ?B/s] 


In [7]:
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=dataset, data_collator=collator)

In [8]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug
sentiment_pipe = pipeline("sentiment-analysis", model="eevvgg/PaReS-sentimenTw-political-PL", device=device)

config.json: 100%|██████████| 889/889 [00:00<00:00, 888kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
pytorch_model.bin: 100%|██████████| 529M/529M [00:12<00:00, 40.7MB/s] 
tokenizer_config.json: 100%|██████████| 536/536 [00:00<?, ?B/s] 
tokenizer.json: 100%|██████████| 1.44M/1.44M [00:00<00:00, 3.48MB/s]
special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 62.5kB/s]


In [9]:
text = "ten produkt jest fatalny, nie polecam!!"
sentiment_pipe(text, **sent_kwargs)



[[{'label': 'Negative', 'score': 6.1759867668151855},
  {'label': 'Neutral', 'score': -3.594167470932007},
  {'label': 'Positive', 'score': -2.946561813354492}]]

In [11]:
text = "łyżka wygodna w użyciu, dobrze sprawdza się w kuchni"
sentiment_pipe(text, **sent_kwargs)

[[{'label': 'Negative', 'score': -2.6929402351379395},
  {'label': 'Neutral', 'score': -1.1684015989303589},
  {'label': 'Positive', 'score': 3.8378612995147705}]]

In [12]:
gen_kwargs = {"min_length": -1, "top_k": 0.0, "top_p": 1.0, "do_sample": True, "pad_token_id": tokenizer.eos_token_id}

In [13]:
output_min_length = 4
output_max_length = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)


generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    query_tensors = batch["input_ids"]

    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len = output_length_sampler()
        generation_kwargs["max_new_tokens"] = gen_len
        response = ppo_trainer.generate(query, **generation_kwargs)
        response_tensors.append(response.squeeze()[-gen_len:])
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)

0it [00:00, ?it/s]You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
30it [1:28:12, 176.43s/it]


KeyboardInterrupt: 

In [14]:
#### get a batch from the dataset
bs = 16
game_data = dict()
dataset.set_format("pandas")
df_batch = dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    gen_len = output_length_sampler()
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), max_new_tokens=gen_len, **gen_kwargs
    ).squeeze()[-gen_len:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results



Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,Słuchawki grają,biblijne zaklęcia o tak rzekomej ważności. Einno,"przez poprzek w przypadku, o których mowa w u...",4.933931,5.5814
1,"""Generalnie ok",. 14 procent siły roboczej przebywającej w szp...,", w czasie, gdy wedle ostatniego wpisu, o",3.284299,5.701523
2,Szkło nadaje się jedynie do,użytku w warunkach domowych,krajów z nią związanych,4.978797,0.006226
3,"""Solidna rzecz. Montaż","nowych okien na budynku mieszkalnym, w którym...",może być głównym dostawcą sprzedawcy PanaRese...,-2.119914,-1.25149
4,"etui ok, ale miejsca na",szczelinie np. 2aʻ,podstawie art. 8 ust. 1,5.737679,5.675669
5,Czarna ramka na,głowę [6] widoczna,podstawie art. 20 ust,1.303098,4.582406
6,"""Szkło zostało prawidłowo przy",cięte w celu recyklingu. Suche kolory,użyciu sklepu może zostać wymienione w przypa...,-2.828176,5.814958
7,"Dla amatora, który lu",zem jeździ i na tych zawodach,w tym roku kwietnia podtrzyma cywilnoprawny,5.203637,5.794578
8,Fotel,jest największym browarem w Paryżu.Zatrzymani...,". W przypadku, gdy nie byłam na terytorium",3.048746,5.703817
9,"""Fo","toklub TVP Łódź 2005,","lią. W przypadku, o",5.601231,5.932721
