### Установка библиотек, импорты

In [None]:
#!g1.1
%pip uninstall keras tensorflow transformers
%pip install --upgrade keras tensorflow transformers

In [74]:
#!g1.1
import torch
from tqdm import tqdm
import pandas as pd

from dataclasses import dataclass, field
from typing import Dict, Optional

tqdm.pandas()

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader, RandomSampler, random_split

from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, DPOTrainer
from trl.core import LengthSampler

2023-11-29 15:43:31.172082: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-29 15:43:31.172300: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-29 15:43:32.644269: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [75]:
#!g1.1
device = 0 if torch.cuda.is_available() else "cpu"

### Настраиваем конфиг

In [2]:
#!g1.1
config = PPOConfig(
    model_name="lvwerra/gpt2-imdb",
    learning_rate=1.41e-5,
    log_with="wandb",
)


In [76]:
#!g1.1

args = {
    "model_name": "lvwerra/gpt2-imdb",
    "report_to": "wandb",
    "learning_rate": 1e-3,
    "per_device_train_batch_size": 16,
    "max_length": 512
}

### Инициализация wandb

In [228]:
#!g1.1
import wandb

wandb.init()

### Вспомогательная функция для генерации датасета

In [105]:
#!g1.1
def build_dataset(args, dataset_name="imdb", input_min_text_length=2, input_max_text_length=8):
    """
    Build dataset for training. This builds the dataset from `load_dataset`, one should
    customize this function to train the model on its own dataset.

    Args:
        dataset_name (`str`):
            The name of the dataset to be loaded.

    Returns:
        dataloader (`torch.utils.data.DataLoader`):
            The dataloader for the dataset.
    """
    tokenizer = AutoTokenizer.from_pretrained(args["model_name"])
    tokenizer.pad_token = tokenizer.eos_token
    # load imdb with datasets
    ds = load_dataset(dataset_name, split="train")
    ds = ds.rename_columns({"text": "review"})
    ds = ds.filter(lambda x: len(x["review"]) > 200, batched=False)

#     input_size = LengthSampler(input_min_text_length, input_max_text_length)
    input_size = 8
    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["review"])[: input_size]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")
    return ds

In [128]:
#!g1.1
dataset = build_dataset(args)

In [129]:
#!g1.1
dataset = dataset.remove_columns("label")

In [214]:
#!g1.1
model = AutoModelForCausalLM.from_pretrained(args["model_name"])
ref_model = AutoModelForCausalLM.from_pretrained(args["model_name"])
tokenizer = AutoTokenizer.from_pretrained(args["model_name"], padding_side='left')

tokenizer.pad_token = tokenizer.eos_token

### Посмотрим, как выглядят данные

In [131]:
#!g1.1
dataset[0]["input_ids"]

tensor([   40, 26399,   314,  3001,   327, 47269, 20958,    12])

In [132]:
#!g1.1
tokenizer.decode(dataset[0]["input_ids"])

'I rented I AM CURIOUS-'

In [133]:
#!g1.1
dataset[0]["review"]

'I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, ev

Мы видим, что в `input_ids` лежит обрезанное ревью. Пусть это будет нашим промптом. По этому промпту мы будем генерировать N выходов и среди них сделаем N-1 пар вида `winner-loser`, где `winner sentiment score` > `loser sentiment score`

### Генерация и оценка текста

In [134]:
#!g1.1
sent_kwargs = {"top_k": None, "function_to_apply": "none", "batch_size": 16}

In [135]:
#!g1.1
sentiment_pipe = pipeline("sentiment-analysis", model="lvwerra/distilbert-imdb", device=device)

In [136]:
#!g1.1
text = "this movie was really bad!!"
sentiment_pipe(text, **sent_kwargs)

[{'label': 'NEGATIVE', 'score': 2.335048198699951},
 {'label': 'POSITIVE', 'score': -2.7265758514404297}]

In [137]:
#!g1.1
text = "this movie was really good!!"
sentiment_pipe(text, **sent_kwargs)

[{'label': 'POSITIVE', 'score': 2.557040214538574},
 {'label': 'NEGATIVE', 'score': -2.294790267944336}]

In [162]:
#!g1.1
gen_kwargs = {"min_length": -1,
              "max_new_tokens": 20,
              "top_k": 0.0,
              "top_p": 1.0,
              "do_sample": True,
              "num_return_sequences": 2,
              "pad_token_id": tokenizer.eos_token_id}

In [163]:
#!g1.1
generator = pipeline('text-generation', model=args["model_name"], device=device)

In [164]:
#!g1.1
generated_samples = generator(tokenizer.decode(dataset[0]["input_ids"]), **gen_kwargs)
for sample in generated_samples:
    output = sample["generated_text"]
    print(output)
    print(sentiment_pipe(output, **sent_kwargs))

I rented I AM CURIOUS-AVIAN AND set a date for the tour with my favorite talent...and it never happened. D
[{'label': 'NEGATIVE', 'score': 0.7753939628601074}, {'label': 'POSITIVE', 'score': -1.1945312023162842}]
I rented I AM CURIOUS-BANDTHIS is basically just...all comedy in one campy poor-ass tale. Walk me
[{'label': 'NEGATIVE', 'score': 1.9472341537475586}, {'label': 'POSITIVE', 'score': -2.3995108604431152}]


Мы научились генерировать и оценивать positive/negative label текстов. Настала пора создавать датасет

In [165]:
#!g1.1
dataset[0]

{'input_ids': tensor([   40, 26399,   314,  3001,   327, 47269, 20958,    12]),
 'query': 'I rented I AM CURIOUS-'}

In [142]:
#!g1.1
output = [sample["generated_text"] for sample in generated_samples]
sentiment_samples = sentiment_pipe(output, **sent_kwargs)
# sentiments = [result for result in sentiment_samples if result["label"] == "POSITIVE"]
# sentiments
sentiment_samples

[[{'label': 'NEGATIVE', 'score': 2.0228464603424072},
  {'label': 'POSITIVE', 'score': -2.4736461639404297}],
 [{'label': 'POSITIVE', 'score': 2.4222044944763184},
  {'label': 'NEGATIVE', 'score': -2.1730639934539795}]]

In [143]:
#!g1.1
import warnings
warnings.filterwarnings("ignore")

In [148]:
#!g1.1
dataset = dataset.remove_columns("review")

In [149]:
#!g1.1
loader = DataLoader(
    dataset,
    batch_size=sent_kwargs["batch_size"],
    shuffle=False,
    num_workers=8
    )

In [150]:
#!g1.1
print(dataset[0]["input_ids"])
print(dataset[1]["input_ids"])

tensor([   40, 26399,   314,  3001,   327, 47269, 20958,    12])
tensor([    1,    40,  1703, 44269,    25, 12550,     1,   318])


In [179]:
#!g1.1
sentiment_pipe(tokenizer.decode(dataset[0]["input_ids"]), **sent_kwargs)

[{'label': 'POSITIVE', 'score': 0.2557511627674103},
 {'label': 'NEGATIVE', 'score': -0.3383306860923767}]

In [181]:
#!g1.1
sentiment_pipe(tokenizer.decode(dataset[2]["input_ids"]), **sent_kwargs)

[{'label': 'NEGATIVE', 'score': 1.3919830322265625},
 {'label': 'POSITIVE', 'score': -1.791073203086853}]

In [151]:
#!g1.1
prompt_chosen_rejected_list = {"prompt": [], "chosen": [], "rejected": []}

In [170]:
#!g1.1
query

tensor([   40, 26399,   314,  3001,   327, 47269, 20958,    12])

In [172]:
#!g1.1
generator('', **gen_kwargs)

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


[{'generated_text': "hmmmm 70's (yum!) bloodbudget production and a 46 year old undercarriage produces"},
 {'generated_text': ' e LCR has never been heard before again. A film with just a distaste for the subject'}]

In [178]:
#!g1.1
generator(, **gen_kwargs)

[]

In [183]:
#!g1.1
new_prompt_chosen_rejected_list = {"chosen": [], "rejected": []}

In [189]:
#!g1.1
sentiment

[[{'label': 'POSITIVE', 'score': 0.08281411975622177},
  {'label': 'NEGATIVE', 'score': -0.3155623972415924}],
 [{'label': 'NEGATIVE', 'score': 0.5138266682624817},
  {'label': 'POSITIVE', 'score': -0.6836692690849304}],
 [{'label': 'POSITIVE', 'score': 0.24299603700637817},
  {'label': 'NEGATIVE', 'score': -0.316336452960968}],
 [{'label': 'NEGATIVE', 'score': 0.7443016767501831},
  {'label': 'POSITIVE', 'score': -1.1051040887832642}],
 [{'label': 'NEGATIVE', 'score': 0.7930777668952942},
  {'label': 'POSITIVE', 'score': -1.1245578527450562}]]

In [191]:
#!g1.1
import warnings
warnings.filterwarnings("ignore")

In [193]:
#!g1.1
query = ''
gen_kwargs["num_return_sequences"] = 5
output_min_length = 4
output_max_length = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)

for i in tqdm(range(10000)):
    gen_len = output_length_sampler()
    gen_kwargs["max_new_tokens"] = gen_len
    texts_gen = generator(query, **gen_kwargs)
    texts = [elem['generated_text'] for elem in texts_gen]
    sentiment_samples = sentiment_pipe(texts, **sent_kwargs)
    positive_scores = []
    for sent in sentiment_samples:
        for dict_label_score in sent:
            if dict_label_score['label'] == 'POSITIVE':
                positive_scores.append(dict_label_score['score'])
    max_index = positive_scores.index(max(positive_scores))
    min_index = positive_scores.index(min(positive_scores))
    new_prompt_chosen_rejected_list["chosen"].append(texts[max_index])
    new_prompt_chosen_rejected_list["rejected"].append(texts[min_index])

In [201]:
#!g1.1
new_prompt_chosen_rejected_list["prompt"] = [tokenizer.pad_token for _ in range(len(new_prompt_chosen_rejected_list["chosen"]))]

In [225]:
#!g1.1
args = {
    "model_name": "lvwerra/gpt2-imdb",
    "report_to": "wandb",
    "learning_rate": 1e-3,
    "per_device_train_batch_size": 16,
    "max_length": 512,
    "max_steps": 15000,
    "gradient_accumulation_steps": 1,
    "beta": 0.1
}

In [203]:
#!g1.1
from transformers import TrainingArguments

In [217]:
#!g1.1
train_split = {"prompt": new_prompt_chosen_rejected_list["prompt"][:7000],
                "chosen": new_prompt_chosen_rejected_list["chosen"][:7000],
                "rejected": new_prompt_chosen_rejected_list["rejected"][:7000]}

eval_split = {"prompt": new_prompt_chosen_rejected_list["prompt"][7000:],
                "chosen": new_prompt_chosen_rejected_list["chosen"][7000:],
                "rejected": new_prompt_chosen_rejected_list["rejected"][7000:]}


In [219]:
#!g1.1
import datasets
train_dataset = datasets.Dataset.from_dict(train_split)
eval_dataset = datasets.Dataset.from_dict(eval_split)

In [229]:
#!g1.1
training_args = TrainingArguments(
        per_device_train_batch_size=args["per_device_train_batch_size"],
        max_steps=args["max_steps"],
        remove_unused_columns=False,
        gradient_accumulation_steps=args["gradient_accumulation_steps"],
        learning_rate=args["learning_rate"],
        evaluation_strategy="steps",
        logging_first_step=True,
        logging_steps=10,
        eval_steps=4000,
        output_dir="./test",
        optim="rmsprop",
        warmup_steps=100,
        report_to=args["report_to"],
        gradient_checkpointing=False,
    )

In [230]:
#!g1.1
dpo_trainer = DPOTrainer(
        model,
        ref_model,
        args=training_args,
        beta=args["beta"],
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        max_length=args["max_length"],
#         max_target_length=script_args.max_target_length,
#         max_prompt_length=script_args.max_prompt_length,
        generate_during_eval=True,
    )


In [None]:
#!g1.1
dpo_trainer.train()

Step,Training Loss,Validation Loss


In [None]:
#!g1.1
output_min_length = 4
output_max_length = 16
output_length_sampler = LengthSampler(output_min_length, output_max_length)

for epoch, batch in tqdm(enumerate(loader)):
    query_tensors = batch["input_ids"]

    #### Get response from gpt2
    response_tensors = []
    for query in query_tensors:
        gen_len = output_length_sampler()
        gen_kwargs["max_new_tokens"] = gen_len
        response = generator(query, **gen_kwargs)
        response = model.generate(query, **generation_kwargs)
        response_tensors.append(response.squeeze()[-gen_len:])
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)


In [167]:
#!g1.1
for d in tqdm(loader):
    query = d["query"]
    generated_samples = generator(query, **gen_kwargs)
    texts = []
    for batch_elem in generated_samples:
        for x in batch_elem:
            texts.append(x['generated_text'])
            
    sentiment_samples = sentiment_pipe(texts, **sent_kwargs)
    positive_scores = []
    for sent in sentiment_samples:
        for dict_label_score in sent:
            if dict_label_score['label'] == 'POSITIVE':
                positive_scores.append(dict_label_score['score'])
    # generate only 2 samples
    prompt_chosen_rejected_list["prompt"].extend(query)
    for i in range(0, 2*len(query), 2):
        if positive_scores[i] > positive_scores[i+1]:
            prompt_chosen_rejected_list["chosen"].append(texts[i])
            prompt_chosen_rejected_list["rejected"].append(texts[i+1])
        else:
            prompt_chosen_rejected_list["chosen"].append(texts[i+1])
            prompt_chosen_rejected_list["rejected"].append(texts[i])

 10%|█         | 158/1556 [15:01<2:13:00,  5.71s/it]


KeyboardInterrupt: 

In [4]:
#!g1.1
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [7]:
#!g1.1
ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=dataset, data_collator=collator)

In [None]:
#!g1.1
dpo_trainer = DPOTrainer

In [20]:
#!g1.1
response = ppo_trainer.generate(ppo_trainer.dataset[0]["input_ids"].to(device), **gen_kwargs)
hr_response = tokenizer.decode(response.squeeze())
hr_response



'I rented I AM ERIVEN NEIGHBECK but I will check that another angel gets sent'

In [25]:
#!g1.1
ppo_trainer.dataset[0]

NameError: name 'ppo_trainer' is not defined

In [None]:
#!g1.1
