<a href="https://colab.research.google.com/github/ArtemBurenok/NLP-Homework/blob/main/Dialog_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
%%capture
!pip install datasets gradio trl==0.11.3

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
from datasets import Dataset
import gradio as gr
from trl import PPOTrainer, PPOConfig

## Модель

In [23]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [24]:
dialogue_data = [
    {"prompt": "Hi, how are you?", "response": "I'm fine, thank you. And you?"},
    {"prompt": "What are you doing today?", "response": "I'm planning to go to the gym."},
    {"prompt": "Do you like movies?", "response": "Yes, I love watching action films."},
    {"prompt": "What's your favorite food?", "response": "I really enjoy sushi."},
    {"prompt": "Are you free tomorrow?", "response": "Yes, I am. Do you want to meet up?"},
    {"prompt": "How was your weekend?", "response": "It was great! I went hiking."},
    {"prompt": "Do you have any pets?", "response": "Yes, I have a cat named Luna."},
    {"prompt": "What music do you like?", "response": "I enjoy listening to classical music."},
    {"prompt": "Where are you from?", "response": "I'm from Canada."},
    {"prompt": "Can you help me with my homework?", "response": "Of course! What subject is it?"},
]

dataset = Dataset.from_list(dialogue_data)

## Токенизация

In [25]:
def preprocess(example):
    input_ids = tokenizer(example["prompt"], padding="max_length", truncation=True, max_length=64)
    labels = tokenizer(example["response"], padding="max_length", truncation=True, max_length=64)
    input_ids["labels"] = labels["input_ids"]
    return input_ids

tokenized_dataset = dataset.map(preprocess)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

## Fine-tune

In [26]:
training_args = TrainingArguments(
    output_dir="./dialogue_model",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    save_strategy="no",
)

In [27]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)

  trainer = Trainer(


In [28]:
trainer.train()
model.save_pretrained("dialogue-ft")
tokenizer.save_pretrained("dialogue-ft")

Step,Training Loss


('dialogue-ft/tokenizer_config.json',
 'dialogue-ft/special_tokens_map.json',
 'dialogue-ft/spiece.model',
 'dialogue-ft/added_tokens.json',
 'dialogue-ft/tokenizer.json')

## RLHF

In [None]:
from transformers import AutoTokenizer
from trl import PPOConfig, PPOTrainer
from trl import AutoModelForSeq2SeqLMWithValueHead

In [None]:
tokenizer = AutoTokenizer.from_pretrained("dialogue-ft")

ppo_config = PPOConfig(
    model_name="dialogue-ft",
    learning_rate=1.41e-5,
    batch_size=1,
    mini_batch_size=1,
    gradient_accumulation_steps=1,
    log_with=None
)

In [None]:
ppo_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained("dialogue-ft")
ppo_trainer = PPOTrainer(config=ppo_config, model=ppo_model, tokenizer=tokenizer)

In [30]:
import torch

user_inputs = [
    "Tell me something interesting.",
    "Can you recommend a movie?",
]
rewards = [0.0, 1.0]

In [31]:
for prompt, reward in zip(user_inputs, rewards):
    query_tensor = tokenizer(prompt, return_tensors="pt").input_ids
    response_tensor = ppo_model.generate(query_tensor, max_new_tokens=50)
    response_text = tokenizer.decode(response_tensor[0], skip_special_tokens=True)

    print(f"\n[Prompt] {prompt}")
    print(f"[Response] {response_text}")
    print(f"[Reward] {reward}")

    ppo_trainer.step([query_tensor[0]], [response_tensor[0]], [torch.tensor(reward)])


[Prompt] Tell me something interesting.
[Response] The sandstone cliffs of the cliffs of the cliffs of the cliffs of the cliffs of the cliffs of the cliffs of the cliffs of the cliffs of the
[Reward] 0.0


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()



[Prompt] Can you recommend a movie?
[Response] I'm not sure.
[Reward] 1.0


In [32]:
ppo_model.save_pretrained("dialogue-ppo")
tokenizer.save_pretrained("dialogue-ppo")

('dialogue-ppo/tokenizer_config.json',
 'dialogue-ppo/special_tokens_map.json',
 'dialogue-ppo/spiece.model',
 'dialogue-ppo/added_tokens.json',
 'dialogue-ppo/tokenizer.json')