In [31]:
!pip install datasets



In [32]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load DailyDialog dataset (or any dialogue dataset)
dataset = load_dataset("daily_dialog")

# Use the training split for fine-tuning
train_dataset = dataset["train"]
eval_dataset = dataset["validation"]


In [33]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("TheLongTran/Dialogue-For-ChatBot")
model = AutoModelForCausalLM.from_pretrained("TheLongTran/Dialogue-For-ChatBot")

# tokenizer = AutoTokenizer.from_pretrained(model_path)
# model = AutoModelForCausalLM.from_pretrained(model_path)

In [34]:
train_dataset

Dataset({
    features: ['dialog', 'act', 'emotion'],
    num_rows: 11118
})

formatting the training datasets

In [35]:
def build_dataset(dataset, input_min_text_length, input_max_text_length):
  dataset = dataset.filter(lambda x: len(x["dialog"]) > input_min_text_length and len(x['dialog']) <= input_max_text_length, batched=False)


  def tokenize(sample):
    prompt = str(sample["dialog"])
    sample["input_ids"] = tokenizer.encode(prompt)
    sample['query'] = tokenizer.decode(sample["input_ids"])
    return sample

  dataset = dataset.map(tokenize, batched=False)
  dataset.set_format(type="torch")

  dataset_splits = dataset.train_test_split(test_size=0.2)
  return dataset_splits

train_dataset = build_dataset(train_dataset, input_min_text_length = 5, input_max_text_length = 20)

In [36]:
train_dataset

DatasetDict({
    train: Dataset({
        features: ['dialog', 'act', 'emotion', 'input_ids', 'query'],
        num_rows: 5790
    })
    test: Dataset({
        features: ['dialog', 'act', 'emotion', 'input_ids', 'query'],
        num_rows: 1448
    })
})

Set up the ppo_model

In [37]:
pip install trl==0.11.3



In [38]:
from transformers import AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, pipeline, GenerationConfig
from trl import AutoModelForCausalLMWithValueHead, create_reference_model
import torch
import numpy as np


In [70]:
ppo_model = AutoModelForCausalLMWithValueHead.from_pretrained(model,
                                                               torch_dtype=torch.bfloat16,
                                                               is_trainable=True)



In [71]:
ref_model = create_reference_model(ppo_model)
ref_model = ref_model.to("cuda")

In [72]:
toxicity_model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
toxicity_tokenizer = AutoTokenizer.from_pretrained(toxicity_model_name)
toxicity_model = AutoModelForSequenceClassification.from_pretrained(toxicity_model_name)
toxicity_model.to("cuda")

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [73]:
toxic_text = "You are terrible and I damn hate you"
non_toxic_text = "I love you"
toxicity_input_ids = toxicity_tokenizer(toxic_text, return_tensors = 'pt').input_ids.to("cuda")
print(toxicity_input_ids)


logits = toxicity_model(toxicity_input_ids).logits.to("cuda")
print(logits)

probabilities = logits.softmax(dim = -1).tolist()
print(probabilities)

not_hate_index = 0

nothate_reward = (logits[:, not_hate_index]).tolist()
print(nothate_reward)

tensor([[    0,  1185,    32,  6587,     8,    38, 16490,  4157,    47,     2]],
       device='cuda:0')
tensor([[ 4.7102, -4.2254]], device='cuda:0', grad_fn=<AddmmBackward0>)
[[0.9998683929443359, 0.00013160303933545947]]
[4.71022367477417]


In [74]:
non_toxic_text = "#Person 1# tells Tommy that he didn't like the movie."

toxicity_input_ids = toxicity_tokenizer(non_toxic_text, return_tensors="pt").input_ids.to("cuda")

logits = toxicity_model(input_ids=toxicity_input_ids).logits
print(f'logits [not hate, hate]: {logits.tolist()[0]}')

# Print the probabilities for [not hate, hate]
probabilities = logits.softmax(dim=-1).tolist()[0]
print(f'probabilities [not hate, hate]: {probabilities}')

# get the logits for "not hate" - this is the reward!
not_hate_index = 0
nothate_reward = (logits[:, not_hate_index]).tolist()
print(f'reward (high): {nothate_reward}')

logits [not hate, hate]: [3.1141021251678467, -2.4896185398101807]
probabilities [not hate, hate]: [0.9963293671607971, 0.00367060792632401]
reward (high): [3.1141021251678467]


In [75]:
device = 0 if torch.cuda.is_available() else "cpu"

sentiment_pipeline = pipeline("sentiment-analysis",
                              model=toxicity_model_name,
                              device = device)

reward_logits_kwargs = {
    "top_k": None,
    "function_to_apply": "none",
    "batch_size": 16
}

reward_probabilities_kwargs  = {
    "top_k": None,
    "function_to_apply": "softmax",
    "batch_size": 16
}

print("Reward model output for non-toxic text:")
print(sentiment_pipeline(non_toxic_text, **reward_logits_kwargs))
print(sentiment_pipeline(non_toxic_text, **reward_probabilities_kwargs))

print("Reward model output for toxic text:")
print(sentiment_pipeline(toxic_text, **reward_logits_kwargs))
print(sentiment_pipeline(toxic_text, **reward_probabilities_kwargs))


Device set to use cuda:0


Reward model output for non-toxic text:
[{'label': 'nothate', 'score': 3.1141021251678467}, {'label': 'hate', 'score': -2.4896185398101807}]
[{'label': 'nothate', 'score': 0.9963293671607971}, {'label': 'hate', 'score': 0.0036706076934933662}]
Reward model output for toxic text:
[{'label': 'nothate', 'score': 4.71022367477417}, {'label': 'hate', 'score': -4.225365161895752}]
[{'label': 'nothate', 'score': 0.9998683929443359}, {'label': 'hate', 'score': 0.00013160303933545947}]


In [76]:
!pip install evaluate



In [77]:
import evaluate

In [78]:
toxicity_evaluator = evaluate.load("toxicity",
                                    toxicity_model_name,
                                    module_type="measurement",
                                    toxic_label='toxic')

Device set to use cuda:0


In [79]:
def evaluate_toxicity(model,
                      toxicity_evaluator,
                      tokenizer,
                      dataset,
                      num_samples):

  max_new_tokens = 100

  toxicities = []
  input_texts = []
  for i, sample in tqdm(enumerate(dataset)):
    input_text = sample["query"]
    if i > num_samples:
      break

    input_ids = tokenizer(input_text, return_tensors = "pt").input_ids.to("cuda")
    generation_config = GenerationConfig(max_new_tokens = max_new_tokens,
                                         top_k = 0.0,
                                         top_p = 1.0,
                                         do_sample = True)
    response_token_ids = model.generate(input_ids = input_ids,
                                        generation_config = generation_config)

    generated_text = tokenizer.decode(response_token_ids[0], skip_special_tokens = True)

    toxicity_score = toxicity_evaluator.compute(
        predictions=[(input_text + " " + generated_text)]
    )

    toxicities.append(toxicity_score["toxicity"])
    input_texts.append(input_text)

  mean = np.mean(toxicities)
  std = np.std(toxicities)

  return mean, std



In [80]:
!pip install tqdm



In [81]:
from tqdm import tqdm

In [82]:
mean_before_detoxification, std_before_detoxification = evaluate_toxicity(
                      model = ref_model,
                      toxicity_evaluator = toxicity_evaluator,
                      tokenizer = tokenizer,
                      dataset = train_dataset['test'],
                      num_samples = 10)

print(f"Mean before detox: {mean_before_detoxification}")
print(f"Std before detox: {std_before_detoxification}")

0it [00:00, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
1it [00:00,  9.18it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
2it [00:01,  1.08it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
3it [00:02,  1.31it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain r

Mean before detox: 0.033042981903153384
Std before detox: 0.04276982735482991





In [83]:
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

test_data = [{"key1": "value1", "key2": "value2", "key3": "value3"}]
print(f'Collator input: {test_data}')
print(f'Collator output: {collator(test_data)}')

Collator input: [{'key1': 'value1', 'key2': 'value2', 'key3': 'value3'}]
Collator output: {'key1': ['value1'], 'key2': ['value2'], 'key3': ['value3']}


In [84]:
from trl import PPOConfig, PPOTrainer

In [85]:
learning_rate=1.41e-5
max_ppo_epochs=1
mini_batch_size=4
batch_size=16

config = PPOConfig(
    learning_rate=learning_rate,
    ppo_epochs=max_ppo_epochs,
    mini_batch_size=mini_batch_size,
    batch_size=batch_size
)

ppo_trainer = PPOTrainer(config=config,
                         model=ppo_model,
                         ref_model=ref_model,
                         tokenizer=tokenizer,
                         dataset=train_dataset["train"],
                         data_collator=collator)

In [86]:
from trl.core import LengthSampler

In [87]:
output_min_length = 10
output_max_length = 20
output_length_sampler = LengthSampler(output_min_length, output_max_length)

generation_kwargs = {
    "min_length": 5,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True
}

reward_kwargs = {
    "top_k": None, # Return all scores.
    "function_to_apply": "none", # You want the raw logits without softmax.
    "batch_size": 16,
    "truncation": True,
    "max_length": 512
}

max_ppo_steps = 10

for step, batch in tqdm(enumerate(ppo_trainer.dataloader)):
    # Break when you reach max_steps.
    if step >= max_ppo_steps:
        break

    prompt_tensors = batch["input_ids"]

    # Get response from FLAN-T5/PEFT LLM.
    summary_tensors = []

    for prompt_tensor in prompt_tensors:
        max_new_tokens = output_length_sampler()

        generation_kwargs["max_new_tokens"] = max_new_tokens
        summary = ppo_trainer.generate(prompt_tensor, **generation_kwargs)

        summary_tensors.append(summary.squeeze()[-max_new_tokens:])

    # This needs to be called "response".
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in summary_tensors]

    # Compute reward outputs.
    query_response_pairs = [q + r for q, r in zip(batch["query"], batch["response"])]
    rewards = sentiment_pipeline(query_response_pairs, **reward_kwargs)

    # You use the `nothate` item because this is the score for the positive `nothate` class.
    reward_tensors = [torch.tensor(reward[not_hate_index]["score"]) for reward in rewards]

    # Run PPO step.
    # Configure the model for causal LM behavior.

    stats = ppo_trainer.step(prompt_tensors, summary_tensors, reward_tensors)
    ppo_trainer.log_stats(stats, batch, reward_tensors)

    print(f'objective/kl: {stats["objective/kl"]}')
    print(f'ppo/returns/mean: {stats["ppo/returns/mean"]}')
    print(f'ppo/policy/advantages_mean: {stats["ppo/policy/advantages_mean"]}')
    print('-'.join('' for x in range(100)))

0it [00:00, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 fo

objective/kl: 0.0
ppo/returns/mean: 2.7095603942871094
ppo/policy/advantages_mean: 0.009025752544403076
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: -0.1463538259267807
ppo/returns/mean: 2.8466224670410156
ppo/policy/advantages_mean: -0.007045373320579529
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: -0.15503641963005066
ppo/returns/mean: 3.0882840156555176
ppo/policy/advantages_mean: -0.002453923225402832
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 0.7789863348007202
ppo/returns/mean: 2.5116307735443115
ppo/policy/advantages_mean: 0.0478583425283432
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 0.17533844709396362
ppo/returns/mean: 2.856487274169922
ppo/policy/advantages_mean: 0.010297667235136032
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 0.7648022174835205
ppo/returns/mean: 2.4731380939483643
ppo/policy/advantages_mean: -0.004974208772182465
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 1.1321187019348145
ppo/returns/mean: 2.5848727226257324
ppo/policy/advantages_mean: 0.0065553560853004456
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 1.7301366329193115
ppo/returns/mean: 2.3197507858276367
ppo/policy/advantages_mean: -0.0007800310850143433
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 1.6724928617477417
ppo/returns/mean: 2.86326265335083
ppo/policy/advantages_mean: -0.0018711090087890625
---------------------------------------------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

objective/kl: 0.01849663257598877
ppo/returns/mean: 2.7831385135650635
ppo/policy/advantages_mean: -0.007663009688258171
---------------------------------------------------------------------------------------------------





In [88]:
mean_after_detoxification, std_after_detoxification = evaluate_toxicity(model=ppo_model,
                                                                        toxicity_evaluator=toxicity_evaluator,
                                                                        tokenizer=tokenizer,
                                                                        dataset=train_dataset["test"],
                                                                        num_samples=10)
print(f'toxicity [mean, std] after detox: [{mean_after_detoxification}, {std_after_detoxification}]')

0it [00:00, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
2it [00:00,  4.80it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
3it [00:00,  5.05it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setti

toxicity [mean, std] after detox: [0.020003214721906592, 0.016328999179341865]





In [89]:
mean_improvement = (mean_before_detoxification - mean_after_detoxification) / mean_before_detoxification
std_improvement = (std_before_detoxification - std_after_detoxification) / std_before_detoxification

print(f'Percentage improvement of toxicity score after detoxification:')
print(f'mean: {mean_improvement*100:.2f}%')
print(f'std: {std_improvement*100:.2f}%')

Percentage improvement of toxicity score after detoxification:
mean: 39.46%
std: 61.82%


In [90]:
import pandas as pd

In [91]:
batch_size = 20
compare_results = {}

df_batch = train_dataset["test"][0:batch_size]

compare_results["query"] = df_batch["query"]
prompt_tensors = df_batch["input_ids"]

summary_tensors_ref = []
summary_tensors = []

# Get response from ppo and base model.
for i in tqdm(range(batch_size)):
    gen_len = output_length_sampler()
    generation_kwargs["max_new_tokens"] = gen_len

    summary = ref_model.generate(
        input_ids=torch.as_tensor(prompt_tensors[i]).unsqueeze(dim=0).to(device),
        **generation_kwargs
    ).squeeze()[-gen_len:]
    summary_tensors_ref.append(summary)

    summary = ppo_model.generate(
        input_ids=torch.as_tensor(prompt_tensors[i]).unsqueeze(dim=0).to(device),
        **generation_kwargs
    ).squeeze()[-gen_len:]
    summary_tensors.append(summary)

# Decode responses.
compare_results["response_before"] = [tokenizer.decode(summary_tensors_ref[i]) for i in range(batch_size)]
compare_results["response_after"] = [tokenizer.decode(summary_tensors[i]) for i in range(batch_size)]

# Sentiment analysis of query/response pairs before/after.
texts_before = [d + s for d, s in zip(compare_results["query"], compare_results["response_before"])]
rewards_before = sentiment_pipeline(texts_before, **reward_kwargs)
compare_results["reward_before"] = [reward[not_hate_index]["score"] for reward in rewards_before]

texts_after = [d + s for d, s in zip(compare_results["query"], compare_results["response_after"])]
rewards_after = sentiment_pipeline(texts_after, **reward_kwargs)
compare_results["reward_after"] = [reward[not_hate_index]["score"] for reward in rewards_after]

  0%|          | 0/20 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  5%|▌         | 1/20 [00:00<00:07,  2.47it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain r

In [92]:
pd.set_option('display.max_colwidth', 500)
df_compare_results = pd.DataFrame(compare_results)
df_compare_results["reward_diff"] = df_compare_results['reward_after'] - df_compare_results['reward_before']
df_compare_results_sorted = df_compare_results.sort_values(by=['reward_diff'], ascending=False).reset_index(drop=True)
df_compare_results_sorted

Unnamed: 0,query,response_before,response_after,reward_before,reward_after,reward_diff
0,"[""Guess what? I've got great news! "",'What is it? ', "" Well, you know how I've been working at the Economist as a proof-reader, right? "",'Yes. ','Well, the editor-in-chief heard that I had experience as an editor at another magadize and asked me if I was interested in becoming an assistant editor for him. ', "" Really? That's fantastic! Will you get a chance to do any writing? "",'She said that the columnist for the literary criticism column would be going on pregnancy leave soon and that I co...",2 and so far away from lurking!'drugs,"a magazine storystrip. Actually in quotations, combining",2.836744,3.787407,0.950663
1,"['Hello, can I speak to Mr.Green? ','Yes, speaking. ', "" This is Steven speaking. I'd like to buy a stock. "",'What stock do you want to buy and how many? ', "" I'd like to buy 1 000 shares of Duson. "",'Let me get the asking price of the stock. Just a moment. Oh, now the asking price is $ 90 each share. By the way, what is the offering price? ','Let me see. The offering price is near or at $ 88. ', "" Then the difference between the asking price and the offering price is $ 2. It's hard to take ...",", maybe even assuming you ', yes, no binulous investors'not going.",? By.,1.861691,2.715429,0.853738
2,"['Bill, how can you hear so happy today? ', "" Aha. I've read of my roommate. I made a move today. "",'Really? What was the matter? ','You knew Brain Locker? ', "" Brain Locker? No, I don't think so. What does he look like? "", "" Well, he's thin and tall. He has brown hair, a holt nose, green eyes and wearing glasses. "", "" Mm. I've seen him a couple of times, I think. ""]","Brain Locker, the new guy. He's a potato. But he is crazy","Yes, Mr. Locker must be really under the weather this morning. His eyes",3.12879,3.773917,0.645128
3,"[""Guess what! I know something you don't know! "", "" What's that? "",'How many planets are there in the solar system? ', "" That's easy. Everyone knows that there are nice. "", "" Not anymore! Can you believe it? They've decided that Pluto is not a planet anymore! "", "" Nice try. I wasn't born yesterday, you know. "", "" I'm dead serious. They've decided that it's too small to be a planet, but actually they haven't yet agreed on how big something has to be in order to be a planet anymore. "", "" That ...",... even. before the other ; omm Maybe,to learn!,2.431906,2.91906,0.487155
4,"['Hi Kara, this is Mike. ','Hello Mike. How are things going for you? ','Great, how are you? ','Fine. Everything is just fine. ','Kara, I had a great time the other night and was wondering if you would like to go out again this weekend. ','Mike, I enjoyed your company, but I am getting ready to graduate soon. I really need to focus on my studies. ','Maybe I could help you with what you need to get done. ','It would be better for me to just deal with getting my work done, but thank you for a ...","Thanks for your invitation, Julie. What are you studying this evening? I",Are you good friends with each other! Just prepare an entire tv in,3.028995,3.426456,0.397461
5,"['We ’ re having a department meeting at 10 o ’ clock, ok? ','That ’ s fine... I need to pick up some stationary-you know, a stapler, scissors, files, who should I see about that? ','See Julie, the receptionist. She knows where all that stuff is kept. You might like a calendar for you desk. She can give you one of those too. ','Thanks. I need to make a few photocopies. ','The photocopier is near my office. Come on, I ’ ll show you where it is. ','Do you enjoy working in this office? ','Yes, ...",ideas. Be sure to speak up if you have any. '] <|endoftext|>,I just maybe??,3.219876,3.601178,0.381302
6,"['Is there any way you can cut us a better deal on your wholesale price for this order? ','We did the best that we could to give you a low price. Did you get our recent estimate? ', "" Based on the estimate you gave us, by the time we figure in transportation and other expenses, our profit is short.With the offer you've given me, we're making next to nothing.Can't you do any better? "", "" I've already given you a discount of 20 % off of our normally charge. If I go any lower, we'll have loss o...",is unfortunately. if I've this will be elsewhere. I've,"Everything is the group, after 110, broken to buy today!",2.344698,2.681228,0.33653
7,"['Excuse me, This bus goes down town, doesn ’ t it? ','Yes, where do you want to go? ','The worker stadium. ','This is the right bus. ','Do you let me know where to get off? ','Certainly. Four more stops after this. I ’ ll call out the stops. ','Thank you. ']","You can stay on this bus alseep afterwards, if","Sorry, sir. There is nobody right just gets off at",1.066795,1.398782,0.331987
8,"['how did your interview go? ', "" pretty well. I don't know if I'll get the promotion or not, but I feel good about it. "",'if you get the promotion, what will your new title be? ','if I get the promotion, I will be a senior engineer instead of an assistant engineer. ','will you get a pay-raise, too? ','whenever you are given added responsibilities, you should get a promotion. ','that makes sense. Who interviewed you? ','my boss. ','what kinds of questions did she ask you? ','she asked me abo...","better, to be nearly always not, you have no",". '], although a large and its.<|endoftext|>",1.891522,2.038954,0.147432
9,"[""I'm glad we live in a small town. "",'Why? ','Because the houses look so nice at Christmas time. ','Yes, they do. Do you have a Christmas tree this year? ','Yes, we have a big tree this year. I bought the decorations at the five-and-tencent store yesterday. Do you want to come and see it? ', "" I can't now because I have to buy a present for my mother. "",'When can you come? ', "" I don't know when I can come. I'll let you know later. ""]","know later. ""] I don't know then Why not? <|endoftext|>","I'll let you know later. ""] Ok, mother, you <|endoftext|>",2.204136,2.315302,0.111166


In [93]:
# Save the model
ppo_model.save_pretrained("/content/drive/MyDrive/ppo_saved_model")

# Save the tokenizer
tokenizer.save_pretrained("/content/drive/MyDrive/ppo_saved_model")


('/content/drive/MyDrive/ppo_saved_model/tokenizer_config.json',
 '/content/drive/MyDrive/ppo_saved_model/special_tokens_map.json',
 '/content/drive/MyDrive/ppo_saved_model/vocab.json',
 '/content/drive/MyDrive/ppo_saved_model/merges.txt',
 '/content/drive/MyDrive/ppo_saved_model/added_tokens.json',
 '/content/drive/MyDrive/ppo_saved_model/tokenizer.json')

In [94]:
model_path = "/content/drive/MyDrive/ppo_saved_model"

tokenizer = AutoTokenizer.from_pretrained(model_path)
ppo_model = AutoModelForCausalLM.from_pretrained(model_path)

Some weights of the model checkpoint at /content/drive/MyDrive/ppo_saved_model were not used when initializing GPT2LMHeadModel: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing GPT2LMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2LMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
# Sample prompts for evaluation
sample_prompts = [
    "Hi, how are you today?",
    "Can you help me book a flight?",
    "I love you"
]

for prompt in sample_prompts:
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    output = ppo_model.generate(input_ids, max_length=50, do_sample=True)
    print(f"Prompt: {prompt}")
    print("Response:", tokenizer.decode(output[0], skip_special_tokens=True))
    print("-" * 50)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Hi, how are you today?
Response: Hi, how are you today?   It ’ s great. I haven ’ t been in this long. Why did you come to visit me?   I wanted to see you to know what I could do for you. 
--------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: Can you help me book a flight?
Response: Can you help me book a flight?   Yes, where should we go?   We can take the L-R-V-Isadora to the airport and take a taxi.   That's great. What time do we
--------------------------------------------------
Prompt: I love you
Response: I love you.   I love you too.   Where's your room?   It's at the far end of the road, the corner of this building.   This is the hotel I wanted to stay at.  
--------------------------------------------------


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [96]:
!pip install nltk



In [97]:
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize

# Sample prompt
prompt = "What is your name, i forgot"
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
output = ppo_model.generate(input_ids, max_length=50, do_sample=True)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

# Tokenize into sentences and take the first sentence
first_sentence = sent_tokenize(generated_text)[1]

print(f"Prompt: {prompt}")
print("Response:", first_sentence)
print("-" * 50)


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt: What is your name, i forgot
Response: What about you?
--------------------------------------------------


In [98]:
conversation_history = []
cnt = 1
def chatbot_response(user_input, cnt):
  print(cnt)
  conversation_history.append(user_input)
  if len(conversation_history) > 5:
    conversation_history.pop(0)

  memory_context = '\n'.join(conversation_history)
  print(memory_context)
  input_text = f"{memory_context}"

  input_ids = tokenizer(input_text, return_tensors='pt').input_ids
  output = ppo_model.generate(input_ids, max_length=200, do_sample = True)
  response = tokenizer.decode(output[0], skip_special_tokens=True)

  print(sent_tokenize(response)[:])
  print(len(sent_tokenize(response)[:]))
  first_sentence = sent_tokenize(response)[cnt]

  conversation_history.append(first_sentence)
  if len(conversation_history) > 5:
    conversation_history.pop(0)

  return first_sentence

while True:
  user_msg = input()
  if user_msg.lower() == 'exit':
    cnt = 1
    break
  else:
    bot_response = chatbot_response(user_msg, cnt)
    if cnt < 3:
      cnt += 1
    print("BOT: ", bot_response)

what is your name


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


1
what is your name
['what is your name, please?', 'My name is Daniel Smith.', 'How long have you been waiting to call you, Steven?', 'About 2 weeks.', 'Is it just a few days or more?', 'About a week.', 'Really?', 'I know I look at you every day now.']
8
BOT:  My name is Daniel Smith.
what is your favorite movie


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


2
what is your name
My name is Daniel Smith.
what is your favorite movie
['what is your name\nMy name is Daniel Smith.', 'what is your favorite movie?', 'It is definitely the one with the most famous director.', 'Do you like his movies?', "I don't know about his movies.There's a book out for children's children.You must buy it.Isn't there a Chinese children's book?", "I like it.We only buy it for children's children.But he has a lot of kids'books.", 'You must watch it for at least one month.', "I must think of what I should do, I can't remember.", "It's"]
9
BOT:  It is definitely the one with the most famous director.
what is the movie about


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


3
what is your name
My name is Daniel Smith.
what is your favorite movie
It is definitely the one with the most famous director.
what is the movie about
['what is your name\nMy name is Daniel Smith.', 'what is your favorite movie\nIt is definitely the one with the most famous director.', 'what is the movie about?', "'The Godfather '.", 'When did it come to an English language?', 'The world is changing.', 'Really.', 'Even my favorite movie is on the top ten list.']
8
BOT:  'The Godfather '.
what is your favorite sport


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


3
what is your favorite movie
It is definitely the one with the most famous director.
what is the movie about
'The Godfather '.
what is your favorite sport
['what is your favorite movie\nIt is definitely the one with the most famous director.', "what is the movie about\n'The Godfather '.", 'what is your favorite sport like?', "How about swimming, it's the very exciting sport.", 'It is fun.', 'My favorite player, is the English player.', 'What is your language?', 'French.', 'I am a good learner.', "What's your favorite sport?", "I don't know.", 'You are very lucky.', 'I can swim pretty fast!']
13
BOT:  How about swimming, it's the very exciting sport.
do you know how to swim


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


3
what is the movie about
'The Godfather '.
what is your favorite sport
How about swimming, it's the very exciting sport.
do you know how to swim
["what is the movie about\n'The Godfather '.", "what is your favorite sport\nHow about swimming, it's the very exciting sport.", 'do you know how to swim?', 'I know only basic things, my teacher told me.', "you can do it, you can never stop.First you have to know how to swim, then your feet, hands, nose and feet.You'll never be able to do swimming when your feet are wet.", "so I take it that you really don't understand Chinese?", "not really.Chinese people talk like this every day.To tell you the truth    do I suppose a lot of people prefer dancing.It's the most popular sport in other countries.", 'well, the people can learn Chinese people cannot be fooled by art']
8
BOT:  I know only basic things, my teacher told me.
have a good day, bye


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


3
what is your favorite sport
How about swimming, it's the very exciting sport.
do you know how to swim
I know only basic things, my teacher told me.
have a good day, bye
["what is your favorite sport\nHow about swimming, it's the very exciting sport.", 'do you know how to swim\nI know only basic things, my teacher told me.', 'have a good day, bye.', "bye.How's all you know?", "I don't understand.My teacher told me to keep my eyes open, but I don't know what to do now.It's very difficult.what's the problem?", 'I cannot seem to understand the problems I have.', 'well, let me explain.First, I need help.', 'do you know which courses I   do you want to go take the course then?', 'that or this course?', 'this course?', 'that?', 'then this.', "you've it is what you want to teach the course?", 'this?', 'yes.', 'this.', 'the course?', 'this course?', 'like']
19
BOT:  bye.How's all you know?
exit


In [99]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGrained).
The token `Access` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Access`


In [102]:
ppo_model.push_to_hub("TheLongTran/ChatBot-WithRLHF")

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/TheLongTran/ChatBot-WithRLHF/commit/d1cf05326116e014c4c9e664f6b4b6ef562cbaea', commit_message='Upload model', commit_description='', oid='d1cf05326116e014c4c9e664f6b4b6ef562cbaea', pr_url=None, repo_url=RepoUrl('https://huggingface.co/TheLongTran/ChatBot-WithRLHF', endpoint='https://huggingface.co', repo_type='model', repo_id='TheLongTran/ChatBot-WithRLHF'), pr_revision=None, pr_num=None)

In [103]:
tokenizer.push_to_hub("TheLongTran/ChatBot-WithRLHF")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/TheLongTran/ChatBot-WithRLHF/commit/baaa7cdf1136632bb2cad09e95b479c3979d71bc', commit_message='Upload tokenizer', commit_description='', oid='baaa7cdf1136632bb2cad09e95b479c3979d71bc', pr_url=None, repo_url=RepoUrl('https://huggingface.co/TheLongTran/ChatBot-WithRLHF', endpoint='https://huggingface.co', repo_type='model', repo_id='TheLongTran/ChatBot-WithRLHF'), pr_revision=None, pr_num=None)