<a href="https://colab.research.google.com/github/ChandlerU11/Hugging_ReST/blob/main/ReST_Method_Fine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets
!pip install accelerate -U
!pip install transformers[torch]

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
Installing collected p

In [None]:
from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments, AutoModelForSeq2SeqLM, AutoTokenizer
from sklearn.model_selection import train_test_split
from datasets import Dataset
from tqdm import tqdm
import torch
import pandas as pd
import statistics
import random
import time

if torch.cuda.is_available():
    device = 0

def prepare_dataset(examples):
  input_ids = tokenizer(examples['input'], truncation=True, max_length=128)['input_ids']
  label_ids = tokenizer(examples["response"], truncation=True, max_length=128)['input_ids']
  return {"input_ids": input_ids, "labels": label_ids}

def dummy_reward_model(df):
    # Get input length
    input_len = [int(each.split()[0]) for each in df['input']]

    # Get word count of response
    gen_len = [len(each.split()) for each in df['response']]

    # Count number of "hugs" in response
    hug_count = [sum(1 for i in each.split() if i == 'hugs') for each in df['response']]

    # If count of "hugs" and total words in string do not match input legnth, give negative reward
    rewards = []
    for x,y,z in zip(input_len, hug_count, gen_len):
      if z == y:
        rewards.append(-abs(int(x) - y))
      else:
        rewards.append(-3)

    return rewards

def generate(df, gen_model, tokenizer, generation_kwargs, N):
    responses = []
    for _ in range(N):
        inputs = tokenizer(df['input'].tolist(), return_tensors="pt", padding = True, truncation = True)
        response = gen_model.generate(inputs["input_ids"].to(device), **generation_kwargs)
        response = [tokenizer.decode(each, skip_special_tokens = True) for each in response]
        responses.extend(response)

    # Create preserved-order dataframe with prompt / response pairs
    gen_df = pd.concat([df.copy()] * N)
    gen_df['response'] = responses

    return gen_df

def ReST(D, Deval, G, I, N, model, tokenizer, generation_kwargs, training_args):
    for g in range(G):
        print('Grow Step ', g)
        train_df = D.copy()

        train_df = generate(train_df, model, tokenizer, generation_kwargs, N)
        train_df['scores'] = dummy_reward_model(train_df)
        print(len(train_df[train_df['scores'] == 0]), "generations out of ", len(train_df), "are the correct length.")
        print("Example output from model:")
        print(train_df.head(50))
        time.sleep(10)

        steps = 0
        for tau_i in I:
            print('Improve Step: ', steps)
            print('Threshold: ', tau_i)

            # Filter for samples at or above threshold
            Dg = train_df.loc[(train_df['scores'] >= tau_i)].copy()
            if len(Dg) == 0:
                print("NO SAMPLES ABOVE THRESHOLD")
                break

            Dg = Dataset.from_pandas(Dg).map(prepare_dataset, batched=True)

            # Create trainer with newly filtered data
            trainer = Seq2SeqTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=Dg, data_collator=data_collator)

            # First fine-tuning of improve step
            trainer.train()

            # Evaluation
            Dg_e = Deval.copy()

            # Generate one response to for every sample in eval set
            Dg_e = generate(Dg_e, model, tokenizer, generation_kwargs, 1)
            Dg_e['scores'] = dummy_reward_model(Dg_e)

            prev = 0
            improve = statistics.mean(Dg_e['scores'])

            # While model improves reward model score on eval set, continue to fine-tune with newly filtered data
            while prev < improve:
                trainer.train()
                Dg_e = Deval.copy()
                Dg_e = generate(Dg_e, model, tokenizer, generation_kwargs, 1)
                Dg_e['scores'] = dummy_reward_model(Dg_e)
                prev = improve
                improve = statistics.mean(Dg_e['scores'])

            steps += 1

    print("Training Finished!!!")
    return model

def test_ReST(test_data, model, tokenizer, generation_kwargs):
      test_df = generate(test_data, model, tokenizer, generation_kwargs, 1)
      test_df['scores'] = dummy_reward_model(test_df)
      print("The fine-tuned model generates the correct number of hugs", len(test_df[test_df['scores'] == 0]) / len(test_df) * 100, "percent of the time!" )


rand_list = []
for i in range(1000):
     rand_list.append(str(random.randrange(1,5)) + ' hugs')

test_data = []
for i in range(100):
     test_data.append(str(random.randrange(1,5)) + ' hugs')

test_df = pd.DataFrame()
test_df['input'] = test_data

generation_kwargs = {
    "min_length":-1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
}

training_args = Seq2SeqTrainingArguments(
            do_train=True,
            do_eval=False,
            learning_rate = 3e-4,
            output_dir="./t5-small",
            num_train_epochs=1,
            per_device_train_batch_size = 64
            )


tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small").to(device)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=-100)

train_df = pd.DataFrame()
train_df['input'] = rand_list
D, Deval = train_test_split(train_df, test_size = .1, random_state = 42)

G = 3
I = [-2,-1,0]
N = 10

fine_tuned_model = ReST(D, Deval, G, I, N, model, tokenizer, generation_kwargs, training_args)
test_ReST(test_df, fine_tuned_model, tokenizer, generation_kwargs)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Grow Step  0




6 generations out of  9000 are the correct length.
Example output from model:
      input                response  scores
716  3 hugs                   3hugs      -3
351  3 hugs           wipe keep mom      -3
936  3 hugs                 3 comes      -3
256  2 hugs                  2 bren      -3
635  3 hugs                3 beaine      -3
644  1 hugs                       !      -3
554  1 hugs                      I.      -3
959  1 hugs               Ein rlock      -3
168  1 hugs                       &      -3
917  2 hugs                  2 hugs      -3
528  3 hugs                2 3 hugs      -3
823  2 hugs                  2 hugs      -3
985  1 hugs                              -1
816  4 hugs  4 4 Jack Jack Gate Top      -3
86   1 hugs                  1 hugs      -3
432  2 hugs                   2hugs      -3
184  2 hugs                   2hugs      -3
978  2 hugs                   2hugs      -3
534  3 hugs                 33 hugs      -3
294  1 hugs                  1 hugs      -

Map:   0%|          | 0/185 [00:00<?, ? examples/s]

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss




Improve Step:  1
Threshold:  -1


Map:   0%|          | 0/147 [00:00<?, ? examples/s]

Step,Training Loss




Improve Step:  2
Threshold:  0


Map:   0%|          | 0/6 [00:00<?, ? examples/s]

Step,Training Loss




Grow Step  1
341 generations out of  9000 are the correct length.
Example output from model:
      input                    response  scores
716  3 hugs                                  -3
351  3 hugs                       # try      -3
936  3 hugs                                  -3
256  2 hugs                                  -2
635  3 hugs                    hugshugs      -3
644  1 hugs                        hugs       0
554  1 hugs                                  -1
959  1 hugs                                  -1
168  1 hugs                                  -1
917  2 hugs                                  -2
528  3 hugs                                  -3
823  2 hugs                                  -2
985  1 hugs                                  -1
816  4 hugs                       shugs      -3
86   1 hugs                                  -1
432  2 hugs                        hugs      -1
184  2 hugs                 hug hughugs      -3
978  2 hugs                        hugs    

Map:   0%|          | 0/3794 [00:00<?, ? examples/s]

Step,Training Loss


Improve Step:  1
Threshold:  -1




Map:   0%|          | 0/2016 [00:00<?, ? examples/s]

Step,Training Loss


Improve Step:  2
Threshold:  0




Map:   0%|          | 0/341 [00:00<?, ? examples/s]

Step,Training Loss




Grow Step  2
2726 generations out of  9000 are the correct length.
Example output from model:
      input        response  scores
716  3 hugs  hugs hugs hugs       0
351  3 hugs            hugs      -2
936  3 hugs            hugs      -2
256  2 hugs            hugs      -1
635  3 hugs  hugs hugs hugs       0
644  1 hugs            hugs       0
554  1 hugs                      -1
959  1 hugs            hugs       0
168  1 hugs            hugs       0
917  2 hugs            hugs      -1
528  3 hugs            hugs      -2
823  2 hugs            hugs      -1
985  1 hugs            hugs       0
816  4 hugs       hugs hugs      -2
86   1 hugs            hugs       0
432  2 hugs            hugs      -1
184  2 hugs            hugs      -1
978  2 hugs            hugs      -1
534  3 hugs       hugs hugs      -1
294  1 hugs            hugs       0
892  3 hugs            hugs      -2
425  4 hugs            hugs      -3
713  3 hugs       hugs hugs      -1
260  1 hugs            hugs       0
237  2

Map:   0%|          | 0/7158 [00:00<?, ? examples/s]

Step,Training Loss




Improve Step:  1
Threshold:  -1


Map:   0%|          | 0/5664 [00:00<?, ? examples/s]

Step,Training Loss




Improve Step:  2
Threshold:  0


Map:   0%|          | 0/2726 [00:00<?, ? examples/s]

Step,Training Loss




Training Finished!!!
The fine-tuned model generates the correct number of hugs 84.0 percent of the time!
