# Runtime test notebook

In [1]:
# ## dependencies for colab
# !git clone https://__TOKEN_GIT__:@github.com/DanielSc4/RL-on-LM.git
# %cd RL-on-LM/
# !pip install -r requirements.txt
# from huggingface_hub import login
# login(token = '')  # https://huggingface.co/settings/tokens

In [2]:
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'true'

In [1]:
import rewardlm

## Data Utils

In [2]:
from rewardlm.data.data_utils import gen_benchmark_data
from rewardlm.core.GenerativeModel import GenerativeModel

generative_manager = GenerativeModel(model_id = 'EleutherAI/pythia-70m')

gen_benchmark_data(generative_manager.tokenizer)

Using MPS (Apple Silicon GPU)


Found cached dataset json (/Users/daniel/.cache/huggingface/datasets/allenai___json/allenai--real-toxicity-prompts-eb8779dd2693db47/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


<torch.utils.data.dataloader.DataLoader at 0x105f81220>

## Reward model

In [3]:
from rewardlm.core.RewardModel import RewardModel
reward_manager = RewardModel(model_id = 'facebook/roberta-hate-speech-dynabench-r4-target', device='mps')

### Reward tokenizer

In [6]:
reward_manager.tokenize_text('Hello, world!')

{'input_ids': tensor([[    0, 31414,     6,   232,   328,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

In [10]:
# Trying w/ batch text
reward_manager.tokenize_text(
    ['First sentence', 'second longer sentence'],
    padding = 'max_length', 
    max_length = 12, 
    truncation = True,
)

{'input_ids': tensor([[    0, 10993,  3645,     2,     1,     1,     1,     1,     1,     1,
             1,     1],
        [    0, 10815,  1181,  3645,     2,     1,     1,     1,     1,     1,
             1,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]])}

### Model itself

In [7]:
reward_manager.get_score_pair(
    prompt = reward_manager.tokenize_text('This is a prompt'),
    response = reward_manager.tokenize_text('This is the continuation of the prompt')
)

(array([0.00119799], dtype=float32), array([0.00028152], dtype=float32))

In [13]:
from torch.utils.data import DataLoader, Dataset
from rewardlm.data.CustomDatasets import ToxicityGeneratedSet
import pandas as pd

# trying w/ batch text
batch_input = reward_manager.tokenize_text(
    ['First sentence', 'second longer sentence', 'third one'],
    padding = 'max_length', 
    max_length = 12, 
    truncation = True,
)

dataset = ToxicityGeneratedSet(pd.DataFrame)
        
reward_manager.get_batch_score_pair(
    DataLoader(batch_input, batch_size=1)
)

TypeError: __init__() missing 1 required positional argument: 'tokenizer'

## RL model

In [1]:
from rewardlm.core.RL.RLModel import RLModel
import torch

# sample data (thanks chatGPT for the help)
sample_data = [
    "I love to eat pizza.",
    "The cat is sleeping.",
    "She plays the guitar.",
    "He is a good friend.",
    "The sun is shining.",
    "They went to the park.",
    "I enjoy reading books.",
    "We love going to movies.",
    "The dog is barking.",
    "She loves to dance.",
    "He is a great chef.",
    "The flowers are blooming.",
    "They won the game.",
    "I need a cup of coffee.",
    "She sings beautifully.",
    "He likes to swim.",
    "The rain is pouring.",
    "They are best friends.",
    "I like to go hiking.",
    "She runs every morning.",
]

rlmanager = RLModel(
    model_id = 'EleutherAI/pythia-70m',
    reward_model_id = 'facebook/roberta-hate-speech-dynabench-r4-target',
    optimized = True,
    # log_method = 'wandb',
    bs = 16,     # TMP TODO, Remove it when done tesing on sample data (defaults to 256)
    accelerator_kwargs = {
        'cpu': False if torch.cuda.is_available() else True,        # only for M1 testing
    },
)
custom_prompt = 'Someone said {prompt}'

dataset = rlmanager.generate_dataset(
    text = sample_data,
    max_len = 50,
    custom_prompt = custom_prompt,
)

assert rlmanager.generator_manager.tokenizer.decode(dataset[0]['input_ids'], skip_special_tokens = True) == custom_prompt.format(prompt = sample_data[0]), 'Something wrong'


history = rlmanager.train_PPO(dataset = dataset)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /Users/daniel/miniforge3/envs/torch_new/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so
CUDA SETUP: Loading binary /Users/daniel/miniforge3/envs/torch_new/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so...
dlopen(/Users/daniel/miniforge3/envs/torch_new/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so, 0x0006): tried: '/Users/daniel/miniforge3/envs/torch_new/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (not a mach-o file), '/System/Volumes/Preboot/Cryptexes/OS/Users/daniel/miniforge3/envs/torch_new/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (no such file), '/Users/daniel/miniforge3/envs/torch_new/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (not a mach-o file)


  warn("The installed version of bitsandbytes was compiled without GPU support. "


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


0it [00:00, ?it/s]

batch n: 1:


100%|██████████| 1/1 [00:00<00:00,  2.17it/s]
You're using a GPTNeoXTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
1it [00:13, 13.69s/it]


## Fine Tune

In [3]:
import torch
from rewardlm.core.GenerativeModel import GenerativeModel
from rewardlm.data.CustomDatasets import PromptsDataset
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'true'

sample_data = [
    "I love to eat pizza.",
    "The cat is sleeping.",
    "She plays the guitar.",
    "He is a good friend.",
    "The sun is shining.",
    "They went to the park.",
    "I enjoy reading books.",
    "We love going to movies.",
    "The dog is barking.",
    "She loves to dance.",
    "He is a great chef.",
    "The flowers are blooming.",
    "They won the game.",
    "I need a cup of coffee.",
    "She sings beautifully.",
    "He likes to swim.",
    "The rain is pouring.",
    "They are best friends.",
    "I like to go hiking.",
    "She runs every morning.",
]

model_id = 'facebook/opt-350m'
# model_id = 'EleutherAI/pythia-70m'
generator_manager = GenerativeModel(
    model_id,
    # load_dtype = '8-bit',
    accelerator_kwargs = {
        'cpu': False if torch.cuda.is_available() else True,        # only for M1 testing
    },
)

dataset = PromptsDataset(
    tokenizer = generator_manager.tokenizer,
    text = sample_data,
    max_len = 50,
)

generator_manager.fine_tune(torch_dataset = dataset, optimized = False)

Downloading (…)lve/main/config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/663M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/685 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/441 [00:00<?, ?B/s]

lm_head
trainable params: 0 || all params 331196416 || trainable(%): 0.00


  0%|          | 0/200 [00:00<?, ?it/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.

In [4]:
dataset

NameError: name 'dataset' is not defined