In [49]:
import subprocess
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset, Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F
import numpy as np
from datasets import disable_progress_bar

# turn off all HF-Datasets progress bars
disable_progress_bar()

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

LM_MODEL = "august66/qwen2-sft-final"
SENTIMENT_MODEL = "siebert/sentiment-roberta-large-english"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sft_model = AutoModelForCausalLM.from_pretrained(
    LM_MODEL,
    torch_dtype = torch.float16
)
sft_tokenizer = AutoTokenizer.from_pretrained("august66/qwen2-sft-final") 
sft_tokenizer.pad_token = sft_tokenizer.eos_token
sft_model.config.pad_token_id = sft_model.config.eos_token_id
sentiment_tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(SENTIMENT_MODEL)

dataset_test = load_dataset("stanfordnlp/imdb", split="test")
def prompt_completion_preprocess(example):
    words = example['text'].split()
    prompt = ' '.join(words[:5])
    completion = ' '.join(words[5:])
    return {'prompt': prompt, 'completion': completion}
dataset_test = dataset_test.map(prompt_completion_preprocess, remove_columns=['text', 'label'])


print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"Reserved : {torch.cuda.memory_reserved()  / 1024**3:.2f} GB")

Allocated: 0.01 GB
Reserved : 13.19 GB


In [None]:
from tqdm import tqdm
def tokenize(batch):

    inputs = sft_tokenizer(
        batch['prompt'],
        padding = 'max_length',
        truncation = True,
        max_length = 128,
        padding_side = 'left',
        add_special_tokens = True, 
        return_tensors = 'pt'
    )

    return {k:v for k, v in inputs.items()}

tokenized_inputs = dataset_test.map(
    tokenize, 
    batched = True,
    batch_size = 32, 
    remove_columns = ['completion']
).with_format('torch', columns = ['prompt', 'input_ids', 'attention_mask'])


tokenized_inputs_dataloader = DataLoader(
    tokenized_inputs,
    batch_size = 512, 
    shuffle = True,
    pin_memory = True 
)

prompt_completion_data = []
with torch.inference_mode():
    sft_model = sft_model.to(device)
    for input_batch in tqdm(tokenized_inputs_dataloader):
        prompts = input_batch.pop('prompt')
        input_batch = {k:v.to(device) for k, v in input_batch.items()}
        batch_size = input_batch['input_ids'].shape[0]

        outputs = sft_model.generate(
            **input_batch,
            num_return_sequences = 2, 
            do_sample = True,
            top_k=50,
            top_p=0.95,
            temperature=1.0,
            use_cache = True,
            pad_token_id = sft_model.config.eos_token_id,
            max_new_tokens=10
        ).view(batch_size * 2, -1).cpu()
        
        decoded_outputs_batch = sft_tokenizer.batch_decode(
            outputs,
            skip_special_tokens = True,
            clean_up_tokenization_spaces=True
        )

        prompt_completion_data_batch = [{
            'prompt':prompt,
            'completion_1':' '.join(decoded_outputs_batch[2*index].split()[5:]).strip(),
            'completion_2':' '.join(decoded_outputs_batch[2*index+1].split()[5:]).strip(),
        } for index, prompt in enumerate(prompts)]
        
        prompt_completion_data.extend(prompt_completion_data_batch)


prompt_completion_dataset = Dataset.from_list(prompt_completion_data)
prompt_completion_dataset = 






  0%|          | 0/49 [00:00<?, ?it/s]

100%|██████████| 49/49 [02:28<00:00,  3.03s/it]


In [57]:
prompt_completion_dataset

Dataset({
    features: ['prompt', 'completion_1', 'completion_2'],
    num_rows: 25000
})

In [5]:
def sentiment_tokenize(batch):
    
    prompts = batch['prompt']
    prompts = [prompt for prompt in prompts for _ in range(2)]
    completions_flat = [c for pair in zip(batch['completion_1'], batch['completion_2']) for c in pair]
    sentiment_inputs = sentiment_tokenizer(
        text = completions_flat,
        max_length = 128,
        add_special_tokens = True,
        return_tensors = 'pt',
        padding = 'max_length', 
        truncation = True,
        padding_side = 'left'
    )
    return {'prompt':prompts, 'completion':completions_flat, 'input_ids':sentiment_inputs['input_ids'], 'attention_mask':sentiment_inputs['attention_mask']}

tokenized_sentiments = prompt_completion_dataset.map(
    sentiment_tokenize, 
    batched = True,
    batch_size = 32,
    remove_columns=["completion_1", "completion_2"]
).with_format('torch', columns = ['prompt', 'completion', 'input_ids', 'attention_mask'])


tokenized_sentiment_inputs_dataloader = DataLoader(
    tokenized_sentiments, 
    batch_size = 512,
    shuffle = True,
    pin_memory = True
)

reward_dict = []
with torch.no_grad():
    sentiment_model = sentiment_model.to(device)
    for batch in tqdm(tokenized_sentiment_inputs_dataloader):
        prompts = batch.pop('prompt')
        completions = batch.pop('completion')
        inputs = {k:v.to(device) for k, v in batch.items()}
        outputs = sentiment_model(**inputs)
        probs_batch = F.softmax(outputs.logits, dim = -1)
        reward_batch = probs_batch[:, 1].cpu().tolist()

        reward_batch = [{'prompt':p, 'completion':c, 'reward':reward} for p, c, reward in zip(prompts, completions, reward_batch)]
        reward_dict.extend(reward_batch)

reward_dataset = Dataset.from_list(reward_dict)



Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

100%|██████████| 98/98 [02:30<00:00,  1.54s/it]


In [51]:
unique_prompts = set(reward_dataset['prompt'])
preference_list = []
for prompt in tqdm(unique_prompts):
    ds = reward_dataset.filter(lambda x: x['prompt'] == prompt)
    completion_1, reward_1 = ds[0]['completion'], ds[0]['reward']
    completion_2, reward_2 = ds[1]['completion'], ds[1]['reward']
    reward_diff = torch.tensor(reward_1-reward_2)
    pref_prob = F.sigmoid(reward_diff)
    if pref_prob > 0.5:
        chosen, rejected = completion_1, completion_2
    else:
        chosen, rejected = completion_2, completion_1
    
    preference_list.extend({
        'prompt':prompt, 'accept':chosen, 'reject':rejected, 'accept_prob':pref_prob
    })
preference_dataset = Dataset.from_list(preference_list)
    

  0%|          | 35/23033 [00:06<1:06:27,  5.77it/s]


KeyboardInterrupt: 

In [56]:
reward_dataset['prompt']

['Watching QUINTET is not unlike',
 "I know, it's a movie.",
 'I saw this movie at',
 'It is so nice to',
 'The story is: a turn-of-the-century',
 'This film is a third',
 "THE worst movie I've ever",
 'I appear to be in',
 'And a hard spot somewhere',
 'Cybrog 2:Glass Shadow stars Elias',
 'If (as I just pointed',
 "It's just one of those",
 'First off, I just watched',
 'First I am a teenager.',
 '"Air Bud 2: Golden Receiver"',
 'bad acting , combats are',
 'An absorbing exploration of virtual',
 'I agree with all aforementioned',
 'Heya Denver fans! The animation',
 "I really didn't like this",
 'How good is this film?',
 "Although I'm not too much",
 'What do you mean son',
 'This was such a terrible',
 'When the noble Hanabusa clan',
 'Disappointing heist movie indeed, I',
 'It just so happens that',
 'If this is classed as',
 'I knew this would be',
 "I realize it's a small",
 'I have seen this movie',
 'There is a phrase by',
 'This show is the worst',
 'Prepare to meet your Mes

In [41]:
pref_prob

tensor(0.2697)

In [46]:
for  prompt in unique_prompts:
    print (prompt)

I saw what I believe
This is a well done
After searching for 6 months,
The movie opens with beautiful
Felix is watching an actor
Bob Cummings is excellent in
This is a sort of
When I was a child
Of course, all this nonsense
I saw it in Europe-plex.
I've read innumerable reviews talking
This movie has the feel
'Traffik', despite its title, is
Distributor: GOODTIMES home video <br
I thought the movie started
"I fear you speak upon
This movie was excellent from
A wonder. My favorite film.
My original title for this
I can not believe I
Why would Burt Lancaster allow
The movie Night Crossing captures
The other reviewer was completely
This movie is on the
I recently was in a
It's amazing what you can
This is one of Michael
This British film version of
Some giant scorpions are on
I first saw this one
This movie had terrible acting,
This (allegedly) based-on-a-true story TV
I've seen soap operas more
Renee Zellweger absolutely shines as
I have no idea what
If the following sounds tempting,
Thi