In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
from tqdm import tqdm
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
reward_name = "OpenAssistant/reward-model-deberta-v3-large-v2"
rank_model, tokenizer = AutoModelForSequenceClassification.from_pretrained(reward_name), AutoTokenizer.from_pretrained(reward_name, model_max_length=1700)
rank_model = rank_model.to('cuda')

In [3]:
base_dir = 'data/openai_summarize_from_feedback'
original = pd.read_csv(f"{base_dir}/english_original.csv")
chinese = pd.read_csv(f"{base_dir}/english_to_chinese.csv")
english = pd.read_csv(f"{base_dir}/english_to_chinese_to_english.csv")

In [4]:
original.shape, english.shape, chinese.shape

((83802, 3), (83802, 3), (83802, 3))

In [5]:
def get_score(prompt, response):
    inputs = tokenizer(prompt, response, return_tensors='pt', truncation=True)
    inputs = inputs.to('cuda')
    return rank_model(**inputs).logits[0].cpu().detach().item()

In [None]:
def get_batch_score(prompts, responses):
    inputs = tokenizer(prompts, responses, padding=True, return_tensors='pt').to('cuda')
    return rank_model(**inputs).logits.flatten().tolist()

In [6]:
dataset_map = {
    'original' : original,
    'chinese' : chinese,
    'english' : english
}

In [7]:
for k,v in dataset_map.items():
    dataset_map[k] = v.fillna('')

In [9]:
name = 'chinese'
dataframe = dataset_map[name]
chosen_scores = []
reject_scores = []
for i, row in tqdm(dataframe.iterrows(), total=len(original)):
    if (row.prompt, row.chosen) not in existing:
        existing[(row.prompt, row.chosen)] = get_score(row.prompt, row.chosen)
    chosen_scores.append(existing[(row.prompt, row.chosen)])

    if (row.prompt, row.rejected) not in existing:
        existing[(row.prompt, row.rejected)] = get_score(row.prompt, row.rejected)
    reject_scores.append(existing[(row.prompt, row.rejected)])

all_scores[name] = {'chosen' : chosen_scores, 'rejected' : reject_scores}

100%|██████████| 83802/83802 [28:15<00:00, 49.43it/s]  


In [10]:
for name in ['english']:
    dataframe = dataset_map[name]
    chosen_scores = []
    reject_scores = []
    existing = {}
    for i, row in tqdm(dataframe.iterrows(), total=len(original)):
        if (row.prompt, row.chosen) not in existing:
            existing[(row.prompt, row.chosen)] = get_score(row.prompt, row.chosen)
        chosen_scores.append(existing[(row.prompt, row.chosen)])

        if (row.prompt, row.rejected) not in existing:
            existing[(row.prompt, row.rejected)] = get_score(row.prompt, row.rejected)
        reject_scores.append(existing[(row.prompt, row.rejected)])

    all_scores[name] = {'chosen' : chosen_scores, 'rejected' : reject_scores}

100%|██████████| 83802/83802 [1:01:02<00:00, 22.88it/s] 


In [17]:
pd.DataFrame(all_scores['english']).to_csv('model_scores/deberta_v3_large/english_to_chinese_to_english_scores.csv', index=False)

In [8]:
all_scores = {}
for name, dataframe in dataset_map.items():
    chosen_scores = []
    reject_scores = []
    existing = {}
    for i, row in tqdm(dataframe.iterrows(), total=len(original)):
        if (row.prompt, row.chosen) not in existing:
            existing[(row.prompt, row.chosen)] = get_score(row.prompt, row.chosen)
        chosen_scores.append(existing[(row.prompt, row.chosen)])

        if (row.prompt, row.rejected) not in existing:
            existing[(row.prompt, row.rejected)] = get_score(row.prompt, row.rejected)
        reject_scores.append(existing[(row.prompt, row.rejected)])

    all_scores[name] = {'chosen' : chosen_scores, 'rejected' : reject_scores}

100%|██████████| 83802/83802 [45:46<00:00, 30.51it/s]  
 58%|█████▊    | 49022/83802 [42:02<29:49, 19.43it/s]  


RuntimeError: CUDA out of memory. Tried to allocate 266.00 MiB (GPU 0; 15.78 GiB total capacity; 14.24 GiB already allocated; 78.94 MiB free; 14.50 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [8]:
import pickle
with open('test.pkl', 'rb') as f:
    test = pickle.load(f)
    all_scores = test['all_scores']
    existing=test['existing']

In [18]:
dataframe

Unnamed: 0,prompt,chosen,rejected
0,I know that many people are often confused aft...,I've never dated like an adult. Now I don't kn...,I don't know how to please myself. What am I s...
1,I know that many people are often confused aft...,I don't know how to please myself. What am I s...,I don't know how to date / stream / stream wit...
2,I know that many people are often confused aft...,I've never dated like an adult. Now I don't kn...,I don't know how to date / stream / stream wit...
3,I know that many people are often confused aft...,I don't know how to please myself. What am I s...,Starting long-term relationships in adolescenc...
4,I know that many people are often confused aft...,Starting long-term relationships in adolescenc...,I don't know how to date / stream / stream wit...
...,...,...,...
83797,I don't know how to ask this and not sound stu...,"My boyfriend is slow, and I'm frustrated, and ...",My boyfriend is slow. I'm depressed. Sometimes...
83798,I really don't care anymore. Excluding doctors...,The medical field looks like hell. What are so...,Life sucks if you don't have the money. The me...
83799,Someone I think is a good friend. After some t...,"When you meet a man who's remodeled, becomes h...","I met a retried man, became my best friend, me..."
83800,"I went out with my boyfriend for about a year,...","There was a drunk last night at a party, drunk...","There was a drunk last night at a party, drunk..."
