In [1]:
import argparse
import json
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import sys
from typing import List
from modeling_letter import LETTER
import torch
import transformers
import torch.nn as nn 
# from peft import PeftModel
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaConfig, T5Tokenizer, T5Config, T5ForConditionalGeneration
import types
from utils import *
from collator import TestCollator, Collator
from evaluate import get_topk_results, get_metrics_results
from generation_trie import Trie
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model
# from trl.core import respond_to_batch

class SimpleArgs:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)
def get_keys_by_value(my_dict, target_value):
    keys = [int(key) for key, value in my_dict.items() if value == target_value]
    if len(keys)==0:
        keys.append(0)
    elif len(keys)>1:
        keys=keys[:1]
    return keys
args = SimpleArgs(
    seed=42,
    gpu_id=0,
    tasks="seqrec",
    dataset="Beauty",
    index_file=".index.json",
    data_path="/root/LETTER/data",
    his_sep=", ",
    max_his_len=20,
    only_train_response=False,
    add_prefix=False,
    train_prompt_sample_num="1",
    train_data_sample_num="-1",
    valid_prompt_id=0,
    sample_valid=True,
    valid_prompt_sample_num=2,
    ckpt_path="./ckpt/Instruments/checkpoint-3465",
    results_file="./results/$DATASET/res.json",
    test_task="SeqRec",
    test_prompt_ids="0",
    sample_num=-1,
    num_beams=20,
    test_batch_size=20,
    rl_type='dpo',
    rl_neg_num=1
)
set_seed(args.seed)

device_map = {"": args.gpu_id}
device = torch.device("cuda",args.gpu_id)

config = T5Config.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained(
    "t5-small",
    model_max_length=512,
)
train_data, valid_data = load_datasets(args)
add_num = tokenizer.add_tokens(train_data.get_new_tokens())
config.vocab_size = len(tokenizer)

print("add {} new token.".format(add_num))
print("data num:", len(train_data))

with open(os.path.join(os.path.join(args.data_path, args.dataset), args.dataset + args.index_file), 'r') as f:
    indices = json.load(f)

# tokenizer = T5Tokenizer.from_pretrained(args.ckpt_path)
model = LETTER(config)
#model.set_hyper(args.temperature)
model.resize_token_embeddings(len(tokenizer))
model.to(device)
# model = T5ForConditionalGeneration.from_pretrained(
#     args.ckpt_path,
#     low_cpu_mem_usage=True,
#     device_map=device_map,
# )
prompt_ids = [0]

test_data = load_test_dataset(args)


all_items = test_data.get_all_items()



candidate_trie = Trie(
        [
            [0] + tokenizer.encode(candidate)
            for candidate in all_items
        ]
    )
prefix_allowed_tokens = prefix_allowed_tokens_fn(candidate_trie)

# # collator = TestCollator(args, tokenizer)
# collator = Collator(args, tokenizer)
# train_loader = DataLoader(train_data,collate_fn=collator,batch_size=20)
# # test_loader = DataLoader(test_data, batch_size=args.test_batch_size, collate_fn=collator,
#                             # shuffle=True, num_workers=4, pin_memory=True)
# for i in train_loader:
#     break
from datasets import Dataset
def data_converter(data):
    data_list = []
    for i in range(len(data)):
        data_list.append(data[i])
    data_dict = {key: [item[key] for item in data_list] for key in data_list[0].keys()}
    return data_dict, Dataset.from_dict(data_dict)


  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


add 965 new token.
data num: 131413


In [4]:
for dataset in  ['Beauty','Yelp']:
    # for index in ['.index.json','.new.index.json']:
    args.dataset=dataset
    train_data, valid_data = load_datasets(args)
    add_num = tokenizer.add_tokens(train_data.get_new_tokens())
    config.vocab_size = len(tokenizer)
    train_dict, train_data = data_converter(train_data)
    valid_dict, valid_data = data_converter(valid_data) # Initial state
    ensure_dir(f'/root/LETTER/data/{dataset}/dpo/')
    with open( f'/root/LETTER/data/{dataset}/dpo/train0.json', 'w') as f:
        for item in train_data:
            json.dump(item, f)  
            f.write('\n')
    with open( f'/root/LETTER/data/{dataset}/dpo/valid0.json', 'w') as f:
        for item in valid_data:
            json.dump(item, f)  
            f.write('\n')
    

In [96]:
len(valid_dict['prompt'])

30431

In [5]:
args.index_file=".new.index.json"

for dataset in  ['Beauty','Yelp']:
    # for index in ['.index.json','.new.index.json']:
    args.dataset=dataset
    train_data, valid_data = load_datasets(args)
    add_num = tokenizer.add_tokens(train_data.get_new_tokens())
    config.vocab_size = len(tokenizer)
    train_dict, train_data = data_converter(train_data)
    valid_dict, valid_data = data_converter(valid_data) # Initial state
    ensure_dir(f'/root/LETTER/data/{dataset}/dpo/')
    with open( f'/root/LETTER/data/{dataset}/dpo/new-train0.json', 'w') as f:
        for item in train_data:
            json.dump(item, f)  
            f.write('\n')
    with open( f'/root/LETTER/data/{dataset}/dpo/new-valid0.json', 'w') as f:
        for item in valid_data:
            json.dump(item, f)  
            f.write('\n')
    

In [3]:
args.dataset='Beauty'
args.index_file=".index.json"
args.rl_type='sprec'
args.per_device_batch_size=1024
args.num_beams=5
if 'new' in args.index_file:
    train_json_file = f"/root/LETTER/data/{args.dataset}/dpo/new-train0.json"
    valid_json_file = f"/root/LETTER/data/{args.dataset}/dpo/new-valid0.json"
else:
    train_json_file = f"/root/LETTER/data/{args.dataset}/dpo/train0.json"
    valid_json_file = f"/root/LETTER/data/{args.dataset}/dpo/valid0.json"
# with open(train_json_file, 'r') as f:
#     train_data = json.load(f)
# with open(valid_json_file, 'r') as f:
#     valid_data = json.load(f)
from datasets import load_dataset
train_dataset = load_dataset("json", data_files=train_json_file)
train_data = train_dataset["train"]
valid_dataset = load_dataset("json", data_files=valid_json_file)
val_data = valid_dataset["train"]


In [4]:
data_loader = DataLoader(val_data,batch_size=1024, 
                             shuffle=True, num_workers=4, pin_memory=True)

model = T5ForConditionalGeneration.from_pretrained(
    f'/root/autodl-tmp/org_chkpt/ckpt/{args.dataset}/',
    low_cpu_mem_usage=True,
    device_map=device_map,
)

In [16]:
with torch.no_grad():
    new_data={'prompt':[], 'chosen':[], 'rejected':[]}
    prog_iter = tqdm(data_loader, leave=False, desc='Generating')
    for batch in prog_iter:
        inputs = tokenizer(batch['prompt'], 
            return_tensors="pt",
                padding="longest",
                max_length=tokenizer.model_max_length,
                truncation=True,
                return_attention_mask=True)
        output = model.generate(
                        input_ids=inputs["input_ids"].to(model.device),
                        attention_mask=inputs["attention_mask"].to(model.device),
                        max_new_tokens=10,
                        # max_length=10,
                        prefix_allowed_tokens_fn=prefix_allowed_tokens,
                        num_beams=args.num_beams,
                        num_return_sequences=args.num_beams,
                        output_scores=True,
                        return_dict_in_generate=True,
                        early_stopping=True,
                    )
        output_ids = output["sequences"]
        scores = output["sequences_scores"]
        output = tokenizer.batch_decode(
                    output_ids, skip_special_tokens=True
            )
        predictions = [_.strip().replace(" ","") for _ in output]
        if args.rl_type=='sprec':
            new_rej = get_topk_results(predictions, scores, batch['chosen'], args.num_beams, all_items=all_items)
        elif args.rl_type=='ipa':
            response_items=torch.tensor([get_keys_by_value(indices,i.split(" ")) for i in output]).to(device) 
            collab_score = colab_model.predict(torch.stack(batch['origin_inters'],0).to(device),response_items.reshape(-1,args.num_beams),torch.stack(batch['positions'],0).to(device)).sigmoid().flatten()
            new_rej = get_topk_results(predictions, collab_score, batch['chosen'], args.num_beams, all_items=all_items)
        batch['rejected'] = new_rej
        for key in new_data.keys():
            new_data[key]+=batch[key]
    # return Dataset.from_dict(new_data)

                                                               

In [18]:
Dataset.from_dict(new_data)

Dataset({
    features: ['prompt', 'chosen', 'rejected'],
    num_rows: 22363
})

In [5]:

model = T5ForConditionalGeneration.from_pretrained(
    f'/root/autodl-tmp/org_chkpt/ckpt/{args.dataset}/',
    low_cpu_mem_usage=True,
    device_map=device_map,
)
# reference_model = T5ForConditionalGeneration.from_pretrained(
#     f'/root/autodl-tmp/org_chkpt/ckpt/{args.dataset}/',
#     low_cpu_mem_usage=True,
#     device_map=device_map,
# )

In [None]:
def reward_func(prompts,completions, ground_truth,origin_item,origin_inters,positions):
    print(prompts,completions, ground_truth,origin_item,origin_inters,positions)
    response_items=torch.tensor([get_keys_by_value(train_data.indices,i.split(' ')[1:-1]) for i in completions])
    print(response_items)
    

In [9]:
def data_generator():
    for i in range(len(train_data)):
        yield train_data[i]

In [3]:
res = model(input_ids=i['input_ids'].to(model.device),attention_mask=i['attention_mask'].to(model.device),labels=i['labels'].to(model.device))

NameError: name 'i' is not defined

In [28]:
input_ids = i['input_ids']
attention_mask = i['attention_mask']

actions = i['labels'].unsqueeze(1).repeat(1,2,1)
def in_batch_negative_sampling(labels,origin_inters,positions,origin_item, N,collab_model):
    B, L = labels.shape
    if B<=N:
        N=B
    actions = torch.zeros((B, N, L), dtype=labels.dtype, device=labels.device) 
    actions[:, 0, :] = labels
    reward = torch.tensor([1]+[0]*(N-1),device=labels.device,dtype=torch.float).unsqueeze(0).repeat_interleave(repeats=B,dim=0) # B,N; Basic Reward
    if collab_model:
        collab_pred = collab_model.predict(origin_inters,origin_item,positions).sigmoid() # B*B        
    for i in range(B):
        other_indices = [j for j in range(B) if j != i]
        negative_indices = torch.randperm(len(other_indices))[:N - 1]
        negative_samples = labels[torch.tensor(other_indices)[negative_indices]]
        actions[i, 1:, :] = negative_samples
        reward[i,:]+=collab_pred[i,torch.cat([torch.tensor(i).unsqueeze(0),torch.tensor(other_indices)[negative_indices]])]
        reward[i,:]+=(negative_samples[:,:-1]==labels[i,:-1]).sum()/(L-1)
    return actions, reward

import torch

def in_batch_negative_sampling_new(labels, origin_inters, positions, origin_item, N, collab_model):
    B, L = labels.shape
    if B <= N:
        N = B
    actions = torch.zeros((B, N, L), dtype=labels.dtype, device=labels.device)
    actions[:, 0, :] = labels
    reward = torch.tensor([1] + [0] * (N - 1), device=labels.device, dtype=torch.float).unsqueeze(0).repeat_interleave(repeats=B, dim=0) +0.5
    all_indices = torch.arange(B, device=labels.device).repeat(B,1)
    mask = ~torch.eye(B, dtype=torch.bool, device=labels.device)
    other_indices_matrix = all_indices[mask].reshape(B, B - 1)
    if collab_model:
        collab_pred = collab_model.predict(origin_inters, origin_item, positions).sigmoid()
        other_collab_pred = collab_pred.gather(1,other_indices_matrix)
        if torch.rand(1).item() < 0.001:
            negative_indices_matrix = torch.argsort(other_collab_pred, dim=-1)[:, :N - 1]
        else:
            negative_indices_matrix = torch.argsort(torch.randn([B, B - 1], device=labels.device), dim=-1)[:, :N - 1]
        if epsilon< 0.9:
            epsilon += 0.001
            if epsilon>0.89:
                print(epsilon)
    else:
        negative_indices_matrix = torch.argsort(torch.randn([B, B - 1], device=labels.device), dim=-1)[:, :N - 1]
    negative_samples = labels[other_indices_matrix.gather(1, negative_indices_matrix)]
    actions[:, 1:, :] = negative_samples
    if collab_model:
        self_indices = torch.arange(B, device=labels.device).unsqueeze(1)
        selected_indices = torch.cat([self_indices, other_indices_matrix.gather(1, negative_indices_matrix)], dim=1)
        reward *= collab_pred.gather(1, selected_indices)+0.5
    partial_match = (negative_samples[:, :, :-1] == labels.unsqueeze(1)[:, :, :-1]).sum(dim=-1) / (L - 1)
    reward *= partial_match+0.5
    return actions, reward

In [3]:
collab_model_args = SimpleArgs(
hidden_size=32,
num_heads=1,
trm_num=2,
dropout_rate=0.5, 
max_len=20,
)
from models.Bert4Rec import Bert4Rec
model_state_dict=torch.load('/root/GFGR/SeqRec/saved/Beauty/bert4rec/pytorch_model.bin')
colab_model = Bert4Rec(1,model_state_dict['state_dict']['item_emb.weight'].shape[0]-2,model.device,collab_model_args)
colab_model.load_state_dict(model_state_dict['state_dict'])
colab_model.eval()
colab_model.to(model.device)
ns_res = in_batch_negative_sampling(i['labels'].to(model.device),torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['positions']).to(model.device),torch.tensor(i['origin_item']).to(model.device),2,colab_model)

  ns_res = in_batch_negative_sampling(i['labels'].to(model.device),torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['positions']).to(model.device),torch.tensor(i['origin_item']).to(model.device),2,colab_model)


In [38]:

colab_res = colab_model.predict(torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['origin_item']).to(model.device),torch.tensor(i['positions']).to(model.device))
colab_res.shape

torch.Size([20, 20])

In [87]:
%time
set_seed(42)
ns_res = in_batch_negative_sampling(i['labels'].to(model.device),torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['positions']).to(model.device),torch.tensor(i['origin_item']).to(model.device),2,colab_model)

CPU times: user 7 μs, sys: 1e+03 ns, total: 8 μs
Wall time: 15.5 μs


  ns_res = in_batch_negative_sampling(i['labels'].to(model.device),torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['positions']).to(model.device),torch.tensor(i['origin_item']).to(model.device),2,colab_model)


In [29]:
%time
set_seed(42)
ns_res_new = in_batch_negative_sampling_new(i['labels'].to(model.device),torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['positions']).to(model.device),torch.tensor(i['origin_item']).to(model.device),2,colab_model)

CPU times: user 47 μs, sys: 7 μs, total: 54 μs
Wall time: 15 μs


  ns_res_new = in_batch_negative_sampling_new(i['labels'].to(model.device),torch.tensor(i['origin_inters']).to(model.device),torch.tensor(i['positions']).to(model.device),torch.tensor(i['origin_item']).to(model.device),2,colab_model)


In [2]:
def _process_train_rl_data(self):
    def random_neq(candidates, s=[], neg_num=1):
        # if neg_num > len(candidates):
        #     return np.array(list(candidates))
        neg_list = random.sample(list(candidates), neg_num)
        return np.array(neg_list)
    inter_data = []
    all_items = set(self.indices.keys())
    for uid  in self.remapped_inters:
        items = self.remapped_inters[uid][:-2]
        nonneg_items = self.inters[uid]
        origin_items = nonneg_items[:-2]
        
        for i in range(1, len(items)):
            one_data = dict()
            # one_data["user"] = uid
            one_data["item"] = items[i]
            one_data["origin_item"] = origin_items[i]
            history = items[:i]
            origin_history = origin_items[:i]
            if self.max_his_len > 0:
                history = history[-self.max_his_len:]
                history_len =len(origin_history)
            if history_len > self.max_his_len:
                mask_len = 0
                positions = list(range(1, self.max_his_len+1))
                origin_history=origin_history[-self.max_his_len:]
            else:
                mask_len = self.max_his_len - history_len
                positions = list(range(1, history_len+1))
                origin_seq = np.zeros([self.max_his_len], dtype=np.int32)
                origin_seq[-history_len:] = origin_history
                origin_history = origin_seq
            positions= positions[-self.max_his_len:]
            positions = [0] * mask_len + positions
            one_data["positions"] = np.array(positions)
            if self.add_prefix:
                history = [str(k+1) + ". " + item_idx for k, item_idx in enumerate(history)]
            one_data["inters"] = "".join(history)
            one_data["origin_inters"] = origin_history
            neg_items = random_neq(all_items,nonneg_items,neg_num=1)
            one_data["origin_neg"] = neg_items
            one_data["neg"] = ["".join(self.indices[str(i)]) for i in neg_items] 
            inter_data.append(one_data)
    return inter_data

In [3]:
tmp_inter = _process_train_rl_data(train_data.datasets[0])

In [2]:
class RLCollator(object):

    def __init__(self, args, tokenizer):
        self.args = args
        self.only_train_response = args.only_train_response
        self.tokenizer = tokenizer
        if self.tokenizer.pad_token_id is None:
            self.tokenizer.pad_token_id = 0
        # print(self.tokenizer.model_max_length)

    def __call__(self, batch):
        if self.args.rl_type=='dpo':
            prompt_texts = [d["prompt"] for d in batch]
            chosen_texts = [d["chosen"] for d in batch]
            rejected_texts = [d["rejected"] for d in batch]
            prompt = self.tokenizer(prompt_texts,
                                    return_tensors="pt",
                                    padding="longest",
                                    max_length=self.tokenizer.model_max_length,
                                    truncation=True,
                                    return_attention_mask=True)

            chosen = self.tokenizer(chosen_texts,
                                    return_tensors="pt",
                                    padding="longest",
                                    max_length=self.tokenizer.model_max_length,
                                    truncation=True,
                                    return_attention_mask=True)
            rejected = self.tokenizer(rejected_texts,
                            return_tensors="pt",
                            padding="longest",
                            max_length=self.tokenizer.model_max_length,
                            truncation=True,
                            return_attention_mask=True)
            return dict(prompt_input_ids=prompt["input_ids"], prompt_attention_mask=prompt['attention_mask'],
                chosen_input_ids=chosen["input_ids"], chosen_attention_mask=chosen['attention_mask'],
                rejected_input_ids=rejected["input_ids"], rejected_attention_mask=rejected['attention_mask'])
        else:
            input_texts = [d["prompt"] for d in batch]
            label_texts = [d["ground_truth"] for d in batch]

            inputs = self.tokenizer(input_texts,
                                    return_tensors="pt",
                                    padding="longest",
                                    max_length=self.tokenizer.model_max_length,
                                    truncation=True,
                                    return_attention_mask=True)

            labels = self.tokenizer(label_texts,
                                    return_tensors="pt",
                                    padding="longest",
                                    max_length=self.tokenizer.model_max_length,
                                    truncation=True,
                                    return_attention_mask=True)
            inputs['labels'] = labels['input_ids']
            inputs['labels'][inputs['labels'] == self.tokenizer.pad_token_id] = -100
            origin_item= torch.tensor([d["origin_item"] for d in batch]).to(prompt['input_ids'].device)
            origin_inters=torch.tensor([d["origin_inters"] for d in batch]).to(prompt['input_ids'].device)
            positions = torch.tensor([d["positions"] for d in batch]).to(prompt['input_ids'].device)

            return dict(inputs=inputs,origin_item=origin_item,origin_inters=origin_inters,positions=positions)


In [61]:
collator = RLCollator(args,tokenizer)
train_loader = DataLoader(train_data,collate_fn=collator,batch_size=128)

In [4]:
import trl 
model = trl.AutoModelForSeq2SeqLMWithValueHead(T5ForConditionalGeneration.from_pretrained(
    f'/root/autodl-tmp/org_chkpt/ckpt/{args.dataset}/',
    low_cpu_mem_usage=True,
    device_map=device_map,
))
model.is_peft_model=False
# reference_model = T5ForConditionalGeneration.from_pretrained(
#      f'/root/autodl-tmp/org_chkpt/ckpt/{args.dataset}/',
#     low_cpu_mem_usage=True,
#     device_map=device_map,
# )


In [62]:
for i in train_loader:
    break

In [9]:
collab_model_args = SimpleArgs(
hidden_size=32,
num_heads=1,
trm_num=2,
dropout_rate=0.5, 
max_len=20,
)
from models.Bert4Rec import Bert4Rec
model_state_dict=torch.load('/root/GFGR/SeqRec/saved/Beauty/bert4rec/pytorch_model.bin')
colab_model = Bert4Rec(1,model_state_dict['state_dict']['item_emb.weight'].shape[0]-2,model.pretrained_model.device,collab_model_args)
colab_model.load_state_dict(model_state_dict['state_dict'])
colab_model.eval()
colab_model.to(model.pretrained_model.device)

Bert4Rec(
  (item_emb): Embedding(12102, 32, padding_idx=0)
  (pos_emb): Embedding(120, 32)
  (emb_dropout): Dropout(p=0.5, inplace=False)
  (backbone): BertBackbone(
    (attention_layernorms): ModuleList(
      (0-1): 2 x LayerNorm((32,), eps=1e-08, elementwise_affine=True)
    )
    (attention_layers): ModuleList(
      (0-1): 2 x MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
      )
    )
    (forward_layernorms): ModuleList(
      (0-1): 2 x LayerNorm((32,), eps=1e-08, elementwise_affine=True)
    )
    (forward_layers): ModuleList(
      (0-1): 2 x PointWiseFeedForward(
        (conv1): Conv1d(32, 32, kernel_size=(1,), stride=(1,))
        (dropout1): Dropout(p=0.5, inplace=False)
        (relu): ReLU()
        (conv2): Conv1d(32, 32, kernel_size=(1,), stride=(1,))
        (dropout2): Dropout(p=0.5, inplace=False)
      )
    )
    (last_layernorm): LayerNorm((32,), eps=1e-08, elementwise_affine=True)
  )
  (lo

In [78]:
ppo_config = PPOConfig(
    batch_size=128,
)
ppo_trainer = PPOTrainer(config=ppo_config, model=model,tokenizer=tokenizer)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [64]:
def get_keys_by_value(my_dict, target_value):
    keys = [int(key) for key, value in my_dict.items() if value == target_value]
    if len(keys)==0:
        keys.append(-1)
    return keys
device = model.pretrained_model.device
response_tensor=model.generate(input_ids=i['inputs']['input_ids'].to(device),attention_mask=i['inputs']['attention_mask'].to(device))
decoded_response=[tokenizer.decode(i[1:-1]).split(' ') for i in response_tensor]
response_items=torch.tensor([get_keys_by_value(train_data.indices,i) for i in decoded_response])
reward = (response_items.squeeze()==i['origin_item']).long().to(device).squeeze()+colab_model.predict(torch.tensor(i['origin_inters']).to(device),response_items.to(device),torch.tensor(i['positions']).to(device)).sigmoid().squeeze()


  reward = (response_items.squeeze()==i['origin_item']).long().to(device).squeeze()+colab_model.predict(torch.tensor(i['origin_inters']).to(device),response_items.to(device),torch.tensor(i['positions']).to(device)).sigmoid().squeeze()


In [79]:
train_stats = ppo_trainer.step(list(i['inputs']['input_ids'].to(device).unbind(dim=0)), list(response_tensor.unbind(dim=0)), list(reward.unbind(dim=0)))

In [80]:
train_stats

{'objective/kl': 0.0,
 'objective/kl_dist': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
 'objective/logprobs': array([[-3.76126862e+00, -4.91392851e+00, -1.44429564e+00,
         -3.39802384e-01,  0.00000000e+00],
        [-3.30107641e+00, -1.97480762e+00, -1.94255662e+00,
         -7.31331706e-01,  0.00000000e+00],
        [-3.42423558e+00, -3.05836535e+00, -8.84022855e-04,
         -5.25592919e-03,  0.00000000e+00],
        [-3.45880651e+00, -2.9

tensor([[    0, 32173, 32550, 32756, 32847,     1],
        [    0, 32173, 32550, 32756, 32847,     1],
        [    0, 32223, 32513, 32649, 32858,     1],
        [    0, 32117, 32465, 32798, 33035,     1],
        [    0, 32118, 32513, 32600, 32869,     1],
        [    0, 32118, 32513, 32600, 32869,     1],
        [    0, 32167, 32475, 32561, 32881,     1],
        [    0, 32192, 32309, 32657, 32875,     1],
        [    0, 32192, 32475, 32653, 32889,     1],
        [    0, 32192, 32489, 32569, 32985,     1],
        [    0, 32192, 32489, 32569, 32985,     1],
        [    0, 32192, 32489, 32569, 32985,     1],
        [    0, 32192, 32475, 32653, 32889,     1],
        [    0, 32287, 32461, 32590, 33027,     1],
        [    0, 32192, 32475, 32653, 32889,     1],
        [    0, 32167, 32475, 32561, 32881,     1],
        [    0, 32178, 32464, 32683, 32979,     1],
        [    0, 32178, 32464, 32683, 32979,     1],
        [    0, 32287, 32513, 32800, 32905,     1],
        [   

In [35]:
!conda create -n ppo --clone py310
conda activate ppo
pip install trl==0.11.4

Retrieving notices: ...working... done
Source:      /root/miniconda3/envs/py310
Destination: /root/miniconda3/envs/ppo
Packages: 25
Files: 41067

Downloading and Extracting Packages


Downloading and Extracting Packages

Preparing transaction: done
Verifying transaction: done
Executing transaction: done
#
# To activate this environment, use
#
#     $ conda activate ppo
#
# To deactivate an active environment, use
#
#     $ conda deactivate



In [2]:
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer

torch.Size([20, 1])

In [16]:
import collections
import json
import logging

import numpy as np
import torch
from time import time
from torch import optim
from tqdm import tqdm

from torch.utils.data import DataLoader

import argparse
import os
def check_collision(all_indices_str):
    tot_item = len(all_indices_str)
    tot_indice = len(set(all_indices_str.tolist()))
    return tot_item==tot_indice

def get_indices_count(all_indices_str):
    indices_count = collections.defaultdict(int)
    for index in all_indices_str:
        indices_count[index] += 1
    return indices_count

def get_collision_item(all_indices_str):
    index2id = {}
    for i, index in enumerate(all_indices_str):
        if index not in index2id:
            index2id[index] = []
        index2id[index].append(i)

    collision_item_groups = []

    for index in index2id:
        if len(index2id[index]) > 1:
            collision_item_groups.append(index2id[index])

    return collision_item_groups

In [78]:
dataset='Yelp'
input_file=f'/root/LETTER/data/{dataset}/{dataset}.index.json'
output_file=f'/root/LETTER/data/{dataset}/{dataset}.new.index.json'
with open(input_file, 'r') as fp:
    all_indices_dict = json.load(fp)
all_indices = list(all_indices_dict.values())
all_indices=np.array(all_indices)
all_indices_str = np.array([str(i) for i in all_indices])
check_collision(all_indices_str)
collision_item_groups = get_collision_item(all_indices_str)
prefix = ["<a_{}>","<b_{}>","<c_{}>","<d_{}>","<e_{}>","<f_{}>"]
origin_len = len(all_indices[0])

In [25]:
# all_indices = np.c_[all_indices,np.repeat(prefix[all_indices.shape[-1]].format(0), all_indices.shape[0]).reshape(-1, 1)]
# all_indices_str = np.array([str(i) for i in all_indices])

In [85]:
for collision in collision_item_groups:
    for i in range(1,len(collision)):
        origin_prefix = all_indices[collision[i]][:-1]
        all_indices[collision[i]]= np.concatenate([origin_prefix,np.array([prefix[origin_len-1].format(int(i+110))])],0)

In [86]:
# all_indices
all_indices_str = np.array([str(i) for i in all_indices])
collision_item_groups = get_collision_item(all_indices_str)

In [88]:
all_indices_str

array(["['<a_80>' '<b_235>' '<c_240>' '<d_54>']",
       "['<a_173>' '<b_238>' '<c_127>' '<d_104>']",
       "['<a_139>' '<b_251>' '<c_235>' '<d_202>']", ...,
       "['<a_222>' '<b_74>' '<c_57>' '<d_212>']",
       "['<a_203>' '<b_240>' '<c_233>' '<d_184>']",
       "['<a_190>' '<b_157>' '<c_28>' '<d_133>']"],
      shape=(20033,), dtype='<U41')

In [89]:

all_indices_dict = {}
for item, indices in enumerate(all_indices.tolist()):
    all_indices_dict[item] = list(indices)


with open(output_file, 'w') as fp:
    json.dump(all_indices_dict,fp)


In [74]:
collision_item_groups

[]

In [65]:
all_indices[8371]

array(['<a_226>', '<b_162>', '<c_212>', '<d_2>'], dtype='<U7')