# Controllable generation via RL about text-guided voice conversion


In [1]:
import torch
from datasets import load_from_disk
from vc.encodec_model.nar_bart_model import NARBartForConditionalGeneration
from transformers import AutoTokenizer, BartForConditionalGeneration
import sys
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead, create_reference_model
from tqdm import tqdm

# load the model
ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
nar_checkpoint = "lca0503/speech-chatgpt-base-nar-v2-epoch4-wotrans"

device = "cuda" if torch.cuda.is_available() else "cpu"
# model = AutoModelForCausalLMWithValueHead.from_pretrained(ar_checkpoint)
# model = BartForConditionalGeneration.from_pretrained(ar_checkpoint)
model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(ar_checkpoint)
model_ref = create_reference_model(model)
tokenizer = AutoTokenizer.from_pretrained(ar_checkpoint)
nar_tokenizer = AutoTokenizer.from_pretrained(nar_checkpoint)
nar_model = NARBartForConditionalGeneration.from_pretrained(nar_checkpoint)
tokenizer.pad_token = tokenizer.eos_token

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

2024-05-15 10:26:21.089444: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from datetime import datetime
import os

now = datetime.now()
ts = now.strftime("%m%d-%H%M")
print("timestamp:", ts)

# define the path
base_path = "/work/b0990106x/trl"
agent_input_dir = f"{base_path}/data-encodec"
agent_output_dir = f"{base_path}/output/{ts}"
env_input_dir = agent_output_dir
env_output_dir = agent_input_dir

if not os.path.exists(agent_output_dir):
    os.makedirs(agent_output_dir)

timestamp: 0515-1026


In [3]:
# load the dataset
dataset = load_from_disk(agent_input_dir)

In [4]:
all_src_encodec_layers = []
all_src_encodec = []
all_instruction = []
# all_instruction_ids = []

layer_len = 8
data_len = 3
# data_len = len(dataset)
print("data_len:", data_len)

for i in range(layer_len):
    all_src_encodec_layers.append(dataset[f"src_encodec_{i}"])

for i in range(data_len):
    src_encodec = []
    for j in range(layer_len):
        src_encodec.append(all_src_encodec_layers[j][i])
    all_src_encodec.append(src_encodec)

    all_instruction.append(dataset["instruction"][i])
    # all_instruction_ids.append(ar_tokenizer(all_instruction[i])["input_ids"][1 : -1])

data_len: 3


In [5]:
# print the length of all src encodec
for i in range(data_len):
    print(f"src_encodec_{i} len:", len(all_src_encodec[i][0]))

src_encodec_0 len: 327
src_encodec_1 len: 336
src_encodec_2 len: 131


### Debugging Section

In [6]:
observation_list = []
for i in range(data_len):
    observation_list.append(
        {
            "input": "",
            "src_encodec": all_src_encodec[i],
            "instruction": all_instruction[i],
        }
    )

# # pop the first one
observation_list.pop(0)
all_instruction.pop(0)
observation_list.pop(0)
all_instruction.pop(0)
print("observation_list:", observation_list)
print("all_instruction:", all_instruction)


# for i in range(data_len):
#     observation_list.append({'input': "", 'src_encodec': all_src_encodec[i], 'instruction': all_instruction[i]})

observation_list: [{'input': '', 'src_encodec': [[835, 339, 999, 629, 604, 462, 314, 600, 846, 562, 846, 358, 984, 393, 182, 453, 584, 535, 407, 1021, 701, 843, 945, 495, 563, 495, 495, 727, 317, 604, 475, 835, 835, 835, 339, 475, 339, 123, 254, 103, 561, 858, 646, 755, 375, 548, 435, 233, 323, 395, 819, 475, 339, 835, 779, 257, 339, 341, 170, 38, 38, 103, 408, 62, 141, 731, 73, 651, 143, 875, 321, 310, 310, 972, 679, 582, 808, 813, 808, 291, 722, 982, 627, 192, 764, 531, 291, 466, 567, 601, 771, 112, 688, 348, 793, 793, 11, 192, 23, 983, 1022, 23, 73, 73, 276, 537, 103, 53, 148, 148, 148, 463, 176, 148, 463, 463, 463, 463, 463, 463, 463, 433, 25, 472, 257, 228, 395, 133, 395, 475, 126], [646, 841, 168, 1023, 277, 820, 278, 215, 58, 592, 607, 607, 349, 346, 504, 632, 482, 14, 968, 588, 529, 904, 662, 662, 602, 1013, 662, 386, 617, 870, 648, 1023, 277, 277, 913, 200, 1007, 503, 807, 144, 132, 558, 984, 164, 610, 66, 830, 925, 744, 129, 87, 648, 391, 646, 424, 700, 646, 713, 702, 443, 4,

In [7]:
import sys
sys.path.append("/work/b0990106x/TextRL/vc")
from vc.trainer_encodec_vc_inference import get_ar_prediction
from types import SimpleNamespace


args_predict = SimpleNamespace(output_path=f"{base_path}/output/{ts}/example.wav", seed=0, device=device)

decode_ar = get_ar_prediction(args_predict, model, nar_model, tokenizer, nar_tokenizer, all_src_encodec[0], all_instruction[0], 0)

decode_ar_str = tokenizer.convert_tokens_to_string(
                [f"v_tok_{u}" for u in decode_ar]
            )

print(decode_ar_str)

Decode AR:  tensor([[    2,     0, 51100, 51100, 51063, 50633, 51111, 50800, 50800, 51002,
         51002, 50642, 50495, 50364, 50821, 51151, 50364, 50676, 50680, 50643,
         51202, 51245, 50676, 50676, 51171, 50798, 51199, 50312, 50703, 50703,
         50753, 50673, 50371, 50604, 50744, 50744, 50744, 50698, 50416, 50416,
         51171, 50767, 51171, 50689, 50473, 50802, 50802, 50338, 50948, 51012,
         51012, 50858, 51012, 50650, 50590, 50756, 51287, 50498, 50858, 51019,
         50686, 50427, 51273, 51232, 50317, 50660, 50660, 50522, 51169, 50317,
         50574, 50700, 50513, 51019, 51040, 51262, 51262, 50601, 50813, 50487,
         50789, 50406, 50744, 51205, 50541, 50321, 50673, 51100, 50740, 50471,
         51044, 50838, 51196, 50863, 50276, 50285, 51215, 50634, 50271, 50731,
         50981, 51106, 50347, 50634, 50553, 50821, 51168, 50821, 50657, 51186,
         51241, 51100, 50368, 50290, 50386, 51100, 51100, 51100, 50604, 50604,
         51225, 50653, 50582, 50406, 505

In [8]:
from datetime import datetime
import os

now = datetime.now()
ts = now.strftime("%m%d-%H%M")
print("timestamp:", ts)
log_dir = f"logs/{ts}"
os.makedirs(log_dir, exist_ok=True)

lr= 0.0000141
batch_size = 1
mini_batch_size = 1


ppo_config = PPOConfig(batch_size=1, mini_batch_size=1, log_with='tensorboard', learning_rate=lr, project_kwargs={'logging_dir': log_dir})
ppo_trainer = PPOTrainer(config = ppo_config, model = model, ref_model=model_ref, tokenizer=tokenizer)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


timestamp: 0515-1026




In [9]:
from importlib import reload
from NISQA.nisqa.NISQA_model import nisqaModel

import sys
sys.path.append("/work/b0990106x/trl/vc") 
from vc.trainer_encodec_vc_inference import get_ar_prediction_v2


def get_reward(predicted_list, single_src_encodec, single_instruction, episode_counter,finish):
    reward = 0
    # predicted_list will be one text of "v_tok_410v_tok_411v_tok_595 ...""
    # predicted_token will be a list of [v_tok_410, v_tok_411, v_tok_595 ...]
    
    if finish or len(predicted_list) >= 1000:
        try:
            # predicted_tokens = predicted_list[0][1:-1]
            predicted_tokens = [f'v_tok_{u}' for u in predicted_list.split("v_tok_")[1:]]
            predicted_ids = tokenizer.convert_tokens_to_ids([f"{u}" for u in predicted_tokens])
            print("predict length: ", len(predicted_ids))
            print("predicted_tokens: ", predicted_tokens)
            print("predicted_ids: ", predicted_ids)

            decode_ar = get_ar_prediction_v2(
                args_predict,
                predicted_ids,
                nar_model,
                tokenizer,
                nar_tokenizer,
                single_src_encodec,
                single_instruction,
                episode_counter,
            )
            # print("decode_ar:", decode_ar)
            
            # use nisqa to get the reward
            args_nisqa = {
                "mode": "predict_file",
                "pretrained_model": f"{base_path}/NISQA/weights/nisqa.tar",
                "deg": f"{base_path}/output/{ts}/example.wav",
                "data_dir": None,
                "output_dir": f"{base_path}/NISQA/result/",
                "csv_file": None,
                "csv_deg": None,
                "num_workers": 0,
                "bs": 1,
                "ms_channel": None,
            }
            args_nisqa["tr_bs_val"] = args_nisqa["bs"]
            args_nisqa["tr_num_workers"] = args_nisqa["num_workers"]

            nisqa = nisqaModel(args_nisqa)
            prediction = nisqa.predict()
            reward = float(prediction["mos_pred"].iloc[0])*10
            # reward = float(prediction["mos_pred"].iloc[0])-3.0
            print(
                "Length of predicted_list:",
                len(predicted_list),
                ", Reward:",
                reward,
            )

        except Exception as e:
            print("Error:", e)
            reward = 0

    return reward

In [10]:
import logging
import os
import sys

output_log_path = f"logs/log_{ts}.log"

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

handlers = logger.handlers[:]
for handler in handlers:
    logger.removeHandler(handler)

file_handler = logging.FileHandler(output_log_path)
logger.addHandler(file_handler)

In [11]:
import time
from trl.core import respond_to_batch

start_time = time.time()

try:
    for iteration in tqdm(range(100)):
        query_txt = decode_ar_str
        query_tensor = tokenizer.encode(query_txt, return_tensors="pt")
        query_tensor = query_tensor.to(device)
        
        # FILEPATH: /work/b0990106x/trl/textrl.ipynb
        response_tensor = respond_to_batch(model, query_tensor, txt_len=500)
        # response_tensor = model.generate(query_tensor)
        response_text = tokenizer.decode(response_tensor[0], skip_special_tokens=True)
        # Mimic batch structure
        batch = {
            "query": query_tensor,
            "response": response_text
        }
        reward_float = get_reward(response_text, all_src_encodec[0], all_instruction[0], iteration, True)
        reward_length = len(tokenizer.decode(response_tensor[0], skip_special_tokens=True))
        reward = torch.tensor([float(reward_float)], device=device)
        train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], [reward])
        ppo_trainer.log_stats(train_stats, batch, reward)

        print(f"Iteration {iteration + 1}, Reward: {reward.item()}, Length: {len(response_tensor[0])}, Reward_Length: {reward_length}, Predicted Text: {response_text}")

except Exception as e:
    print("An error occurred:", e)

print("used time: ", time.time() - start_time)

  0%|          | 0/100 [00:00<?, ?it/s]

predict length:  393
predicted_tokens:  ['v_tok_753', 'v_tok_353', 'v_tok_619', 'v_tok_325', 'v_tok_976', 'v_tok_73', 'v_tok_133', 'v_tok_20', 'v_tok_779', 'v_tok_233', 'v_tok_629', 'v_tok_593', 'v_tok_475', 'v_tok_99', 'v_tok_904', 'v_tok_421', 'v_tok_216', 'v_tok_435', 'v_tok_234', 'v_tok_213', 'v_tok_112', 'v_tok_358', 'v_tok_754', 'v_tok_435', 'v_tok_846', 'v_tok_437', 'v_tok_358', 'v_tok_739', 'v_tok_951', 'v_tok_593', 'v_tok_213', 'v_tok_524', 'v_tok_951', 'v_tok_233', 'v_tok_479', 'v_tok_479', 'v_tok_504', 'v_tok_1023', 'v_tok_803', 'v_tok_317', 'v_tok_940', 'v_tok_1022', 'v_tok_151', 'v_tok_257', 'v_tok_106', 'v_tok_904', 'v_tok_395', 'v_tok_904', 'v_tok_904', 'v_tok_661', 'v_tok_99', 'v_tok_309', 'v_tok_435', 'v_tok_248', 'v_tok_248', 'v_tok_754', 'v_tok_997', 'v_tok_997', 'v_tok_997', 'v_tok_358', 'v_tok_830', 'v_tok_233', 'v_tok_951', 'v_tok_146', 'v_tok_411', 'v_tok_172', 'v_tok_535', 'v_tok_141', 'v_tok_721', 'v_tok_317', 'v_tok_317', 'v_tok_151', 'v_tok_59', 'v_tok_56', '

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Length of predicted_list: 3448 , Reward: 22.342751026153564


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
  1%|          | 1/100 [00:14<24:05, 14.60s/it]

Iteration 1, Reward: 22.342750549316406, Length: 500, Reward_Length: 3448, Predicted Text: v_tok_753v_tok_353v_tok_619v_tok_325v_tok_976v_tok_73v_tok_133v_tok_20v_tok_779v_tok_233v_tok_629v_tok_593v_tok_475v_tok_99v_tok_904v_tok_421v_tok_216v_tok_435v_tok_234v_tok_213v_tok_112v_tok_358v_tok_754v_tok_435v_tok_846v_tok_437v_tok_358v_tok_739v_tok_951v_tok_593v_tok_213v_tok_524v_tok_951v_tok_233v_tok_479v_tok_479v_tok_504v_tok_1023v_tok_803v_tok_317v_tok_940v_tok_1022v_tok_151v_tok_257v_tok_106v_tok_904v_tok_395v_tok_904v_tok_904v_tok_661v_tok_99v_tok_309v_tok_435v_tok_248v_tok_248v_tok_754v_tok_997v_tok_997v_tok_997v_tok_358v_tok_830v_tok_233v_tok_951v_tok_146v_tok_411v_tok_172v_tok_535v_tok_141v_tok_721v_tok_317v_tok_317v_tok_151v_tok_59v_tok_56v_tok_408v_tok_780v_tok_475v_tok_835v_tok_475v_tok_461v_tok_206v_tok_475v_tok_430v_tok_59v_tok_747v_tok_677v_tok_23v_tok_317v_tok_658v_tok_901v_tok_598v_tok_11v_tok_11v_tok_20v_tok_950v_tok_369v_tok_369v_tok_6v_tok_288v_tok_466v_tok_82v_tok_503v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4502 , Reward: 26.37690305709839


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
  2%|▏         | 2/100 [00:27<22:06, 13.54s/it]

Iteration 2, Reward: 26.376903533935547, Length: 500, Reward_Length: 4502, Predicted Text: v_tok_230v_tok_699v_tok_91v_tok_860v_tok_491v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_977v_tok_530v_tok_530v_tok_530v_tok_530v_tok_1001v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v

  3%|▎         | 3/100 [00:39<20:56, 12.95s/it]

Iteration 3, Reward: 0.0, Length: 500, Reward_Length: 939, Predicted Text: v_tok_904v_tok_454v_tok_325v_tok_358v_tok_208v_tok_275v_tok_347v_tok_999v_tok_208v_tok_855v_tok_52v_tok_404v_tok_52v_tok_52v_tok_176v_tok_879v_tok_404v_tok_52v_tok_661v_tok_133v_tok_52v_tok_123v_tok_52v_tok_525v_tok_754v_tok_1009v_tok_9v_tok_835v_tok_874v_tok_160v_tok_56v_tok_575v_tok_287v_tok_1017v_tok_582v_tok_151v_tok_846v_tok_432v_tok_475v_tok_794v_tok_475v_tok_855v_tok_792v_tok_983v_tok_25v_tok_30v_tok_820v_tok_47v_tok_940v_tok_145v_tok_276v_tok_115v_tok_126v_tok_360v_tok_835v_tok_25v_tok_81v_tok_694v_tok_475v_tok_151v_tok_151v_tok_497v_tok_944v_tok_94v_tok_113v_tok_491v_tok_25v_tok_976v_tok_565v_tok_629v_tok_491v_tok_99v_tok_85v_tok_94v_tok_287v_tok_971v_tok_300v_tok_370v_tok_275v_tok_791v_tok_861v_tok_402v_tok_105v_tok_565v_tok_414v_tok_20v_tok_615v_tok_650v_tok_516v_tok_393v_tok_408v_tok_56v_tok_516v_tok_988v_tok_1013v_tok_94v_tok_820v_tok_209v_tok_410v_tok_629v_tok_865v_tok_94v_tok_495v_tok_497v_tok_208

  4%|▍         | 4/100 [00:51<20:17, 12.68s/it]

Iteration 4, Reward: 0.0, Length: 500, Reward_Length: 4480, Predicted Text: v_tok_257 folksThe isv_tok_961v_tok_961v_tok_961v_tok_348v_tok_348v_tok_348v_tok_348v_tok_348v_tok_804v_tok_804v_tok_804v_tok_804v_tok_804v_tok_804v_tok_804v_tok_945v_tok_804v_tok_945v_tok_945v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_495v_tok_945v_tok_945v_tok_945v_tok_945v_tok_945v_tok_945v_tok_945v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_344v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_tok_530v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4481 , Reward: 15.14295220375061


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
  5%|▌         | 5/100 [01:05<20:30, 12.95s/it]

Iteration 5, Reward: 15.142951965332031, Length: 500, Reward_Length: 4481, Predicted Text: v_tok_224v_tok_586v_tok_679v_tok_128v_tok_128v_tok_593v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_162v_tok_793v_tok_20v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_739v_tok_793v_tok_793v_tok_593v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_793v_tok_162v_tok_793v_tok_870v_tok_793v_tok_240v_tok_793v_tok_793v_tok_240v_tok_240v_tok_793v_tok_793v_tok_793v_tok_793v_tok_162v_tok_793v_tok_240v_tok_240v_tok_240v_tok_162v_tok_793v_tok_182v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_240v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v

  6%|▌         | 6/100 [01:17<19:53, 12.70s/it]

Iteration 6, Reward: 0.0, Length: 500, Reward_Length: 2227, Predicted Text: v_tok_257v_tok_523v_tok_753v_tok_696v_tok_408v_tok_408v_tok_339v_tok_25v_tok_131v_tok_121v_tok_855v_tok_347v_tok_25v_tok_935v_tok_25v_tok_583v_tok_677v_tok_475v_tok_69v_tok_876v_tok_835v_tok_835v_tok_835v_tok_339v_tok_819v_tok_537v_tok_604v_tok_1021v_tok_561v_tok_23v_tok_317v_tok_151v_tok_151v_tok_495v_tok_502v_tok_907v_tok_368v_tok_182v_tok_788v_tok_871v_tok_871v_tok_472v_tok_1011v_tok_537v_tok_91v_tok_30v_tok_275v_tok_793v_tok_478v_tok_162v_tok_830v_tok_747v_tok_901v_tok_923v_tok_629v_tok_976v_tok_699v_tok_699v_tok_126v_tok_402v_tok_228v_tok_133v_tok_876v_tok_676v_tok_537v_tok_916v_tok_224v_tok_971v_tok_931v_tok_99v_tok_604v_tok_408v_tok_835v_tok_875v_tok_990v_tok_73v_tok_151v_tok_428v_tok_56v_tok_491v_tok_347v_tok_408v_tok_976v_tok_472v_tok_478v_tok_62v_tok_393v_tok_432v_tok_1014v_tok_457v_tok_699v_tok_370v_tok_573v_tok_793v_tok_404v_tok_52v_tok_154v_tok_393v_tok_409v_tok_432v_tok_121v_tok_393v_tok_907v_tok_

  7%|▋         | 7/100 [01:29<19:25, 12.53s/it]

Iteration 7, Reward: 0.0, Length: 500, Reward_Length: 2559, Predicted Text: v_tok_662v_tok_85v_tok_94v_tok_860v_tok_94v_tok_604v_tok_62v_tok_360v_tok_176v_tok_160v_tok_709v_tok_121v_tok_408v_tok_738v_tok_30v_tok_835v_tok_835v_tok_835v_tok_835v_tok_395v_tok_339v_tok_339v_tok_91v_tok_395v_tok_1005v_tok_606v_tok_837 overviewv_tok_11v_tok_971v_tok_30v_tok_783v_tok_11v_tok_792v_tok_203v_tok_475v_tok_11v_tok_627v_tok_91v_tok_540v_tok_20v_tok_20v_tok_747v_tok_203v_tok_288v_tok_203v_tok_369v_tok_878v_tok_288v_tok_369v_tok_414v_tok_914v_tok_348v_tok_348v_tok_575v_tok_936v_tok_392v_tok_314v_tok_556v_tok_81v_tok_81v_tok_503v_tok_524v_tok_771v_tok_575v_tok_325v_tok_556v_tok_658v_tok_688v_tok_392v_tok_921v_tok_858v_tok_860v_tok_276v_tok_25v_tok_103v_tok_25v_tok_881v_tok_408v_tok_408v_tok_835v_tok_835v_tok_835v_tok_25v_tok_835v_tok_738v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_339v_tok_25v_tok_876v_tok_835v_tok_602v_tok_430v_tok_317v_tok_361v_tok_246v_tok_872v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4474 , Reward: 24.755640029907227


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
  8%|▊         | 8/100 [01:42<19:24, 12.66s/it]

Iteration 8, Reward: 24.755640029907227, Length: 500, Reward_Length: 4474, Predicted Text: v_tok_593v_tok_347v_tok_408v_tok_565v_tok_323v_tok_323v_tok_1011v_tok_1011v_tok_1011v_tok_777v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_629v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_900v_tok_629v_tok_900v_tok_629v_tok_629v_tok_629v_tok_900v_tok_900v_tok_900v_tok_900v_tok_629v_tok_629v_tok_900v_tok_629v_tok_629v_tok_157v_tok_699v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_228v_tok_629v_tok_629v_tok_629v_tok_629v_tok_228v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_208v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_629v_tok_819v_tok_629v_tok_629v_tok_629v_tok_819v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_537v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4470 , Reward: 27.16283082962036


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
  9%|▉         | 9/100 [01:55<19:18, 12.73s/it]

Iteration 9, Reward: 27.162830352783203, Length: 500, Reward_Length: 4470, Predicted Text: v_tok_738v_tok_881v_tok_264v_tok_793v_tok_984v_tok_990v_tok_990v_tok_990v_tok_886v_tok_886v_tok_730v_tok_730v_tok_99v_tok_730v_tok_730v_tok_400v_tok_567v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_578v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_41v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_768v_tok_768v_tok_721v_tok_655v_tok_721v_tok_692v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_749v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_141v_tok_721v_tok_990v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_721v_tok_1023v_tok_721v_tok_400v_tok_721v_tok_768v_tok_99v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4506 , Reward: 13.368309736251831


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 10%|█         | 10/100 [02:08<19:10, 12.78s/it]

Iteration 10, Reward: 13.36830997467041, Length: 500, Reward_Length: 4506, Predicted Text: v_tok_885v_tok_421v_tok_926v_tok_987v_tok_230v_tok_230v_tok_230v_tok_230v_tok_695v_tok_501v_tok_501v_tok_421v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_1009v_tok_396v_tok_1009v_tok_1009v_tok_683v_tok_421v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_422v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_886v_tok_99v_tok_886v_tok_886v_tok_392v_tok_886v_tok_886v_tok_730v_tok_886v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_361v_tok_99v_tok_990v_tok_990v_tok_990v_tok_886v_tok_886v_tok_392v_tok_886v_tok_216v_tok_886v_tok_573v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_886v_tok_730v_tok_730v_tok_730v_tok_730v_tok_730v_tok_216v_tok_730v_tok_431v_tok_886v_tok_730v_to

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4477 , Reward: 29.070231914520264


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 11%|█         | 11/100 [02:21<19:01, 12.83s/it]

Iteration 11, Reward: 29.070232391357422, Length: 500, Reward_Length: 4477, Predicted Text: v_tok_213v_tok_612v_tok_890v_tok_890v_tok_136v_tok_860v_tok_321v_tok_321v_tok_321v_tok_321v_tok_670v_tok_670v_tok_670v_tok_670v_tok_670v_tok_670v_tok_670v_tok_310v_tok_670v_tok_670v_tok_310v_tok_321v_tok_310v_tok_670v_tok_310v_tok_670v_tok_670v_tok_670v_tok_670v_tok_860v_tok_670v_tok_344v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_942v_tok_881v_tok_881v_tok_967v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_30v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_881v_tok_1010v_tok_881v_tok_945v_tok_881v_tok_881v_tok_881v_tok_1010v_tok_881v_tok_1010v_tok_881v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4475 , Reward: 25.458948612213135


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 12%|█▏        | 12/100 [02:34<18:47, 12.82s/it]

Iteration 12, Reward: 25.458948135375977, Length: 500, Reward_Length: 4475, Predicted Text: v_tok_62v_tok_909v_tok_51v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_854v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_951v_tok_335v_tok_85v_tok_372v_tok_372v_tok_372v_tok_372v_tok_372v_tok_372v_tok_800v_tok_372v_tok_372v_tok_951v_tok_951v_tok_951v_tok_85v_tok_951v_tok_593v_tok_593v_tok_593v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_162v_tok_230v_tok_793v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_901v_tok_872v_tok_495v_tok_777v_tok_679v_tok_679v_tok_901v_tok_843v_tok_1014v_tok_1014v_tok_1014v_tok_533v_tok_1014v_tok_1014v_tok_1014v_tok_1014v_tok_1014v_tok_1014v_tok_1014v_tok_1014v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4479 , Reward: 13.330937623977661


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 13%|█▎        | 13/100 [02:47<18:49, 12.98s/it]

Iteration 13, Reward: 13.330937385559082, Length: 500, Reward_Length: 4479, Predicted Text: v_tok_408v_tok_325v_tok_325v_tok_323v_tok_91v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_395v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_52v_tok_257v_tok_257v_tok_52v_tok_257v_tok_395v_tok_395v_tok_395v_tok_395v_tok_395v_tok_395v_tok_339v_tok_395v_tok_475v_tok_779v_tok_779v_tok_475v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_475v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_tok_779v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4466 , Reward: 16.02697491645813


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 14%|█▍        | 14/100 [03:00<18:35, 12.97s/it]

Iteration 14, Reward: 16.026975631713867, Length: 500, Reward_Length: 4466, Predicted Text: v_tok_479v_tok_479v_tok_479v_tok_479v_tok_388v_tok_141v_tok_388v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_17v_tok_838v_tok_17v_tok_17v_tok_17v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_1021v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_906v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_407v_tok_727v_tok_407v_tok_407v_tok_407v_tok_407v_tok_502v_tok_407v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok_465v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4404 , Reward: 13.108234405517578


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 15%|█▌        | 15/100 [03:13<18:16, 12.90s/it]

Iteration 15, Reward: 13.108234405517578, Length: 500, Reward_Length: 4404, Predicted Text: v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_62v_tok_408v_tok_408v_tok_62v_tok_408v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_408v_tok_62v_tok_62v_tok_62v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_835v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4401 , Reward: 24.212353229522705


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 16%|█▌        | 16/100 [03:26<18:12, 13.01s/it]

Iteration 16, Reward: 24.212352752685547, Length: 500, Reward_Length: 4401, Predicted Text: v_tok_846v_tok_435v_tok_730v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_270v_tok_431v_tok_435v_tok_435v_tok_435v_tok_951v_tok_951v_tok_270v_tok_270v_tok_222v_tok_233v_tok_593v_tok_385v_tok_960v_tok_822v_tok_822v_tok_822v_tok_822v_tok_882v_tok_882v_tok_882v_tok_882v_tok_882v_tok_882v_tok_431v_tok_573v_tok_431v_tok_431v_tok_431v_tok_324v_tok_400v_tok_531v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_679v_tok_925v_tok_679v_tok_925v_tok_679v_tok_385v_tok_679v_tok_679v_tok_679v_tok_679v_tok_67

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4488 , Reward: 26.348087787628174


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 17%|█▋        | 17/100 [03:39<17:57, 12.98s/it]

Iteration 17, Reward: 26.348087310791016, Length: 500, Reward_Length: 4488, Predicted Text: v_tok_372v_tok_776v_tok_788v_tok_788v_tok_317v_tok_916v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_428v_tok_582v_tok_428v_tok_428v_tok_327v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_230v_tok_222v_tok_695v_tok_222v_tok_222v_tok_222v_tok_222v_tok_222v_tok_230v_tok_222v_tok_695v_tok_230v_tok_230v_tok_479v_tok_695v_tok_230v_tok_479v_tok_230v_tok_695v_tok_141v_tok_695v_tok_141v_tok_141v_tok_243v_tok_141v_tok_141v_tok_141v_tok_790v_tok_782v_tok_361v_tok_141v_tok_141v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_141v_tok_479v_tok_662v_tok_479v_tok_479v_tok_479v_tok_479v_tok_479v_tok_141v_tok_479v_tok_47

 18%|█▊        | 18/100 [03:51<17:25, 12.75s/it]

Iteration 18, Reward: 0.0, Length: 500, Reward_Length: 2657, Predicted Text: v_tok_103v_tok_323v_tok_402v_tok_323v_tok_523v_tok_275v_tok_565v_tok_690v_tok_690v_tok_1001v_tok_155v_tok_523v_tok_523v_tok_523v_tok_860v_tok_860v_tok_1001v_tok_598v_tok_598v_tok_257v_tok_753v_tok_52v_tok_257v_tok_52v_tok_257v_tok_257v_tok_257v_tok_228v_tok_228v_tok_228v_tok_228v_tok_228v_tok_779v_tok_1017v_tok_339v_tok_339v_tok_339v_tok_339v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_9v_tok_945v_tok_491v_tok_834v_tok_25v_tok_192v_tok_1004v_tok_461v_tok_275v_tok_530v_tok_940v_tok_1022v_tok_855v_tok_224v_tok_904v_tok_976v_tok_611v_tok_467v_tok_385v_tok_971v_tok_879v_tok_699v_tok_185v_tok_583v_tok_1022v_tok_1022v_tok_192v_tok_666v_tok_971v_tok_666v_tok_940v_tok_317v_tok_1008v_tok_679v_tok_402v_tok_128v_tok_437v_tok_437v_tok_793v_tok_657v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4474 , Reward: 25.226237773895264


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 19%|█▉        | 19/100 [04:04<17:15, 12.79s/it]

Iteration 19, Reward: 25.226238250732422, Length: 500, Reward_Length: 4474, Predicted Text: v_tok_81v_tok_1018v_tok_141v_tok_886v_tok_504v_tok_504v_tok_6v_tok_6v_tok_892v_tok_466v_tok_190v_tok_190v_tok_466v_tok_259v_tok_466v_tok_466v_tok_451v_tok_451v_tok_451v_tok_681v_tok_656v_tok_451v_tok_451v_tok_451v_tok_451v_tok_1020v_tok_722v_tok_99v_tok_723v_tok_990v_tok_990v_tok_830v_tok_593v_tok_593v_tok_593v_tok_593v_tok_593v_tok_694v_tok_694v_tok_694v_tok_575v_tok_504v_tok_504v_tok_288v_tok_504v_tok_348v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_575v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_504v_tok_431v_tok_575v_tok_222v_tok_431v_tok_222v_tok_937v_tok_230v_tok_230v_tok_1020v_tok_820v_tok_820v_tok_961v_tok_327v_tok_327v_tok_327v_tok_327v_tok_961v_tok_327v_tok_414v_tok_327v_tok_575v_tok_575v_tok_504v_tok_954v_tok_755v_tok_867v_tok_81v_tok_961v_tok_970v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4435 , Reward: 27.292449474334717


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 20%|██        | 20/100 [04:17<17:06, 12.84s/it]

Iteration 20, Reward: 27.292449951171875, Length: 500, Reward_Length: 4435, Predicted Text: v_tok_865v_tok_865v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_310v_tok_408v_tok_408v_tok_738v_tok_835v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_208v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_738v_tok_738v_tok_738v_tok_738v_tok_738v_tok_738v_tok_1017v_tok_835v_tok_835v_tok_408v_tok_408v_tok_408v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4468 , Reward: 16.453615427017212


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 21%|██        | 21/100 [04:33<18:19, 13.92s/it]

Iteration 21, Reward: 16.453615188598633, Length: 500, Reward_Length: 4468, Predicted Text: v_tok_760v_tok_879v_tok_1011v_tok_23v_tok_1022v_tok_1022v_tok_1022v_tok_753v_tok_753v_tok_753v_tok_753v_tok_753v_tok_753v_tok_753v_tok_753v_tok_753v_tok_875v_tok_753v_tok_976v_tok_879v_tok_208v_tok_879v_tok_208v_tok_879v_tok_208v_tok_879v_tok_208v_tok_879v_tok_325v_tok_879v_tok_325v_tok_325v_tok_753v_tok_208v_tok_875v_tok_208v_tok_310v_tok_879v_tok_321v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_325v_tok_879v_tok_325v_tok_879v_tok_325v_tok_325v_tok_879v_tok_604v_tok_325v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_753v_tok_879v_tok_753v_tok_879v_tok_753v_tok_879v_tok_432v_tok_879v_tok_879v_tok_879v_tok_879v_tok_879v_tok_325v_tok_598v_tok_323v_tok_879v_tok_879v_tok_208v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4437 , Reward: 12.525955438613892


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 22%|██▏       | 22/100 [04:46<17:46, 13.67s/it]

Iteration 22, Reward: 12.525955200195312, Length: 500, Reward_Length: 4437, Predicted Text: v_tok_347v_tok_106v_tok_432v_tok_1019v_tok_1019v_tok_430v_tok_228v_tok_1019v_tok_228v_tok_604v_tok_430v_tok_430v_tok_430v_tok_1017v_tok_1017v_tok_1017v_tok_1017v_tok_1017v_tok_738v_tok_835v_tok_25v_tok_475v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_1019v_tok_835v_tok_1019v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_83

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4114 , Reward: 22.849936485290527


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 23%|██▎       | 23/100 [04:59<17:12, 13.41s/it]

Iteration 23, Reward: 22.849937438964844, Length: 500, Reward_Length: 4114, Predicted Text: v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_835v_tok_408v_tok_408v_tok_835v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_62v_tok_62v_tok_408v_tok_408v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_408v_tok_62v_tok_835v_tok_62v_tok_408v_tok_408v_tok_408v_tok_62v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_62v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_408v_tok_835v_tok_408v_tok_408v_tok_408v_tok_62v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4085 , Reward: 18.533324003219604


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 24%|██▍       | 24/100 [05:16<18:04, 14.26s/it]

Iteration 24, Reward: 18.533323287963867, Length: 500, Reward_Length: 4085, Predicted Text: v_tok_835v_tok_1021v_tok_1011v_tok_237v_tok_237v_tok_237v_tok_237v_tok_237v_tok_237v_tok_237v_tok_237v_tok_237v_tok_940v_tok_765v_tok_940v_tok_940v_tok_765v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_765v_tok_940v_tok_940v_tok_940v_tok_940v_tok_765v_tok_940v_tok_940v_tok_765v_tok_979v_tok_843v_tok_465v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_582v_tok_317v_tok_679v_tok_23v_tok_690v_tok_23v_tok_23v_tok_23v_tok_901v_tok_901v_tok_901v_tok_645v_tok_495v_tok_650v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_901v_tok_23v_tok_192v_tok_23v_tok_495v_tok_23v_tok_23v_tok_23v_tok_23v_tok_862v_tok_23v_tok_23v_tok_415v_tok_23v_tok_415v_tok_23v_tok_415v_tok_23v_tok_415v_tok_23v_tok_23v_tok_23v_tok_862v_tok_23v_tok_23v_tok_23v_tok_23v_tok_415v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4143 , Reward: 20.84228515625


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 25%|██▌       | 25/100 [05:29<17:24, 13.93s/it]

Iteration 25, Reward: 20.84228515625, Length: 500, Reward_Length: 4143, Predicted Text: v_tok_738v_tok_81v_tok_81v_tok_504v_tok_524v_tok_81v_tok_154v_tok_81v_tok_388v_tok_886v_tok_838v_tok_141v_tok_804v_tok_694v_tok_99v_tok_804v_tok_804v_tok_804v_tok_645v_tok_645v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_456v_tok_611v_tok_807v_tok_611v_tok_807v_tok_456v_tok_70v_tok_385v_tok_385v_tok_385v_tok_385v_tok_385v_tok_611v_tok_456v_tok_611v_tok_611v_tok_611v_tok_611v_tok_1010v_tok_611v_tok_565v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_565v_tok_611v_tok_565v_tok_151v_tok_1010v_tok_151v_tok_317v_tok_317v_tok_317v_tok_611v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_611v_tok_807v_tok_611v_tok_151v_tok_611v_tok_611v_tok_23v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_611v_tok_61

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4374 , Reward: 22.523863315582275


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 26%|██▌       | 26/100 [05:42<16:53, 13.70s/it]

Iteration 26, Reward: 22.523862838745117, Length: 500, Reward_Length: 4374, Predicted Text: v_tok_835v_tok_779v_tok_779v_tok_779v_tok_677v_tok_677v_tok_677v_tok_677v_tok_677v_tok_433v_tok_677v_tok_1011v_tok_1011v_tok_1011v_tok_1011v_tok_1011v_tok_1011v_tok_1011v_tok_1011v_tok_1011v_tok_491v_tok_491v_tok_491v_tok_136v_tok_976v_tok_224v_tok_604v_tok_604v_tok_779v_tok_472v_tok_73v_tok_472v_tok_887v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_676v_tok_276v_tok_276v_tok_676v_tok_666v_tok_276v_tok_276v_tok_276v_tok_537v_tok_276v_tok_276v_tok_276v_tok_276v_tok_317v_tok_276v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_317v_tok_23v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_940v_tok_317v_tok_317v_tok_317v_tok_940v_tok_940v_tok_317v_tok_708v_tok_317v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4498 , Reward: 12.856525182723999


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 27%|██▋       | 27/100 [05:55<16:23, 13.47s/it]

Iteration 27, Reward: 12.856525421142578, Length: 500, Reward_Length: 4498, Predicted Text: v_tok_408v_tok_835v_tok_835v_tok_835v_tok_106v_tok_106v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_475v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_475v_tok_835v_tok_475v_tok_408v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_106v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_834v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_83

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4497 , Reward: 24.294111728668213


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 28%|██▊       | 28/100 [06:08<16:04, 13.40s/it]

Iteration 28, Reward: 24.294111251831055, Length: 500, Reward_Length: 4497, Predicted Text: v_tok_408v_tok_575v_tok_385v_tok_961v_tok_961v_tok_961v_tok_961v_tok_961v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_726v_tok_727v_tok_727v_tok_727v_tok_727v_tok_727v_tok_840v_tok_727v_tok_915v_tok_727v_tok_906v_tok_915v_tok_502v_tok_151v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_650v_tok_907v_tok_907v_tok_916v_tok_916v_tok_916v_tok_916v_tok_916v_tok_916v_tok_1008v_tok_916v_tok_151v_tok_23v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_151v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_687v_tok_907v_tok_907v_tok_907v_tok_907v_tok_457v_tok_151v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_907v_tok_90

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4500 , Reward: 13.40855598449707


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 29%|██▉       | 29/100 [06:21<15:38, 13.22s/it]

Iteration 29, Reward: 13.40855598449707, Length: 500, Reward_Length: 4500, Predicted Text: v_tok_408v_tok_779v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_475v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_475v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339v_tok_339

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4479 , Reward: 24.763944149017334


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 30%|███       | 30/100 [06:34<15:33, 13.34s/it]

Iteration 30, Reward: 24.763944625854492, Length: 500, Reward_Length: 4479, Predicted Text: v_tok_565v_tok_323v_tok_971v_tok_370v_tok_325v_tok_879v_tok_523v_tok_523v_tok_523v_tok_523v_tok_523v_tok_967v_tok_967v_tok_523v_tok_696v_tok_753v_tok_696v_tok_323v_tok_291v_tok_1022v_tok_1022v_tok_323v_tok_323v_tok_323v_tok_879v_tok_323v_tok_323v_tok_323v_tok_323v_tok_293v_tok_323v_tok_323v_tok_323v_tok_696v_tok_565v_tok_696v_tok_598v_tok_690v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_1001v_tok_942v_tok_1001v_tok_1001v_tok_871v_tok_871v_tok_871v_tok_871v_tok_598v_tok_871v_tok_860v_tok_860v_tok_879v_tok_472v_tok_208v_tok_208v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_323v_tok_457v_tok_523v_tok_523v_tok_523v_to

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4496 , Reward: 14.075920581817627


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 31%|███       | 31/100 [06:49<15:41, 13.65s/it]

Iteration 31, Reward: 14.075920104980469, Length: 500, Reward_Length: 4496, Predicted Text: v_tok_408v_tok_408v_tok_835v_tok_835v_tok_738v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_83

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4498 , Reward: 14.366000890731812


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 32%|███▏      | 32/100 [07:02<15:11, 13.40s/it]

Iteration 32, Reward: 14.36600112915039, Length: 500, Reward_Length: 4498, Predicted Text: v_tok_835v_tok_395v_tok_395v_tok_339v_tok_475v_tok_475v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_103v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4491 , Reward: 13.433881998062134


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 33%|███▎      | 33/100 [07:15<14:48, 13.26s/it]

Iteration 33, Reward: 13.433881759643555, Length: 500, Reward_Length: 4491, Predicted Text: v_tok_865v_tok_717v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_709v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_83

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4428 , Reward: 20.89986801147461


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 34%|███▍      | 34/100 [07:28<14:38, 13.31s/it]

Iteration 34, Reward: 20.89986801147461, Length: 500, Reward_Length: 4428, Predicted Text: v_tok_738v_tok_408v_tok_408v_tok_162v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_23v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_23v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_276v_tok_23v_tok_276v_tok_276v_tok_23v_tok_276v_tok_276v_tok_23v_tok_23v_tok_276v_tok_276v_tok_276v_tok_276v_tok_23v_tok_276v_tok_23v_tok_23v_tok_23v_tok_23v_tok_666v_tok_23v_tok_666v_tok_666v_tok_666v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_151v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_317v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_276v_tok_23v_tok_23v_tok_276v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4400 , Reward: 14.632585048675537


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 35%|███▌      | 35/100 [07:41<14:21, 13.25s/it]

Iteration 35, Reward: 14.632585525512695, Length: 500, Reward_Length: 4400, Predicted Text: v_tok_408v_tok_865v_tok_865v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_717v_tok_865v_tok_408v_tok_62v_tok_408v_tok_62v_tok_62v_tok_408v_tok_408v_tok_62v_tok_408v_tok_408v_tok_62v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_62v_tok_408v_tok_408v_tok_408v_tok_62v_tok_408v_tok_62v_tok_62v_tok_408v_tok_62v_tok_62v_tok_62v_tok_717v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_62v_tok_408v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_62v_tok_62v_tok_62v_tok_408v_tok_408v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_408v_tok_62v_tok_62v_tok_62v_tok_62v_tok_62v_tok_408v_tok_62v_tok_408v_tok_62v_tok_62v_tok_408v_tok_62v_tok_408v_tok_408v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4457 , Reward: 19.79960560798645


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 36%|███▌      | 36/100 [07:54<14:00, 13.13s/it]

Iteration 36, Reward: 19.799606323242188, Length: 500, Reward_Length: 4457, Predicted Text: v_tok_228v_tok_738v_tok_62v_tok_511v_tok_63v_tok_63v_tok_63v_tok_63v_tok_63v_tok_368v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_502v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_915v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_915v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_708v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_915v_tok_940v_tok_940v_tok_940v_tok_940v_tok_915v_tok_940v_tok_708v_tok_502v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_611v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_940v_tok_838v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4471 , Reward: 21.402854919433594


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 37%|███▋      | 37/100 [08:07<13:42, 13.06s/it]

Iteration 37, Reward: 21.402854919433594, Length: 500, Reward_Length: 4471, Predicted Text: v_tok_779v_tok_23v_tok_1022v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_23v_tok_415v_tok_23v_tok_415v_tok_23v_tok_415v_tok_317v_tok_415v_tok_317v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v_tok_415v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4498 , Reward: 13.552228212356567


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 38%|███▊      | 38/100 [08:21<13:47, 13.35s/it]

Iteration 38, Reward: 13.552227973937988, Length: 500, Reward_Length: 4498, Predicted Text: v_tok_62v_tok_106v_tok_408v_tok_408v_tok_408v_tok_408v_tok_408v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_677v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_408v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_677v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_475v_tok_408v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_488v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_677v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_677v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4501 , Reward: 14.316762685775757


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 39%|███▉      | 39/100 [08:34<13:24, 13.20s/it]

Iteration 39, Reward: 14.316762924194336, Length: 500, Reward_Length: 4501, Predicted Text: v_tok_408v_tok_1019v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_8

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4500 , Reward: 14.267727136611938


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 40%|████      | 40/100 [08:46<13:04, 13.07s/it]

Iteration 40, Reward: 14.26772689819336, Length: 500, Reward_Length: 4500, Predicted Text: v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_475v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4499 , Reward: 20.213594436645508


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 41%|████      | 41/100 [08:59<12:46, 12.99s/it]

Iteration 41, Reward: 20.213594436645508, Length: 500, Reward_Length: 4499, Predicted Text: v_tok_835v_tok_317v_tok_835v_tok_317v_tok_317v_tok_317v_tok_230v_tok_479v_tok_230v_tok_730v_tok_695v_tok_886v_tok_411v_tok_886v_tok_695v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_99v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_755v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_755v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4497 , Reward: 18.20864200592041


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 42%|████▏     | 42/100 [09:12<12:30, 12.93s/it]

Iteration 42, Reward: 18.208641052246094, Length: 500, Reward_Length: 4497, Predicted Text: v_tok_835v_tok_504v_tok_835v_tok_431v_tok_154v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_59v_tok_411v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_162v_tok_59v_tok_317v_tok_59v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4495 , Reward: 16.278676986694336


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 43%|████▎     | 43/100 [09:25<12:17, 12.93s/it]

Iteration 43, Reward: 16.278676986694336, Length: 500, Reward_Length: 4495, Predicted Text: v_tok_835v_tok_504v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_339v_tok_935v_tok_798v_tok_843v_tok_954v_tok_55v_tok_146v_tok_730v_tok_830v_tok_317v_tok_317v_tok_430v_tok_834v_tok_699v_tok_208v_tok_699v_tok_976v_tok_373v_tok_59v_tok_411v_tok_479v_tok_327v_tok_317v_tok_317v_tok_834v_tok_317v_tok_738v_tok_276v_tok_25v_tok_276v_tok_25v_tok_339v_tok_25v_tok_835v_tok_835v_tok_176v_tok_339v_tok_176v_tok_176v_tok_176v_tok_176v_tok_709v_tok_339v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_709v_tok_835v_tok_709v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_to

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4392 , Reward: 25.286309719085693


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 44%|████▍     | 44/100 [09:38<12:03, 12.91s/it]

Iteration 44, Reward: 25.28631019592285, Length: 500, Reward_Length: 4392, Predicted Text: v_tok_835v_tok_552v_tok_339v_tok_230v_tok_924v_tok_55v_tok_539v_tok_634v_tok_854v_tok_422v_tok_886v_tok_539v_tok_539v_tok_539v_tok_392v_tok_99v_tok_392v_tok_198v_tok_348v_tok_771v_tok_504v_tok_886v_tok_755v_tok_99v_tok_755v_tok_392v_tok_99v_tok_99v_tok_422v_tok_348v_tok_198v_tok_730v_tok_198v_tok_99v_tok_222v_tok_415v_tok_230v_tok_411v_tok_695v_tok_730v_tok_593v_tok_730v_tok_593v_tok_99v_tok_990v_tok_99v_tok_886v_tok_755v_tok_99v_tok_694v_tok_411v_tok_327v_tok_411v_tok_372v_tok_372v_tok_372v_tok_411v_tok_593v_tok_411v_tok_593v_tok_593v_tok_593v_tok_230v_tok_411v_tok_411v_tok_411v_tok_501v_tok_317v_tok_411v_tok_317v_tok_788v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4494 , Reward: 14.189741611480713


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 45%|████▌     | 45/100 [09:51<11:48, 12.88s/it]

Iteration 45, Reward: 14.189741134643555, Length: 500, Reward_Length: 4494, Predicted Text: v_tok_339v_tok_495v_tok_709v_tok_843v_tok_392v_tok_970v_tok_55v_tok_319v_tok_141v_tok_422v_tok_81v_tok_317v_tok_317v_tok_488v_tok_103v_tok_438v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_103v_tok_276v_tok_103v_tok_25v_tok_103v_tok_25v_tok_25v_tok_25v_tok_537v_tok_395v_tok_339v_tok_339v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_339v_tok_488v_tok_339v_tok_395v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4500 , Reward: 14.266031980514526


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 46%|████▌     | 46/100 [10:03<11:33, 12.84s/it]

Iteration 46, Reward: 14.266032218933105, Length: 500, Reward_Length: 4500, Predicted Text: v_tok_835v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_835v_tok_395v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_83

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4500 , Reward: 18.079665899276733


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 47%|████▋     | 47/100 [10:16<11:21, 12.85s/it]

Iteration 47, Reward: 18.079666137695312, Length: 500, Reward_Length: 4500, Predicted Text: v_tok_835v_tok_666v_tok_835v_tok_317v_tok_206v_tok_206v_tok_206v_tok_362v_tok_990v_tok_230v_tok_690v_tok_501v_tok_501v_tok_501v_tok_501v_tok_501v_tok_222v_tok_841v_tok_841v_tok_162v_tok_695v_tok_162v_tok_501v_tok_645v_tok_222v_tok_501v_tok_230v_tok_222v_tok_501v_tok_222v_tok_230v_tok_588v_tok_501v_tok_695v_tok_162v_tok_222v_tok_501v_tok_695v_tok_222v_tok_695v_tok_841v_tok_501v_tok_230v_tok_222v_tok_501v_tok_230v_tok_222v_tok_222v_tok_230v_tok_695v_tok_695v_tok_222v_tok_695v_tok_230v_tok_230v_tok_230v_tok_695v_tok_695v_tok_222v_tok_695v_tok_411v_tok_501v_tok_695v_tok_222v_tok_222v_tok_230v_tok_222v_tok_695v_tok_222v_tok_411v_tok_501v_tok_411v_tok_222v_tok_411v_tok_222v_tok_411v_tok_501v_tok_411v_tok_222v_tok_501v_tok_411v_tok_695v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_411v_tok_162v_tok_411v_tok_222v_tok_695v_tok_222v_tok_222v_tok_411v_tok_222v_tok_411v_tok_841v_tok_411v_tok_230v_tok_41

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4498 , Reward: 13.531631231307983


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 48%|████▊     | 48/100 [10:29<11:08, 12.86s/it]

Iteration 48, Reward: 13.531631469726562, Length: 500, Reward_Length: 4498, Predicted Text: v_tok_835v_tok_475v_tok_835v_tok_395v_tok_395v_tok_666v_tok_537v_tok_55v_tok_142v_tok_362v_tok_99v_tok_230v_tok_317v_tok_730v_tok_347v_tok_317v_tok_666v_tok_598v_tok_843v_tok_495v_tok_940v_tok_694v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_317v_tok_780v_tok_347v_tok_835v_tok_835v_tok_339v_tok_339v_tok_677v_tok_537v_tok_395v_tok_537v_tok_395v_tok_176v_tok_176v_tok_176v_tok_176v_tok_709v_tok_819v_tok_709v_tok_835v_tok_339v_tok_835v_tok_835v_tok_339v_tok_395v_tok_395v_tok_819v_tok_395v_tok_819v_tok_835v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_835v_tok_395v_tok_835v_tok_395v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_395v_tok_835v_tok_819v_tok_835v_tok_395v_tok_835v_tok_395v_tok_835v_tok_395v_tok_395v_tok_339v_tok_395v_tok_709v_tok_395v_tok_709v_tok_395v_tok_835v_tok_709v_tok_835v_tok_835v_tok_339v_tok_835v_tok_339v_tok_835v_tok_339v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4498 , Reward: 15.079607963562012


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 49%|████▉     | 49/100 [10:42<10:56, 12.87s/it]

Iteration 49, Reward: 15.079607963562012, Length: 500, Reward_Length: 4498, Predicted Text: v_tok_835v_tok_535v_tok_835v_tok_146v_tok_270v_tok_422v_tok_730v_tok_422v_tok_730v_tok_465v_tok_976v_tok_208v_tok_699v_tok_208v_tok_208v_tok_208v_tok_409v_tok_85v_tok_141v_tok_55v_tok_730v_tok_309v_tok_765v_tok_348v_tok_835v_tok_1001v_tok_835v_tok_860v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_395v_tok_835v_tok_145v_tok_835v_tok_835v_tok_924v_tok_835v_tok_709v_tok_835v_tok_709v_tok_835v_tok_835v_tok_339v_tok_339v_tok_835v_tok_395v_tok_395v_tok_835v_tok_835v_tok_835v_tok_408v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_339v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_40v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v_tok_835v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4491 , Reward: 8.699760437011719


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 50%|█████     | 50/100 [10:55<10:41, 12.83s/it]

Iteration 50, Reward: 8.699760437011719, Length: 500, Reward_Length: 4491, Predicted Text: v_tok_835v_tok_843v_tok_835v_tok_362v_tok_797v_tok_435v_tok_55v_tok_248v_tok_730v_tok_841v_tok_634v_tok_501v_tok_501v_tok_650v_tok_1000v_tok_424v_tok_886v_tok_283v_tok_886v_tok_99v_tok_99v_tok_755v_tok_99v_tok_99v_tok_99v_tok_716v_tok_348v_tok_198v_tok_575v_tok_358v_tok_222v_tok_688v_tok_198v_tok_348v_tok_99v_tok_688v_tok_501v_tok_858v_tok_681v_tok_222v_tok_99v_tok_695v_tok_422v_tok_695v_tok_222v_tok_230v_tok_695v_tok_501v_tok_222v_tok_222v_tok_222v_tok_230v_tok_222v_tok_695v_tok_222v_tok_501v_tok_695v_tok_501v_tok_501v_tok_222v_tok_198v_tok_695v_tok_99v_tok_222v_tok_198v_tok_222v_tok_813v_tok_695v_tok_695v_tok_723v_tok_695v_tok_501v_tok_695v_tok_230v_tok_501v_tok_501v_tok_222v_tok_222v_tok_222v_tok_230v_tok_222v_tok_501v_tok_222v_tok_230v_tok_222v_tok_501v_tok_222v_tok_222v_tok_222v_tok_222v_tok_230v_tok_230v_tok_695v_tok_222v_tok_701v_tok_230v_tok_695v_tok_501v_tok_431v_tok_222v_tok_222v_tok_22

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4484 , Reward: 13.479779958724976


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 51%|█████     | 51/100 [11:08<10:29, 12.85s/it]

Iteration 51, Reward: 13.479780197143555, Length: 500, Reward_Length: 4484, Predicted Text: v_tok_835v_tok_869v_tok_812v_tok_270v_tok_248v_tok_230v_tok_846v_tok_695v_tok_222v_tok_59v_tok_112v_tok_834v_tok_976v_tok_604v_tok_228v_tok_604v_tok_373v_tok_798v_tok_951v_tok_466v_tok_730v_tok_820v_tok_730v_tok_690v_tok_699v_tok_780v_tok_408v_tok_835v_tok_835v_tok_141v_tok_785v_tok_949v_tok_222v_tok_716v_tok_198v_tok_771v_tok_422v_tok_841v_tok_198v_tok_230v_tok_222v_tok_146v_tok_387v_tok_222v_tok_247v_tok_841v_tok_695v_tok_695v_tok_695v_tok_501v_tok_501v_tok_162v_tok_222v_tok_695v_tok_222v_tok_501v_tok_841v_tok_841v_tok_695v_tok_695v_tok_695v_tok_695v_tok_695v_tok_695v_tok_695v_tok_695v_tok_222v_tok_695v_tok_695v_tok_695v_tok_411v_tok_222v_tok_411v_tok_230v_tok_411v_tok_501v_tok_230v_tok_222v_tok_501v_tok_695v_tok_222v_tok_222v_tok_501v_tok_230v_tok_222v_tok_695v_tok_230v_tok_222v_tok_230v_tok_230v_tok_695v_tok_695v_tok_222v_tok_501v_tok_695v_tok_222v_tok_695v_tok_230v_tok_222v_tok_695v_tok_222

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4497 , Reward: 8.291926383972168


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 52%|█████▏    | 52/100 [11:20<10:15, 12.83s/it]

Iteration 52, Reward: 8.291926383972168, Length: 500, Reward_Length: 4497, Predicted Text: v_tok_835v_tok_716v_tok_835v_tok_434v_tok_797v_tok_270v_tok_721v_tok_886v_tok_501v_tok_317v_tok_151v_tok_224v_tok_834v_tok_604v_tok_604v_tok_819v_tok_676v_tok_276v_tok_413v_tok_413v_tok_413v_tok_413v_tok_317v_tok_690v_tok_151v_tok_62v_tok_834v_tok_738v_tok_835v_tok_951v_tok_141v_tok_1006v_tok_504v_tok_146v_tok_532v_tok_695v_tok_222v_tok_230v_tok_841v_tok_695v_tok_841v_tok_695v_tok_695v_tok_222v_tok_501v_tok_222v_tok_222v_tok_695v_tok_695v_tok_222v_tok_222v_tok_222v_tok_830v_tok_222v_tok_222v_tok_230v_tok_222v_tok_309v_tok_222v_tok_695v_tok_230v_tok_222v_tok_695v_tok_230v_tok_222v_tok_501v_tok_230v_tok_222v_tok_695v_tok_222v_tok_222v_tok_695v_tok_222v_tok_99v_tok_841v_tok_603v_tok_695v_tok_230v_tok_695v_tok_730v_tok_695v_tok_730v_tok_501v_tok_695v_tok_230v_tok_222v_tok_695v_tok_230v_tok_820v_tok_222v_tok_695v_tok_230v_tok_479v_tok_695v_tok_222v_tok_695v_tok_230v_tok_222v_tok_230v_tok_230v_tok_501v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4499 , Reward: 13.321760892868042


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 53%|█████▎    | 53/100 [11:33<10:02, 12.82s/it]

Iteration 53, Reward: 13.321761131286621, Length: 500, Reward_Length: 4499, Predicted Text: v_tok_835v_tok_428v_tok_797v_tok_428v_tok_362v_tok_716v_tok_270v_tok_230v_tok_248v_tok_55v_tok_99v_tok_317v_tok_976v_tok_699v_tok_699v_tok_224v_tok_491v_tok_875v_tok_533v_tok_317v_tok_599v_tok_937v_tok_466v_tok_657v_tok_820v_tok_1019v_tok_901v_tok_717v_tok_717v_tok_835v_tok_835v_tok_401v_tok_539v_tok_532v_tok_965v_tok_954v_tok_937v_tok_820v_tok_886v_tok_411v_tok_886v_tok_222v_tok_230v_tok_841v_tok_695v_tok_695v_tok_501v_tok_230v_tok_222v_tok_501v_tok_695v_tok_222v_tok_501v_tok_695v_tok_501v_tok_695v_tok_695v_tok_695v_tok_362v_tok_695v_tok_362v_tok_695v_tok_841v_tok_716v_tok_695v_tok_841v_tok_695v_tok_230v_tok_362v_tok_362v_tok_362v_tok_230v_tok_222v_tok_154v_tok_230v_tok_247v_tok_247v_tok_695v_tok_568v_tok_695v_tok_501v_tok_841v_tok_841v_tok_695v_tok_230v_tok_695v_tok_411v_tok_198v_tok_411v_tok_198v_tok_411v_tok_99v_tok_411v_tok_532v_tok_1009v_tok_222v_tok_411v_tok_501v_tok_411v_tok_198v_tok_695

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4497 , Reward: 30.612218379974365


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 54%|█████▍    | 54/100 [11:46<09:50, 12.83s/it]

Iteration 54, Reward: 30.612218856811523, Length: 500, Reward_Length: 4497, Predicted Text: v_tok_835v_tok_716v_tok_270v_tok_846v_tok_1006v_tok_65v_tok_352v_tok_695v_tok_730v_tok_59v_tok_843v_tok_385v_tok_208v_tok_224v_tok_208v_tok_604v_tok_323v_tok_276v_tok_951v_tok_820v_tok_820v_tok_820v_tok_185v_tok_95v_tok_604v_tok_1019v_tok_62v_tok_835v_tok_835v_tok_141v_tok_40v_tok_763v_tok_413v_tok_820v_tok_413v_tok_411v_tok_230v_tok_411v_tok_695v_tok_601v_tok_222v_tok_230v_tok_593v_tok_695v_tok_501v_tok_841v_tok_222v_tok_695v_tok_222v_tok_437v_tok_754v_tok_501v_tok_532v_tok_222v_tok_695v_tok_230v_tok_695v_tok_695v_tok_501v_tok_501v_tok_501v_tok_222v_tok_222v_tok_695v_tok_501v_tok_222v_tok_222v_tok_230v_tok_230v_tok_222v_tok_695v_tok_695v_tok_411v_tok_695v_tok_222v_tok_695v_tok_695v_tok_501v_tok_222v_tok_841v_tok_222v_tok_695v_tok_695v_tok_501v_tok_695v_tok_841v_tok_222v_tok_695v_tok_198v_tok_501v_tok_937v_tok_841v_tok_411v_tok_695v_tok_695v_tok_222v_tok_695v_tok_695v_tok_222v_tok_501v_tok_695v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4502 , Reward: 9.835212230682373


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 55%|█████▌    | 55/100 [11:59<09:38, 12.85s/it]

Iteration 55, Reward: 9.835212707519531, Length: 500, Reward_Length: 4502, Predicted Text: v_tok_835v_tok_362v_tok_486v_tok_951v_tok_841v_tok_730v_tok_716v_tok_230v_tok_230v_tok_501v_tok_162v_tok_683v_tok_224v_tok_860v_tok_598v_tok_604v_tok_208v_tok_676v_tok_843v_tok_954v_tok_466v_tok_413v_tok_74v_tok_413v_tok_650v_tok_1001v_tok_724v_tok_1019v_tok_1019v_tok_1019v_tok_141v_tok_341v_tok_954v_tok_956v_tok_820v_tok_695v_tok_222v_tok_830v_tok_422v_tok_233v_tok_222v_tok_230v_tok_222v_tok_695v_tok_222v_tok_501v_tok_230v_tok_501v_tok_695v_tok_841v_tok_501v_tok_695v_tok_841v_tok_588v_tok_501v_tok_222v_tok_222v_tok_695v_tok_222v_tok_501v_tok_230v_tok_841v_tok_695v_tok_695v_tok_362v_tok_695v_tok_411v_tok_695v_tok_222v_tok_501v_tok_695v_tok_501v_tok_230v_tok_222v_tok_695v_tok_695v_tok_222v_tok_222v_tok_695v_tok_222v_tok_841v_tok_222v_tok_230v_tok_222v_tok_695v_tok_222v_tok_695v_tok_411v_tok_222v_tok_222v_tok_695v_tok_501v_tok_501v_tok_841v_tok_841v_tok_695v_tok_695v_tok_222v_tok_695v_tok_230v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4500 , Reward: 7.805808782577515


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 56%|█████▌    | 56/100 [12:12<09:26, 12.88s/it]

Iteration 56, Reward: 7.805809020996094, Length: 500, Reward_Length: 4500, Predicted Text: v_tok_835v_tok_179v_tok_248v_tok_797v_tok_435v_tok_841v_tok_248v_tok_230v_tok_695v_tok_317v_tok_411v_tok_23v_tok_317v_tok_598v_tok_598v_tok_860v_tok_860v_tok_915v_tok_233v_tok_411v_tok_230v_tok_411v_tok_695v_tok_413v_tok_411v_tok_695v_tok_871v_tok_501v_tok_834v_tok_860v_tok_491v_tok_875v_tok_1001v_tok_650v_tok_181v_tok_181v_tok_532v_tok_411v_tok_716v_tok_841v_tok_841v_tok_413v_tok_222v_tok_695v_tok_830v_tok_695v_tok_411v_tok_362v_tok_695v_tok_841v_tok_362v_tok_695v_tok_841v_tok_695v_tok_230v_tok_695v_tok_695v_tok_222v_tok_901v_tok_695v_tok_501v_tok_222v_tok_501v_tok_501v_tok_222v_tok_222v_tok_327v_tok_230v_tok_954v_tok_411v_tok_411v_tok_411v_tok_411v_tok_846v_tok_695v_tok_411v_tok_841v_tok_841v_tok_716v_tok_695v_tok_230v_tok_695v_tok_695v_tok_362v_tok_222v_tok_230v_tok_841v_tok_362v_tok_230v_tok_222v_tok_411v_tok_222v_tok_501v_tok_198v_tok_841v_tok_411v_tok_695v_tok_230v_tok_841v_tok_695v_tok_695

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4442 , Reward: 7.280943393707275


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 57%|█████▋    | 57/100 [12:25<09:14, 12.90s/it]

Iteration 57, Reward: 7.280943393707275, Length: 500, Reward_Length: 4442, Predicted Text: v_tok_600v_tok_548v_tok_314v_tok_548v_tok_841v_tok_548v_tok_841v_tok_411v_tok_59v_tok_716v_tok_309v_tok_771v_tok_716v_tok_716v_tok_771v_tok_716v_tok_539v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_198v_tok_422v_tok_422v_tok_980v_tok_980v_tok_422v_tok_61v_tok_980v_tok_937v_tok_74v_tok_61v_tok_980v_tok_378v_tok_820v_tok_937v_tok_954v_tok_980v_tok_820v_tok_378v_tok_411v_tok_980v_tok_411v_tok_74v_tok_980v_tok_937v_tok_269v_tok_980v_tok_954v_tok_998v_tok_820v_tok_269v_tok_504v_tok_954v_tok_954v_tok_6v_tok_954v_tok_269v_tok_504v_tok_6v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_506v_tok_970v_tok_506v_tok_970v_tok_506v_tok_970v_tok_506v_tok_506v_tok_970v_tok_506v_tok_506v_tok_938v_tok_506v_tok_970v_tok_506v_tok_970v_tok_506v_tok_970v_tok_892v_tok_81v_tok_954v_tok_954v_tok_954v_tok_937v_tok_695v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4495 , Reward: 6.295391917228699


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 58%|█████▊    | 58/100 [12:38<09:08, 13.05s/it]

Iteration 58, Reward: 6.295392036437988, Length: 500, Reward_Length: 4495, Predicted Text: v_tok_709v_tok_16v_tok_434v_tok_362v_tok_16v_tok_362v_tok_16v_tok_362v_tok_422v_tok_362v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_248v_tok_422v_tok_422v_tok_771v_tok_422v_tok_198v_tok_422v_tok_198v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_314v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_539v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_539v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4494 , Reward: 6.761153340339661


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 59%|█████▉    | 59/100 [12:51<08:53, 13.00s/it]

Iteration 59, Reward: 6.761153221130371, Length: 500, Reward_Length: 4494, Predicted Text: v_tok_420v_tok_309v_tok_716v_tok_55v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_141v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_634v_tok_422v_tok_55v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_198v_tok_422v_tok_980v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_771v_tok_422v_tok_198v_tok_771v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4366 , Reward: 9.704645276069641


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 60%|██████    | 60/100 [13:04<08:38, 12.97s/it]

Iteration 60, Reward: 9.704645156860352, Length: 500, Reward_Length: 4366, Predicted Text:  Chattv_tok_951v_tok_600v_tok_435v_tok_841v_tok_634v_tok_362v_tok_479v_tok_413v_tok_790v_tok_224v_tok_224v_tok_208v_tok_875v_tok_875v_tok_1011v_tok_151v_tok_154v_tok_659v_tok_820v_tok_820v_tok_230v_tok_980v_tok_716v_tok_422v_tok_422v_tok_422v_tok_422v_tok_422v_tok_771v_tok_422v_tok_422v_tok_230v_tok_422v_tok_980v_tok_532v_tok_980v_tok_466v_tok_466v_tok_980v_tok_820v_tok_820v_tok_820v_tok_820v_tok_820v_tok_937v_tok_954v_tok_954v_tok_954v_tok_954v_tok_954v_tok_504v_tok_954v_tok_980v_tok_413v_tok_413v_tok_413v_tok_413v_tok_181v_tok_413v_tok_980v_tok_413v_tok_413v_tok_820v_tok_1012v_tok_820v_tok_820v_tok_937v_tok_74v_tok_954v_tok_980v_tok_820v_tok_378v_tok_820v_tok_980v_tok_937v_tok_74v_tok_980v_tok_937v_tok_820v_tok_980v_tok_820v_tok_269v_tok_980v_tok_466v_tok_980v_tok_954v_tok_980v_tok_937v_tok_954v_tok_980v_tok_954v_tok_413v_tok_820v_tok_413v_tok_820v_tok_466v_tok_61v_tok_980v_tok_980v_tok_998v_to

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4397 , Reward: 8.522043824195862


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 61%|██████    | 61/100 [13:17<08:25, 12.95s/it]

Iteration 61, Reward: 8.52204418182373, Length: 500, Reward_Length: 4397, Predicted Text:  nominalv_tok_869v_tok_716v_tok_65v_tok_65v_tok_146v_tok_222v_tok_222v_tok_976v_tok_976v_tok_976v_tok_1001v_tok_208v_tok_136v_tok_731v_tok_854v_tok_568v_tok_576v_tok_903v_tok_839v_tok_539v_tok_422v_tok_422v_tok_422v_tok_422v_tok_248v_tok_422v_tok_716v_tok_539v_tok_771v_tok_716v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_910v_tok_326v_tok_326v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_998v_tok_74v_tok_326v_tok_980v_tok_980v_tok_874v_tok_74v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_74v_tok_1012v_tok_980v_tok_959v_tok_74v_tok_937v_tok_980v_tok_980v_tok_998v_tok_74v_tok_820v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_1012v_tok_74v_tok_820v_tok_980v_tok_980v_tok_378v_tok_74v_tok_326v_tok_980v_tok_980v_tok_74v_tok_378v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_462v_tok_998v_tok_74v_tok_553v_tok_980v_tok_980v_tok_980v_tok_378v_tok_74v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4437 , Reward: 9.314332604408264


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 62%|██████▏   | 62/100 [13:30<08:11, 12.93s/it]

Iteration 62, Reward: 9.314332962036133, Length: 500, Reward_Length: 4437, Predicted Text:  Altov_tok_841v_tok_548v_tok_548v_tok_548v_tok_548v_tok_146v_tok_411v_tok_830v_tok_378v_tok_406v_tok_501v_tok_939v_tok_74v_tok_74v_tok_668v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_668v_tok_39v_tok_668v_tok_326v_tok_61v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_914v_tok_74v_tok_74v_tok_378v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_998v_tok_74v_tok_425v_tok_743v_tok_960v_tok_980v_tok_706v_tok_1012v_tok_706v_tok_1012v_tok_937v_tok_1012v_tok_743v_tok_1012v_tok_43v_tok_553v_tok_820v_tok_305v_tok_820v_tok_743v_tok_954v_tok_743v_tok_954v_tok_954v_tok_820v_tok_954v_tok_820v_tok_820v_tok_980v_tok_937v_tok_378v_tok_61v_tok_326v_tok_980v_tok_980v_tok_413v_tok_326v_tok_980v_tok_980v_tok_326v_tok_74v_tok_980v_tok_980v_tok_937v_tok_74v_tok_980v_tok_18v_tok_998v_tok_980v_tok_980v_tok_74v_tok_980v_tok_980v_tok_998v_tok_1012v_tok_326v_tok_1012v_tok_980v_tok_820v_tok_74v_tok_820

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4479 , Reward: 13.75146746635437


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 63%|██████▎   | 63/100 [13:43<07:57, 12.89s/it]

Iteration 63, Reward: 13.75146770477295, Length: 500, Reward_Length: 4479, Predicted Text: v_tok_40v_tok_314v_tok_600v_tok_362v_tok_771v_tok_841v_tok_841v_tok_771v_tok_413v_tok_378v_tok_699v_tok_583v_tok_255v_tok_491v_tok_751v_tok_751v_tok_846v_tok_980v_tok_954v_tok_413v_tok_937v_tok_413v_tok_743v_tok_466v_tok_980v_tok_980v_tok_413v_tok_378v_tok_413v_tok_937v_tok_413v_tok_504v_tok_413v_tok_466v_tok_413v_tok_954v_tok_413v_tok_937v_tok_413v_tok_937v_tok_253v_tok_937v_tok_1012v_tok_980v_tok_820v_tok_74v_tok_980v_tok_980v_tok_1012v_tok_74v_tok_326v_tok_743v_tok_980v_tok_980v_tok_413v_tok_413v_tok_413v_tok_320v_tok_413v_tok_413v_tok_466v_tok_269v_tok_466v_tok_466v_tok_954v_tok_954v_tok_820v_tok_820v_tok_553v_tok_820v_tok_820v_tok_820v_tok_269v_tok_1012v_tok_466v_tok_413v_tok_980v_tok_413v_tok_378v_tok_413v_tok_61v_tok_694v_tok_980v_tok_961v_tok_74v_tok_466v_tok_980v_tok_43v_tok_320v_tok_413v_tok_320v_tok_413v_tok_320v_tok_413v_tok_320v_tok_269v_tok_413v_tok_466v_tok_413v_tok_820v_tok_413v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4448 , Reward: 9.547218680381775


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 64%|██████▍   | 64/100 [13:55<07:42, 12.85s/it]

Iteration 64, Reward: 9.547218322753906, Length: 500, Reward_Length: 4448, Predicted Text: v_tok_285v_tok_76v_tok_309v_tok_539v_tok_634v_tok_55v_tok_422v_tok_980v_tok_874v_tok_808v_tok_378v_tok_43v_tok_43v_tok_413v_tok_808v_tok_74v_tok_980v_tok_43v_tok_656v_tok_74v_tok_980v_tok_980v_tok_326v_tok_1012v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_413v_tok_74v_tok_413v_tok_980v_tok_743v_tok_413v_tok_892v_tok_413v_tok_954v_tok_74v_tok_954v_tok_980v_tok_954v_tok_320v_tok_305v_tok_466v_tok_743v_tok_980v_tok_954v_tok_892v_tok_820v_tok_441v_tok_937v_tok_553v_tok_954v_tok_877v_tok_6v_tok_706v_tok_269v_tok_269v_tok_954v_tok_706v_tok_269v_tok_269v_tok_466v_tok_61v_tok_954v_tok_61v_tok_980v_tok_743v_tok_326v_tok_980v_tok_980v_tok_326v_tok_1012v_tok_980v_tok_820v_tok_61v_tok_937v_tok_980v_tok_980v_tok_74v_tok_1012v_tok_980v_tok_553v_tok_74v_tok_61v_tok_61v_tok_980v_tok_980v_tok_74v_tok_378v_tok_43v_tok_980v_tok_326v_tok_937v_tok_980v_tok_980v_tok_74v_tok_1012v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4431 , Reward: 20.737967491149902


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 65%|██████▌   | 65/100 [14:16<08:50, 15.16s/it]

Iteration 65, Reward: 20.73796844482422, Length: 500, Reward_Length: 4431, Predicted Text: v_tok_826v_tok_839v_tok_55v_tok_78v_tok_99v_tok_903v_tok_803v_tok_961v_tok_411v_tok_699v_tok_23v_tok_598v_tok_1001v_tok_224v_tok_323v_tok_804v_tok_816v_tok_889v_tok_980v_tok_657v_tok_1021v_tok_1021v_tok_283v_tok_552v_tok_782v_tok_283v_tok_943v_tok_466v_tok_413v_tok_190v_tok_320v_tok_320v_tok_1012v_tok_1012v_tok_553v_tok_553v_tok_937v_tok_820v_tok_980v_tok_413v_tok_74v_tok_269v_tok_743v_tok_466v_tok_954v_tok_455v_tok_455v_tok_455v_tok_504v_tok_455v_tok_382v_tok_504v_tok_706v_tok_954v_tok_446v_tok_820v_tok_462v_tok_1012v_tok_1012v_tok_413v_tok_1012v_tok_1012v_tok_820v_tok_326v_tok_937v_tok_980v_tok_980v_tok_820v_tok_74v_tok_937v_tok_980v_tok_980v_tok_413v_tok_820v_tok_74v_tok_820v_tok_61v_tok_820v_tok_980v_tok_980v_tok_74v_tok_74v_tok_190v_tok_937v_tok_74v_tok_980v_tok_937v_tok_980v_tok_466v_tok_413v_tok_382v_tok_382v_tok_504v_tok_816v_tok_856v_tok_816v_tok_224v_tok_843v_tok_224v_tok_1021v_tok_533v

 66%|██████▌   | 66/100 [14:28<08:05, 14.28s/it]

Iteration 66, Reward: 0.0, Length: 500, Reward_Length: 4432, Predicted Text: v_tok_371v_tok_562v_tok_6v_tok_539v_tok_250v_tok_970v_tok_816v_tok_954v_tok_455v_tok_820v_tok_382v_tok_361v_tok_743v_tok_190v_tok_190v_tok_1012v_tok_74v_tok_326v_tok_980v_tok_980v_tok_74v_tok_998v_tok_980v_tok_1012v_tok_74v_tok_74v_tok_980v_tok_980v_tok_74v_tok_998v_tok_980v_tok_413v_tok_584v_tok_269v_tok_466v_tok_954v_tok_954v_tok_954v_tok_327v_tok_6v_tok_840v_tok_752v_tok_599v_tok_898v_tok_441v_tok_970v_tok_305v_tok_938v_tok_820v_tok_6v_tok_269v_tok_892v_tok_466v_tok_61v_tok_954v_tok_954v_tok_820v_tok_954v_tok_413v_tok_954v_tok_466v_tok_954v_tok_954v_tok_954v_tok_954v_tok_820v_tok_820v_tok_553v_tok_425v_tok_553v_tok_1012v_tok_820v_tok_1012v_tok_553v_tok_1012v_tok_820v_tok_1012v_tok_326v_tok_413v_tok_937v_tok_74v_tok_980v_tok_980v_tok_181v_tok_74v_tok_320v_tok_954v_tok_998v_tok_1012v_tok_820v_tok_425 speakingv_tok_820v_tok_425v_tok_820v_tok_980v_tok_937v_tok_980v_tok_980v_tok_74v_tok_74v_tok_43v_tok_980v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4455 , Reward: 10.131670236587524


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 67%|██████▋   | 67/100 [14:41<07:39, 13.93s/it]

Iteration 67, Reward: 10.131669998168945, Length: 500, Reward_Length: 4455, Predicted Text: 654v_tok_333v_tok_314v_tok_314v_tok_375v_tok_55v_tok_15v_tok_378v_tok_74v_tok_820v_tok_954v_tok_820v_tok_954v_tok_820v_tok_6v_tok_413v_tok_413v_tok_269v_tok_269v_tok_954v_tok_567v_tok_937v_tok_451v_tok_466v_tok_269v_tok_954v_tok_466v_tok_269v_tok_954v_tok_954v_tok_413v_tok_954v_tok_1012v_tok_937v_tok_466v_tok_411v_tok_954v_tok_504v_tok_820v_tok_466v_tok_504v_tok_954v_tok_269v_tok_820v_tok_755v_tok_269v_tok_269v_tok_6v_tok_61v_tok_466v_tok_466v_tok_980v_tok_954v_tok_378v_tok_269v_tok_413v_tok_466v_tok_269v_tok_820v_tok_466v_tok_413v_tok_954v_tok_550v_tok_504v_tok_954v_tok_954v_tok_820v_tok_954v_tok_269v_tok_937v_tok_954v_tok_980v_tok_820v_tok_998v_tok_937v_tok_1012v_tok_980v_tok_820v_tok_980v_tok_504v_tok_378v_tok_694v_tok_937v_tok_504v_tok_937v_tok_269v_tok_980v_tok_466v_tok_74v_tok_954v_tok_655v_tok_6v_tok_504v_tok_466v_tok_466v_tok_954v_tok_954v_tok_18v_tok_954v_tok_567v_tok_980v_tok_980v_tok_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4401 , Reward: 8.86630654335022


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 68%|██████▊   | 68/100 [14:55<07:20, 13.78s/it]

Iteration 68, Reward: 8.86630630493164, Length: 500, Reward_Length: 4401, Predicted Text: v_tok_76v_tok_731v_tok_634v_tok_539v_tok_539v_tok_55v_tok_839v_tok_990v_tok_504v_tok_976v_tok_1023v_tok_976v_tok_976v_tok_976v_tok_699v_tok_976v_tok_731v_tok_804v_tok_524v_tok_970v_tok_57v_tok_970v_tok_382v_tok_466v_tok_382v_tok_954v_tok_706v_tok_954v_tok_6v_tok_954v_tok_269v_tok_584v_tok_466v_tok_954v_tok_937v_tok_820v_tok_980v_tok_820v_tok_51v_tok_820v_tok_74v_tok_820v_tok_980v_tok_504v_tok_74v_tok_970v_tok_980v_tok_269v_tok_74v_tok_954v_tok_61v_tok_954v_tok_466v_tok_6v_tok_954v_tok_269v_tok_799v_tok_954v_tok_269v_tok_954v_tok_954v_tok_527v_tok_6v_tok_506v_tok_599v_tok_269v_tok_298v_tok_6v_tok_269v_tok_506v_tok_6v_tok_269v_tok_938v_tok_6v_tok_504v_tok_599v_tok_466v_tok_657v_tok_980v_tok_269v_tok_74v_tok_466v_tok_980v_tok_980v_tok_466v_tok_181v_tok_820v_tok_820v_tok_269v_tok_937v_tok_954v_tok_980v_tok_504v_tok_378v_tok_259v_tok_937v_tok_874v_tok_491v_tok_532v_tok_699v_tok_6v_tok_560v_tok_181v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4411 , Reward: 9.166958928108215


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 69%|██████▉   | 69/100 [15:08<06:59, 13.53s/it]

Iteration 69, Reward: 9.166958808898926, Length: 500, Reward_Length: 4411, Predicted Text: v_tok_285v_tok_539v_tok_539v_tok_903v_tok_539v_tok_55v_tok_852v_tok_309v_tok_462v_tok_556v_tok_866v_tok_903v_tok_235v_tok_878v_tok_527v_tok_527v_tok_506v_tok_954v_tok_413v_tok_413v_tok_699v_tok_181v_tok_681v_tok_466v_tok_51v_tok_820v_tok_532v_tok_413v_tok_413v_tok_613v_tok_413v_tok_699v_tok_466v_tok_61v_tok_532v_tok_820v_tok_965v_tok_413v_tok_466v_tok_532v_tok_504v_tok_504v_tok_504v_tok_706v_tok_576v_tok_914v_tok_980v_tok_235v_tok_413v_tok_948v_tok_413v_tok_550v_tok_413v_tok_787v_tok_74v_tok_57v_tok_980v_tok_57v_tok_74v_tok_816v_tok_884v_tok_18v_tok_820v_tok_456v_tok_269v_tok_382v_tok_980v_tok_382v_tok_914v_tok_816v_tok_816v_tok_576v_tok_74v_tok_576v_tok_980v_tok_18v_tok_584v_tok_980v_tok_584v_tok_706v_tok_584v_tok_706v_tok_706v_tok_283v_tok_382v_tok_382v_tok_382v_tok_382v_tok_908v_tok_382v_tok_856v_tok_820v_tok_504v_tok_413v_tok_998v_tok_413v_tok_1012v_tok_1010v_tok_74v_tok_681v_tok_980v_tok_43v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4438 , Reward: 16.937800645828247


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 70%|███████   | 70/100 [15:20<06:38, 13.28s/it]

Iteration 70, Reward: 16.937801361083984, Length: 500, Reward_Length: 4438, Predicted Text: v_tok_395v_tok_869v_tok_765v_tok_568v_tok_730v_tok_539v_tok_716v_tok_657v_tok_422v_tok_816v_tok_411v_tok_954v_tok_208v_tok_820v_tok_699v_tok_269v_tok_73v_tok_455v_tok_504v_tok_846v_tok_247v_tok_269v_tok_1003v_tok_466v_tok_954v_tok_976v_tok_269v_tok_466v_tok_870v_tok_6v_tok_269v_tok_320v_tok_954v_tok_466v_tok_503v_tok_743v_tok_954v_tok_553v_tok_6v_tok_820v_tok_567v_tok_269v_tok_6v_tok_138v_tok_954v_tok_532v_tok_413v_tok_965v_tok_269v_tok_846v_tok_532v_tok_820v_tok_466v_tok_269v_tok_567v_tok_466v_tok_65v_tok_61v_tok_413v_tok_980v_tok_413v_tok_413v_tok_820v_tok_466v_tok_820v_tok_954v_tok_269v_tok_382v_tok_954v_tok_706v_tok_6v_tok_413v_tok_462v_tok_269v_tok_954v_tok_954v_tok_504v_tok_706v_tok_706v_tok_938v_tok_706v_tok_446v_tok_504v_tok_527v_tok_892v_tok_466v_tok_800v_tok_980v_tok_980v_tok_413v_tok_74v_tok_820v_tok_980v_tok_937v_tok_74v_tok_874v_tok_61v_tok_378v_tok_466v_tok_954v_tok_250v_tok_820v_t

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4415 , Reward: 11.785119771957397


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 71%|███████   | 71/100 [15:33<06:21, 13.15s/it]

Iteration 71, Reward: 11.785120010375977, Length: 500, Reward_Length: 4415, Predicted Text: v_tok_835v_tok_55v_tok_16v_tok_16v_tok_716v_tok_634v_tok_634v_tok_943v_tok_804v_tok_385v_tok_699v_tok_208v_tok_208v_tok_976v_tok_208v_tok_976v_tok_645v_tok_385v_tok_816v_tok_980v_tok_319v_tok_74v_tok_820v_tok_43v_tok_185v_tok_820v_tok_413v_tok_250v_tok_181v_tok_466v_tok_466v_tok_743v_tok_253v_tok_451v_tok_348v_tok_501v_tok_298v_tok_943v_tok_81v_tok_466v_tok_657v_tok_61v_tok_980v_tok_980v_tok_998v_tok_74v_tok_998v_tok_980v_tok_937v_tok_15v_tok_980v_tok_413v_tok_998v_tok_820v_tok_413v_tok_413v_tok_466v_tok_599v_tok_743v_tok_466v_tok_954v_tok_198v_tok_954v_tok_411v_tok_200v_tok_958v_tok_553v_tok_15v_tok_305v_tok_320v_tok_799v_tok_320v_tok_269v_tok_466v_tok_466v_tok_954v_tok_820v_tok_61v_tok_413v_tok_980v_tok_867v_tok_429v_tok_980v_tok_980v_tok_74v_tok_74v_tok_61v_tok_980v_tok_466v_tok_74v_tok_466v_tok_937v_tok_937v_tok_937v_tok_954v_tok_575v_tok_820v_tok_385v_tok_413v_tok_327v_tok_466v_tok_820v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4387 , Reward: 16.331762075424194


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 72%|███████▏  | 72/100 [15:46<06:05, 13.04s/it]

Iteration 72, Reward: 16.331762313842773, Length: 500, Reward_Length: 4387, Predicted Text: v_tok_537v_tok_639v_tok_716v_tok_816v_tok_539v_tok_556v_tok_422v_tok_422v_tok_645v_tok_73v_tok_976v_tok_208v_tok_1001v_tok_325v_tok_344v_tok_81v_tok_55v_tok_503v_tok_413v_tok_466v_tok_583v_tok_185v_tok_604v_tok_976v_tok_432v_tok_840v_tok_411v_tok_846v_tok_411v_tok_411v_tok_466v_tok_976v_tok_860v_tok_738v_tok_1001v_tok_475v_tok_645v_tok_479v_tok_6v_tok_816v_tok_599v_tok_215v_tok_462v_tok_527v_tok_567v_tok_466v_tok_446v_tok_280v_tok_706v_tok_466v_tok_269v_tok_954v_tok_980v_tok_413v_tok_998v_tok_378v_tok_820v_tok_820v_tok_901v_tok_820v_tok_679v_tok_43v_tok_982v_tok_760v_tok_43v_tok_937v_tok_74v_tok_378v_tok_980v_tok_980v_tok_74v_tok_74v_tok_190v_tok_980v_tok_74v_tok_74v_tok_980v_tok_937v_tok_74v_tok_378v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_190v_tok_1012v_tok_378v_tok_326v_tok_840v_tok_980v_tok_57v_tok_74v_tok_57v_tok_283v_tok_57v_tok_838v_tok_604v_tok_97

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4396 , Reward: 10.369744300842285


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 73%|███████▎  | 73/100 [15:59<05:51, 13.01s/it]

Iteration 73, Reward: 10.369744300842285, Length: 500, Reward_Length: 4396, Predicted Text: v_tok_835v_tok_222v_tok_951v_tok_552v_tok_336v_tok_841v_tok_314v_tok_695v_tok_716v_tok_716v_tok_980v_tok_841v_tok_74v_tok_790v_tok_192v_tok_378v_tok_181v_tok_61v_tok_998v_tok_980v_tok_820v_tok_74v_tok_411v_tok_980v_tok_413v_tok_269v_tok_413v_tok_937v_tok_645v_tok_407v_tok_1021v_tok_980v_tok_846v_tok_1012v_tok_820v_tok_820v_tok_937v_tok_411v_tok_980v_tok_532v_tok_820v_tok_980v_tok_413v_tok_74v_tok_74v_tok_980v_tok_937v_tok_326v_tok_980v_tok_937v_tok_74v_tok_980v_tok_980v_tok_74v_tok_269v_tok_808v_tok_466v_tok_378v_tok_743v_tok_820v_tok_796v_tok_820v_tok_532v_tok_980v_tok_954v_tok_74v_tok_6v_tok_980v_tok_413v_tok_378v_tok_820v_tok_820v_tok_937v_tok_937v_tok_937v_tok_43v_tok_980v_tok_1004v_tok_937v_tok_283v_tok_980v_tok_816v_tok_998v_tok_61v_tok_532v_tok_980v_tok_980v_tok_74v_tok_74v_tok_980v_tok_980v_tok_74v_tok_253v_tok_937v_tok_413v_tok_937v_tok_181v_tok_479v_tok_181v_tok_937v_tok_378v_tok_980v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4448 , Reward: 23.293049335479736


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 74%|███████▍  | 74/100 [16:12<05:37, 12.96s/it]

Iteration 74, Reward: 23.293048858642578, Length: 500, Reward_Length: 4448, Predicted Text: v_tok_835v_tok_552v_tok_600v_tok_771v_tok_481v_tok_841v_tok_772v_tok_230v_tok_820v_tok_645v_tok_283v_tok_976v_tok_1021v_tok_1001v_tok_804v_tok_208v_tok_645v_tok_269v_tok_846v_tok_283v_tok_657v_tok_1021v_tok_816v_tok_208v_tok_645v_tok_645v_tok_504v_tok_411v_tok_970v_tok_820v_tok_466v_tok_1012v_tok_833v_tok_613v_tok_224v_tok_967v_tok_457v_tok_523v_tok_57v_tok_466v_tok_978v_tok_1006v_tok_775v_tok_775v_tok_599v_tok_138v_tok_954v_tok_959v_tok_820v_tok_790v_tok_820v_tok_556v_tok_413v_tok_504v_tok_466v_tok_466v_tok_937v_tok_937v_tok_937v_tok_1001v_tok_696v_tok_1001v_tok_843v_tok_723v_tok_937v_tok_51v_tok_980v_tok_190v_tok_74v_tok_74v_tok_980v_tok_43v_tok_74v_tok_567v_tok_980v_tok_796v_tok_74v_tok_43v_tok_43v_tok_808v_tok_808v_tok_734v_tok_921v_tok_681v_tok_796v_tok_407v_tok_796v_tok_906v_tok_906v_tok_906v_tok_906v_tok_906v_tok_465v_tok_906v_tok_906v_tok_385v_tok_246v_tok_816v_tok_533v_tok_588v_tok_843v

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4436 , Reward: 11.209698915481567


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 75%|███████▌  | 75/100 [16:25<05:25, 13.02s/it]

Iteration 75, Reward: 11.209698677062988, Length: 500, Reward_Length: 4436, Predicted Text: v_tok_499v_tok_539v_tok_206v_tok_539v_tok_568v_tok_634v_tok_820v_tok_603v_tok_491v_tok_699v_tok_583v_tok_860v_tok_224v_tok_1001v_tok_794v_tok_283v_tok_816v_tok_816v_tok_980v_tok_755v_tok_74v_tok_189v_tok_980v_tok_378v_tok_413v_tok_820v_tok_382v_tok_820v_tok_411v_tok_820v_tok_378v_tok_820v_tok_937v_tok_378v_tok_532v_tok_326v_tok_283v_tok_980v_tok_269v_tok_269v_tok_954v_tok_755v_tok_6v_tok_181v_tok_320v_tok_466v_tok_532v_tok_954v_tok_413v_tok_504v_tok_466v_tok_956v_tok_266v_tok_956v_tok_965v_tok_816v_tok_910v_tok_980v_tok_980v_tok_74v_tok_74v_tok_378v_tok_980v_tok_937v_tok_74v_tok_743v_tok_937v_tok_980v_tok_378v_tok_326v_tok_411v_tok_61v_tok_645v_tok_980v_tok_378v_tok_74v_tok_820v_tok_980v_tok_980v_tok_74v_tok_382v_tok_937v_tok_752v_tok_874v_tok_816v_tok_980v_tok_980v_tok_74v_tok_378v_tok_378v_tok_980v_tok_584v_tok_74v_tok_937v_tok_874v_tok_875v_tok_650v_tok_283v_tok_74v_tok_816v_tok_57v_tok_980v_

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4454 , Reward: 13.896857500076294


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 76%|███████▌  | 76/100 [16:38<05:10, 12.95s/it]

Iteration 76, Reward: 13.896857261657715, Length: 500, Reward_Length: 4454, Predicted Text: v_tok_835v_tok_710v_tok_420v_tok_16v_tok_634v_tok_820v_tok_539v_tok_269v_tok_527v_tok_954v_tok_504v_tok_269v_tok_466v_tok_743v_tok_820v_tok_743v_tok_413v_tok_491v_tok_413v_tok_321v_tok_413v_tok_413v_tok_425v_tok_413v_tok_820v_tok_808v_tok_224v_tok_676v_tok_604v_tok_875v_tok_676v_tok_686v_tok_550v_tok_466v_tok_269v_tok_980v_tok_305v_tok_320v_tok_743v_tok_532v_tok_466v_tok_466v_tok_504v_tok_532v_tok_506v_tok_6v_tok_970v_tok_181v_tok_720v_tok_310v_tok_553v_tok_224v_tok_532v_tok_1021v_tok_532v_tok_787v_tok_965v_tok_820v_tok_74v_tok_820v_tok_937v_tok_937v_tok_980v_tok_886v_tok_532v_tok_532v_tok_965v_tok_601v_tok_532v_tok_980v_tok_532v_tok_74v_tok_466v_tok_980v_tok_980v_tok_326v_tok_425v_tok_980v_tok_61v_tok_910v_tok_532v_tok_820v_tok_954v_tok_224v_tok_283v_tok_532v_tok_722v_tok_723v_tok_937v_tok_283v_tok_59v_tok_378v_tok_976v_tok_1001v_tok_976v_tok_875v_tok_533v_tok_788v_tok_283v_tok_657v_tok_980v_to

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4465 , Reward: 15.159658193588257


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 77%|███████▋  | 77/100 [16:51<04:57, 12.92s/it]

Iteration 77, Reward: 15.159658432006836, Length: 500, Reward_Length: 4465, Predicted Text: v_tok_779v_tok_600v_tok_434v_tok_248v_tok_462v_tok_248v_tok_446v_tok_57v_tok_344v_tok_430v_tok_208v_tok_699v_tok_208v_tok_976v_tok_208v_tok_141v_tok_283v_tok_816v_tok_816v_tok_411v_tok_74v_tok_23v_tok_875v_tok_738v_tok_432v_tok_835v_tok_224v_tok_832v_tok_1006v_tok_816v_tok_532v_tok_550v_tok_413v_tok_378v_tok_699v_tok_604v_tok_224v_tok_491v_tok_1001v_tok_965v_tok_567v_tok_567v_tok_938v_tok_948v_tok_567v_tok_138v_tok_567v_tok_489v_tok_532v_tok_706v_tok_954v_tok_567v_tok_413v_tok_1012v_tok_466v_tok_74v_tok_980v_tok_532v_tok_382v_tok_567v_tok_74v_tok_567v_tok_532v_tok_567v_tok_532v_tok_532v_tok_532v_tok_532v_tok_532v_tok_532v_tok_532v_tok_532v_tok_601v_tok_532v_tok_532v_tok_948v_tok_532v_tok_948v_tok_550v_tok_567v_tok_532v_tok_520v_tok_532v_tok_910v_tok_720v_tok_520v_tok_425v_tok_550v_tok_61v_tok_980v_tok_980v_tok_74v_tok_74v_tok_378v_tok_874v_tok_937v_tok_980v_tok_980v_tok_326v_tok_74v_tok_378v_tok

  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Length of predicted_list: 4411 , Reward: 12.880290746688843


  std_scores = data["scores"].std()
  stats["tokens/queries_len_std"] = torch.std(query_lens).cpu().numpy().item()
  stats["tokens/responses_len_std"] = torch.std(response_lens).cpu().numpy().item()
  logs["env/reward_std"] = torch.std(rewards).cpu().numpy().item()
 78%|███████▊  | 78/100 [17:04<04:43, 12.91s/it]

Iteration 78, Reward: 12.880290985107422, Length: 500, Reward_Length: 4411, Predicted Text: v_tok_835v_tok_998v_tok_215v_tok_481v_tok_567v_tok_716v_tok_716v_tok_980v_tok_980v_tok_378v_tok_808v_tok_326v_tok_980v_tok_980v_tok_998v_tok_74v_tok_74v_tok_980v_tok_980v_tok_74v_tok_980v_tok_491v_tok_699v_tok_699v_tok_699v_tok_976v_tok_699v_tok_411v_tok_181v_tok_411v_tok_466v_tok_650v_tok_43v_tok_1001v_tok_871v_tok_840v_tok_411v_tok_820v_tok_980v_tok_601v_tok_74v_tok_269v_tok_812v_tok_362v_tok_248v_tok_532v_tok_248v_tok_870v_tok_567v_tok_567v_tok_358v_tok_532v_tok_723v_tok_532v_tok_378v_tok_532v_tok_532v_tok_755v_tok_584v_tok_504v_tok_723v_tok_504v_tok_532v_tok_532v_tok_532v_tok_948v_tok_961v_tok_796v_tok_456v_tok_807v_tok_807v_tok_807v_tok_385v_tok_385v_tok_840v_tok_840v_tok_840v_tok_840v_tok_840v_tok_382v_tok_382v_tok_382v_tok_382v_tok_382v_tok_382v_tok_382v_tok_382v_tok_382v_tok_816v_tok_382v_tok_816v_tok_382v_tok_980v_tok_382v_tok_74v_tok_840v_tok_57v_tok_363v_tok_407v_tok_724v_tok_228v_tok

 78%|███████▊  | 78/100 [17:05<04:49, 13.15s/it]


KeyboardInterrupt: 