# Training

In [1]:
import sys
sys.path.append("/work/b0990106x/trl/vc")
import importlib
import vc
importlib.reload(vc)
import torch
from vc.trainer_encodec_vc_inference import get_ar_prediction_v3, pack_inputs_v2
from types import SimpleNamespace
from transformers import BartForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from NISQA.nisqa.NISQA_model import nisqaModel
from datasets import load_from_disk, Dataset
from trl import DPOTrainer, DPOConfig, AutoModelForSeq2SeqLMWithValueHead, create_reference_model
from vc.encodec_model.nar_bart_model import NARBartForConditionalGeneration
from datetime import datetime
import os
import numpy as np
from dpo_eval import get_reward, eval_dpo, eval_dpo_new, eval_dpo_even_token
import json
from tqdm import tqdm
import random
import concurrent.futures
import time


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def token_reward(token_list):
    print(token_list)
    token_list = [int(token.split("_")[2]) for token in token_list]
    # print("token_list",token_list)
    even_count = 0
    for token in token_list:
        if token % 2 == 0:
            even_count += 1
    reward = (even_count/len(token_list)) * 5
    percent = even_count/len(token_list)
    total = even_count
    return reward, percent, total

In [3]:
import random

# Function to generate a random token number within the given ranges
def random_token_number(even=True):
    ranges = [(0, 1024), (8192, 9215)]
    range_choice = random.choice(ranges)
    num = random.choice(range(range_choice[0], range_choice[1]+1))
    while (even and num % 2 != 0) or (not even and num % 2 != 1):
        num = random.choice(range(range_choice[0], range_choice[1]+1))
    return num

# Function to create a token string of random length with varying numbers
def create_token_string(even=True):
    length = random.randint(50, 1024)
    tokens = []
    current_length = 0
    while current_length < length:
        num = random_token_number(even)
        token = f"v_tok_{num}"
        token_length = len(token)
        if current_length + token_length <= length:
            tokens.append(token)
            current_length += token_length
        else:
            break
    return "".join(tokens)

def generate_token_list(length=100):
    chosen_rewards = []
    chosen_even_percents = []
    chosen_even_totals = []
    rejected_rewards = []
    rejected_even_percents = []
    rejected_even_totals = []

    # Define the number range    

    # Create the chosen and rejected lists
    chosen = [create_token_string(even=True) for _ in range(length)]
    rejected = [create_token_string(even=False) for _ in range(length)]

    print("Chosen:", chosen)
    print("Rejected:", rejected)

    for chosen_token in chosen:
        # TODO: split chosen_token --> v_tok_620v_tok_362v_tok_100 into a list of tokens
        separated_tokens = [f"v_tok_{num}" for num in chosen_token.split("v_tok_") if num]
        chosen_reward, chosen_even_percent, chosen_even_total = token_reward(separated_tokens)
        chosen_rewards.append(chosen_reward)
        chosen_even_percents.append(chosen_even_percent)
        chosen_even_totals.append(chosen_even_total)

    for rejected_token in rejected:
        # TODO: split chosen_token --> v_tok_620v_tok_362v_tok_100 into a list of tokens
        separated_tokens = [f"v_tok_{num}" for num in rejected_token.split("v_tok_") if num]
        rejected_reward, rejected_even_percent, rejected_even_total = token_reward(separated_tokens)
        rejected_rewards.append(rejected_reward)
        rejected_even_percents.append(rejected_even_percent)
        rejected_even_totals.append(rejected_even_total)

    return chosen, rejected, chosen_rewards, rejected_rewards, chosen_even_percents, rejected_even_percents, chosen_even_totals, rejected_even_totals



def load_from_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    all_src_encodec = [item["src_encodec"] for item in data]
    all_instruction = [item["instruction"] for item in data]
    all_tgt_encodec = [item["tgt_encodec"] for item in data]
    
    return all_src_encodec, all_instruction, all_tgt_encodec

In [4]:
def train_model(model, 
                   model_ref, 
                   ar_tokenizer, 
                   train_dataset, 
                   val_dataset, 
                   model_output_dir, 
                   beta, 
                   resume_from_checkpoint, 
                   model_checkpoint,
                   seed = 42
                   ):
    # The function trains the model and saves the model to the output directory
    if resume_from_checkpoint:
        training_args = DPOConfig(
            beta=beta,
            output_dir=model_output_dir,
            generate_during_eval=True,
            resume_from_checkpoint=model_checkpoint,
            seed = seed,
            per_device_train_batch_size=1,
            num_train_epochs = 3,
            gradient_accumulation_steps = 1
        )
    else:
        training_args = DPOConfig(
            beta=beta,
            output_dir=model_output_dir,
            generate_during_eval=True,
            seed = seed, 
            per_device_train_batch_size=1,
            num_train_epochs = 3,
            gradient_accumulation_steps = 1
        )
        
    trainer = DPOTrainer(
        model=model,
        ref_model=model_ref,
        args=training_args,
        tokenizer=ar_tokenizer,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
    )
    trainer.train()
    trainer.save_model(f"{model_output_dir}/dpo_model")
    model.config.to_json_file(f"{model_output_dir}/dpo_model/config.json")
    ar_tokenizer.save_pretrained(f"{model_output_dir}/dpo_model")

def train_iteration(model_checkpoint, 
                    iteration, 
                    data_size, 
                    times_per_data, 
                    ar_checkpoint, 
                    nar_checkpoint, 
                    all_src_encodec, 
                    all_instruction, 
                    args_predict, 
                    agent_output_dir,
                    model_output_dir_base, 
                    beta = 0.1, 
                    resume_from_checkpoint = False
                    ):
    print(f"Iteration {iteration}")
    if iteration == 0: 
        amount_of_data = data_size
        chosen, rejected, chosen_rewards, rejected_rewards, chosen_even_percents, rejected_even_percents, chosen_even_totals, rejected_even_totals = generate_token_list(length = amount_of_data)
        # compare and get chosen and rejected
    
        prompts = []
        
        # Load the latest model
        # ar_model = BartForConditionalGeneration.from_pretrained(model_checkpoint)
        ar_tokenizer = AutoTokenizer.from_pretrained(ar_checkpoint)
        ar_tokenizer.pad_token = ar_tokenizer.eos_token

        for i in range(amount_of_data):
            obs_input = pack_inputs_v2(ar_tokenizer, all_src_encodec[i], all_instruction[i])
            tokenize_input = ar_tokenizer.convert_ids_to_tokens(obs_input)
            tokenize_input_str = ar_tokenizer.convert_tokens_to_string(tokenize_input)
            prompts.append(tokenize_input_str)
            # prompts.append(tokenize_input_str)

    # data = {
    #     "prompt": prompts,
    #     "chosen": chosen+chosen,
    #     "rejected": rejected+rejected,
    #     "chosen_rewards": chosen_rewards+chosen_rewards,
    #     "chosen_even_percents": chosen_even_percents+chosen_even_percents,
    #     "chosen_even_totals": chosen_even_totals+chosen_even_totals,
    #     "rejected_rewards": rejected_rewards+rejected_rewards,
    #     "rejected_even_percents": rejected_even_percents+rejected_even_percents,
    #     "rejected_even_totals": rejected_even_totals+rejected_even_totals
    # }
        if amount_of_data == 1:
            data = {
                "prompt": prompts+prompts,
                "chosen": chosen+chosen,
                "rejected": rejected+rejected,
                "chosen_rewards": chosen_rewards+chosen_rewards,
                "chosen_even_percents": chosen_even_percents+chosen_even_percents,
                "chosen_even_totals": chosen_even_totals+chosen_even_totals,
                "rejected_rewards": rejected_rewards+rejected_rewards,
                "rejected_even_percents": rejected_even_percents+rejected_even_percents,
                "rejected_even_totals": rejected_even_totals+rejected_even_totals
            }
        else:
            data = {
                "prompt": prompts,
                "chosen": chosen,
                "rejected": rejected,
                "chosen_rewards": chosen_rewards,
                "chosen_even_percents": chosen_even_percents,
                "chosen_even_totals": chosen_even_totals,
                "rejected_rewards": rejected_rewards,
                "rejected_even_percents": rejected_even_percents,
                "rejected_even_totals": rejected_even_totals
            }           

        # Save the JSON to a file
        with open(f"{agent_output_dir}/data_iter_{iteration}.json", "w") as outfile:
            json.dump(data, outfile, indent=4)
            # Lawrance
    else: 
        ar_tokenizer = AutoTokenizer.from_pretrained(ar_checkpoint)
        ar_tokenizer.pad_token = ar_tokenizer.eos_token
        with open(f"{agent_output_dir}/data_iter_0.json", "r") as f:
            data = json.load(f)
        chosen_rewards = data["chosen_rewards"]
        rejected_rewards = data["rejected_rewards"]

    data_for_dataset = {key: data[key] for key in ["prompt", "chosen", "rejected"]}

    dataset = Dataset.from_dict(data_for_dataset)
    dataset_dict = dataset.train_test_split(test_size=0.1)
    train_dataset = dataset_dict["train"]
    val_dataset = dataset_dict["test"]

    # define output directory
    model_output_dir = f"{model_output_dir_base}/iter_{iteration}"

    if not os.path.exists(model_output_dir):
        os.makedirs(model_output_dir)

    model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(model_checkpoint, return_dict=True)
    model_ref = create_reference_model(model)
    
    train_model(model=model,
                model_ref=model_ref,
                ar_tokenizer=ar_tokenizer,
                train_dataset=train_dataset,
                val_dataset=val_dataset,
                model_output_dir=model_output_dir,
                beta=beta,
                resume_from_checkpoint=resume_from_checkpoint,
                model_checkpoint=model_checkpoint)
    
    return f"{model_output_dir}/dpo_model", chosen_rewards, rejected_rewards

In [5]:
# Load all data
all_src_encodec, all_instruction, all_tgt_encodec = load_from_json('src_encodec.json')
print(len(all_src_encodec))
print(len(all_instruction))
print(len(all_tgt_encodec))

# Define paths and device
base_path = "/work/b0990106x/trl"
device = "cuda" if torch.cuda.is_available() else "cpu"

now = datetime.now()
ts = now.strftime("%m%d-%H%M")
print("timestamp:", ts)

model_output_dir = f"{base_path}/model_output/{ts}" # Location where the model are saved
agent_input_dir = f"{base_path}/data-encodec" # Location of our original data(input) is stored
agent_output_dir = f"{base_path}/output/{ts}" # Path of saving the generated audio for reward model to evaluate

if not os.path.exists(model_output_dir):
    os.makedirs(model_output_dir)

if not os.path.exists(agent_output_dir):
    os.makedirs(agent_output_dir)
    
args_predict = SimpleNamespace(output_path=f"{base_path}/output/{ts}/example.wav", seed=0, device=device)

ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
nar_checkpoint = "lca0503/speech-chatgpt-base-nar-v2-epoch4-wotrans"
# Run the iterative training process
model_checkpoint = ar_checkpoint # set the initial model checkpoint
initial_data_size = 1 # Training: data size for the first iteration
data_size_per_iteration = 1 # Training: each iteration will train how many data
total_data_size = 1 # Training: total data size that we want to train
times_per_data = 5 # doesn't matter here
beta = 0.1 # Training: beta value for DPO

# num_iterations = (total_data_size - initial_data_size) // data_size_per_iteration + 1 # Training: train how many iterations
# randomly select 10 numbers from 0 to len(all_src_encodec)
eval_data_len = 1 # Evaluation: evaluate how many data
# eval_selected_indices = random.sample(range(len(all_src_encodec)), eval_data_len) # Evaluation: select 10 data for evaluation


print(f"length of all_src_encodec: {len(all_src_encodec)}") # ~ 9000 data
print(f"length of all_instruction: {len(all_instruction)}") # ~ 9000 data

num_iterations = 30
eval_selected_indices = [0]

9254
9254
9254
timestamp: 0718-2332
length of all_src_encodec: 9254
length of all_instruction: 9254


In [6]:
print(f"num_iterations: {num_iterations}")
print(f"data_size_per_iteration: {data_size_per_iteration}")
print(f"times_per_data: {times_per_data}")
print(f"beta: {beta}")
print(f"ar_checkpoint: {ar_checkpoint}")
print(f"nar_checkpoint: {nar_checkpoint}")
print(f"args_predict: {args_predict}")
print(f"model_output_dir: {model_output_dir}")
print(f"agent_output_dir: {agent_output_dir}")
print(f"base_path: {base_path}")
print(f"device: {device}")
print(f"eval_data_len: {eval_data_len}")
print(f"eval_selected_indices: {eval_selected_indices}")


num_iterations: 30
data_size_per_iteration: 1
times_per_data: 5
beta: 0.1
ar_checkpoint: lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans
nar_checkpoint: lca0503/speech-chatgpt-base-nar-v2-epoch4-wotrans
args_predict: namespace(output_path='/work/b0990106x/trl/output/0718-2332/example.wav', seed=0, device='cuda')
model_output_dir: /work/b0990106x/trl/model_output/0718-2332
agent_output_dir: /work/b0990106x/trl/output/0718-2332
base_path: /work/b0990106x/trl
device: cuda
eval_data_len: 1
eval_selected_indices: [0]


In [7]:
import logging

# Set up logging
logging.basicConfig(
    filename=f'{model_output_dir}/log_training.log', 
    filemode='a', 
    format='%(asctime)s - %(levelname)s - %(message)s', 
    level=logging.INFO
)

logging.info(f"Parameters:")
logging.info(f"num_iterations: {num_iterations}")
logging.info(f"data_size_per_iteration: {data_size_per_iteration}")
logging.info(f"times_per_data: {times_per_data}")
logging.info(f"beta: {beta}")
logging.info(f"timestep: {ts}")

# Start time
total_start_time = time.time()

all_metrics = eval_dpo_even_token(ar_checkpoint=ar_checkpoint,
                                    nar_checkpoint=nar_checkpoint,
                                    trained_model_checkpoint=ar_checkpoint, # original model
                                    all_src_encodec=all_src_encodec,
                                    all_instruction=all_instruction,
                                    eval_data_len=eval_data_len,
                                    selected_indices=eval_selected_indices,
                                    device=device,
                                    iteration = -1,
                                    args_predict=args_predict)

logging.info(f"Initial Evaluation: {all_metrics}")

for iteration in tqdm(range(num_iterations), desc="Training Iterations"):
    start_idx = 0
    end_idx = data_size_per_iteration

    batch_src_encodec = all_src_encodec[start_idx:end_idx] # select 'data_size_per_iteration' datas
    batch_instruction = all_instruction[start_idx:end_idx]
    
    resume = iteration > 0 # resume from the previous checkpoint when iteration > 0

    logging.info(f"Starting iteration {iteration}")
    logging.info(f"Processing data from index {start_idx} to {end_idx}")
    
    # model_checkpoint is the model checkpoint from the previous iteration
    # chosen_rewards and rejected_rewards are the rewards of the data
    model_checkpoint, chosen_rewards, rejected_rewards = train_iteration(model_checkpoint=model_checkpoint,
                                                                        iteration=iteration,
                                                                        data_size=data_size_per_iteration,
                                                                        times_per_data=times_per_data,
                                                                        ar_checkpoint=ar_checkpoint,
                                                                        nar_checkpoint=nar_checkpoint,
                                                                        all_src_encodec=batch_src_encodec,
                                                                        all_instruction=batch_instruction,
                                                                        args_predict=args_predict,
                                                                        agent_output_dir=agent_output_dir,
                                                                        model_output_dir_base=model_output_dir,
                                                                        beta=beta,
                                                                        resume_from_checkpoint=resume)
    
    
    logging.info(f"Chosen rewards for iteration {iteration}: {chosen_rewards}")
    logging.info(f"Rejected rewards for iteration {iteration}: {rejected_rewards}")
    logging.info(f"Finished training iteration {iteration}")

    # # Evaluate the result of the current iteration
    logging.info(f"Evaluation Indices: {eval_selected_indices}")
    all_metrics = eval_dpo_even_token(ar_checkpoint=ar_checkpoint,
                                    nar_checkpoint=nar_checkpoint,
                                    trained_model_checkpoint=model_checkpoint,
                                    all_src_encodec=all_src_encodec,
                                    all_instruction=all_instruction,
                                    eval_data_len=eval_data_len,
                                    selected_indices=eval_selected_indices,
                                    num_evaluations = 10,
                                    device=device,
                                    iteration = iteration,
                                    args_predict=args_predict)
    # Evaluation 
    logging.info(f"Evaluation: {all_metrics}")

total_end_time = time.time()

# Calculate total time taken
total_time_taken = total_end_time - total_start_time
logging.info(f"Total time taken for the entire process: {total_time_taken:.2f} seconds")

Training Iterations:   0%|          | 0/30 [00:00<?, ?it/s]

Iteration 0
Chosen: ['v_tok_6v_tok_584v_tok_454v_tok_208v_tok_8498v_tok_8604v_tok_918v_tok_788v_tok_8936v_tok_8776v_tok_8346v_tok_8298v_tok_8504v_tok_988v_tok_8572v_tok_704v_tok_260v_tok_8862v_tok_8520v_tok_8934v_tok_8246v_tok_9198v_tok_9206v_tok_8684v_tok_6v_tok_8726v_tok_890v_tok_954v_tok_250v_tok_104v_tok_8856v_tok_8624v_tok_8908v_tok_9082v_tok_978v_tok_250v_tok_862v_tok_8762v_tok_38v_tok_188v_tok_8290v_tok_344v_tok_714v_tok_696v_tok_348v_tok_578v_tok_9078v_tok_784v_tok_226v_tok_574v_tok_8754v_tok_8672v_tok_8572v_tok_8886v_tok_156v_tok_8940']
Rejected: ['v_tok_699v_tok_8773v_tok_8735v_tok_405v_tok_8229v_tok_423v_tok_211v_tok_8763v_tok_669v_tok_357v_tok_811v_tok_8775v_tok_53v_tok_565v_tok_8671v_tok_921v_tok_9071v_tok_8881v_tok_8985v_tok_231v_tok_997v_tok_321v_tok_8851v_tok_8485v_tok_699v_tok_8951v_tok_8887v_tok_8869v_tok_8243v_tok_949v_tok_9203v_tok_8813v_tok_9065v_tok_8399v_tok_8689v_tok_763v_tok_467v_tok_105v_tok_8881v_tok_657v_tok_231v_tok_77v_tok_735v_tok_805v_tok_119v_tok_8677v_

Map: 100%|██████████| 1/1 [00:00<00:00, 159.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 93.65 examples/s]
[34m[1mwandb[0m: Currently logged in as: [33mb09901066[0m ([33mb09901066_alan[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_0/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:   3%|▎         | 1/30 [01:04<31:18, 64.77s/it]

Iteration 1


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_0/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 158.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 186.36 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_1/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:   7%|▋         | 2/30 [02:03<28:27, 60.99s/it]

Iteration 2


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_1/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 188.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 208.82 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_2/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  10%|█         | 3/30 [03:01<26:49, 59.59s/it]

Iteration 3


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_2/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 186.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.89 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_3/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  13%|█▎        | 4/30 [03:58<25:26, 58.70s/it]

Iteration 4


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_3/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 171.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 157.86 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_4/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  17%|█▋        | 5/30 [04:56<24:22, 58.51s/it]

Iteration 5


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_4/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 179.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 206.21 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_5/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  20%|██        | 6/30 [05:54<23:20, 58.34s/it]

Iteration 6


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_5/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 173.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 188.47 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_6/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  23%|██▎       | 7/30 [06:52<22:20, 58.28s/it]

Iteration 7


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_6/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 100.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 131.91 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_7/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  27%|██▋       | 8/30 [07:50<21:19, 58.15s/it]

Iteration 8


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_7/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 182.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.22 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_8/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  30%|███       | 9/30 [08:48<20:21, 58.19s/it]

Iteration 9


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_8/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 105.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 101.85 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_9/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  33%|███▎      | 10/30 [09:47<19:26, 58.33s/it]

Iteration 10


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_9/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 102.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 155.64 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_10/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Training Iterations:  37%|███▋      | 11/30 [10:46<18:29, 58.38s/it]

Iteration 11


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_10/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Map: 100%|██████████| 1/1 [00:00<00:00, 111.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 103.60 examples/s]


Step,Training Loss


Some weights of the model checkpoint at /work/b0990106x/trl/model_output/0718-2332/iter_11/dpo_model were not used when initializing BartForConditionalGeneration: ['v_head.summary.bias', 'v_head.summary.weight']
- This IS expected if you are initializing BartForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Continue Training from Checkpoint

In [None]:
# import torch
# from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# from datasets import Dataset
# import json
# from trl import DPOTrainer, DPOConfig, AutoModelForSeq2SeqLMWithValueHead, create_reference_model

# base_path = "/work/b0990106x/trl"
# model_output_dir = f"{base_path}/model_output/0718-1514/iter_1_same_data" # Location where the model are saved
# model_input_dir = f"{base_path}/model_output/0718-1514/iter_0" # Location where the model are saved
# model_checkpoint = f"{model_input_dir}/dpo_model"
# agent_output_dir = f"{base_path}/output/0718-1514" # Path of saving the generated audio for reward model to evaluate
# ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
# beta = 0.1
# # iteration = 0 # for name and number only
# # data_size_per_iteration = 100 # Training: each iteration will train how many data
# # times_per_data = 5 # doesn't matter here
# ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
# # beta = 0.1 # Training: beta value for DPO

# ar_tokenizer = AutoTokenizer.from_pretrained(ar_checkpoint)
# ar_tokenizer.pad_token = ar_tokenizer.eos_token

# with open(f"{agent_output_dir}/data_iter_0.json", "r") as f:
#     data = json.load(f)

# data_for_dataset = {key: data[key] for key in ["prompt", "chosen", "rejected"]}
# dataset = Dataset.from_dict(data_for_dataset)
# dataset_dict = dataset.train_test_split(test_size=0.1)
# train_dataset = dataset_dict["train"]
# val_dataset = dataset_dict["test"]

# model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(model_checkpoint, return_dict=True)
# model_ref = create_reference_model(model)

# train_model(model=model,
#             model_ref=model_ref,
#             ar_tokenizer=ar_tokenizer,
#             train_dataset=train_dataset,
#             val_dataset=val_dataset,
#             model_output_dir=model_output_dir,
#             beta=beta,
#             resume_from_checkpoint=True,
#             model_checkpoint=model_checkpoint)

# Evaluation

In [None]:
# import torch
# from types import SimpleNamespace
# import random 
# import json
# from dpo_eval import eval_dpo_token_length

# def load_from_json(file_path):
#     with open(file_path, 'r') as f:
#         data = json.load(f)
    
#     all_src_encodec = [item["src_encodec"] for item in data]
#     all_instruction = [item["instruction"] for item in data]
#     all_tgt_encodec = [item["tgt_encodec"] for item in data]
    
#     return all_src_encodec, all_instruction, all_tgt_encodec


# base_path = "/work/b0990106x/trl"
# ts = "0718-1514"
# ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
# nar_checkpoint = "lca0503/speech-chatgpt-base-nar-v2-epoch4-wotrans"
# model_checkpoint = f"{base_path}/model_output/{ts}/iter_1_same_data/dpo_model"
# # model_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
# eval_data_len = 10 # Evaluation: evaluate how many data
# # eval_selected_indices from 0 to 99
# eval_selected_indices = list(range(10))
# # eval_selected_indices = random.sample(range(100), eval_data_len) # Evaluation: select 10 data for evaluation
# iteration = 0

# #################################### Fixed
# all_src_encodec, all_instruction, all_tgt_encodec = load_from_json('src_encodec.json')
# print(len(all_src_encodec))
# print(len(all_instruction))
# print(len(all_tgt_encodec))
# device = "cuda" if torch.cuda.is_available() else "cpu"
# args_predict = SimpleNamespace(output_path=f"{base_path}/output/{ts}/example.wav", seed=0, device=device)
# ####################################


# all_metrics = eval_dpo_token_length(ar_checkpoint=ar_checkpoint,
#                                     nar_checkpoint=nar_checkpoint,
#                                     trained_model_checkpoint=model_checkpoint,
#                                     all_src_encodec=all_src_encodec,
#                                     all_instruction=all_instruction,
#                                     eval_data_len=eval_data_len,
#                                     selected_indices=eval_selected_indices,
#                                     num_evaluations = 10,
#                                     device=device,
#                                     iteration = iteration,
#                                     args_predict=args_predict)

# print(all_metrics)