## Quick Train - Continue from a checkpoint

In [1]:
from transformers import BartForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
from trl import DPOTrainer, DPOConfig, AutoModelForSeq2SeqLMWithValueHead, create_reference_model
from datasets import Dataset
import json

ModuleNotFoundError: No module named 'transformer'

In [None]:
def train_model(
        model,
        model_ref,
        ar_tokenizer,
        train_dataset: Dataset,
        val_dataset: Dataset,
        model_output_dir: str,
        beta: float,
        resume_from_checkpoint: bool,
        model_checkpoint: str,
        learning_rate: float = 5e-07,
        num_train_epochs: int = 100,
        max_length: int = 1024*9,
        max_prompt_length: int = 1024*9,
        max_target_length: int = 1024*9,
        per_device_train_batch_size: int = 1,
        gradient_accumulation_steps: int = 1,
        seed: int = 42
) -> None:
    '''
    Train the DPO model and save the model.

    Args:
        model(AutoModelForSeq2SeqLMWithValueHead): The DPO model.
        model_ref(AutoModelForCausalLM): The reference model.
        ar_tokenizer(AutoTokenizer): The tokenizer.
        train_dataset(Dataset): The training dataset.
        val_dataset(Dataset): The validation dataset.
        model_output_dir(str): The output directory for the model.
        beta(float): The beta value.
        resume_from_checkpoint(bool): Whether to resume from a checkpoint.
        model_checkpoint(str): The path to the model

    Returns:
        None
    '''

    training_args = DPOConfig(
        beta = beta,
        output_dir = model_output_dir,
        generate_during_eval = True,
        resume_from_checkpoint = model_checkpoint if resume_from_checkpoint else None,
        seed = seed,
        per_device_train_batch_size = per_device_train_batch_size,
        num_train_epochs = num_train_epochs,
        gradient_accumulation_steps = gradient_accumulation_steps,
        learning_rate = learning_rate,
        max_length = max_length,
        max_prompt_length = max_prompt_length,
        max_target_length = max_target_length
    )
    
    trainer = DPOTrainer(
        model=model,
        ref_model=model_ref,
        args=training_args,
        tokenizer=ar_tokenizer,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
    )
    # Train the model
    trainer.train()

    # Save the model
    trainer.save_model(f"{model_output_dir}/dpo_model")
    model.config.to_json_file(f"{model_output_dir}/dpo_model/config.json")
    ar_tokenizer.save_pretrained(f"{model_output_dir}/dpo_model")

In [None]:
# Base
base_path = "/work/b0990106x/trl"
ts = "0718-1514"
iter_name = "iter_0"
output_name = f"output_trained_{ts}_{iter_name}"

# Model Input and Output Directories
model_input_dir = f"{base_path}/model_output/{ts}/{iter_name}" # Location to load the model
model_output_dir = f"{base_path}/model_output/{ts}/{output_name}" # Location to save the new trained model
agent_output_dir = f"{base_path}/output/{ts}" # Path of saving the generated audio for reward model to evaluate
ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
model_checkpoint = f"{model_input_dir}/dpo_model"

# Training Parameters
beta = 0.1 # Training: beta value for DPO
learning_rate = 5e-07 # Training: learning rate
num_train_epochs = 100 # Training: number of training epochs
max_length = 1024*9 # Training: max length of the model
max_prompt_length = 1024*9 # Training: max length of the prompt
max_target_length = 1024*9 # Training: max length of the target
per_device_train_batch_size = 1 # Training: batch size
gradient_accumulation_steps = 1 # Training: gradient accumulation steps
seed = 42 # Training: seed

with open(f"{agent_output_dir}/{iter_name}.json", "r") as f: # Change this file for different datasets
    data = json.load(f)

data_for_dataset = {key: data[key] for key in ["prompt", "chosen", "rejected"]}
dataset = Dataset.from_dict(data_for_dataset)
dataset_dict = dataset.train_test_split(test_size=0.1)
train_dataset = dataset_dict["train"]
val_dataset = dataset_dict["test"]

ar_tokenizer = AutoTokenizer.from_pretrained(ar_checkpoint)
ar_tokenizer.pad_token = ar_tokenizer.eos_token
model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(model_checkpoint, return_dict=True)
model_ref = create_reference_model(model)

resume_from_checkpoint = True

train_model(model=model,
            model_ref=model_ref,
            ar_tokenizer=ar_tokenizer,
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            model_output_dir=model_output_dir,
            beta=beta,
            resume_from_checkpoint=resume_from_checkpoint,
            model_checkpoint=model_checkpoint,
            learning_rate=learning_rate,
            num_train_epochs=num_train_epochs,
            max_length=max_length,
            max_prompt_length=max_prompt_length,
            max_target_length=max_target_length,
            per_device_train_batch_size=per_device_train_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            seed=seed
            )


# Quick Evaluation

In [None]:
import torch 
import json
from dpo_eval import eval_dpo_mos
from typing import List, Tuple
from types import SimpleNamespace

In [None]:
def extract_data_from_json(file_path: str) -> Tuple[List[list], List[str], List[list]]:
    """
    Loads data from a JSON file and extracts 'src_encodec', 'instruction', and 'tgt_encodec'.

    Args:
        file_path (str): The path to the JSON file.

    Returns:
        tuple:
            all_src_encodec (List[list]): A list containing the 'src_encodec' data from each item in the JSON file.
            all_instruction (List[str]): A list containing the 'instruction' data from each item in the JSON file.
            all_tgt_encodec (List[list]): A list containing the 'tgt_encodec' data from each item in the JSON file.
    """
    with open(file_path, 'r') as f:
        data = json.load(f)

    all_src_encodec = [item["src_encodec"] for item in data]
    all_instruction = [item["instruction"] for item in data]
    all_tgt_encodec = [item["tgt_encodec"] for item in data]

    return all_src_encodec, all_instruction, all_tgt_encodec

#### Set Parameters

In [None]:
# Base
base_path = "/work/b0990106x/trl"
ts = "0718-1514"
ar_checkpoint = "lca0503/speech-chatgpt-base-ar-v2-epoch10-wotrans"
nar_checkpoint = "lca0503/speech-chatgpt-base-nar-v2-epoch4-wotrans"

# Model Configs
iter_name = "iter_0"
model_checkpoint = f"{base_path}/model_output/{ts}/{iter_name}/dpo_model"
device = "cuda" if torch.cuda.is_available() else "cpu"
all_src_encodec, all_instruction, all_tgt_encodec = extract_data_from_json('dpo_data/src_encodec.json')
args_predict = SimpleNamespace(output_path=f"{base_path}/output/{ts}/example.wav", seed=0, device=device)

# Evaluation Parameters
eval_data_len = 10
eval_indices = [0] # Evaluation: evaluate on data indicies from all_src_encodec
eval_data_len = 1 # Evaluation: evaluate how many data
num_evaluations = 1 # Evaluation: evaluate how many times per data
iteration = "eval_1" # Evaluation: This is just for the name of the output audio file

# Evaluation
trained_model_metrics, trained_model_rewards = eval_dpo_mos(ar_checkpoint=ar_checkpoint,
                                                            nar_checkpoint=nar_checkpoint,
                                                            trained_model_checkpoint=model_checkpoint,
                                                            all_src_encodec=all_src_encodec,
                                                            all_instruction=all_instruction,
                                                            eval_data_len=eval_data_len,
                                                            selected_indices=eval_indices,
                                                            device=device,
                                                            iteration = iteration,
                                                            args_predict=args_predict,
                                                            num_evaluations=10)

In [None]:
# Output metrics and rewards into a file
output_path = f"{base_path}/output/{ts}/eval_{iter_name}_{eval_data_len}.json"
with open(output_path, 'w') as f:
    json.dump({"metrics": trained_model_metrics, "rewards": trained_model_rewards}, f)
    print(f"Metrics and rewards have been saved to {output_path}")