# Step2. Model Alignment (SFT, SteerLM)

## (1) Supervised Fine-Tuning

In [None]:
DATA_ROOT_DIR = "/work/Data"
MODEL_ROOT_DIR = "/work/Models"

MODEL=f"{MODEL_ROOT_DIR}/llama-new-token/model.nemo" # step1 Pretrained model

TRAIN_DS=f"[{DATA_ROOT_DIR}/sft/sft_train_data.jsonl]" # Blend with Domain Instruct Data & General Chat Data
VALID_DS=f"[{DATA_ROOT_DIR}/sft/sft_val_data.jsonl]"
TEST_DS=f"[{DATA_ROOT_DIR}/sft/sft_test_data.jsonl]"

VALID_NAMES="[dummy-data]"
LOG_ROOT_DIR = "/work/log"

NUM_DEVICES=8
CONCAT_SAMPLING_PROBS="[1]"
TP_SIZE=8
PP_SIZE=1
SEQUENCE_PARALLEL=True


model_save_dir = LOG_ROOT_DIR + "/sft"
batch_size = 64 # 128

!torchrun --nproc_per_node=$NUM_DEVICES \
/opt/NeMo/examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
    --config-path=/opt/NeMo/examples/nlp/language_modeling/tuning/conf \
    --config-name=megatron_gpt_sft_config \
   trainer.precision=bf16 \
   trainer.devices=$NUM_DEVICES \
   trainer.num_nodes=1 \
   trainer.val_check_interval=0.1 \
   trainer.max_epochs=1 \
   trainer.max_steps=-1 \
   model.restore_from_path=$MODEL \
   model.micro_batch_size=1 \
   model.global_batch_size=$batch_size \
   model.tensor_model_parallel_size=$TP_SIZE \
   model.pipeline_model_parallel_size=$PP_SIZE \
   model.megatron_amp_O2=True \
   model.sequence_parallel=$SEQUENCE_PARALLEL \
   model.activations_checkpoint_granularity=selective \
   model.activations_checkpoint_method=uniform \
   model.optim.name=distributed_fused_adam \
   model.optim.lr=5e-6 \
   model.answer_only_loss=True \
   model.peft.peft_scheme=none \
   model.data.train_ds.file_names=$TRAIN_DS \
   model.data.validation_ds.file_names=$VALID_DS \
   model.data.test_ds.file_names=$TEST_DS \
   model.data.train_ds.concat_sampling_probabilities=$CONCAT_SAMPLING_PROBS \
   model.data.train_ds.max_seq_length=4096 \
   model.data.validation_ds.max_seq_length=2048 \
   model.data.train_ds.micro_batch_size=1 \
   model.data.train_ds.global_batch_size=$batch_size \
   model.data.validation_ds.micro_batch_size=1 \
   model.data.validation_ds.global_batch_size=$batch_size \
   model.data.test_ds.micro_batch_size=1 \
   model.data.test_ds.global_batch_size=$batch_size \
   model.data.train_ds.num_workers=0 \
   model.data.validation_ds.num_workers=0 \
   model.data.test_ds.num_workers=0 \
   model.data.validation_ds.metric.name=loss \
   model.data.test_ds.metric.name=loss \
   exp_manager.create_wandb_logger=True \
   exp_manager.wandb_logger_kwargs.project=DAPT \
   exp_manager.wandb_logger_kwargs.name=step1_sft \
   exp_manager.explicit_log_dir=$model_save_dir \
   exp_manager.resume_if_exists=False \
   exp_manager.resume_ignore_no_checkpoint=True \
   exp_manager.create_checkpoint_callback=True \
   exp_manager.checkpoint_callback_params.monitor=validation_loss \
   exp_manager.checkpoint_callback_params.save_best_model=False \
   exp_manager.checkpoint_callback_params.save_nemo_on_train_end=True 

## (2) Train Attribute Reward Model 

In [None]:
global_batch_size=64
base_model = "" # base model for Attribute model
 
!python /opt/NeMo-Aligner/examples/nlp/gpt/train_reward_model.py \
      trainer.num_nodes=1 \
      trainer.devices=8 \
      ++model.micro_batch_size=1 \
      ++model.global_batch_size=$global_batch_size \
      ++model.data.data_impl=jsonl \
      pretrained_checkpoint.restore_from_path=$base_model \
      "model.data.data_prefix={train: ["/work/Data/reg/reg_train_data.jsonl"], validation: ["/work/Data/reg/reg_val_data.jsonl"], test: ["/work/Data/reg/reg_test_data.jsonl"]}" \
      exp_manager.explicit_log_dir=/work/log/attribute \
      trainer.rm.val_check_interval=10 \
      exp_manager.create_wandb_logger=True \
      exp_manager.wandb_logger_kwargs.project=DAPT \
      exp_manager.wandb_logger_kwargs.name=steerlm \
      exp_manager.checkpoint_callback_params.save_top_k=1 \
      trainer.rm.save_interval=10 \
      trainer.rm.max_steps=10 \
      ++model.tensor_model_parallel_size=1 \
      ++model.pipeline_model_parallel_size=1 \
      ++model.activations_checkpoint_granularity="selective" \
      model.optim.sched.constant_steps=0 \
      model.reward_model_type="regression" \
      model.regression.num_attributes=9

## (3) Making pseudo attribute labeled data using attribute model

In [None]:
import os

attribute_model = "/work/log/attribute/checkpoints/megatron_gpt.nemo"

# Start Reward Server
server_command = f"""python /opt/NeMo-Aligner/examples/nlp/gpt/serve_reward_model.py \
      rm_model_file={attribute_model} \
      trainer.num_nodes=1 \
      trainer.devices=8 \
      model.precision=bf16 \
      ++model.tensor_model_parallel_size=1 \
      ++model.pipeline_model_parallel_size=1 \
      ++model.reward_model_type=regression \
      model.forward_micro_batch_size=2 \
      inference.port=1424 &"""
      
os.system(server_command)


In [None]:
CHAT_TRAIN_DATA = f"{DATA_ROOT_DIR}/chat/chat_train_data.jsonl"
CHAT_VAL_DATA = f"{DATA_ROOT_DIR}/chat/chat_val_data.jsonl"

CHAT_ATT_TRAIN_DATA = f"{DATA_ROOT_DIR}/chat/chat_label_train_data.jsonl"
CHAT_ATT_VAL_DATA = f"{DATA_ROOT_DIR}/chat/chat_label_val_data.jsonl"
CHAT_ATT_TRAIN_DATA_2EPOCH = f"{DATA_ROOT_DIR}/chat/chat_label_train_data_2ep.jsonl"

!python /opt/NeMo-Aligner/examples/nlp/data/steerlm/attribute_annotate.py \
      --input-file=$CHAT_TRAIN_DATA \
      --output-file=$CHAT_ATT_TRAIN_DATA \
      --port=1424

!python /opt/NeMo-Aligner/examples/nlp/data/steerlm/attribute_annotate.py \
      --input-file=$CHAT_VAL_DATA \
      --output-file=$CHAT_ATT_VAL_DATA \
      --port=1424

In [None]:
# Stop Reward Server
!ps aux | grep "python /opt/NeMo-Aligner/examples/nlp/gpt/serve_reward_model.py" | grep -v grep | awk '{print $2}' | xargs -r kill -9

In [None]:

!cat $CHAT_ATT_TRAIN_DATA $CHAT_ATT_TRAIN_DATA > $CHAT_ATT_TRAIN_DATA_2EPOCH

!ls $CHAT_ATT_TRAIN_DATA_2EPOCH

## (4) Attribute conditioned supervised finetuning 

In [None]:
SFT_MODEL = "/work/log/sft/checkpoints/megatron_gpt_peft_none_tuning.nemo" # Step2-(1) sft model 

!python /opt/NeMo-Aligner/examples/nlp/gpt/train_gpt_sft.py \
     trainer.num_nodes=1 \
     trainer.devices=8 \
     trainer.precision=bf16 \
     trainer.sft.limit_val_batches=8 \
     trainer.sft.max_epochs=1 \
     trainer.sft.max_steps=-1 \
     trainer.sft.val_check_interval=10 \
     trainer.sft.save_interval=10 \
     model.megatron_amp_O2=True \
     model.restore_from_path=$SFT_MODEL \
     model.tensor_model_parallel_size=8 \
     model.pipeline_model_parallel_size=1 \
     model.optim.lr=5e-6 \
     model.optim.name=distributed_fused_adam \
     model.optim.weight_decay=0.01 \
     ~model.optim.sched \
     model.answer_only_loss=True \
     model.activations_checkpoint_granularity=selective \
     model.activations_checkpoint_method=uniform \
     model.data.chat=True \
     model.data.num_workers=0 \
     model.data.chat_prompt_tokens.system_turn_start=\'\<extra_id_0\>\' \
     model.data.chat_prompt_tokens.turn_start=\'\<extra_id_1\>\' \
     model.data.chat_prompt_tokens.label_start=\'\<extra_id_2\>\' \
     model.data.train_ds.max_seq_length=4096 \
     model.data.train_ds.micro_batch_size=1 \
     model.data.train_ds.global_batch_size=128 \
     model.data.train_ds.file_path=$CHAT_ATT_TRAIN_DATA_2EPOCH \
     model.data.train_ds.index_mapping_dir=/work/log/indexdir \
     model.data.train_ds.add_eos=False \
     model.data.train_ds.hf_dataset=True \
     model.data.validation_ds.max_seq_length=4096 \
     model.data.validation_ds.file_path=$CHAT_ATT_VAL_DATA \
     model.data.validation_ds.micro_batch_size=1 \
     model.data.validation_ds.global_batch_size=128 \
     model.data.validation_ds.index_mapping_dir=/work/log/indexdir  \
     model.data.validation_ds.add_eos=False \
     model.data.validation_ds.hf_dataset=True \
     exp_manager.create_wandb_logger=True \
     exp_manager.wandb_logger_kwargs.project=DAPT \
     exp_manager.wandb_logger_kwargs.name=step2_steerlm \
     exp_manager.explicit_log_dir=/work/log/steerlm_sft \
     exp_manager.checkpoint_callback_params.save_nemo_on_train_end=True

## (5) (Optional) Inference Aligned Model

In [None]:
import os

ALIGN_MODEL = "/work/log/steerlm_sft/checkpoints/megatron_gpt_sft.nemo"

server_command = f"""python /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_eval.py \
        gpt_model_file={ALIGN_MODEL} \
        pipeline_model_parallel_split_rank=0 \
        server=True \
        tensor_model_parallel_size=8 \
        pipeline_model_parallel_size=1 \
        trainer.precision=bf16 \
        trainer.devices=8 \
        trainer.num_nodes=1 \
        web_server=False \
        port=1428 &"""
        
os.system(server_command)

In [None]:
import requests
from collections import OrderedDict

def get_answer(question, max_tokens, values, eval_port=1428):
   prompt = (
       "<extra_id_0>System\nA chat between a curious user and an artificial intelligence assistant. "
       "The assistant gives helpful, detailed, and polite answers to the user's questions.\n"
       "<extra_id_1>User\n{question}\n<extra_id_1>Assistant\n<extra_id_2>{values}\n"
   )
   prompts = [prompt.format(question=question, values=values)]
   data = {
       "sentences": prompts,
       "tokens_to_generate": max_tokens,
       "top_k": 1,
       "greedy": True,
       "end_strings": ["<extra_id_1>"],
   }
   url = f"http://localhost:{eval_port}/generate"
   response = requests.put(url, json=data)
   json_response = response.json()
   response_sentence = json_response["sentences"][0][len(prompt):]
   return response_sentence

def encode_labels(labels):
   return ",".join(f"{key}:{value}" for key, value in labels.items())


values = OrderedDict(
  [
      ("quality", 4),
      ("toxicity", 0),
      ("humor", 0),
      ("creativity", 0),
      ("helpfulness", 4),
      ("correctness", 4),
      ("coherence", 4),
      ("complexity", 4),
      ("verbosity", 4),
   ]
)
values = encode_labels(values)

question = "Write a poem on NVIDIA in the style of Shakespeare"
print(get_answer(question, 512, values))
