In [None]:
!pip install -U datasets
!pip install -q trl peft bitsandbytes sentencepiece


In [None]:
!huggingface-cli login

In [None]:

from datasets import load_dataset
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTConfig, SFTTrainer
#mount to drive
from google.colab import drive
drive.mount('/content/drive')
HF_TOKEN = '[YOUR HF API KEY]'

In [None]:
%cd /content/drive/MyDrive/SFT

**Load data from hg**

In [None]:
#LOAD DATASET
dataset = load_dataset('Blazej/banking_alignment_preference_ds', split="train")
dataset = dataset.train_test_split(test_size=0.1, seed=42)
dataset

**Load base model**

In [None]:
#QUANTIZATION CONFIG AND LOAD MODEL
model_name = 'meta-llama/Llama-3.2-1B-Instruct'
cache_dir = './cache'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map='auto',
    trust_remote_code=True,
    cache_dir=cache_dir,
    torch_dtype=torch.bfloat16
)

base_model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir = cache_dir)

if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
base_model

**Setup config for lora and traine**r

In [None]:
#LORA CONFIG
peft_config = LoraConfig(
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0.05,
    bias = 'none',
    task_type = 'CAUSAL_LM',
    target_modules = [
        'q_proj',
        'k_proj',
        'v_proj',
        'o_proj',
        'gate_proj',
        'up_proj',
        'down_proj'
        ]
)

In [None]:
conversation = [
    {'role': 'system', 'content': 'You are a helpful assistant.'},
    {'role': 'user', 'content': 'This is prompt'},
    {'role': 'assistant', 'content': 'This is preference'}
]


In [None]:
#TRAINING PARAM
hyper_param = {
    'per_device_train_batch_size': 2,
    'gradient_accumulation_steps': 8,
    'gradient_checkpointing': True,
    'logging_steps': 100,
    'learning_rate': 3e-5,
    'max_steps': 600,
    'save_strategy': 'no',
    'overwrite_output_dir': True,
    'optim': 'paged_adamw_8bit',
    'warmup_steps': 100,
    'bf16': True
}
MAX_LENGTH = 512
SFT_OUTPUT_DIR = './sft_output'

# **SFT**

In [None]:
def prompt_with_chat_template(example):
    conversation = [
        {'role': 'system', 'content': 'You are a helpful bank assistant. But Confidentiality Respect: Do not share or reference confidential bank data or client information'},
        {'role': 'user', 'content': example['query']},
        {'role': 'assistant', 'content': example['chosen']}
    ]
    prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    return prompt

In [None]:
sft_config = SFTConfig(
    **{
        **hyper_param, 'output_dir' : SFT_OUTPUT_DIR, 'max_seq_length' : MAX_LENGTH,
    }
)
sft_trainer = SFTTrainer(
    model = base_model,
    train_dataset = dataset['train'],
    peft_config = peft_config,
    processing_class = tokenizer,
    args = sft_config,
    formatting_func = prompt_with_chat_template
)



In [None]:
#TRAIN ADAPTER
sft_trainer.train()

In [None]:
#save model in hugging face hub
SFT_OUTPUT_DIR = './sft_output'
sft_trainer.save_model(SFT_OUTPUT_DIR)
sft_trainer.push_to_hub("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct", token = 'YOUR HF API KEY (write)')

# **In case you want optimizing model with DPO**

In [None]:
#DPO DATA FORMAT
def convert_to_preference_format(example):
    return {
        'prompt': [{'role': 'system', 'content': 'you are helpful bank assistant'},
                   {'role': 'user', 'content': example['query']}],
        'chosen': [{'role': 'assistant', 'content': example['chosen']}],
        'rejected': [{'role': 'assistant', 'content': example['rejected']}],
    }
dpo_dataset = dataset.map(convert_to_preference_format)

In [None]:
from peft import PeftModel
#dpo_model = base_model.load_adapter('HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct', is_trainable = True, adapter_name = 'dpo_full_adapter1')
dpo_model = PeftModel.from_pretrained(base_model, "HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct", is_trainable = True)

In [None]:
dpo_model

In [None]:
from trl import DPOConfig, DPOTrainer

DPO_FULL_OUTPUT_DIR = './dpo_full_output'
dpo_full_config = DPOConfig(
    **{
        **hyper_param, 'output_dir' : DPO_FULL_OUTPUT_DIR, 'max_length' : MAX_LENGTH,
    }
)

dpo_full_trainer = DPOTrainer(
    model = dpo_model,
    train_dataset = dpo_dataset['train'],
    args = dpo_full_config,
    processing_class = tokenizer,
    peft_config = peft_config,
)

In [None]:
#SFT WITH DPO
dpo_full_trainer.train()

In [None]:
sft_trainer.save_model(DPO_FULL_OUTPUT_DIR)
sft_trainer.push_to_hub("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct-DPO", token = HF_TOKEN)

# **continue SFT with lora** (limit GPU in previous train)

In [None]:
#IN CASE YOU WANT CONTINUE TRAINING ADAPTER
from peft import PeftModel
base_model.load_adapter("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct", is_trainable=True, adapter_name="HFn")#Load adapter

In [None]:
hyper_param = {
    'per_device_train_batch_size': 2,
    'gradient_accumulation_steps': 8,
    'gradient_checkpointing': True,
    'logging_steps': 100,
    'learning_rate': 3e-5,
    'max_steps': 500,
    'save_strategy': 'no',
    'overwrite_output_dir': True,
    'optim': 'paged_adamw_8bit',
    'warmup_steps': 100,
    'bf16': True
} # can re use above parameter if have nothing to change
MAX_LENGTH = 512
SFT_OUTPUT_DIR = './sft_output_ctnttt'
sft_config = SFTConfig(
    **{
        **hyper_param, 'output_dir' : SFT_OUTPUT_DIR, 'max_seq_length' : MAX_LENGTH,
    }
)
sft_trainer_ctn = SFTTrainer(
    model = base_model,
    train_dataset = dataset['train'],
    processing_class = tokenizer,
    peft_config = peft_config, # use same LoRA config
    args = sft_config,
    formatting_func = prompt_with_chat_template
)

In [None]:
sft_trainer_ctn.train()

In [None]:
sft_trainer.save_model('./sft_output_ctn')
sft_trainer.push_to_hub("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct-LoRA", token = 'YOUR HF API KEY (write)')