In [None]:
!pip install -U datasets
!pip install -q trl peft bitsandbytes sentencepiece




In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read)

In [None]:

from datasets import load_dataset
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTConfig, SFTTrainer
#mount to drive
from google.colab import drive
drive.mount('/content/drive')
HF_TOKEN = '[YOUR HF API KEY]'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/SFT

/content/drive/MyDrive/SFT


**Load data from hg**

In [None]:
dataset = load_dataset('Blazej/banking_alignment_preference_ds', split="train")
dataset = dataset.train_test_split(test_size=0.1, seed=42)
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['guideline', 'query', 'chosen', 'rejected'],
        num_rows: 1080
    })
    test: Dataset({
        features: ['guideline', 'query', 'chosen', 'rejected'],
        num_rows: 120
    })
})

**Load base model**

In [None]:
model_name = 'meta-llama/Llama-3.2-1B-Instruct'
cache_dir = './cache'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map='auto',
    trust_remote_code=True,
    cache_dir=cache_dir,
    torch_dtype=torch.bfloat16
)

base_model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir = cache_dir)

if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
base_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), 

**Setup config for lora and traine**r

In [None]:
peft_config = LoraConfig(
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0.05,
    bias = 'none',
    task_type = 'CAUSAL_LM',
    target_modules = [
        'q_proj',
        'k_proj',
        'v_proj',
        'o_proj',
        'gate_proj',
        'up_proj',
        'down_proj'
        ]
)

In [None]:
conversation = [
    {'role': 'system', 'content': 'You are a helpful assistant.'},
    {'role': 'user', 'content': 'This is prompt'},
    {'role': 'assistant', 'content': 'This is preference'}
]


In [None]:
hyper_param = {
    'per_device_train_batch_size': 2,
    'gradient_accumulation_steps': 8,
    'gradient_checkpointing': True,
    'logging_steps': 100,
    'learning_rate': 3e-5,
    'max_steps': 600,
    'save_strategy': 'no',
    'overwrite_output_dir': True,
    'optim': 'paged_adamw_8bit',
    'warmup_steps': 100,
    'bf16': True
}
MAX_LENGTH = 512
SFT_OUTPUT_DIR = './sft_output'

# **SFT**

In [None]:
def prompt_with_chat_template(example):
    conversation = [
        {'role': 'system', 'content': 'You are a helpful bank assistant. But Confidentiality Respect: Do not share or reference confidential bank data or client information'},
        {'role': 'user', 'content': example['query']},
        {'role': 'assistant', 'content': example['chosen']}
    ]
    prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    return prompt

In [None]:
sft_config = SFTConfig(
    **{
        **hyper_param, 'output_dir' : SFT_OUTPUT_DIR, 'max_seq_length' : MAX_LENGTH,
    }
)
sft_trainer = SFTTrainer(
    model = base_model,
    train_dataset = dataset['train'],
    peft_config = peft_config,
    processing_class = tokenizer,
    args = sft_config,
    formatting_func = prompt_with_chat_template
)





Applying formatting function to train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

Converting train dataset to ChatML:   0%|          | 0/1080 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
sft_trainer.train()

Step,Training Loss
100,1.907
200,1.2269
300,1.1666
400,1.1202
500,1.0851
600,1.0615


TrainOutput(global_step=600, training_loss=1.2612296549479167, metrics={'train_runtime': 12654.0777, 'train_samples_per_second': 0.759, 'train_steps_per_second': 0.047, 'total_flos': 1.948261084144435e+16, 'train_loss': 1.2612296549479167})

In [None]:
#save model in hugging face hub
SFT_OUTPUT_DIR = './sft_output'
sft_trainer.save_model(SFT_OUTPUT_DIR)
sft_trainer.push_to_hub("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct", token = 'YOUR HF API KEY (write)')



adapter_model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

events.out.tfevents.1747709298.cf7d3c6ca5a2.4177.1:   0%|          | 0.00/6.13k [00:00<?, ?B/s]

events.out.tfevents.1747709010.cf7d3c6ca5a2.4177.0:   0%|          | 0.00/6.13k [00:00<?, ?B/s]

Upload 6 LFS files:   0%|          | 0/6 [00:00<?, ?it/s]

events.out.tfevents.1747709479.cf7d3c6ca5a2.4177.2:   0%|          | 0.00/8.46k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.69k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/HF-Hao/sft_output/commit/0953dcd6fa972cedffd5ba4f215d29eacd4e9e5b', commit_message='HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct', commit_description='', oid='0953dcd6fa972cedffd5ba4f215d29eacd4e9e5b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/HF-Hao/sft_output', endpoint='https://huggingface.co', repo_type='model', repo_id='HF-Hao/sft_output'), pr_revision=None, pr_num=None)

# **In case you want optimizing model with DPO**

In [None]:
def convert_to_preference_format(example):
    return {
        'prompt': [{'role': 'system', 'content': 'you are helpful bank assistant'},
                   {'role': 'user', 'content': example['query']}],
        'chosen': [{'role': 'assistant', 'content': example['chosen']}],
        'rejected': [{'role': 'assistant', 'content': example['rejected']}],
    }
dpo_dataset = dataset.map(convert_to_preference_format)

In [None]:
from peft import PeftModel
#dpo_model = base_model.load_adapter('HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct', is_trainable = True, adapter_name = 'dpo_full_adapter1')
dpo_model = PeftModel.from_pretrained(base_model, "HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct", is_trainable = True)

In [None]:
dpo_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 2048)
        (layers): ModuleList(
          (0-15): 16 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [None]:
from trl import DPOConfig, DPOTrainer

DPO_FULL_OUTPUT_DIR = './dpo_full_output'
dpo_full_config = DPOConfig(
    **{
        **hyper_param, 'output_dir' : DPO_FULL_OUTPUT_DIR, 'max_length' : MAX_LENGTH,
    }
)

dpo_full_trainer = DPOTrainer(
    model = dpo_model,
    train_dataset = dpo_dataset['train'],
    args = dpo_full_config,
    processing_class = tokenizer,
    peft_config = peft_config,
)



Extracting prompt in train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/1080 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
dpo_full_trainer.train()



KeyboardInterrupt: 

In [None]:
sft_trainer.save_model(DPO_FULL_OUTPUT_DIR)
sft_trainer.push_to_hub("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct-DPO", token = HF_TOKEN)

# **continue SFT with lora** (limit GPU in previous train)

In [None]:
from peft import PeftModel
base_model.load_adapter("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct", is_trainable=True, adapter_name="HFn")

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 2048)
        (layers): ModuleList(
          (0-15): 16 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [None]:
hyper_param = {
    'per_device_train_batch_size': 2,
    'gradient_accumulation_steps': 8,
    'gradient_checkpointing': True,
    'logging_steps': 100,
    'learning_rate': 3e-5,
    'max_steps': 500,
    'save_strategy': 'no',
    'overwrite_output_dir': True,
    'optim': 'paged_adamw_8bit',
    'warmup_steps': 100,
    'bf16': True
} # can re use above parameter if have nothing to change
MAX_LENGTH = 512
SFT_OUTPUT_DIR = './sft_output_ctnttt'
sft_config = SFTConfig(
    **{
        **hyper_param, 'output_dir' : SFT_OUTPUT_DIR, 'max_seq_length' : MAX_LENGTH,
    }
)
sft_trainer_ctn = SFTTrainer(
    model = base_model,
    train_dataset = dataset['train'],
    processing_class = tokenizer,
    peft_config = peft_config, # use same LoRA config
    args = sft_config,
    formatting_func = prompt_with_chat_template
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
sft_trainer_ctn.train()

Step,Training Loss
100,1.0477
200,0.9745
300,0.9312
400,0.8997
500,0.8784


TrainOutput(global_step=500, training_loss=0.9462859954833984, metrics={'train_runtime': 10118.4586, 'train_samples_per_second': 0.791, 'train_steps_per_second': 0.049, 'total_flos': 1.6234585650167808e+16, 'train_loss': 0.9462859954833984})

In [None]:
sft_trainer.save_model('./sft_output_ctn')
sft_trainer.push_to_hub("HF-Hao/Llama-3.2-1B-Bank-Conversation-Instruct-LoRA", token = 'YOUR HF API KEY (write)')