### This notebooks is based on the github repo: https://github.com/tloen/alpaca-lora. Credit to  Avatar Eric J. Wang 

In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
import utils
import random


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /home/ec2-user/anaconda3/envs/pytorch_p39/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


In [2]:
from transformers import LlamaForCausalLM, LlamaTokenizer,GenerationConfig
from peft import (
    PeftModel,
    prepare_model_for_int8_training,
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
)

### (1) Load Model

In [6]:
MICRO_BATCH_SIZE = 4
BATCH_SIZE = 64 #128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 50
LEARNING_RATE = 3e-4
CUTOFF_LEN = 256
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
VAL_SET_SIZE = 0.2
TARGET_MODULES = [
    "q_proj",
    "v_proj",
]

DATA_PATH = "./hong_kong_consumption_voucher_scheme_datasets_for_tunning.json"

In [4]:
device_map = "auto"

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LlamaForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)

### load model after fine tuned on alpaca datasets
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [5]:
document_name = "standard chartered bank smart credit card"

In [6]:
def generate_prompt(data_item):
    
    input = data_item.get("input","")
    input = ""# we don't use input for fine tuning
    instruction = data_item.get("instruction","")
    output = data_item.get("output","")
    
    if input:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:{output}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request according to {document_name} facts.

### Instruction:
{instruction}

### Response:{output}"""

In [7]:
eval_generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
)

In [None]:
def evaluate(data_item):
    prompt = generate_prompt(data_item)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=eval_generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        print("Response:", output.split("### Response:")[1].strip())

In [9]:
input = None
instruction = "What is the offer period for the 8% cashback offer on the Standard Chartered Smart Credit Card?"

In [10]:
data_item ={"instruction":instruction}

In [11]:
evaluate(data_item)

Response: The offer period for the 8% cashback offer on the Standard Chartered Smart Credit Card is 6 months.


In [12]:
# model = prepare_model_for_int8_training(model)

In [13]:
# config = LoraConfig(
#     r=LORA_R,
#     lora_alpha=LORA_ALPHA,
#     target_modules=TARGET_MODULES,
#     lora_dropout=LORA_DROPOUT,
#     bias="none",
#     task_type="CAUSAL_LM",
# )
# model = get_peft_model(model, config)
  # unk. we want this to be different from the eos token

### (2) Load Fine Tunning Data

In [7]:
data = load_dataset("json", data_files=DATA_PATH)

Downloading and preparing dataset json/default to /home/ec2-user/.cache/huggingface/datasets/json/default-62144b2b433b15e0/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/ec2-user/.cache/huggingface/datasets/json/default-62144b2b433b15e0/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [8]:
VAL_SET_SIZE = int(VAL_SET_SIZE*len(data['train']))

In [9]:
train_val = data["train"].train_test_split(
    test_size=VAL_SET_SIZE, shuffle=True, seed=42
)
train_data = train_val["train"]
val_data = train_val["test"]

len(train_data)/(len(train_data)+len(val_data))

0.8181818181818182

In [10]:
def generate_prompt_eval(instruction):
    template =  f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:"""
    return template 

In [11]:
def generate_prompt_train(inputs):
    instruction = inputs['instruction']
    output = inputs['output']
    template =  f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:
{output}"""
    return template 

In [12]:
tokenizer.pad_token_id = 0
def tokenize(prompt):
    # there's probably a way to do this with the tokenizer settings
    # but again, gotta move fast
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding="max_length",
    )
    return {
        "input_ids": result["input_ids"][:-1],
        "attention_mask": result["attention_mask"][:-1],
    }


In [13]:
train_dataset = train_data.shuffle().map(lambda x: tokenize(generate_prompt_train(x)))
val_dataset = val_data.shuffle().map(lambda x: tokenize(generate_prompt_train(x)))

Map:   0%|          | 0/18 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

### (3) Evaluate before further fine tunning

In [14]:
eval_generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
)

In [15]:
def generate_answer(instruction):
    prompt = generate_prompt_eval(instruction)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=eval_generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        # print(output)
        print("Response:", output.split("### Response:")[1].strip())

In [16]:
dataset  = utils.open_json(DATA_PATH)

In [17]:
sample = random.choice(dataset)

In [18]:
instruction = sample['instruction']
output = sample['output']
print("instruction:",instruction)
print("ground truth:",output)

instruction: Is it necessary for registrant who has transferred their physical Octopus card registered under 2022 CVS to Octopus card on Mobile to update their registration record in order to receive the first-instalment voucher disbursed 10 under 2023 CVS on 16 April?
ground truth: Relevant people do not need to update the registration record.


In [19]:
generate_answer(instruction)

Response: Yes, it is necessary for registrant who has transferred their physical Octopus card registered under 2022 CVS to Octopus card on Mobile to update their registration record in order to receive the first-instalment voucher disbursed 10 under 2023 CVS on 16 April.


###  (4) Training

In [20]:
num_step_per_epoch = len(train_data)

In [21]:
print(num_step_per_epoch)

18


In [22]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=num_step_per_epoch,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=20,
        evaluation_strategy="steps",
        save_strategy="steps",
        eval_steps=num_step_per_epoch*10,
        save_steps=num_step_per_epoch*10,
        output_dir="lora-alpaca",
        save_total_limit=3,
        load_best_model_at_end=True,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

In [23]:
model.config.use_cache = False

In [24]:
old_state_dict = model.state_dict

model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))


In [25]:
trainer.train()



Step,Training Loss,Validation Loss


TrainOutput(global_step=50, training_loss=0.28703554391860964, metrics={'train_runtime': 186.3294, 'train_samples_per_second': 4.83, 'train_steps_per_second': 0.268, 'total_flos': 7311832016486400.0, 'train_loss': 0.28703554391860964, 'epoch': 40.0})

### (4) Evaluation 

In [26]:
instruction = sample['instruction']
output = sample['output']
print("instruction:",instruction)
print("ground truth:",output)

instruction: Is it necessary for registrant who has transferred their physical Octopus card registered under 2022 CVS to Octopus card on Mobile to update their registration record in order to receive the first-instalment voucher disbursed 10 under 2023 CVS on 16 April?
ground truth: Relevant people do not need to update the registration record.


### (5) Save Model

In [31]:
# saving model
model_name_or_path = "alpaca-lora-7b-tuned-on-hk-consumption-voucher-scheme"
# peft_type = "PROMPT_TUNING"
task_type = "CAUSAL_LM"

peft_model_id = f"{model_name_or_path}_{task_type}".lower()

In [32]:
print(peft_model_id)

alpaca-lora-7b-tuned-on-hk-consumption-voucher-scheme_causal_lm


In [33]:
model.save_pretrained(peft_model_id)