### This notebooks is based on the github repo: https://github.com/tloen/alpaca-lora. Credit to  Avatar Eric J. Wang 

In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
import utils
import random


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /home/ec2-user/anaconda3/envs/pytorch_p39/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/ec2-user/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


In [2]:
from transformers import LlamaForCausalLM, LlamaTokenizer,GenerationConfig
from peft import (
    PeftModel,
    prepare_model_for_int8_training,
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
)

### (1) Load Model

In [3]:
MICRO_BATCH_SIZE = 4
BATCH_SIZE = 64 #128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 50
LEARNING_RATE = 3e-4
CUTOFF_LEN = 256
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
VAL_SET_RATIO = 0.2
TARGET_MODULES = [
    "q_proj",
    "v_proj",
]

DATA_PATH = "./hong_kong_consumption_voucher_scheme_datasets_for_tunning.json"

In [4]:
device_map = "auto"

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LlamaForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)

### load model after fine tuned on alpaca datasets
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

### (2) Load Fine Tunning Data

In [5]:
data = load_dataset("json", data_files=DATA_PATH)

Found cached dataset json (/home/ec2-user/.cache/huggingface/datasets/json/default-4bfdefce5232ac63/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/1 [00:00<?, ?it/s]

In [6]:
VAL_SET_SIZE = int(VAL_SET_RATIO*len(data['train']))

In [7]:
train_val = data["train"].train_test_split(
    test_size=VAL_SET_SIZE, shuffle=True, seed=42
)
train_data = train_val["train"]
val_data = train_val["test"]

round(len(train_data)/(len(train_data)+len(val_data)),2)

Loading cached split indices for dataset at /home/ec2-user/.cache/huggingface/datasets/json/default-4bfdefce5232ac63/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-6cd8d1baf605df32.arrow and /home/ec2-user/.cache/huggingface/datasets/json/default-4bfdefce5232ac63/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-227264b836a6cd59.arrow


0.82

In [8]:
def generate_prompt_eval(instruction):
    template =  f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:"""
    return template 

In [9]:
def generate_prompt_train(inputs):
    instruction = inputs['instruction']
    output = inputs['output']
    template =  f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:
{output}"""
    return template 

In [10]:
eval_generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
)

In [11]:
tokenizer.pad_token_id = 0
def tokenize(prompt):
    # there's probably a way to do this with the tokenizer settings
    # but again, gotta move fast
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding="max_length",
    )
    return {
        "input_ids": result["input_ids"][:-1],
        "attention_mask": result["attention_mask"][:-1],
    }


In [12]:
def generate_answer(data_item):
    prompt = generate_prompt_eval(data_item)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=eval_generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        answer = output.split("### Response:")[1].strip()
        return answer

In [13]:
train_dataset = train_data.shuffle().map(lambda x: tokenize(generate_prompt_train(x)))
val_dataset = val_data.shuffle().map(lambda x: tokenize(generate_prompt_train(x)))

Map:   0%|          | 0/18 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

### (3) Evaluate before further fine tunning

In [14]:
dataset  = utils.open_json(DATA_PATH)

In [15]:
sample = random.choice(dataset)

instruction = sample['instruction']
output = sample['output']
print("instruction:",instruction)
print("ground truth:",output)

In [17]:
generate_answer(instruction)

'No, eligible people who have passed away will not be able to receive the first- instalment voucher on 16 April.'

In [18]:
question = "Write a python script to get the google search result using beautifulsoup"
answer = generate_answer(question)
print(answer)

import requests
from bs4 import BeautifulSoup

url = 'https://www.google.com/search?q=python'

r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')

for link in soup.find_all('a'):
    print(link.get('href'))


###  (4) Training

In [19]:
num_step_per_epoch = len(train_data)

In [20]:
print(num_step_per_epoch)

18


In [21]:
print(EPOCHS)

50


In [22]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=num_step_per_epoch,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=20,
        evaluation_strategy="steps",
        save_strategy="steps",
        eval_steps=num_step_per_epoch*10,
        save_steps=num_step_per_epoch*10,
        output_dir="lora-alpaca",
        save_total_limit=3,
        load_best_model_at_end=True,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

In [23]:
model.config.use_cache = False

In [24]:
old_state_dict = model.state_dict

model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))


In [25]:
trainer.train()



Step,Training Loss,Validation Loss


TrainOutput(global_step=50, training_loss=0.28849071502685547, metrics={'train_runtime': 183.8235, 'train_samples_per_second': 4.896, 'train_steps_per_second': 0.272, 'total_flos': 7311832016486400.0, 'train_loss': 0.28849071502685547, 'epoch': 40.0})

### (4) Evaluation 

In [26]:
instruction = sample['instruction']
output = sample['output']
print("instruction:",instruction)
print("ground truth:",output)

instruction: Can eligible people who have passed away receive the first- instalment voucher on 16 April?
ground truth: Consumption voucher will not be disbursed on 16 April to eligible people who have passed away after successfully registered under 2022 CVS.


In [27]:
generate_answer(instruction)

'We are sorry to inform people who have passed away that they will not receive the first-instalment voucher on 16 April.'

In [28]:
question_4 = "Write a python script to get the google search result using beautifulsoup"
answer = generate_answer(question_4)
print(answer)

import requests
from bs4 import BeautifulSoup

url = 'https://www.google.com/search?q={search_term}'.format(search_term=search_term)
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')

for link in soup.select('a[href]'):
    print(link['href'])


### (5) Save Model

In [32]:
# saving model
model_name_or_path = "alpaca-lora-7b-tuned-on-hk-cvs-fqa"
# peft_type = "PROMPT_TUNING"
# task_type = "CAUSAL_LM"

peft_model_id = model_name_or_path

In [33]:
print(peft_model_id)

alpaca-lora-7b-tuned-on-hk-cvs-fqa


In [34]:
model.save_pretrained(peft_model_id)

### (5) Upload to huggingface

In [35]:
!pip install huggingface_hub

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [36]:
from huggingface_hub import notebook_login

In [37]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [38]:
model.push_to_hub(f"Nelsonlin0321/{peft_model_id}")

adapter_model.bin:   0%|          | 0.00/16.8M [00:00<?, ?B/s]

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/Nelsonlin0321/alpaca-lora-7b-tuned-on-hk-cvs-fqa/commit/3be83b36444580e12231f01f648d96429a9b70a9', commit_message='Upload model', commit_description='', oid='3be83b36444580e12231f01f648d96429a9b70a9', pr_url=None, pr_revision=None, pr_num=None)