<a href="https://colab.research.google.com/github/ShawnLiu119/FineTune-Llama2/blob/main/cust_response_Llama2FT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-tune Llama 2 - Customer Reivews
Created by Shawn Liu, based on Gary's fine tune work for BLSM

This notebook runs on a T4 GPU. (Last update: 06 September 2023)

reference resource:
https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da?permalink_comment_id=4645209

###Step 0 - set up environment & parameters

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/244.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━[0m [32m153.6/244.2 kB[0m [31m4.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m60.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m55.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# The model that you want to train from the Hugging Face hub
# model_name = "NousResearch/Llama-2-7b-chat-hf"
model_name = "meta-llama/Llama-2-7b-chat-hf"
# model_name = "TinyPixel/Llama-2-7B-bf16-sharded"

# The instruction dataset to use
# dataset_name = "mlabonne/guanaco-llama2-1k"
  #This is a subset (1000 samples) of the excellent timdettmers/openassistant-guanaco dataset, processed to match Llama 2's prompt format #
# dataset_name = "databricks/databricks-dolly-15k"

# Fine-tuned model name
# new_model = "llama-2-7b-usecontext"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2 # default 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2 # default 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 1e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

###Step 1 - prep the dataset to align with structure required by Llama2
chatbot q&a dataset
databricks/databricks-dolly-15k
https://huggingface.co/datasets/databricks/databricks-dolly-15k

customer-service-support dataset
https://github.com/bitext/customer-support-llm-chatbot-training-dataset/blob/main/data/train/Bitext_Sample_Customer_Support_Training_Dataset.csv

In [None]:
# The instruction dataset to use
# dataset_name2 = "mlabonne/guanaco-llama2-1k"
dataset_name = "databricks/databricks-dolly-15k"

# Load dataset (you can process it here)
# dataset2 = load_dataset(dataset_name2, split="train")
dataset = load_dataset(dataset_name, split="train")

Downloading readme:   0%|          | 0.00/8.20k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset #structure

#instruction: questions / task asked the model to answer / do
#context: background information / content fed into
#response: answered generated (dependent on goal(category)- either general_qa, closed_qa, summerization)

Dataset({
    features: ['instruction', 'context', 'response', 'category'],
    num_rows: 15011
})

In [None]:
dataset[2]

{'instruction': 'Why can camels survive for long without water?',
 'context': '',
 'response': 'Camels use the fat in their humps to keep them filled with energy and hydration for long periods of time.',
 'category': 'open_qa'}

In [None]:
# prepare new dataset
text = []
# category

# cat_old = list(set(dataset['category']))
cat_old = ['information_extraction',
 'closed_qa',
 'open_qa',
 'summarization',
 'general_qa',
 'creative_writing',
 'brainstorming',
 'classification']


cat_new = ['information extraction',
 'closed question answering',
 'open question answering',
 'summarization',
 'general question answering',
 'creative writing',
 'brainstorming',
 'classification']
cat_map = dict(zip(cat_old, cat_new))

for i, v in enumerate(dataset):
  # if i == 0:
  instruction = v.get('instruction')
  context = v.get('context')
  response = v.get('response')
  category = v.get('category')
  category = cat_map.get(category)

  if category:
    category = category
  else:
    category = "general conversation"

  if context:
    train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
Context: {context}
You shall think carefully based on the context provided above and write a response that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything by yourself if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
  else:
    train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
You shall think carefully and write a response with details that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
  text.append(train_text)

    # print(train_text)

In [10]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/LLM/

Mounted at /content/drive
/content/drive/MyDrive/LLM


In [21]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os

In [23]:
df_ft = pd.read_csv("cust-serv-bot.csv")
df_ft.head()

Unnamed: 0.1,Unnamed: 0,instruction,intent,response
0,0,how can I cancel purchase 113542617735902?,cancel_order,I understand how unnerving it can be to cancel...
1,1,can you help me canceling purchase 00004587345?,cancel_order,I understand your concern and I'm here to find...
2,2,i want assistance to cancel purchase 732201349959,cancel_order,I'm here to help you with canceling your purch...
3,3,i want assistance to cancel order 732201349959,cancel_order,No worries. I'm here to provide you with guida...
4,4,"I don't want my last item, help me cancel orde...",cancel_order,I understand your request to cancel order 3707...


In [28]:
df_1 = df_ft.drop(df_ft.columns[0], axis=1)

df_1['context'] = np.nan

df_1.columns = np.array(['instruction', 'category', 'response', 'context'])

df_1.head()




Unnamed: 0,instruction,category,response,context
0,how can I cancel purchase 113542617735902?,cancel_order,I understand how unnerving it can be to cancel...,
1,can you help me canceling purchase 00004587345?,cancel_order,I understand your concern and I'm here to find...,
2,i want assistance to cancel purchase 732201349959,cancel_order,I'm here to help you with canceling your purch...,
3,i want assistance to cancel order 732201349959,cancel_order,No worries. I'm here to provide you with guida...,
4,"I don't want my last item, help me cancel orde...",cancel_order,I understand your request to cancel order 3707...,


In [35]:
df_1.to_csv('cust-serv-bot_1.csv', index=False)


In [36]:
#fine tune - import our customer support bot dataset

dataset_name = "cust-serv-bot_1.csv"

df_ft = load_dataset('csv', data_files={'train': dataset_name}, split='train')


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [39]:
df_ft

Dataset({
    features: ['instruction', 'category', 'response', 'context'],
    num_rows: 4514
})

In [None]:
  # cat_old = list(set(dataset['category']))
  cat_old = ['information_extraction',
   'closed_qa',
   'open_qa',
   'summarization',
   'general_qa',
   'creative_writing',
   'brainstorming',
   'classification']


  cat_new = ['information extraction',
    'closed question answering',
    'open question answering',
    'summarization',
    'general question answering',
    'creative writing',
    'brainstorming',
    'classification']
  cat_map = dict(zip(cat_old, cat_new))

In [44]:
# prepare new dataset
# here we may test around using different category to try to get best result

def data_prep(dataset, category):

    text = []

    for i, v in enumerate(dataset):
    # if i == 0:
      instruction = v.get('instruction')
      context = v.get('context')
      response = v.get('response')
      category = category
      # category = v.get('category')
      # category = cat_map.get(category)

  # if category:
  #   category = category
  # else:
  #   category = "general conversation"

      if context:
        train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
Context: {context}
You shall think carefully based on the context provided above and write a response that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything by yourself if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
      else:
        train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
You shall think carefully and write a response with details that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
      text.append(train_text)

    return text

In [45]:
text_gc = data_prep(df_ft, 'general conversation')
text_gc[0]


"<s>[INST] Below is an instruction that describes a general conversation task.\nYou shall think carefully and write a response with details that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything if you do not know.\nInstruction: how can I cancel purchase 113542617735902?\n[/INST] I understand how unnerving it can be to cancel an order, especially if it's order number {{Purchase ID Anonymized}}. However, you are not alone. Here's a quick guide to walk you through the process:\r\n\r\n1. Firstly, connect to our system by logging into our {{Website Name}}.\r\n2. Spot the {{Order Details}} section.\r\n3. Identify the order with the number {{Purchase ID Anonymized}}.\r\n4. Adjacent to your order, you'll find a {{Cancel Order}} button.\r\n5. Press it and keep following the easy instructions that pop up on your screen.\r\n\r\nYou can always lean on our support team during this process if you need any guidance or come acr

In [48]:
df = pd.DataFrame(text_gc)
df.columns = ['text']
df.to_csv('cust_serv_gc.csv', index=False)

In [56]:
data_train = load_dataset('csv', data_files='cust_serv_gc.csv', split= 'train')

In [57]:
data_train

Dataset({
    features: ['text'],
    num_rows: 4514
})

##Step 2 - load the model and tokenizer for training

In [61]:

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [68]:
#model training config. definition

num_train_epochs = 2
per_device_train_batch_size = 1
learning_rate = 2e-3
max_steps = -1
weight_decay = 0.01
gradient_accumulation_steps = 1
save_steps = 1000
logging_steps = 1000

In [69]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [70]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=data_train,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

In [None]:
# Start fine tuning model

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained("outputs")

Step,Training Loss
1000,0.771
2000,0.6944
3000,0.6668
4000,0.6327
5000,0.569
6000,0.5132
7000,0.4761


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-71-d61622a03e98>", line 4, in <cell line: 4>
    trainer.train()
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1539, in train
    return inner_training_loop(
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 1901, in _inner_training_loop
    self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2212, in _maybe_log_save_evaluate
    self.log(logs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2570, in log
    self.control = self.callback_handler.on_log(self.args, self.state, self.control, logs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/trainer_callback.py"

In [1]:
!ls outputs

ls: cannot access 'outputs': No such file or directory


In [None]:
# save model files (adapter model) to google drive
# Change path if needed

import shutil
shutil.copy('outputs/adapter_config.json', '/content/drive/MyDrive/LLM/adapter_config.json')
shutil.copy('outputs/adapter_model.bin', '/content/drive/MyDrive/LLM/adapter_model.bin')
shutil.copy('outputs/README.md', '/content/drive/MyDrive/LLM/README.md')

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-72-f608a8dfcb98>", line 5, in <cell line: 5>
    shutil.copy('outputs/adapter_config.json', '/content/drive/MyDrive/LLM/adapter_config.json')
  File "/usr/lib/python3.10/shutil.py", line 417, in copy
    copyfile(src, dst, follow_symlinks=follow_symlinks)
  File "/usr/lib/python3.10/shutil.py", line 254, in copyfile
    with open(src, 'rb') as fsrc:
OSError: [Errno 107] Transport endpoint is not connected: 'outputs/adapter_config.json'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2099, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'OSError' object has no attribute '_render_traceback_'

During handling o