<a href="https://colab.research.google.com/github/ShawnLiu119/FineTune-Llama2/blob/main/cust_response_Llama2FT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-tune Llama 2 - Customer Reivews
Created by Shawn Liu, based on Gary's fine tune work for BLSM

This notebook runs on a T4 GPU. (Last update: 06 September 2023)

reference resource:
https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da?permalink_comment_id=4645209

###Step 0 - set up environment & parameters

In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [3]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
# The model that you want to train from the Hugging Face hub
# model_name = "NousResearch/Llama-2-7b-chat-hf"
model_name = "meta-llama/Llama-2-7b-chat-hf"
# model_name = "TinyPixel/Llama-2-7B-bf16-sharded"

# The instruction dataset to use
# dataset_name = "mlabonne/guanaco-llama2-1k"
  #This is a subset (1000 samples) of the excellent timdettmers/openassistant-guanaco dataset, processed to match Llama 2's prompt format #
# dataset_name = "databricks/databricks-dolly-15k"

# Fine-tuned model name
# new_model = "llama-2-7b-usecontext"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2 # default 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2 # default 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 1e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

###Step 1 - prep the dataset to align with structure required by Llama2
chatbot q&a dataset
databricks/databricks-dolly-15k
https://huggingface.co/datasets/databricks/databricks-dolly-15k

customer-service-support dataset
https://github.com/bitext/customer-support-llm-chatbot-training-dataset/blob/main/data/train/Bitext_Sample_Customer_Support_Training_Dataset.csv

In [5]:
# The instruction dataset to use
# dataset_name2 = "mlabonne/guanaco-llama2-1k"
dataset_name = "databricks/databricks-dolly-15k"

# Load dataset (you can process it here)
# dataset2 = load_dataset(dataset_name2, split="train")
dataset = load_dataset(dataset_name, split="train")

Downloading readme:   0%|          | 0.00/8.20k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [6]:
dataset #structure

#instruction: questions / task asked the model to answer / do
#context: background information / content fed into
#response: answered generated (dependent on goal(category)- either general_qa, closed_qa, summerization)

Dataset({
    features: ['instruction', 'context', 'response', 'category'],
    num_rows: 15011
})

In [None]:
dataset[2]

{'instruction': 'Why can camels survive for long without water?',
 'context': '',
 'response': 'Camels use the fat in their humps to keep them filled with energy and hydration for long periods of time.',
 'category': 'open_qa'}

In [None]:
# prepare new dataset
text = []
# category

# cat_old = list(set(dataset['category']))
cat_old = ['information_extraction',
 'closed_qa',
 'open_qa',
 'summarization',
 'general_qa',
 'creative_writing',
 'brainstorming',
 'classification']


cat_new = ['information extraction',
 'closed question answering',
 'open question answering',
 'summarization',
 'general question answering',
 'creative writing',
 'brainstorming',
 'classification']
cat_map = dict(zip(cat_old, cat_new))

for i, v in enumerate(dataset):
  # if i == 0:
  instruction = v.get('instruction')
  context = v.get('context')
  response = v.get('response')
  category = v.get('category')
  category = cat_map.get(category)

  if category:
    category = category
  else:
    category = "general conversation"

  if context:
    train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
Context: {context}
You shall think carefully based on the context provided above and write a response that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything by yourself if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
  else:
    train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
You shall think carefully and write a response with details that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
  text.append(train_text)

    # print(train_text)

In [7]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/LLM/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/LLM


In [8]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os

In [9]:
df_ft = pd.read_csv("cust-serv-bot.csv")
df_ft.head()

Unnamed: 0.1,Unnamed: 0,instruction,intent,response
0,0,how can I cancel purchase 113542617735902?,cancel_order,I understand how unnerving it can be to cancel...
1,1,can you help me canceling purchase 00004587345?,cancel_order,I understand your concern and I'm here to find...
2,2,i want assistance to cancel purchase 732201349959,cancel_order,I'm here to help you with canceling your purch...
3,3,i want assistance to cancel order 732201349959,cancel_order,No worries. I'm here to provide you with guida...
4,4,"I don't want my last item, help me cancel orde...",cancel_order,I understand your request to cancel order 3707...


In [10]:
df_1 = df_ft.drop(df_ft.columns[0], axis=1)

df_1['context'] = np.nan

df_1.columns = np.array(['instruction', 'category', 'response', 'context'])

df_1.head()




Unnamed: 0,instruction,category,response,context
0,how can I cancel purchase 113542617735902?,cancel_order,I understand how unnerving it can be to cancel...,
1,can you help me canceling purchase 00004587345?,cancel_order,I understand your concern and I'm here to find...,
2,i want assistance to cancel purchase 732201349959,cancel_order,I'm here to help you with canceling your purch...,
3,i want assistance to cancel order 732201349959,cancel_order,No worries. I'm here to provide you with guida...,
4,"I don't want my last item, help me cancel orde...",cancel_order,I understand your request to cancel order 3707...,


In [19]:
#explore to fine tune with another round - more customized data

df_blst = pd.read_csv("reviews_resp.csv")
df_blst.head()

Unnamed: 0,instruction,context,Sentiment,Response
0,can you please resolve this issue?,So I placed an order and scheduled the down pa...,1,I'm truly sorry for the frustration this has c...
1,can you please resolve this issue?,Three days ago I purchased a SonicCare electri...,1,I'm truly sorry for the frustration this situa...
2,can you please resolve this issue?,I did not get the bookshelves I ordered. The c...,1,I'm sincerely sorry for the disappointment and...
3,can you please resolve this issue?,I bought a Toshiba Bluetooth boombox in May 20...,1,I'm truly sorry for this frustrating experienc...
4,can you please resolve this issue?,"I pay over 100.00 every month, yet my availabl...",1,I'm genuinely sorry for the frustration you're...


In [20]:
df_blst = df_blst[['instruction', 'context', 'Response']]

df_blst.columns = np.array(['instruction', 'context', 'response'])

df_blst_ext = pd.concat([df_blst, df_1])



In [22]:
df_blst_ext.tail()

Unnamed: 0,instruction,context,response,category
4509,I am waiting for a rebate of 299 dollars,,I'm sorry to hear that you're waiting for a re...,track_refund
4510,I am waiting for a compensation of 1200 dollars,,I'm sorry to hear that you're waiting for a co...,track_refund
4511,I expect a compensation of 160 dollars,,I understand your expectation to receive a com...,track_refund
4512,I am wating for a reimbursement of $3,,I understand your concern about the reimbursem...,track_refund
4513,help seeing if there is anything new on my rebate,,I'm here to help you check if there are any up...,track_refund


In [23]:
df_blst_ext.to_csv('blst_ext.csv', index=False)


In [24]:
#fine tune - import our customer support bot dataset

dataset_name = 'blst_ext.csv'

df_ft = load_dataset('csv', data_files={'train': dataset_name}, split='train')


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [25]:
df_ft

Dataset({
    features: ['instruction', 'context', 'response', 'category'],
    num_rows: 4552
})

In [None]:
  # cat_old = list(set(dataset['category']))
  cat_old = ['information_extraction',
   'closed_qa',
   'open_qa',
   'summarization',
   'general_qa',
   'creative_writing',
   'brainstorming',
   'classification']


  cat_new = ['information extraction',
    'closed question answering',
    'open question answering',
    'summarization',
    'general question answering',
    'creative writing',
    'brainstorming',
    'classification']
  cat_map = dict(zip(cat_old, cat_new))

In [17]:
# prepare new dataset
# here we may test around using different category to try to get best result

def data_prep(dataset, category):

    text = []

    for i, v in enumerate(dataset):
    # if i == 0:
      instruction = v.get('instruction')
      context = v.get('context')
      response = v.get('response')
      category = category
      # category = v.get('category')
      # category = cat_map.get(category)

  # if category:
  #   category = category
  # else:
  #   category = "general conversation"

      if context:
        train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
Context: {context}
You shall think carefully based on the context provided above and write a response that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything by yourself if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
      else:
        train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
You shall think carefully and write a response with details that appropriately completes the request. Your response must be be friendly, polite, and provide details. Do not create anything if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
      text.append(train_text)

    return text

In [29]:
# adjust the prompt instruction part
# here we may test around using different category to try to get best result

def data_prep2(dataset, category):

    text = []

    for i, v in enumerate(dataset):
    # if i == 0:
      instruction = v.get('instruction')
      context = v.get('context')
      response = v.get('response')
      category = category
      # category = v.get('category')
      # category = cat_map.get(category)

  # if category:
  #   category = category
  # else:
  #   category = "general conversation"

      if context:
        train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
Context: {context}
You shall generate response using fewer than 200 words based on the context provided above to the customer's request. The response should start with phrases to display empathy to a customer who had a negative experience, then provide potential solution.  Do not create anything by yourself if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
      else:
        train_text = f"""<s>[INST] Below is an instruction that describes a {category} task.
You shall generate response using fewer than 200 words to the customer's request. The response should start with phrases to display empathy to a customer who had a negative experience, then provide potential solution.  Do not create anything by yourself if you do not know.
Instruction: {instruction}
[/INST] {response}
"""
      text.append(train_text)

    return text

In [30]:
text_gc = data_prep2(df_ft, 'general conversation')
text_gc[0]


"<s>[INST] Below is an instruction that describes a general conversation task.\nContext: So I placed an order and scheduled the down payment to be paid 3 days later. I cancelled the order on the same day that I placed it. Keep in mind this was 3 days before scheduled down payment. Got a confirmation that the order was cancelled, also states it was cancelled on my Fingerhut account page. They went ahead and took the down payment from my checking account anyway. Called to complain and asked for a refund and was given ridiculous excuses as to why they went ahead and took my money. They say they are holding my money for future purchases even though I told them I would never order anything from them again!!\nYou shall generate response using fewer than 200 words based on the context provided above to the customer's request. The response should start with phrases to display empathy to a customer who had a negative experience, then provide potential solution.  Do not create anything by yourse

In [31]:
df = pd.DataFrame(text_gc)
df.columns = ['text']
df.to_csv('cust_serv_combine.csv', index=False)

In [32]:
data_train = load_dataset('csv', data_files='cust_serv_combine.csv', split= 'train')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [33]:
data_train

Dataset({
    features: ['text'],
    num_rows: 4552
})

##Step 2 - load the model and tokenizer for training

In [34]:

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [35]:
#model training config. definition

num_train_epochs = 2
per_device_train_batch_size = 1
learning_rate = 2e-3
max_steps = -1
weight_decay = 0.01
gradient_accumulation_steps = 1
save_steps = 1000
logging_steps = 1000

In [36]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [37]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=data_train,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/4552 [00:00<?, ? examples/s]

### Step 3 - fine tune training

In [None]:
# Start fine tuning model

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained("blst_2nd_tune")

Step,Training Loss


In [None]:
!ls "blst_2nd_tune"

In [None]:
# save model files (adapter model) to google drive
# Change path if needed

import shutil
shutil.copy('outputs/adapter_config.json', '/content/drive/MyDrive/LLM/adapter_config.json')
shutil.copy('outputs/adapter_model.bin', '/content/drive/MyDrive/LLM/adapter_model.bin')
shutil.copy('outputs/README.md', '/content/drive/MyDrive/LLM/README.md')

## Step 4 - test with Fingerhut specfic data

In [6]:
# The model that you want to train from the Hugging Face hub
# model_name = "NousResearch/Llama-2-7b-chat-hf"
model_name = "meta-llama/Llama-2-7b-chat-hf"
# model_name = "TinyPixel/Llama-2-7B-bf16-sharded"

# The instruction dataset to use
# dataset_name = "mlabonne/guanaco-llama2-1k"
  #This is a subset (1000 samples) of the excellent timdettmers/openassistant-guanaco dataset, processed to match Llama 2's prompt format #
# dataset_name = "databricks/databricks-dolly-15k"

# Fine-tuned model name
# new_model = "llama-2-7b-usecontext"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2 # default 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2 # default 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 1e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [7]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#load base model again - with saved lora_config
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [20]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param:.2f}"
    )

print_trainable_parameters(model)

trainable params: 0 || all params: 3533967360 || trainable%: 0.00


In [21]:
print_trainable_parameters(model_fh)

trainable params: 0 || all params: 3533967360 || trainable%: 0.00


In [8]:
# model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
# model_to_save.save_pretrained("outputs_blst")

from peft import LoraConfig, get_peft_model, PeftModel

lora_config = LoraConfig.from_pretrained('outputs')

model.enable_input_require_grads()
#https://github.com/huggingface/peft/issues/137

model_fh = get_peft_model(model, lora_config)

# model_fh = PeftModel.from_pretrained(model, lora_config, is_trainable=True).to(device)

In [9]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [15]:
#test quickly whether the loaded model is right
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "can you please cancel my memberhsip with Fingerhut?"
pipe = pipeline(task="text-generation", model=model_fh, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])


<s>[INST] can you please cancel my memberhsip with Fingerhut? [/INST]  I'm not able to directly cancel your membership with Fingerhut as I'm just an AI assistant and do not have access to your personal information or account details. everybody.
To cancel your membership with Fingerhut, you will need to contact their customer service department directly. Here are the steps you can follow:
1. Visit the Fingerhut website at [www.fingerhut.com](http://www.fingerhut.com) and click on the "Customer Service" link at the bottom of the page.
2. Call Fingerhut's customer service number at 1-800-443-3578.
3. Chat with a representative through the Fingerhut website by clicking on the "Chat with Us" button.
4


In [11]:
#The result seems promissing in general

In [15]:
#explore to fine tune with another round - more customized data

df_blst = pd.read_csv("reviews_resp.csv")
df_blst.head()

Unnamed: 0,instruction,context,Sentiment,Response
0,can you please resolve this issue?,So I placed an order and scheduled the down pa...,1,I'm truly sorry for the frustration this has c...
1,can you please resolve this issue?,Three days ago I purchased a SonicCare electri...,1,I'm truly sorry for the frustration this situa...
2,can you please resolve this issue?,I did not get the bookshelves I ordered. The c...,1,I'm sincerely sorry for the disappointment and...
3,can you please resolve this issue?,I bought a Toshiba Bluetooth boombox in May 20...,1,I'm truly sorry for this frustrating experienc...
4,can you please resolve this issue?,"I pay over 100.00 every month, yet my availabl...",1,I'm genuinely sorry for the frustration you're...


In [20]:
df_blst = df_blst[['instruction', 'context', 'Response']]
df_blst['category'] = np.nan
df_blst.head()

Unnamed: 0,instruction,context,Response,category
0,can you please resolve this issue?,So I placed an order and scheduled the down pa...,I'm truly sorry for the frustration this has c...,
1,can you please resolve this issue?,Three days ago I purchased a SonicCare electri...,I'm truly sorry for the frustration this situa...,
2,can you please resolve this issue?,I did not get the bookshelves I ordered. The c...,I'm sincerely sorry for the disappointment and...,
3,can you please resolve this issue?,I bought a Toshiba Bluetooth boombox in May 20...,I'm truly sorry for this frustrating experienc...,
4,can you please resolve this issue?,"I pay over 100.00 every month, yet my availabl...",I'm genuinely sorry for the frustration you're...,
5,can you please resolve this issue?,My experience with Finger Hut/ Fetti has been ...,I'm truly sorry for the frustration you've exp...,
6,can you please resolve this issue?,I'm not sure what this company is doing. They ...,I'm truly sorry for the confusion and inconven...,
7,can you please resolve this issue?,I made a mistake of making like 1400 purchases...,I'm truly sorry for the frustration this situa...,
8,can you please resolve this issue?,Fingerhut today in email is the new Apr is ove...,I'm truly sorry for the distress this has caus...,
9,can you please resolve this issue?,I understand the credit in us is very importan...,I'm truly sorry for the frustration you're exp...,


In [23]:
df_blst.columns = np.array(['instruction', 'context', 'response', 'category'])

df_blst.to_csv('fh_rev.csv', index=False)



In [10]:
dataset_name = "fh_rev.csv"
df_fh = load_dataset('csv', data_files={'train': dataset_name}, split='train')

df_fh

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['instruction', 'context', 'response', 'category'],
    num_rows: 38
})

In [28]:
text_fh = data_prep2(df_fh, 'general conversation')
text_fh[0]


"<s>[INST] Below is an instruction that describes a general conversation task.\nContext: So I placed an order and scheduled the down payment to be paid 3 days later. I cancelled the order on the same day that I placed it. Keep in mind this was 3 days before scheduled down payment. Got a confirmation that the order was cancelled, also states it was cancelled on my Fingerhut account page. They went ahead and took the down payment from my checking account anyway. Called to complain and asked for a refund and was given ridiculous excuses as to why they went ahead and took my money. They say they are holding my money for future purchases even though I told them I would never order anything from them again!!\nYou shall generate response using fewer than 200 words based on the context provided above to the customer's request. The response should start with phrases to display empathy to a customer who had a negative experience, then provide potential solution.  Do not create anything by yourse

In [29]:
df = pd.DataFrame(text_fh)
df.columns = ['text']
df.to_csv('fh_rev_processed.csv', index=False)

In [11]:
data_train_fh = load_dataset('csv', data_files='fh_rev_processed.csv', split= 'train')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [12]:
data_train_fh

Dataset({
    features: ['text'],
    num_rows: 38
})

In [13]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

In [14]:
#model training config. definition

num_train_epochs = 5
per_device_train_batch_size = 1
learning_rate = 2e-4
max_steps = -1
weight_decay = 0.01
gradient_accumulation_steps = 1
save_steps = 1000
logging_steps = 1000

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [15]:
# Set supervised fine-tuning parameters
trainer_fh = SFTTrainer(
    model=model_fh, #trained on the 1st fine-tuned model
    train_dataset=data_train_fh, #trained on the cutomized dataset
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/38 [00:00<?, ? examples/s]

In [16]:
import torch
torch.cuda.empty_cache()

print(torch.cuda.memory_summary(device=None, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   4196 MiB |   4196 MiB |  16548 MiB |  12352 MiB |
|       from large pool |   3876 MiB |   3930 MiB |  16228 MiB |  12352 MiB |
|       from small pool |    320 MiB |    320 MiB |    320 MiB |      0 MiB |
|---------------------------------------------------------------------------|
| Active memory         |   4196 MiB |   4196 MiB |  16548 MiB |  12352 MiB |
|       from large pool |   3876 MiB |   3930 MiB |  16228 MiB |  12352 MiB |
|       from small pool |    320 MiB |    320 MiB |    320 MiB |      0 MiB |
|---------------------------------------------------------------

In [16]:
# Start fine tuning model

# Train model
trainer_fh.train()

# Save trained model
trainer_fh.model.save_pretrained("fh_rev_outputs")

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


OutOfMemoryError: ignored