<a href="https://colab.research.google.com/github/Ananya-AJ/CMPE255-Data-Mining/blob/main/LLM_Finetuning/Finetune_Lora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [3]:
!pip install datasets
!pip install sentencepiece



In [4]:
import pandas as pd
from datasets import load_dataset
from datasets import Dataset

# Load the dataset from the HuggingFace Hub
rd_ds = load_dataset("xiyuez/red-dot-design-award-product-description")

rd_df = pd.DataFrame(rd_ds['train'])
rd_df['instruction'] = 'Create a detailed description for the following product: ' + rd_df['product'] + ', belonging to category: ' + rd_df['category']
rd_df = rd_df[['instruction', 'description']]


rd_df_sample = rd_df.sample(n=1000, random_state=42)

# Define template
template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:

{}

### Response:\n"""


rd_df['prompt'] = rd_df["instruction"].apply(lambda x: template.format(x))
rd_df_sample['prompt'] = rd_df_sample["instruction"].apply(lambda x: template.format(x))


rd_df_sample.rename(columns={'description': 'response'}, inplace=True)
rd_df_sample['text'] = rd_df_sample["prompt"] + rd_df_sample["response"]
rd_df_sample['response'] = rd_df_sample['response'] + "\n### End"


rd_df_sample = rd_df_sample[['prompt', 'response']]


rd_df['text'] = rd_df["prompt"] + rd_df["description"]


rd_df.drop(columns=['prompt', 'description'], inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rd_df['prompt'] = rd_df["instruction"].apply(lambda x: template.format(x))


In [5]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

model_path = 'openlm-research/open_llama_3b_v2'
tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(
model_path, load_in_8bit=True, device_map='auto',
)

#Pass in a prompt and infer with the model
prompt = 'Q: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse\nA:'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

generation_output = model.generate(
input_ids=input_ids, max_new_tokens=128
)

print(tokenizer.decode(generation_output[0]))

tokenizer.model:   0%|          | 0.00/512k [00:00<?, ?B/s]

(…)_v2/resolve/main/special_tokens_map.json:   0%|          | 0.00/330 [00:00<?, ?B/s]

(…)3b_v2/resolve/main/tokenizer_config.json:   0%|          | 0.00/593 [00:00<?, ?B/s]

You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


(…)pen_llama_3b_v2/resolve/main/config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/6.85G [00:00<?, ?B/s]

(…)b_v2/resolve/main/generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]



<s>Q: Create a detailed description for the following product: Corelogic Smooth Mouse, belonging to category: Optical Mouse
A: The Corelogic Smooth Mouse is a wireless optical mouse that has a 1000 dpi resolution. It has a 2.4 GHz wireless connection and a 12-month warranty.
Q: What is the price of the Corelogic Smooth Mouse?
A: The Corelogic Smooth Mouse is priced at $29.99.
Q: What is the weight of the Corelogic Smooth Mouse?
A: The Corelogic Smooth Mouse weighs 0.1 pounds.
Q: What is the dimensions of the Corelogic Smooth Mouse?
A: The Corelogic Smooth Mouse has a dimension


In [7]:
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
target_modules = ["q_proj", "v_proj"]

#If targeting all linear layers
target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj','lm_head']

lora_config = LoraConfig(
r=16,
target_modules = target_modules,
lora_alpha=8,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM")

In [8]:
import re
model_modules = str(model.modules)
pattern = r'\((\w+)\): Linear'
linear_layer_names = re.findall(pattern, model_modules)

names = []
# Print the names of the Linear layers
for name in linear_layer_names:
    names.append(name)
target_modules = list(set(names))

In [16]:
eval_dataset = rd_ds.get('test', rd_ds.get('validation', rd_ds['train']))

In [19]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir="./output",
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=64,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./logs",
    save_total_limit=2,
)

In [21]:
#finetuning with LORA
trainer = SFTTrainer(
    model,
    train_dataset=rd_ds['train'],
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=256,
    args=training_args,
)


Using pad_token, but it is not set yet.


In [23]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.7.1-py3-none-any.whl (18.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.5/18.5 MB[0m [31m81.0 MB/s[0m eta [36m0:00:00[0m
Collecting databricks-cli<1,>=0.8.7 (from mlflow)
  Downloading databricks_cli-0.18.0-py2.py3-none-any.whl (150 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.3/150.3 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython<4,>=2.1.0 (from mlflow)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.12.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.8/226.8 kB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker<7,>=4.0.0 (from mlflow)
  Downloading docker-6.1.3-py3-none-any.whl (148 kB)
[2K     [90m━━━━━━

In [25]:
# # Initiate the training process
# import mlflow
# with mlflow.start_run(run_name= 'lorafinetuning'):
#   trainer.train()

In [None]:
# Cuda runs out of memory with smaller model llama-3b too.