In [None]:
!pip install transformers==4.30

In [None]:
!pip install datasets sentencepiece accelerate bitsandbytes peft trl mlflow config jsonlines

In [None]:
import datasets
import tempfile
import logging
import random
import config
import os
import yaml
import time
import torch
import transformers
import pandas as pd
import jsonlines

#from utilities import *
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from transformers import TrainingArguments
from transformers import AutoModelForCausalLM
from datasets import Dataset
from datasets import load_dataset
from pprint import pprint
from tqdm import tqdm
#logger = logging.getLogger(__name__)
global_config = None

In [None]:
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM
from transformers import LlamaTokenizer, LlamaForCausalLM
from transformers.trainer_callback import TrainerCallback
from datasets import load_dataset, DatasetDict, Dataset
from transformers import BitsAndBytesConfig
from trl import SFTTrainer
import mlflow

In [None]:
dataset_name = 'qwedsacf/grade-school-math-instructions'
model_name = 'openlm-research/open_llama_3b_v2'

In [None]:
raw_dataset = load_dataset(dataset_name, split='train')

In [None]:
import pandas as pd

instruction_dataset_df = pd.DataFrame(raw_dataset)
instruction_dataset_dict = instruction_dataset_df.to_dict()

In [None]:
instruction_dataset_df.head(2)

In [None]:
instruction_dataset_df.info()

In [None]:
instruction_dataset_df['INSTRUCTION'].apply(lambda x: len(x.split())).max()

In [None]:
def form_finetuning_dataset(dataset_dict: dict, question_key: str, answer_key: str) -> Dataset:
    instruction_template = """{question}"""

    prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

    ### Instruction:

    {instruction}

    ### Response:\n"""

    num_samples = len(dataset_dict[question_key])
    finetuning_dataset_list = []
    for i in range(num_samples):
        question = dataset_dict[question_key][i]
        instruction = instruction_template.format(question=question)
        prompt = prompt_template.format(instruction=instruction)
        response = dataset_dict[answer_key][i] + "\n### End"
        text = prompt + response
        finetuning_dataset_list.append({"instruction": instruction, "response": response, "text": text})

    finetuning_dataset = Dataset.from_list(finetuning_dataset_list)

    print("One sample in the finetuning dataset:")
    pprint(finetuning_dataset[0])

    return finetuning_dataset

In [None]:
finetuning_dataset = form_finetuning_dataset(instruction_dataset_dict, question_key = "INSTRUCTION", answer_key = "RESPONSE")

In [None]:
split_dataset = finetuning_dataset.train_test_split(test_size=0.1, shuffle=True, seed=42)
test_dataset = split_dataset['test'].train_test_split(test_size=0.5, shuffle=True, seed=42)

dataset = DatasetDict({
    'train' : split_dataset['train'],
    'test' : test_dataset['train'],
    'eval' : test_dataset['test'],
})

In [None]:
dataset

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
#If only targeting attention blocks of the model
target_modules = ["q_proj", "v_proj"]

#If targeting all linear layers
#target_modules = ['q_proj','k_proj','v_proj','o_proj','gate_proj','down_proj','up_proj','lm_head']

lora_config = LoraConfig(r=16,
                        target_modules = target_modules,
                        lora_alpha=8,
                        lora_dropout=0.05,
                        bias="none",
                        task_type="CAUSAL_LM")

In [None]:
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = 'adamw_hf'
learning_rate = 1e-5
max_grad_norm = 0.3
warmup_ratio = 0.03
lr_scheduler_type = "linear"

In [None]:
training_args = TrainingArguments(
    output_dir="01-experiment",
    save_strategy="epoch",
    evaluation_strategy="epoch",
    num_train_epochs = 1.0,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

In [None]:
nf4_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_use_double_quant=True,
  bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

In [None]:
model = LlamaForCausalLM.from_pretrained(
    model_name, device_map='auto', quantization_config=nf4_config,
)

In [None]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
trainer = SFTTrainer(
    model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['eval'],
    dataset_text_field="text",
    max_seq_length=256,
    args=training_args,
    )

In [None]:
#Upcast layer norms to float 32 for stability
for name, module in trainer.model.named_modules():
  if "norm" in name:
    module = module.to(torch.float32)

In [None]:
# Initiate the training process
with mlflow.start_run(run_name='02-LoRA-Experiment'):
    trainer.train()

In [None]:
!huggingface-cli login

In [None]:
model.push_to_hub("open_llama_3b_v2-Fine-Tuned-Grade_School_Math_Instructions")

In [None]:
evaluation_results = trainer.evaluate()


In [None]:
import math
print(f"Perplexity: {math.exp(evaluation_results['eval_loss']):.2f}")

In [None]:
#dbutils.fs.ls("01-experiment")

# COMMAND ----------

model_path = 'openlm-research/open_llama_3b_v2'

# COMMAND ----------

tokenizer_2 = LlamaTokenizer.from_pretrained(model_path)
tokenizer_2.add_special_tokens({'pad_token': '[PAD]'})

# COMMAND ----------

model_2 = LlamaForCausalLM.from_pretrained(
    model_path, device_map='auto', quantization_config=nf4_config,
)

# COMMAND ----------

peft_model_id = '/content/01-experiment/checkpoint-129/'

# COMMAND ----------

peft_model = PeftModel.from_pretrained(model_2, peft_model_id)

In [None]:
test_strings = dataset['test'][0:3]['instruction']

In [None]:
predictions = []
for test in test_strings:
  prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

  ### Instruction:
  {}

  ### Response:""".format(test)
  input_ids = tokenizer_2(prompt, return_tensors="pt").input_ids.to('cuda')

  generation_output = model_2.generate(
      input_ids=input_ids, max_new_tokens=156
  )
  predictions.append(tokenizer_2.decode(generation_output[0]))

In [None]:
def extract_response_text(input_string):
    start_marker = '### Response:'
    end_marker = '###'

    start_index = input_string.find(start_marker)
    if start_index == -1:
        return None

    start_index += len(start_marker)

    end_index = input_string.find(end_marker, start_index)
    if end_index == -1:
        return input_string[start_index:]

    return input_string[start_index:end_index].strip()

In [None]:

for i in range(3):
  pred = predictions[i]
  text = test_strings[i]
  print(text+'\n')
  print(extract_response_text(pred))
  print('--------')