In [1]:
from llama3 import Llama
import torch
from pathlib import Path
import json
from llama3.model import ModelArgs, Transformer
from llama3.tokenizer import ChatFormat, Dialog, Message, Tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig, TrainingArguments
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel, PeftConfig
from trl import SFTTrainer
from datasets import load_dataset, Dataset, Features, ClassLabel, Value

In [2]:
ckpt_dir = "F:\AugustRoboticsLLM\Meta-Llama-3-8B-Instruct"
tokenizer_path = "F:\AugustRoboticsLLM\Meta-Llama-3-8B-Instruct"
output_dir = "F:\AugustRoboticsLLM\LoRAAdapter"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
peft_config = LoraConfig(
    r=32,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj"],
)

In [3]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True, add_bos_token=False)
tokenizer.padding_side = "right"
tokenizer.pad_token = tokenizer.eos_token

In [1]:
class_names = ["Symptom", "Solution1", "Solution2", "Solution3", "Solution4", "Solution5", "Solution6", "Solution7"]
features = Features({name: Value('string') for name in class_names})
QandA_dataset = load_dataset("csv", data_dir="F:\AugustRoboticsLLM\dataset", sep=',', quoting=1, quotechar=r'"', doublequote=True, features=features)

NameError: name 'Features' is not defined

In [12]:
def prepare_dataset(dataset, tokenizer=tokenizer):
    # prompting eng: https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb
    # print(dataset)
    solutions = []
    for col in ['Solution1', 'Solution2', 'Solution3', 'Solution4', 'Solution5', 'Solution6', 'Solution7']:
        if dataset[col] != None:
            # print(dataset[col])
            solutions.append(dataset[col])
    
    str_solutions = ""
    for _idx, solution in enumerate(solutions):
        str_solutions += f"\t {_idx}: " + solutions + "\n"
    dialogs = [
        [{"role": "system", "content": "Your role is an on-site robotics operation engineer who gives technical and practical advice to client who use Lionel robot to draw lines on the ground.\n"
                                       "You belong to August Robotics Ltd. Please do not answer any question not relate to our business, you can simply refuse it by saying no.\n"
                                       "Let's think through this carefully, step by step.\n"},
        {"role": "user", "content": f"{dataset['Symptom']}"},
    {"role": "assistant", "content": "Here are the possible solutions provided by me: \n"
                                          f"{str_solutions}"}],
    ]
    # print(type(tokenizer.apply_chat_template(dialogs, tokenize=False)[0]))
    return {"formattedchat": tokenizer.apply_chat_template(dialogs, tokenize=False)[0]}

QandA_dataset = QandA_dataset.map(prepare_dataset, remove_columns=('Symptom', 'Solution1', 'Solution2', 'Solution3', 'Solution4', 'Solution5', 'Solution6', 'Solution7'))
# QandA_dataset = QandA_dataset.train_test_split(test_size=0.1)
QandA_dataset

Map:   0%|          | 0/44 [00:00<?, ? examples/s]

{'Symptom': 'Nothing is displayed when a map is viewed', 'Solution1': 'Refresh the webpage on the tablet, and navigate to the homepage (192.168.59.99)', 'Solution2': 'Ensure that the map has not been archived', 'Solution3': 'If possible, hide the information layer using the show/hide tool', 'Solution4': 'On the client portal (https://portal.augustrobotics.com/) ensure that the correct units were chosen when uploading the map (m / mm / inch)', 'Solution5': None, 'Solution6': None, 'Solution7': None}
<class 'str'>
{'Symptom': 'An error appears about unreachable marks', 'Solution1': 'Check for marks outside the marking area - it is possible that there are booths "outside" the hall', 'Solution2': 'Check if there are any marks surrounded by an obstacle', 'Solution3': 'Wait until the job is complete, are there still marks counted as unpainted?', 'Solution4': 'Stop the job and re-start it. Does the MSP continue to display the message?', 'Solution5': 'Contact an AR representative', 'Solution6'

Dataset({
    features: ['formattedchat'],
    num_rows: 44
})

In [4]:
text_dataset = load_dataset("text", data_dir="F:\AugustRoboticsLLM\/text_dataset")

In [5]:
model = AutoModelForCausalLM.from_pretrained(ckpt_dir, quantization_config=bnb_config, device_map="auto")
model.resize_token_embeddings(len(tokenizer))
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
del model
model = AutoModelForCausalLM.from_pretrained(ckpt_dir, 
    return_dict=True,
    torch_dtype=torch.float16, device_map="auto")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device device because they were offloaded to the disk and cpu.


In [6]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    num_train_epochs=1,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=5,
    learning_rate=0.0002,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    dataloader_num_workers=0
)

trainer = SFTTrainer(
    model=model,
    train_dataset=text_dataset["train"],
    peft_config=peft_config,
    # dataset_text_field="formattedchat",
    dataset_text_field="text",
    dataset_batch_size=2,
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=True,
    
)

trainer.train()
trainer.model.save_pretrained(output_dir)

Generating train split: 0 examples [00:00, ? examples/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  attn_output = torch.nn.functional.scaled_dot_product_attention(
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
5,4.7245
10,3.603
15,2.9452
20,2.688
25,2.7089
30,2.4004
35,2.5524
40,2.5183


In [4]:
base_model = AutoModelForCausalLM.from_pretrained(
    ckpt_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map="cpu",
    offload_buffers=False,
)
model = PeftModel.from_pretrained(base_model, "F:\AugustRoboticsLLM\LoRAAdapter", device_map="cpu")
model = model.merge_and_unload()
model.save_pretrained("F:\AugustRoboticsLLM\ARLLM")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

ValueError: Can't find 'adapter_config.json' at 'F:\AugustRoboticsLLM\LoRAAdapter'

In [12]:
prompts = [
        # For these prompts, the expected answer is the natural continuation of the prompt
        "I believe the meaning of life is",
    
        "Simply put, the theory of relativity states that ",
    
        """A brief message congratulating the team on the launch:
        Hi everyone,
        I just """,
    
        # Few shot prompt (providing a few examples before asking model to complete more);
        """Translate English to French:
        sea otter => loutre de mer
        peppermint => menthe poivrée
        plush girafe => girafe peluche
        cheese =>""",
    ]
inputs = tokenizer(prompts, padding=True, truncation=True, return_tensors="pt", max_length=512)
print(inputs)

{'input_ids': tensor([[128000,     40,   4510,    279,   7438,    315,   2324,    374, 128009,
         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,
         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,
         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,
         128009, 128009, 128009, 128009],
        [128000,  61346,   2231,     11,    279,  10334,    315,   1375,  44515,
           5415,    430,    220, 128009, 128009, 128009, 128009, 128009, 128009,
         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,
         128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009, 128009,
         128009, 128009, 128009, 128009],
        [128000,     32,  10015,   1984,  40588,  15853,    279,   2128,    389,
            279,   7195,   1473,    286,  21694,   5127,   3638,    286,    358,
           1120,    220, 128009, 128009, 128009, 128009, 128009, 128009, 128009,
         12

In [37]:
generation_config = GenerationConfig.from_pretrained(ckpt_dir)
generation_config.max_new_tokens=100
generation_config.repetition_penalty = 1.1
model_inputs = tokenizer(tokenizer.apply_chat_template([{"role": "system", "content": "Your role is an on-site robotics operation engineer who gives technical and practical advice to client who use Lionel robot to draw lines on the ground.\n"
                                       "You belong to August Robotics Ltd. Please do not answer any question not relate to our business, you can simply refuse it by saying no.\n"
                                       "Let's think through this carefully, step by step.\n"},{"role": "user", "content": "Lionel nozzle is blocked, what should I do?"}], tokenize=False), return_tensors="pt").to("cuda")
print(model_inputs)
output = model.generate(**model_inputs, generation_config=generation_config)
print(tokenizer.decode(output[0], skip_special_tokens=False))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{'input_ids': tensor([[128000, 128000, 128006,   9125, 128007,    271,   7927,   3560,    374,
            459,    389,  29654,  74706,   5784,  24490,    889,   6835,  11156,
            323,  15325,   9650,    311,   3016,    889,   1005,  84224,  12585,
            311,   4128,   5238,    389,    279,   5015,    627,   2675,   9352,
            311,   6287,  77564,  12604,     13,   5321,    656,    539,   4320,
            904,   3488,    539,  29243,    311,   1057,   2626,     11,    499,
            649,   5042,  26122,    433,    555,   5605,    912,    627,  10267,
            596,   1781,   1555,    420,  15884,     11,   3094,    555,   3094,
             13, 128009, 128006,    882, 128007,    271,     43,    290,    301,
          82981,    374,  19857,     11,   1148,   1288,    358,    656,     30,
         128009]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [5]:
import gc
del base_model
gc.collect()

1505

In [31]:
del model

In [33]:
gc.collect()

33