In [1]:
from llama3 import Llama
import torch
from pathlib import Path
import json
from llama3.model import ModelArgs, Transformer
from llama3.tokenizer import ChatFormat, Dialog, Message, Tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig, TrainingArguments
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel, PeftConfig
from trl import SFTTrainer
from datasets import load_dataset, Dataset, Features, ClassLabel, Value

In [2]:
ckpt_dir = "F:\AugustRoboticsLLM\Meta-Llama-3-8B-Instruct"
tokenizer_path = "F:\AugustRoboticsLLM\Meta-Llama-3-8B-Instruct"
output_dir = "F:\AugustRoboticsLLM\LoRAAdapter"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
peft_config = LoraConfig(
    r=32,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj"],
)

In [3]:
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True, add_bos_token=False)
tokenizer.padding_side = "right"
tokenizer.pad_token = tokenizer.eos_token

In [4]:
class_names = ["Symptom", "Solution"]
features = Features({name: Value('string') for name in class_names})
QandA_dataset = load_dataset("csv", data_dir="F:\AugustRoboticsLLM\dataset", sep=',', quoting=1, quotechar=r'"', doublequote=True, features=features)

In [5]:
def prepare_dataset(dataset, tokenizer=tokenizer):
    # prompting eng: https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb
    dialogs = [
        [
            {"role": "system", "content": "Your role is an on-site robotics operation engineer who gives technical and practical advice to client who use Lionel robot to draw marks on the exhibition.\n"
                                       "You belong to August Robotics Ltd. Please do not answer any question not relate to our business, you can simply refuse it by saying no.\n"
                                       "Let's think through this carefully, step by step.\n"},
            {"role": "user", "content": f"{dataset['Symptom']}"},
            {"role": "assistant", "content": f"{dataset['Solution']}"}
        ],
    ]
    # print(type(tokenizer.apply_chat_template(dialogs, tokenize=False)[0]))
    return {"formattedchat": tokenizer.apply_chat_template(dialogs, tokenize=False)[0]}

QandA_dataset = QandA_dataset.map(prepare_dataset, remove_columns=('Symptom', 'Solution'))
# QandA_dataset = QandA_dataset.train_test_split(test_size=0.1)
QandA_dataset

DatasetDict({
    train: Dataset({
        features: ['formattedchat'],
        num_rows: 32
    })
})

In [12]:
print(QandA_dataset["train"]["formattedchat"][0])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Your role is an on-site robotics operation engineer who gives technical and practical advice to client who use Lionel robot to draw lines on the ground.
You belong to August Robotics Ltd. Please do not answer any question not relate to our business, you can simply refuse it by saying no.
Let's think through this carefully, step by step.<|eot_id|><|start_header_id|>user<|end_header_id|>

Nothing is displayed when a map is viewed on the tablet<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Start by refreshing the webpage on your tablet and navigate back to the map. Confirm that the map has not been archived. If the problem persists, access the client portal at https://portal.augustrobotics.com and verify that the map is still active.<|eot_id|>


In [6]:
text_dataset = load_dataset("text", data_dir="F:\AugustRoboticsLLM\/text_dataset")
print(text_dataset)

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 517
    })
})


In [4]:
model = AutoModelForCausalLM.from_pretrained("F:\AugustRoboticsLLM\ARLLM", quantization_config=bnb_config, device_map="auto")
model.resize_token_embeddings(len(tokenizer))
# model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Embedding(128256, 4096)

In [10]:
del model
model = AutoModelForCausalLM.from_pretrained(ckpt_dir, 
    return_dict=True,
    torch_dtype=torch.float16, device_map="auto")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device device because they were offloaded to the disk and cpu.


In [9]:
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    num_train_epochs=2,
    optim="paged_adamw_32bit",
    save_steps=200,
    logging_steps=50,
    learning_rate=0.0004,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    lr_scheduler_type="cosine",
    dataloader_num_workers=0
)

trainer = SFTTrainer(
    model=model,
    train_dataset=QandA_dataset["train"],
    # train_dataset=text_dataset["train"],
    peft_config=peft_config,
    dataset_text_field="formattedchat",
    # dataset_text_field="text",
    dataset_batch_size=1,
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    
)

trainer.train()
trainer.model.save_pretrained(output_dir)

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
50,1.2411


In [4]:
base_model = AutoModelForCausalLM.from_pretrained(
    ckpt_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map="cpu",
    offload_buffers=False,
)
model = PeftModel.from_pretrained(base_model, "F:\AugustRoboticsLLM\LoRAAdapter", device_map="cpu")
model = model.merge_and_unload()
model.save_pretrained("F:\AugustRoboticsLLM\ARLLM")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
generation_config = GenerationConfig.from_pretrained(ckpt_dir)
generation_config.max_new_tokens=150
generation_config.repetition_penalty = 1.1
prompts = [
        # For these prompts, the expected answer is the natural continuation of the prompt
        "The Guiding Station is designed for",
    ]
model_inputs = tokenizer(prompts, padding=True, truncation=True, return_tensors="pt", max_length=512)
output = model.generate(**model_inputs, generation_config=generation_config)
print(tokenizer.decode(output[0], skip_special_tokens=False))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  return fn(*args, **kwargs)


<|begin_of_text|>The Guiding Station is designed for Lionel to understand its position in the hall and adjust itself before each job. It consists of a physical beacon (LED light) that Lionel can see, and a camera that Lionel uses to determine its precise position relative to the beacon. The Guiding Station will be placed in a fixed position in the hall by August Robotics before the first day of the event, so that it knows exactly where it is in the hall. Once the Lionel robot is turned on, it will look for the Guiding Station and use its camera to determine its position relative to the beacon, and thereby calculate its own position in the hall. This step is called “self-leveling”. After self-leveling, Lionel’s laser will be precisely aligned with the reference points, allowing Lionel


In [5]:
generation_config = GenerationConfig.from_pretrained(ckpt_dir)
generation_config.max_new_tokens=200
generation_config.repetition_penalty = 1.1
generation_config.temperature = 0.2
generation_config.top_k = 20
generation_config.top_p = 0.2
model_inputs = tokenizer(tokenizer.apply_chat_template([
    {
    "role": "system", "content": "Your role is an on-site robotics operation engineer who gives technical and practical advice to client who use Lionel robot to draw lines on the ground.\n"
    "You belong to August Robotics Ltd. Please do not answer any question not relate to our business, you can simply refuse it by saying no.\n"
    "Let's think through this carefully, step by step.\n"
    "Lionel is a four-wheeled robot that autonomously moves, localises and makes floor markings in exhibition halls. Lionel has Camera Beacon, Multiple Sensors, Mobile Base, Mesh Router, Spray Marking Mechanism \n"
    "Guiding Station is a stationary piece of equipment that helps the Lionel to get its location in the hall to spray marks accurately. It has a camera, a laser measurement unit, and a beacon (green and red LED lights) \n"
    "Map Server is a server like system which control and management all the devices (Lionel and GS) connected to it. It can be accessed by using the provided tablet with browser, i.e. webpage application \n"
    "Reference Beacon is a LED light with blue and green color device which is placed on the reference points in the hall during the set-up process to allow the Guiding Station to calculate its position \n"
    },
    {
        "role": "user", "content": "How does the whole system work anyway?"
    }], tokenize=False), return_tensors="pt").to("cuda")
print(model_inputs)
output = model.generate(**model_inputs, generation_config=generation_config)
print(tokenizer.decode(output[0], skip_special_tokens=False))

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{'input_ids': tensor([[128000, 128000, 128006,   9125, 128007,    271,   7927,   3560,    374,
            459,    389,  29654,  74706,   5784,  24490,    889,   6835,  11156,
            323,  15325,   9650,    311,   3016,    889,   1005,  84224,  12585,
            311,   4128,   5238,    389,    279,   5015,    627,   2675,   9352,
            311,   6287,  77564,  12604,     13,   5321,    656,    539,   4320,
            904,   3488,    539,  29243,    311,   1057,   2626,     11,    499,
            649,   5042,  26122,    433,    555,   5605,    912,    627,  10267,
            596,   1781,   1555,    420,  15884,     11,   3094,    555,   3094,
            627,     43,    290,    301,    374,    264,   3116,   2695,    383,
          41189,  12585,    430,  95103,   7162,  11031,     11,   2254,   5014,
            323,   3727,   6558,  65172,    304,  28099,  52473,     13,  84224,
            706,  14669,  59720,     11,  29911,  95520,     11,  13716,   5464,
             1

  attn_output = torch.nn.functional.scaled_dot_product_attention(


<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

Your role is an on-site robotics operation engineer who gives technical and practical advice to client who use Lionel robot to draw lines on the ground.
You belong to August Robotics Ltd. Please do not answer any question not relate to our business, you can simply refuse it by saying no.
Let's think through this carefully, step by step.
Lionel is a four-wheeled robot that autonomously moves, localises and makes floor markings in exhibition halls. Lionel has Camera Beacon, Multiple Sensors, Mobile Base, Mesh Router, Spray Marking Mechanism 
Guiding Station is a stationary piece of equipment that helps the Lionel to get its location in the hall to spray marks accurately. It has a camera, a laser measurement unit, and a beacon (green and red LED lights) 
Map Server is a server like system which control and management all the devices (Lionel and GS) connected to it. It can be accessed by using the provided tablet

In [23]:
# print(tokenizer.decode(model_inputs["input_ids"][0], skip_special_tokens=True))
print(tokenizer.decode(output[0][len(model_inputs["input_ids"][0])+2:], skip_special_tokens=True).strip())


The Map Server is the central hub for the entire system. It connects to the Guiding Station, Lionel, and other devices via WiFi or Ethernet cables. The Map Server uses the data from these devices to create a map of the exhibition hall, which is then used to guide Lionel as it draws marks on the ground. The Reference Beacons are used to help the Guiding Station determine its own location within the map. Finally, the Map Server communicates with the tablet to display the current state of the job, including the location of Lionel and the marks being drawn. If there are any errors or issues, the Map Server will alert the user through notifications on the tablet.


In [4]:
model = AutoModelForCausalLM.from_pretrained("F:\AugustRoboticsLLM\ARLLM", quantization_config=bnb_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [20]:
import gc
del model
gc.collect()

92

In [10]:
del QandA_dataset

In [21]:
gc.collect()

0

In [16]:
model.unload()

NameError: name 'model' is not defined