Python Pakete, Modell und Tokenizer installieren

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -qU transformers accelerate bitsandbytes pandas==2.0.3 peft trl

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, BitsAndBytesConfig
import torch
from trl import setup_chat_format
from google.colab import userdata

TOKEN = userdata.get("HF_TOKEN")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

MODEL_NAME = 'meta-llama/Meta-Llama-3-8B'
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map='auto',
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
    token=TOKEN)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=TOKEN)
tokenizer.padding_side = "right"

model, tokenizer = setup_chat_format(model, tokenizer)

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In diesem Abschnitt wird der Datensatz in das ChatML Format gebracht

In [None]:
def split_chat(string,lst):
    idx = string[5:].find("ASSISTANT:")
    idy = string[5:].find("USER:")
    idz = string[5:].find("FUNCTION RESPONSE:")
    idx = min3(idx, idy, idz)
    if idx == -1:
        lst.append(string.rstrip("\n"))
        return
    idx += 5
    lst.append(string[:idx].rstrip("\n"))
    split_chat(string[idx:], lst)

def min2(x, y):
    if x == -1:
       return y
    if y == -1:
       return x
    return min(x, y)

def min3(x, y, z):
    if x == -1:
        return min2(y, z)
    if y == -1:
        return min2(x, z)
    if z == -1:
        return min2(x, y)
    return min(x, y, z)

In [None]:
def format_dataset(sample):
  chat = sample['chat']
  system = sample['system'].lstrip("SYSTEM:")[1:].rstrip("\n")
  msgs = []
  split_chat(chat, msgs)
  msgs_format = []
  e = {"role": "system", "content": system}
  msgs_format.append(e)
  for m in msgs:
    if m.startswith("USER:"):
      e = {"role": "user", "content": m.lstrip("USER:")[1:]}
    elif m.startswith("ASSISTANT:"):
      e = {"role": "assistant", "content": m.lstrip("ASSISTANT:")[1:].rstrip("<|endoftext|>")}
    elif m.startswith("FUNCTION RESPONSE:"):
      e = {"role": "user", "content": m}
    else:
      continue
    msgs_format.append(e)
  return {"messages": msgs_format}

In [None]:
from datasets import load_dataset

dataset = load_dataset("glaiveai/glaive-function-calling-v2", split="train[:1%]")

Downloading readme:   0%|          | 0.00/106 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/271M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/112960 [00:00<?, ? examples/s]

In [None]:
dataset_formatted = dataset.map(format_dataset)
dataset_formatted = dataset_formatted.remove_columns(['chat', 'system'])

dataset_formatted = dataset_formatted.train_test_split(test_size=0.3)

Map:   0%|          | 0/1130 [00:00<?, ? examples/s]

In [None]:
print(dataset_formatted["train"][2])

{'messages': [{'content': 'You are a helpful assistant with access to the following functions. Use them if required -\n{\n    "name": "send_email",\n    "description": "Send an email to a recipient",\n    "parameters": {\n        "type": "object",\n        "properties": {\n            "recipient": {\n                "type": "string",\n                "description": "The email address of the recipient"\n            },\n            "subject": {\n                "type": "string",\n                "description": "The subject of the email"\n            },\n            "message": {\n                "type": "string",\n                "description": "The content of the email"\n            }\n        },\n        "required": [\n            "recipient",\n            "subject",\n            "message"\n        ]\n    }\n}', 'role': 'system'}, {'content': 'I need to send an email to my boss. Can you help me with that?', 'role': 'user'}, {'content': "Of course, I can help you with that. Could you ple

In [None]:
dataset_formatted["train"].to_json("train_dataset.json", orient="records")
dataset_formatted["test"].to_json("test_dataset.json", orient="records")

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

873938

In diesem Abschnitt findet das Fine-Tuning statt

In [None]:
from peft import LoraConfig

peft_config = LoraConfig(
    lora_alpha=128,
    lora_dropout=0.05,
    r=256,
    bias="none",
    target_modules="all-linear",
    task_type="CAUSAL_LM",
)

In [None]:
import transformers
from transformers import TrainingArguments


args = TrainingArguments(
    output_dir="output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="adamw_torch_fused",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-4,
    #bf16=True,
    #tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    report_to="tensorboard",
)

In [None]:
dataset = load_dataset("json", data_files="train_dataset.json", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token": False,
    }
)

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
trainer.train()

trainer.save_model()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,0.7411
20,0.5336
30,0.4275
40,0.3411
50,0.3326
60,0.1998
70,0.1842




In [None]:
del model
del trainer

torch.cuda.empty_cache()

In [None]:
trained_model = "llama3-8B-function-calling"

In [None]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
    args.output_dir,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)

merged_model = model.merge_and_unload()
merged_model.save_pretrained(trained_model, safe_serialization=True, max_shard_size="4GB")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Lokale Daten zum Drive kopieren

In [None]:
!cp -r /content/llama3-8B-function-calling /content/drive/MyDrive/llama3-8B-function-calling

Pfad zum gespeicherten Modell

In [None]:
trained_model = "/content/drive/MyDrive/" + trained_model

In diesem Abschnitt wird das Modell getestet

In [None]:
import torch
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

In [None]:
from transformers import pipeline
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
    trained_model,
    device_map='auto',
    torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(trained_model)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`

In [None]:
eval_dataset = load_dataset("json", data_files="test_dataset.json", split="train")

In [None]:
from random import randint

rand_idx = randint(0, len(eval_dataset))
chat = eval_dataset[rand_idx]["messages"]

prompt = pipe.tokenizer.apply_chat_template(chat[:2], tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)

print(f"Eingabe:\n{chat[1]['content']}")
print(f"Erwartet:\n{chat[2]['content']}")
print(f"Ausgabe:\n{outputs[0]['generated_text'][len(prompt):].strip()}")


Eingabe:
How can I implement a binary search algorithm in [JSON data] format on a collection of positive integers in ascending order? I want the search to begin by identifying the midpoint of the collection and comparing it with the target integer. If the target integer is greater than the midpoint, the search should continue in the right half of the collection, and if the target integer is less than the midpoint, the search should continue in the left half of the collection. The process should be repeated until the target integer is found or it is determined that the integer does not exist in the collection. Can you provide an example [JSON data] code for this?
Erwartet:
Sure, here's an example of how you can implement a binary search algorithm in JSON data format:
```json
{
 "collection": [1, 3, 5, 7, 9, 11, 13],
 "target": 9
}
```
In this example, we have a collection of positive integers in ascending order and we want to search for the integer 9.
To implement the binary search algo

In diesem Abschnitt kann das Modell mit eigenen Eingaben ausgeführt werden

In [None]:
messages = [
    {"role": "system", "content": """You are a helpful assistant with access to the following functions. Use them if required -
{
    ""name"": ""get_exchange_rate"",
    ""description"": ""Get the exchange rate between two currencies"",
    ""parameters"": {
        ""type"": ""object"",
        ""properties"": {
            ""base_currency"": {
                ""type"": ""string"",
                ""description"": ""The currency to convert from""
            },
            ""target_currency"": {
                ""type"": ""string"",
                ""description"": ""The currency to convert to""
            }
        },
        ""required"": [
            ""base_currency"",
            ""target_currency""
        ]
    }
}"""},
    {"role": "user", "content": "Can you convert EURO to US Dollars?"},
]

In [None]:
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=256, do_sample=False, temperature=0.1, top_k=50, top_p=0.1, eos_token_id=pipe.tokenizer.eos_token_id, pad_token_id=pipe.tokenizer.pad_token_id)

print(f"User: {messages[1]['content']}")
print(f"Assistant :\n{outputs[0]['generated_text'][len(prompt):].strip()}")

User: Can you convert EURO to US Dollars?
Assistant :
<functioncall> {"name": "get_exchange_rate", "arguments": '{"base_currency": "EURO", "target_currency": "US Dollars"}'}
