In [35]:
from datasets import Dataset, load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import os
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model
import torch
from tqdm import tqdm
import textwrap
import torch
import random
import dotenv
from __future__ import print_function, division
from typing import List, Dict, Any, Optional, Callable, Tuple
import re
import sys
import json
import requests
import argparse
from pydantic import BaseModel
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from time import time

In [None]:
# Model loading

max_seq_length = 2048 # Choose any. Unsloth support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: NVIDIA A40. Max memory: 44.352 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 8.6. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [37]:
# Data loading
import json
input_file_path = "./train.json"

train_data = []
with open(input_file_path, 'r') as file:
  for line in file:
    json_data = json.loads(line)
    # print(json_data)
    if json_data['success'] == True:
      train_data.append(json_data)

In [38]:
prompt = """
You are a shopping agent shopping in Webshop.

The actions available to you are :
- reset
- think[Thought]
- search[Search query]
- click[Button to click]

Rules:
- You can reset from any page, think on any page.
- You can only click buttons available on the page described in the observation. Buttons are defined between square brackets - []
- You can only search from a page with [Search], so click on the back buttons to reach such a page before you search again.
- You can ONLY reply with the action you want to take.
- You must end after a few tries by attempting to buy something.

Tips:
- Carefully surf the webshop to fullfil requirements. 
- If any items match some of the requirements, click on them to see a detailed description and to see if they match all the requirements.  Quantity requirements can be met 
- Don't just give up on a search at the 1st page of results. Move through the result pages by pressing the [Next >] button. You may decide to give up at a reasonable point such as when the results are empty or too different from the requirements (usually 2-3 pages).
"""

In [None]:
train_data[0]

{'instruction': 'i want a noise cancelling cosycost usb microphone, and price lower than 60.00 dollars',
 'score': 1.0,
 'success': True,
 'trajectory': 'WebShop \nInstruction:  \ni want a noise cancelling cosycost usb microphone, and price lower than 60.00 dollars \n[Search] \n> reset\n\nWebShop \nInstruction:  \ni want a noise cancelling cosycost usb microphone, and price lower than 60.00 dollars \n[Search] \n> search[noise cancelling cosycost usb microphone]\n\n[Back to Search] \nPage 1 (Total results: 50) \n[Next >] \n[B0972Q1T8T] \nCosycost USB Microphone,Condenser Computer PC Gaming Microphone for PS4/5 Laptop Windows Mac OS Android Phone,Noise Cancelling Instant Mute,Studio Mic for Voice,Music Recording,Podcasting,Streaming \n$32.99 \n[B072L2D6LY] \nAndrea Communications NC-255VM USB On-Ear Stereo USB Computer Headset with Noise-Canceling Microphone, in-Line Volume/Mute Controls, and Plug \n$34.59 \n[B071H84LTJ] \nAndrea Communications NC-455VM USB Over-Ear Circumaural Stereo US

In [41]:
def format_chat_template(row):
    # input_text = prompt + row['instruction'] # concat prompt and requirement

    conversation = []
    row['conversation'][0]['content'] = f"{prompt}\n{row['conversation'][0]['content']}"
    conversation.extend(row['conversation'])

    row["text"] = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt = False) # applies unsloth function to convert conversation to templated format
  
    return row

processed_data = list(map(format_chat_template, train_data))
train_dataset = Dataset.from_list(processed_data)

In [42]:
train_dataset

Dataset({
    features: ['instruction', 'score', 'success', 'trajectory', 'conversation', 'text'],
    num_rows: 1448
})

In [43]:
print(train_dataset[0]["text"])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>


You are a shopping agent shopping in Webshop.

The actions available to you are :
- reset
- think[Thought]
- search[Search query]
- click[Button to click]

Rules:
- You can reset from any page, think on any page.
- You can only click buttons available on the page described in the observation. Buttons are defined between square brackets - []
- You can only search from a page with [Search], so click on the back buttons to reach such a page before you search again.
- You can ONLY reply with the action you want to take.
- You must end after a few tries by attempting to buy something.

Tips:
- Carefully surf the webshop to fullfil requirements. 
- If any items match some of the requirements, click on them to see a detailed description and to see if they match all the requirements.  Quantity requirements can be met 


In [44]:
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 4,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        per_device_train_batch_size = 16,
        gradient_accumulation_steps = 2,
        # warmup_steps = 5,
        num_train_epochs = 5,
        # eval_strategy = "steps",
        # eval_steps = 100,
        do_eval=False,
        # max_steps = 60,
        learning_rate = 1e-5,
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 3407,
        output_dir = "output-llama3.18b",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=4): 100%|██████████| 1448/1448 [00:02<00:00, 524.08 examples/s]
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [45]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Map: 100%|██████████| 1448/1448 [00:01<00:00, 1255.03 examples/s]


In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2"
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,448 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 16 | Gradient Accumulation steps = 2
\        /    Total batch size = 32 | Total steps = 225
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
10,0.9581
20,0.8299
30,0.6506
40,0.4836


In [None]:
model.save_pretrained_gguf("./outputs/run5/model", tokenizer, quantization_method = "quantized")