In [1]:
# 📦 Step 1: Install Dependencies
!pip install -q bitsandbytes transformers datasets peft accelerate

# 📁 Step 2: Download the Dataset
!wget -O train.json https://rail.eecs.berkeley.edu/datasets/rl-llm-bench-dataset/car-dealer/train.json

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m122.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m96.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m58.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import json

with open("train.json", "r") as f:
    raw_data = json.load(f)

formatted_data = []

for dialogue in raw_data:
    turns = dialogue["lines"]
    for i in range(1, len(turns)):
        if turns[i]["role"] == "Dealer" and turns[i - 1]["role"] == "Buyer":
            prompt = turns[i - 1]["text"]
            response = turns[i]["text"]
            formatted_data.append({
                "prompt": prompt.strip(),
                "response": response.strip()
            })

print(f"✅ Processed {len(formatted_data)} buyer→dealer examples")

with open("dealer_finetune_dataset.json", "w") as f:
    for item in formatted_data:
        f.write(json.dumps(item) + "\n")


✅ Processed 30747 buyer→dealer examples


In [3]:
from huggingface_hub import login

login("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")

In [4]:
import json
from datasets import Dataset
from transformers import AutoTokenizer

# Load raw JSONL data
with open("dealer_finetune_dataset.json", "r") as f:
    data = [json.loads(line) for line in f]

# Convert to Hugging Face dataset
dataset = Dataset.from_list(data)

# Load tokenizer from your new base model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure pad token is defined

# Define tokenization function
def tokenize(example):
    prompt = f"### Buyer:\n{example['prompt']}\n\n### Dealer:\n"
    full_text = prompt + example["response"]

    encoding = tokenizer(
        full_text,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    # Mask out loss on padding tokens
    encoding["labels"] = [
        token if token != tokenizer.pad_token_id else -100
        for token in encoding["input_ids"]
    ]

    return encoding

# Apply to dataset
tokenized_dataset = dataset.map(
    tokenize,
    batched=False,
    remove_columns=dataset.column_names
)

print(f"✅ Tokenized {len(tokenized_dataset)} examples using {model_name}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Map:   0%|          | 0/30747 [00:00<?, ? examples/s]

✅ Tokenized 30747 examples using TinyLlama/TinyLlama-1.1B-Chat-v1.0


In [5]:
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

# Load base model (full precision)
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    device_map="cuda",
    trust_remote_code=True
)

# Match tokenizer embedding
model.resize_token_embeddings(len(tokenizer))

# Apply LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [6]:
# model.resize_token_embeddings(len(tokenizer))

# model = prepare_model_for_kbit_training(model)

# lora_config = LoraConfig(
#     r=8,
#     lora_alpha=16,
#     lora_dropout=0.05,
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=["q_proj", "v_proj"]  # Works well for Mistral
# )

# model = get_peft_model(model, lora_config)

In [7]:
def tokenize(example):
    text = f"### Buyer:\n{example['prompt']}\n\n### Dealer:\n{example['response']}"
    tokens = tokenizer(text, truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = [
        t if t != tokenizer.pad_token_id else -100
        for t in tokens["input_ids"]
    ]
    return tokens

In [8]:
tokenized_dataset = dataset.map(tokenize, batched=False)

Map:   0%|          | 0/30747 [00:00<?, ? examples/s]

In [9]:
all_ids = [token for ex in tokenized_dataset for token in ex["input_ids"]]
print("Max token ID:", max(all_ids))  # MUST be < tokenizer.vocab_size
print("Tokenizer vocab size:", tokenizer.vocab_size)

Max token ID: 30010
Tokenizer vocab size: 32000


In [10]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./mistral-dealer-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    save_strategy="epoch",
    logging_steps=10,
    fp16=True,
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,1.8613
20,1.5438
30,1.2904
40,1.189
50,1.1205
60,1.1209
70,1.0836
80,1.0311
90,1.0485
100,1.0065


Step,Training Loss
10,1.8613
20,1.5438
30,1.2904
40,1.189
50,1.1205
60,1.1209
70,1.0836
80,1.0311
90,1.0485
100,1.0065


TrainOutput(global_step=3843, training_loss=0.7674057589140592, metrics={'train_runtime': 6034.5031, 'train_samples_per_second': 5.095, 'train_steps_per_second': 0.637, 'total_flos': 9.781149319770931e+16, 'train_loss': 0.7674057589140592, 'epoch': 0.9998699102380643})

In [11]:
model.save_pretrained("/content/drive/MyDrive/lora_adapter")
tokenizer.save_pretrained("content/drive/MyDrive/lora_adapter")

('content/drive/MyDrive/lora_adapter/tokenizer_config.json',
 'content/drive/MyDrive/lora_adapter/special_tokens_map.json',
 'content/drive/MyDrive/lora_adapter/tokenizer.model',
 'content/drive/MyDrive/lora_adapter/added_tokens.json',
 'content/drive/MyDrive/lora_adapter/tokenizer.json')

In [17]:
model.save_pretrained("/content/drive/MyDrive/lora_adapter")
tokenizer.save_pretrained("/content/drive/MyDrive/lora_adapter")

('/content/drive/MyDrive/lora_adapter/tokenizer_config.json',
 '/content/drive/MyDrive/lora_adapter/special_tokens_map.json',
 '/content/drive/MyDrive/lora_adapter/tokenizer.model',
 '/content/drive/MyDrive/lora_adapter/added_tokens.json',
 '/content/drive/MyDrive/lora_adapter/tokenizer.json')

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
