In [None]:
# 📦 Step 1: Install Dependencies
!pip install -q bitsandbytes transformers datasets peft accelerate

# 📁 Step 2: Download the Dataset
!wget -O train.json https://rail.eecs.berkeley.edu/datasets/rl-llm-bench-dataset/car-dealer/train.json

In [None]:
import json

with open("train.json", "r") as f:
    raw_data = json.load(f)

formatted_data = []

for dialogue in raw_data:
    turns = dialogue["lines"]
    for i in range(1, len(turns)):
        if turns[i]["role"] == "Dealer" and turns[i - 1]["role"] == "Buyer":
            prompt = turns[i - 1]["text"]
            response = turns[i]["text"]
            formatted_data.append({
                "prompt": prompt.strip(),
                "response": response.strip()
            })

print(f"✅ Processed {len(formatted_data)} buyer→dealer examples")

with open("dealer_finetune_dataset.json", "w") as f:
    for item in formatted_data:
        f.write(json.dumps(item) + "\n")


In [None]:
from huggingface_hub import login

login("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")

In [None]:
import json
from datasets import Dataset
from transformers import AutoTokenizer

# Load raw JSONL data
with open("dealer_finetune_dataset.json", "r") as f:
    data = [json.loads(line) for line in f]

# Convert to Hugging Face dataset
dataset = Dataset.from_list(data)

# Load tokenizer from your new base model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # Ensure pad token is defined

# Define tokenization function
def tokenize(example):
    prompt = f"### Buyer:\n{example['prompt']}\n\n### Dealer:\n"
    full_text = prompt + example["response"]

    encoding = tokenizer(
        full_text,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    # Mask out loss on padding tokens
    encoding["labels"] = [
        token if token != tokenizer.pad_token_id else -100
        for token in encoding["input_ids"]
    ]

    return encoding

# Apply to dataset
tokenized_dataset = dataset.map(
    tokenize,
    batched=False,
    remove_columns=dataset.column_names
)

print(f"✅ Tokenized {len(tokenized_dataset)} examples using {model_name}")

In [None]:
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

# Load base model (full precision)
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    device_map="cuda",
    trust_remote_code=True
)

# Match tokenizer embedding
model.resize_token_embeddings(len(tokenizer))

# Apply LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
# model.resize_token_embeddings(len(tokenizer))

# model = prepare_model_for_kbit_training(model)

# lora_config = LoraConfig(
#     r=8,
#     lora_alpha=16,
#     lora_dropout=0.05,
#     bias="none",
#     task_type="CAUSAL_LM",
#     target_modules=["q_proj", "v_proj"]  # Works well for Mistral
# )

# model = get_peft_model(model, lora_config)

In [None]:
def tokenize(example):
    text = f"### Buyer:\n{example['prompt']}\n\n### Dealer:\n{example['response']}"
    tokens = tokenizer(text, truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = [
        t if t != tokenizer.pad_token_id else -100
        for t in tokens["input_ids"]
    ]
    return tokens

In [None]:
tokenized_dataset = dataset.map(tokenize, batched=False)

In [None]:
all_ids = [token for ex in tokenized_dataset for token in ex["input_ids"]]
print("Max token ID:", max(all_ids))  # MUST be < tokenizer.vocab_size
print("Tokenizer vocab size:", tokenizer.vocab_size)

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./mistral-dealer-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    save_strategy="epoch",
    logging_steps=10,
    fp16=True,
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

trainer.train()

In [None]:
model.save_pretrained("/content/drive/MyDrive/lora_adapter")
tokenizer.save_pretrained("content/drive/MyDrive/lora_adapter")

In [None]:
model.save_pretrained("/content/drive/MyDrive/lora_adapter")
tokenizer.save_pretrained("/content/drive/MyDrive/lora_adapter")

In [None]:
from google.colab import drive
drive.mount('/content/drive')