# --> 2.1) Fine Tuning 2: Using LoRA and PEFT

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, TaskType
from transformers import BitsAndBytesConfig




In [2]:
# Model and tokenizer
model_name = "Qwen/Qwen2-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Quantization config (4-bit)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

In [3]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)

model = prepare_model_for_kbit_training(model)

In [4]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn", "q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)

In [6]:
dataset = load_dataset("lucadillenburg/startup-chatbot")

In [7]:
def format_as_chat(example):
    instruction = example["instruction"] or ""
    output = example["output"] or ""
    if instruction.strip() == "" or output.strip() == "":
        return {"text": None}
    messages = [
        {"role": "system", "content": "You are a helpful assistant that provides support to startup founders."},
        {"role": "user", "content": instruction},
        {"role": "assistant", "content": output}
    ]
    return {"text": tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)}

formatted_dataset = dataset.map(format_as_chat)
formatted_dataset = formatted_dataset.filter(lambda x: x["text"] is not None and x["text"].strip() != "")

In [9]:
def tokenize_function(example):
    tokens = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)

In [10]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        outputs = model(**inputs)
        loss = outputs.loss
        return (loss, outputs) if return_outputs else loss

In [11]:
training_args = TrainingArguments(
    output_dir="./qwen-startup-finetuned-lora",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    evaluation_strategy="no",
    fp16=True,
    push_to_hub=False
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"]
)

trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,30.182
20,23.8009
30,15.3898
40,7.0615
50,4.1884
60,3.8752
70,3.328
80,3.3536
90,2.7964
100,3.2814


  return fn(*args, **kwargs)


TrainOutput(global_step=918, training_loss=3.8395306331659453, metrics={'train_runtime': 23366.3345, 'train_samples_per_second': 0.157, 'train_steps_per_second': 0.039, 'total_flos': 1.478959775612928e+16, 'train_loss': 3.8395306331659453, 'epoch': 2.99184339314845})

In [12]:
model.save_pretrained("./qwen-startup-finetuned-lora")
tokenizer.save_pretrained("./qwen-startup-finetuned-lora")

('./qwen-startup-finetuned-lora\\tokenizer_config.json',
 './qwen-startup-finetuned-lora\\special_tokens_map.json',
 './qwen-startup-finetuned-lora\\vocab.json',
 './qwen-startup-finetuned-lora\\merges.txt',
 './qwen-startup-finetuned-lora\\added_tokens.json',
 './qwen-startup-finetuned-lora\\tokenizer.json')