In [1]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
from datasets import load_dataset
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import os
os.environ["WANDB_PROJECT"] = "qwen-coder-llm-fine-tuning"

In [2]:
import wandb
wandb.login()

wandb: Currently logged in as: casvi-sanchez (virtualtek) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

In [3]:
if wandb.run is not None:
  wandb.finish()

In [None]:
model_id = "Qwen/Qwen2.5-Coder-0.5B-Instruct"

In [4]:
dataset_name = "squad_v2"
dataset = load_dataset(dataset_name, split="train")
eval_dataset = load_dataset(dataset_name, split="validation")
print("dataset: ",dataset)
print("eval_dataset: ",eval_dataset)


dataset:  Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 130319
})
eval_dataset:  Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 11873
})


In [6]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
cuda_available = torch.cuda.is_available()

if cuda_available:
    device_id = 0  # You can change to 1,2,3 if you want other GPUs
    torch.cuda.set_device(device_id)
    # device = torch.device(f"cuda:{device_id}")
    device = torch.device(f"cuda:{device_id}")
    print(f"🖥️ Using GPU {device_id}: {torch.cuda.get_device_name(device_id)}")
else:
    device = torch.device("cpu")
    print("⚙️ No GPU available, using CPU.")

print(f"Device selected: {device}")

🖥️ Using GPU 0: NVIDIA GeForce RTX 4070 SUPER
Device selected: cuda:0


In [7]:
# Load the model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
device_map="auto"
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

base_model.config.use_cache = False
print(base_model.device)  # Shows cuda:0 or cpu

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.
  _ = torch.tensor([0], device=i)


cuda:0


In [8]:
output_dir = "./results"
login_dir="./logs"

In [9]:
from transformers import AutoTokenizer, EarlyStoppingCallback
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig

# More info: https://github.com/huggingface/transformers/pull/24906
base_model.config.pretraining_tp = 1

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05,
    r=16,
    bias="none",
    task_type="CAUSAL_LM"
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# if you don't want to use google drive, just set this to a directory on your computer:
new_model_name = f"qwen-3b-peft-{dataset_name}"

In [10]:
import evaluate
import numpy as np
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

In [11]:
optim=torch.optim.Adam(base_model.parameters(), lr=1e-5)

In [12]:
import time
start = time.time()

training_args = SFTConfig(
    output_dir=output_dir,
    logging_dir=login_dir,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=16,
    learning_rate=2e-4,
    logging_steps=10,
    max_steps=500,
    num_train_epochs=100,
    eval_strategy="steps",
    eval_steps=100,
    save_total_limit=5,
    push_to_hub=False,
    load_best_model_at_end=True,
    dataset_text_field="question",
    max_seq_length=512,
    report_to="wandb"
)

# Initialize the trainer with the configuration
trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    args=training_args,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=200)],
    optimizers=(optim,None),
    compute_metrics=None
)

# Start training
trainer.train()

end = time.time()
length = end - start

hours = int(length // 3600)
minutes = int((length % 3600) // 60)
seconds = int(length % 60)

print(f"It took {hours} hours, {minutes} minutes, and {seconds} seconds to train the model!")

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss,Validation Loss
100,4.8449,5.37362
200,4.8781,5.37362
300,4.8352,5.37362
400,4.8388,5.37362
500,4.9086,5.37362


It took 0 hours, 41 minutes, and 37 seconds to train the model!




In [18]:
print(trainer.evaluate())

{'eval_loss': 5.37362003326416, 'eval_runtime': 133.0385, 'eval_samples_per_second': 89.245, 'eval_steps_per_second': 11.162}


In [11]:
trainer.model.save_pretrained(
    os.path.join(output_dir, "final_checkpoint"),
 )

In [14]:
from peft import PeftModel

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": torch.cuda.current_device()},
    trust_remote_code=True,
)

# Step 2: Load your fine-tuned LoRA adapter
adapter_path = os.path.join(output_dir, "final_checkpoint")
model = PeftModel.from_pretrained(base_model, adapter_path)

# Step 3: Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Step 4: Generate predictions
prompt = "Question: What areas did Beyonce compete in when she was growing up??\nAnswer:"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Question: What areas did Beyonce compete in when she was growing up??
Answer: Beyonce competed in several different areas when she was growing up, including:

1. Advertising: Beyonce was very active in advertising and used various platforms to promote her work.

2. Music: She worked on various music projects, including the song "Beyoncé" and "My Beautiful Boy".

3. Sports: Beyonce played a significant part in sports, including her skating career and her boxing career.

4. Art: She worked on various art projects, including the painting "Beyoncé
