In [1]:
import os
import torch
from trl import SFTTrainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
from transformers import (AutoTokenizer,AutoModelForCausalLM,BitsAndBytesConfig,TrainingArguments)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def get_coldstart_filenames(directory="../../../data/coldstart_data"):
    return [
        os.path.join(directory, file)
        for file in os.listdir(directory)
        if file.endswith(".jsonl")
    ]


filenames = get_coldstart_filenames()

dataset = load_dataset("json", data_files=filenames)["train"]

Generating train split: 581 examples [00:00, 39556.06 examples/s]


In [3]:
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
)
print("Created configurations")

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name_or_path = "../../models/DeepSeek-R1-Distill-Qwen-1.5B"
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    quantization_config=bnb_cfg,
)
model = model.to(device)
print("Loaded model")

tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path, use_fast=True, trust_remote_code=True
)
print("Loaded tokenizer")

Created configurations


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loaded model
Loaded tokenizer


In [4]:
# Tokenize with short sequence to reduce VRAM
def tokenize(example):
    return tokenizer(
        example["text"], truncation=True, max_length=256, padding="max_length"
    )


tokenized = dataset.map(tokenize, remove_columns=dataset.column_names)

Map: 100%|██████████| 581/581 [00:00<00:00, 1169.55 examples/s]


In [5]:
# QLoRA config
peft_config = LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type=TaskType.CAUSAL_LM
)

In [6]:
model = get_peft_model(model, peft_config)

In [7]:
output_directory = input(
    "Please enter the directory you want the model to be saved and checkpointed to: "
)

In [8]:
epochs = int(input("Enter the number of training Epochs: "))

In [9]:
# Training config
training_args = TrainingArguments(
    output_dir=output_directory,
    per_device_train_batch_size=1,  # 🟡 Crucial for 4GB VRAM
    gradient_accumulation_steps=8,  # Mimics batch size of 8
    num_train_epochs=epochs,
    logging_steps=10,
    save_strategy="epoch",
    fp16=False,  # No FP16 on CPUs or low-VRAM GPUs
    bf16=False,
    optim="paged_adamw_8bit",
)

In [10]:
trainer = SFTTrainer(
    model=model, train_dataset=tokenized, processing_class=tokenizer, args=training_args
)

Truncating train dataset: 100%|██████████| 581/581 [00:00<00:00, 106932.76 examples/s]
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [11]:
trainer.train()

Step,Training Loss
10,8.102
20,7.8476
30,8.3979
40,8.7436
50,7.6047
60,6.8337
70,7.1669
80,5.9608
90,7.4824
100,6.6591


TrainOutput(global_step=360, training_loss=6.275171004401313, metrics={'train_runtime': 1410.02, 'train_samples_per_second': 2.06, 'train_steps_per_second': 0.255, 'total_flos': 6805244026552320.0, 'train_loss': 6.275171004401313})

In [13]:
model.save_pretrained(output_directory)
tokenizer.save_pretrained(output_directory)

('qwen_lora_trained\\tokenizer_config.json',
 'qwen_lora_trained\\special_tokens_map.json',
 'qwen_lora_trained\\tokenizer.json')