In [1]:
from dotenv import load_dotenv
import os

load_dotenv("/home/dj/Code/fine-tuning/.env")

hf_token = os.getenv("HF_TOKEN")
os.environ['HF_TOKEN'] = hf_token

In [2]:
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import is_bfloat16_supported, FastLanguageModel

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
MODEL = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-unsloth-bnb-4bit"
REPO_NAME = "vector124" # your HF username here
MODEL_NAME = "deepseek-r1-distill-qwen-1.5-unsloth-sft-recipe-with-reasoning-generator"

In [4]:
# Load the 4bit pre quantized model of deepseek and the tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

CUSTOM_TOKENS = ["<think>", "</think>"]
tokenizer.add_special_tokens({"additional_special_tokens": CUSTOM_TOKENS})
tokenizer.pad_token = tokenizer.eos_token

==((====))==  Unsloth 2025.2.12: Fast Qwen2 patching. Transformers: 4.48.3.
   \\   /|    GPU: NVIDIA GeForce GTX 1660. Max memory: 5.788 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [5]:
# We add the LORA adapters to the model
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2025.2.12 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [6]:
prompt_style = """Below is a request for a recipe, paired with a response for how to make the requested recipe.
Write a response that appropriately answer the question.
Before answering, think carefully but concisely about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are an experienced home cook who has been cooking for many years. 
Your task is to provide recipes to the user. For ingredients, your answer should 
be a bulleted list. For the instructions, your answer should also be a bulleted list. 
For other parts of your answer, feel free to format as you see fit.

### Request:
{}

### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token


def formatting_prompts_func(examples):
    prompts = examples["prompt"]
    completions = examples["completion"]
    texts = []
    for prompt,completion in zip(prompts, completions):
        text = prompt_style.format(prompt, completion) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }


dataset = load_dataset("vector124/recipe-data-with-reasoning", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

Generating train split: 100%|██████████| 100/100 [00:00<00:00, 12926.23 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8131.96 examples/s]


In [7]:
print(dataset)
print(dataset[9]["text"])

Dataset({
    features: ['id', 'status', 'inserted_at', 'updated_at', '_server_id', 'system_prompt', 'prompt', 'completion', 'rating.responses', 'rating.responses.users', 'rating.responses.status', 'text'],
    num_rows: 100
})
Below is a request for a recipe, paired with a response for how to make the requested recipe.
Write a response that appropriately answer the question.
Before answering, think carefully but concisely about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are an experienced home cook who has been cooking for many years. 
Your task is to provide recipes to the user. For ingredients, your answer should 
be a bulleted list. For the instructions, your answer should also be a bulleted list. 
For other parts of your answer, feel free to format as you see fit.

### Request:
How can I make a creamy mushroom risotto that's both rich in flavor and easy to prepare for a weeknight dinner?

### Response:


In [8]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        max_steps = 60,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        output_dir = "outputs",
        optim = "adamw_8bit",
        seed = 3407,
    ),
)

  trainer = SFTTrainer(
Map: 100%|██████████| 100/100 [00:00<00:00, 4585.74 examples/s]
Applying chat template to train dataset (num_proc=16): 100%|██████████| 100/100 [00:02<00:00, 39.65 examples/s]
Tokenizing train dataset (num_proc=16): 100%|██████████| 100/100 [00:02<00:00, 39.19 examples/s]
Tokenizing train dataset (num_proc=16): 100%|██████████| 100/100 [00:00<00:00, 443.07 examples/s]


In [9]:
os.environ['UNSLOTH_RETURN_LOGITS'] = '1'

with torch.cuda.device(0):  # Assuming you're using GPU 0
    trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 100 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 18,464,768


Step,Training Loss
1,1.5832
2,1.7991
3,1.7152
4,1.6651
5,1.6728
6,1.6859
7,1.6166
8,1.5933
9,1.6722
10,1.6812


In [15]:
# Run inference
question = "How do I make a homemade beef stroganoff using fresh mushrooms and a creamy sauce?"

FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    
    # max_new_tokens=2048,
    # use_cache=True,

)
response = tokenizer.batch_decode(outputs)
print(response[0])

<｜begin▁of▁sentence｜>Below is a request for a recipe, paired with a response for how to make the requested recipe.
Write a response that appropriately answer the question.
Before answering, think carefully but concisely about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are an experienced home cook who has been cooking for many years. 
Your task is to provide recipes to the user. For ingredients, your answer should 
be a bulleted list. For the instructions, your answer should also be a bulleted list. 
For other parts of your answer, feel free to format as you see fit.

### Request:
How do I make a homemade beef stroganoff using fresh mushrooms and a creamy sauce?

### Response:

To address the user's request for a homemade beef stroganoff using fresh mushrooms and a creamy sauce, I need to provide a clear and detailed recipe. The recipe should include ingredients such as fresh mushrooms, beef, onions, garlic, 

In [13]:
# Save to the local directory and push it to the Hub
model.save_pretrained(MODEL_NAME)
tokenizer.save_pretrained(MODEL_NAME)
model.save_pretrained_merged(MODEL_NAME, tokenizer, save_method="merged_16bit")

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 10.7 out of 31.24 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 86%|████████▌ | 24/28 [00:00<00:00, 36.28it/s]
We will save to Disk and not RAM now.
100%|██████████| 28/28 [00:00<00:00, 31.14it/s]


Unsloth: Saving tokenizer... Done.
Done.


In [14]:
fine_tuned_model = f"{REPO_NAME}/{MODEL_NAME}"
model.push_to_hub(fine_tuned_model, safe_serialization=None)

adapter_model.bin: 100%|██████████| 74.0M/74.0M [00:02<00:00, 30.5MB/s]


Saved model to https://huggingface.co/vector124/deepseek-r1-distill-qwen-1.5-unsloth-sft-recipe-with-reasoning-generator
