# Installing Unslooth

In [3]:
!pip install --upgrade unsloth


Collecting unsloth
  Using cached unsloth-2025.5.9-py3-none-any.whl.metadata (47 kB)
Collecting unsloth_zoo>=2025.5.11 (from unsloth)
  Using cached unsloth_zoo-2025.5.11-py3-none-any.whl.metadata (8.1 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Using cached xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Using cached bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting tyro (from unsloth)
  Using cached tyro-0.9.24-py3-none-any.whl.metadata (11 kB)
Collecting datasets>=3.4.1 (from unsloth)
  Using cached datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)
  Using cached trl-0.18.1-py3-none-any.whl.metadata (11 kB)
Collecting protobuf<4.0.0 (from unsloth)
  Using cached protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->da

# Importing Libraries

In [1]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!


# DeepSeek Initialization

In [2]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B-unsloth-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype=None,
    load_in_4bit = True,
)

==((====))==  Unsloth 2025.5.9: Fast Llama patching. Transformers: 4.52.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

# LORA Initialization

In [3]:

model = FastLanguageModel.get_peft_model(
    model, r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    use_rslora = True,

)

Unsloth 2025.5.9 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# alpaca-gpt4 Loading And Preprocessing

In [8]:
from datasets import load_dataset
dataset = load_dataset("vicgalle/alpaca-gpt4", split="train[:5000]")

will use only 5k rows

In [9]:
dataset

Dataset({
    features: ['instruction', 'input', 'output', 'text'],
    num_rows: 5000
})

In [10]:
from unsloth import apply_chat_template

def format_dataset_for_chat(dataset):
    def format_conversation(example):
        conversation = [
            {"role": "user", "content": example["instruction"]},
            {"role": "assistant", "content": example["output"]}
        ]
        return {"conversations": conversation}
    return dataset.map(format_conversation)

formatted_dataset = format_dataset_for_chat(dataset)
dataset = apply_chat_template(
    formatted_dataset,
    tokenizer=tokenizer,
)

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Unsloth: We automatically added an EOS token to stop endless generations.


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

# Model Training

In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments

In [13]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        output_dir="outputs"
    ),
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [17]:
traininf_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Step,Training Loss
1,0.7961
2,0.6741
3,0.5299
4,0.6079
5,0.4095
6,0.408
7,0.3792
8,0.3681
9,0.3815
10,0.3255


# Saving the model and quick inference

In [18]:
model.save_pretrained("finetuned_r1_model")

In [20]:

inference_model, inference_tokenizer = FastLanguageModel.from_pretrained(
    model_name="./finetuned_r1_model",
    max_seq_length=2048,
    load_in_4bit=True
)


==((====))==  Unsloth 2025.5.9: Fast Llama patching. Transformers: 4.52.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


# Some quistions for quick test

1 - How do I create a virtual environment in Python?

2 - Solve the equation: 3x + 5 = 20. Show your work.

3 - Act like a pirate and tell me how to make coffee.

In [27]:

while True:
    user_input = input("> ")
    if user_input.lower() in ["bye", "exit", "stop"]:
        break

    text_prompt = [f"{user_input}"]
    for prompt in text_prompt:
        formated_prompt = inference_tokenizer.apply_chat_template([{
            "role": "user",
            "content": prompt
        }], tokenize=False)

        model_inputs = inference_tokenizer(formated_prompt, return_tensors="pt").to("cuda")
        generated_ids = inference_model.generate(
            **model_inputs,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            pad_token_id=inference_tokenizer.pad_token_id
        )

        response = inference_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        print(response)

> How do I create a virtual environment in Python?
<ÔΩúUserÔΩú>How do I create a virtual environment in Python? 

I‚Äôve heard that using a virtual environment is a good practice when working on multiple projects or when sharing code with others. I‚Äôve seen that in Python, this is done using the venv module. But I need to figure out how exactly to use it.

First, I need to know what the purpose of a virtual environment is. It‚Äôs meant to create an isolated Python environment for a project, so that changes in one project don't affect other projects or the system-wide Python installation.

So, to create a virtual environment, I need to use the venv module. But wait, is that module included by default in Python? I think it is, at least in Python 3.3 and above.

To create a virtual environment, I need to do the following steps:

1. Open the command prompt or the terminal.
2. Navigate to the project or folder where the virtual environment is to be created.
3. Run the following command: `p