In [1]:
import os


os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["TORCHDYNAMO_VERBOSE"] = "1"
os.environ["TORCH_LOGS"] = "+dynamo"
#most code copied from this notebook of the official unsloth blog: https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Gemma3_(270M).ipynb#scrollTo=-Xbb0cuLzwgf

from unsloth import FastModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm
INFO:datasets:PyTorch version 2.8.0 available.


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
import torch
import time

if torch.cuda.is_available():
    # Create tensors on CPU
    cpu_tensor = torch.randn(1000, 1000)
    
    # Create tensors on GPU
    gpu_tensor = torch.randn(1000, 1000).cuda()
    
    # Time CPU operation
    start = time.time()
    cpu_result = torch.mm(cpu_tensor, cpu_tensor)
    cpu_time = time.time() - start
    
    # Time GPU operation
    start = time.time()
    gpu_result = torch.mm(gpu_tensor, gpu_tensor)
    gpu_time = time.time() - start
    
    print(f"CPU time: {cpu_time:.4f}s")
    print(f"GPU time: {gpu_time:.4f}s")
    print(f"GPU tensor device: {gpu_result.device}")
else:
    print("CUDA not available")

CPU time: 0.0184s
GPU time: 0.8498s
GPU tensor device: cuda:0


In [3]:
max_seq_length = 2048
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-270m-it",
    max_seq_length = max_seq_length, # Choose any for long context!
    load_in_4bit = False,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # [NEW!] A bit more accurate, uses 2x memory
    full_finetuning = False, # [NEW!] We have full finetuning now!
    # token = "hf_...", # use one if using gated models
)

==((====))==  Unsloth 2025.8.9: Fast Gemma3 patching. Transformers: 4.56.0.
   \\   /|    Orin. Num GPUs = 1. Max memory: 7.441 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0. CUDA: 8.7. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


`torch_dtype` is deprecated! Use `dtype` instead!


Unsloth: QLoRA and full finetuning all not selected. Switching to 16bit LoRA.


In [4]:
model = FastModel.get_peft_model(
    model,
    r = 128, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 128,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = True,
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth: Making `model.base_model.model.model` require gradients


In [5]:
import sys
# Get the absolute path of the current script's directory
current_dir = os.path.dirname(os.path.abspath("gemini2.5.ipynb"))

# Get the absolute path of the parent directory (project_folder)
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to the Python path
sys.path.append(parent_dir)

# Now you can import from GetXY.py
from GetXY import x_string, y

# ... rest of your code
print("Successfully imported variables!")

3 + 2 - -4
2543
9.0

Expressions not in x:
-4 + -1 + -1
True
1457
-6.0
15
-4.0
[-5.   1.   1.   0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5
  0.5]
Successfully imported variables!


In [6]:
import datasets

print(len(x_string))
print(len(y))
y = [str(i) for i in y]
# Combine your x and y lists into a dictionary
data = {"input": x_string, "expected_output": y}

# Create a Dataset object
dataset = datasets.Dataset.from_dict(data)

# Let's check the first example to see if it's correct
print(dataset[0])

2543
2543
{'input': '3 + 2 - -4', 'expected_output': '9.0'}


In [7]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma3",
)

In [8]:
def convert_to_chatml(example):
    return {
        "conversations": [
            {"role": "user", "content": example["input"]},
            {"role": "assistant", "content": example["expected_output"]}
        ]
    }

dataset = dataset.map(
    convert_to_chatml,
    num_proc=1
)

Map: 100%|█████████████████████████████████████████████████████████████████████████████████████| 2543/2543 [00:00<00:00, 13027.62 examples/s]


In [9]:
def formatting_prompts_func(examples):
   convos = examples["conversations"]
   texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False).removeprefix('<bos>') for convo in convos]
   return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True, num_proc=1)

Map: 100%|██████████████████████████████████████████████████████████████████████████████████████| 2543/2543 [00:00<00:00, 7416.05 examples/s]


In [10]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None, # Can set up evaluation!
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 1, # Use GA to mimic batch size!
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 100,
        learning_rate = 5e-5, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir="outputs",
        report_to = "none", # Use this for WandB etc
        # torch_compile=True,
        # torch_compile_backend="aot_eager",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2): 100%|█████████████████████████████████████████████████| 2543/2543 [00:04<00:00, 590.32 examples/s]


In [11]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

In [13]:
tokenizer.decode(trainer.train_dataset[100]["input_ids"])

'<bos><start_of_turn>user\n-2 - 4 + 0<end_of_turn>\n<start_of_turn>model\n-6.0<end_of_turn>\n'

In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,543 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 1 x 1) = 8
 "-____-"     Trainable parameters = 30,375,936 of 298,474,112 (10.18% trained)


BackendCompilerFailed: backend='inductor' raised:
TypeError: cannot pickle '_thread.RLock' object

Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
