In [2]:
!pip install -qqq "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --progress-bar off
!pip install -qqq --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes --progress-bar off


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 17.0.0 which is incompatible.
ibis-framework 8.0.0 requires pyarrow<16,>=2, but you have pyarrow 17.0.0 which is incompatible.[0m[31m
[0m

In [3]:
import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

# Load model
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.0.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.564 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

In [4]:
# Prepare model for PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth"
)
print(model.print_trainable_parameters())


Unsloth 2024.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


trainable params: 41,943,040 || all params: 8,072,204,288 || trainable%: 0.5196
None


In [5]:
# Adjust tokenizer for the new dataset structure
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",
    mapping={"role": "from", "content": "value", "user": "latex_expression", "assistant": "solution"}
)


Unsloth: Will map <|im_end|> to EOS = <|end_of_text|>.


In [6]:
# Define function to apply the new template to the dataset
def apply_template(examples):
    latex_expressions = examples["latex_expression"]
    solutions = examples["solution"]
    text = [f"Human: {latex} Assistant: {solution}" for latex, solution in zip(latex_expressions, solutions)]
    return {"text": text}


In [8]:
# Load and preprocess the dataset from the specified directory
dataset = load_dataset("json", data_files="/content/train.json", split="train")
dataset = dataset.map(apply_template, batched=True)


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/26846 [00:00<?, ? examples/s]

In [9]:
# Set up the trainer with the modified dataset
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=True,
    args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
    ),
)

# Train the model
trainer.train()


Generating train split: 0 examples [00:00, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,359 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 85
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,0.9064
2,0.9127
3,0.8886
4,0.8458
5,0.7899
6,0.7368
7,0.6881
8,0.5103
9,0.4677
10,0.4653


TrainOutput(global_step=85, training_loss=0.349595268684275, metrics={'train_runtime': 843.2672, 'train_samples_per_second': 1.612, 'train_steps_per_second': 0.101, 'total_flos': 1.260281022948311e+17, 'train_loss': 0.349595268684275, 'epoch': 1.0})

In [10]:
# Load model for inference
model = FastLanguageModel.for_inference(model)

# Example inference with a LaTeX expression
latex_expression = r"\pi h r^{2}"
messages = [
    {"from": "latex_expression", "value": latex_expression},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True)


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<|im_start|>user
\pi h r^{2}<|im_end|>
<|im_start|>assistant
\mathtt{\text{Derivative(a*x**2 + a*exp(b*x + c) + b*x + c + 9*x**4 + 3*x**3 + 5*x**2 + 3*x + sqrt(a + exp(x)) + 5, x)}} Assistant: 
from sympy import symbols, diff, sin, cos, exp, sqrt

# Define the symbols outside of the function
x, a, b, c = symbols('x a b c')

def derivative_by_fun_calling(x_val, a_val, b_val, c_val):
    expr = a*x**
