In [None]:
!pip install unsloth

Collecting unsloth
  Downloading unsloth-2025.4.3-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.4/46.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.4.2 (from unsloth)
  Downloading unsloth_zoo-2025.4.2-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.19-py3-none-any.whl.metadata (9.9 kB)
Collecting datasets>=2.16.0 (from unsloth)
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,<=0.15.2,>=0.7.9 (from unsloth)
  Downloadin

In [None]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset

max_seq_length = 2048
dtype = None
load_in_4bit = True

# Load model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)



NotImplementedError: Unsloth: No NVIDIA GPU found? Unsloth currently only supports GPUs!

In [None]:
# Load dataset
dataset = load_dataset("json", data_files={"data": "instructions.jsonl"}, split="data")

# Split into train/validation/test
dataset = dataset.train_test_split(test_size=0.1, seed=42)
train_val = dataset['train']
test_set = dataset['test']

# Now split train_val into train and validation (90/10 of 80%)
train_val = train_val.train_test_split(test_size=0.1, seed=42)  # ~10% of 90% = 10% of total
train_set = train_val['train']
validation_set = train_val['test']

print(f"Train size: {len(train_set)}, Validation size: {len(validation_set)}, Test size: {len(test_set)}")

# Save test set to a JSONL file
test_set.to_json("test_set.jsonl", orient="records", lines=True)

# Format prompts
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""



Generating data split: 0 examples [00:00, ? examples/s]

Train size: 247, Validation size: 28, Test size: 31


Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:


EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Apply formatting
train_set = train_set.map(formatting_prompts_func, batched=True)
validation_set = validation_set.map(formatting_prompts_func, batched=True)
test_set = test_set.map(formatting_prompts_func, batched=True)


Map:   0%|          | 0/247 [00:00<?, ? examples/s]

Map:   0%|          | 0/28 [00:00<?, ? examples/s]

Map:   0%|          | 0/31 [00:00<?, ? examples/s]

In [None]:

# PEFT model setup
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


Unsloth 2025.4.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
# Setup Trainer
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_set,
    eval_dataset=validation_set,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,  # Can make training 5x faster if you have short sequences
    args = TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=1000,
        learning_rate=1e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),

)


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/247 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/28 [00:00<?, ? examples/s]

In [None]:
# Train the model
trainer_stats = trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 247 | Num Epochs = 33 | Total steps = 1,000
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/7,000,000,000 (0.60% trained)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkinganycowking[0m ([33mkinganycowking-robert-gordon-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.0315
2,1.9333
3,1.7357
4,1.4801
5,1.4798
6,1.3484
7,1.0132
8,0.9456
9,0.8612
10,0.8409


In [None]:
results = trainer.evaluate()

print("Validation Results:", results)

Unsloth: Not an error, but MistralForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


Validation Results: {'eval_loss': 0.3750697374343872, 'eval_runtime': 3.5681, 'eval_samples_per_second': 10.37, 'eval_steps_per_second': 1.401}


In [None]:
# Save the fine-tuned model
trainer.save_model("./my_model_fix")

In [None]:
!zip -r my_model_fix.zip my_model_fix

  adding: my_model_signature_fix_3/ (stored 0%)
  adding: my_model_signature_fix_3/adapter_model.safetensors (deflated 21%)
  adding: my_model_signature_fix_3/special_tokens_map.json (deflated 79%)
  adding: my_model_signature_fix_3/training_args.bin (deflated 53%)
  adding: my_model_signature_fix_3/tokenizer.json (deflated 85%)
  adding: my_model_signature_fix_3/adapter_config.json (deflated 56%)
  adding: my_model_signature_fix_3/tokenizer.model (deflated 55%)
  adding: my_model_signature_fix_3/tokenizer_config.json (deflated 68%)
  adding: my_model_signature_fix_3/README.md (deflated 66%)
