start

In [1]:
import unsloth
print(dir(unsloth))


Unsloth: Patching Xformers to fix some performance issues.
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
import torch
from unsloth import FastLlamaModel, load_correct_tokenizer

model_name = "unsloth/Llama-3.3-70B-Instruct"

# Get the correct tokenizer for the model.
tokenizer = load_correct_tokenizer(model_name)

# Load the model in 4-bit mode without passing torch_dtype explicitly.
model_output = FastLlamaModel.from_pretrained(
    model_name,
    load_in_4bit=True,       # Enable 4-bit quantization
    device_map="auto",
    # Remove torch_dtype to avoid duplicate specification.
)
model = model_output[0] if isinstance(model_output, tuple) else model_output

print("Model loaded with dtype:", next(model.parameters()).dtype)


==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA H100 80GB HBM3. Max memory: 79.097 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.3.1+cu121. CUDA: 9.0. CUDA Toolkit: 12.1. Triton: 2.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 30/30 [09:35<00:00, 19.18s/it]


Model loaded with dtype: torch.bfloat16


In [3]:
from datasets import load_dataset

def preprocess(batch):
    # Build a list of prompts for each example in the batch.
    prompts = [
        f"Translate English to Norwegian:\nEnglish: {text}\nNorwegian:" 
        for text in batch["input"]
    ]
    # Tokenize the list of prompts.
    tokens = tokenizer(
        prompts,
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"  # Get output as PyTorch tensors.
    )
    # Convert the tensors to Python lists.
    return {key: value.tolist() for key, value in tokens.items()}

# Load your JSONL dataset.
dataset = load_dataset("json", data_files={"train": "norges-bank-translations.jsonl"}, split="train")

# Remove all existing columns to avoid conflicts, and apply our preprocessing function in batched mode.
dataset = dataset.map(preprocess, batched=True, remove_columns=dataset.column_names)

# (Optional) Display the first example to verify the tokenization.
print(dataset[0])


{'input_ids': [128000, 28573, 6498, 311, 45721, 512, 23392, 25, 9356, 555, 24245, 90043, 599, 72720, 82881, 311, 279, 62924, 87003, 9251, 315, 452, 81875, 8715, 323, 18719, 15051, 389, 7950, 220, 845, 7552, 220, 679, 17, 627, 33763, 37665, 25, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 128004, 1280

In [4]:
import torch
from peft import LoraConfig
from transformers import TrainingArguments
# Import SFTTrainer from the trainer submodule.
from unsloth.trainer import SFTTrainer

# Define your LoRA configuration.
lora_config = LoraConfig(
    r=32,                   # Rank for the LoRA adapters
    lora_alpha=64,          # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Modules to adapt
    lora_dropout=0.1,       # Dropout rate for LoRA layers
    bias="none",            # How to handle bias
    task_type="CAUSAL_LM"   # Task type (causal language modeling)
)

# Set up the training arguments.
training_args = TrainingArguments(
    output_dir="./llama-norwegian-translation",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    save_steps=100,
    logging_steps=10,
    save_total_limit=2,
    evaluation_strategy="no",
    bf16=True,  # Use bf16 for training
)




In [9]:
import os
os.environ["XFORMERS_DISABLE_FMHA"] = "1"
print("XFORMERS_DISABLE_FMHA set to", os.environ["XFORMERS_DISABLE_FMHA"])


XFORMERS_DISABLE_FMHA set to 0


In [11]:
# Create the SFTTrainer using the model instance (from Cell 1), tokenized dataset (from Cell 2),
# LoRA configuration, training arguments, and tokenizer.
trainer = SFTTrainer(
    model=model,            # Model instance extracted earlier.
    train_dataset=dataset,  # Your tokenized dataset.
    peft_config=lora_config,
    args=training_args,
    tokenizer=tokenizer,
)

# Start the fine-tuning process.
trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 86,587 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 32,469
 "-____-"     Number of trainable parameters = 65,536,000


IndexError: Dimension specified as 1 but tensor has no dimensions

In [None]:
# Save the fine-tuned model to disk.
trainer.save_model("./llama-norwegian-translation")


In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training

# 1. Load dataset
dataset = load_dataset(
    "json",
    data_files={"train": "data/small_dataset.jsonl"},
    split="train"
)

# 2. Process data into an instruction/prompt format
#    For demonstration, we'll just do input -> output pairs.
#    In practice, you might want to build a prompt like:
#      <BOS> instruction: {input} \n response:
#      or something that fits your model style.
def tokenize_function(example):
    # We'll do a simplistic: "input" + special token + "output"
    # Adapt as needed for your model’s prompt format.
    prompt = f"Translate English to Norwegian:\nEnglish: {example['input']}\nNorwegian: {example['output']}"
    return tokenizer(prompt, truncation=True)

# 3. Load tokenizer & base model
model_name_or_path = "my-llama3.3"  # or "meta-llama/Llama-2-7b-hf", etc.
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=False)

# Some Llama tokenizers might require special settings:
# tokenizer.pad_token = tokenizer.eos_token  # For example

model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    device_map="auto",  # automatically place on GPU
    load_in_8bit=True,  # If you want int8 to save memory
)

# Prepare model for int8 training (if using 8-bit)
model = prepare_model_for_int8_training(model)

# 4. Apply LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj","v_proj"],  # Common for Llama, adjust if needed
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

# 5. Tokenize dataset
tokenized_dataset = dataset.map(tokenize_function, batched=False)

# 6. Data Collator
#    We can use a simple default_data_collator or a custom one
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 7. Training hyperparameters
#    Because the dataset is extremely small, the below is mostly for demo.
training_args = TrainingArguments(
    output_dir="finetuned-llama",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=10,
    learning_rate=1e-4,
    fp16=True,  # H100 can handle fp16 or bf16 well
    logging_steps=1,
    save_steps=2,
    save_total_limit=1,
)

# 8. Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

# 9. Train
trainer.train()

# 10. Save LoRA adapter and tokenizer
trainer.save_model("finetuned-llama")
tokenizer.save_pretrained("finetuned-llama")


  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'prepare_model_for_int8_training' from 'peft' (/home/ubuntu/miniconda3/envs/ollama/lib/python3.10/site-packages/peft/__init__.py)