In [1]:
# Install and update the required library
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
#!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [2]:
# Show the transformers library path
! pip show transformers

Name: transformers
Version: 4.48.3
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /usr/local/lib/python3.11/dist-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm
Required-by: peft, sentence-transformers, trl, unsloth, unsloth_zoo


# Load Model

In [2]:
# Import necessary libraries from Unsloth and PyTorch
from unsloth import FastVisionModel # FastLanguageModel for LLMs
import torch

# Load the unsloth/Pixtral-12B-2409 model with 4-bit quantization for efficient memory use
model, tokenizer = FastVisionModel.from_pretrained(
    # if Pixtral-12B
    "unsloth/Pixtral-12B-2409",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.2.15: Fast Llava vision patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/214k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.22G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/133 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/162 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/177k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

In [3]:
# Configure LoRA (Low-Rank Adaptation) fine-tuning settings
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers     = False,  # False if not finetuning vision layers
    finetune_language_layers   = True,   # False if not finetuning language layers
    finetune_attention_modules = True,   # False if not finetuning attention layers
    finetune_mlp_modules       = True,   # False if not finetuning MLP layers

    r = 16,            # The larger, the higher the accuracy, but might overfit
    lora_alpha = 16,   # Recommended alpha == r at least
    lora_dropout = 0,  # No dropout for LoRA
    bias = "none",
    random_state = 3407,
    use_rslora = False,  # Do not use rank-stabilized LoRA
    loftq_config = None, # No use of LoftQ
)

Unsloth: Making `model.base_model.model.language_model.model` require gradients


# Load Dataset

In [4]:
# Load the dataset (text-only) from Hugging Face
from datasets import load_dataset
dataset = load_dataset("mlabonne/FineTome-100k", split = "train")

README.md:   0%|          | 0.00/982 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/117M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [5]:
# Function to convert dataset into the required conversation format
def convert_to_conversation(sample):
    new_conversation = []
    for init_conversation in sample['conversations']:
      if init_conversation['from'] == 'human':
         role = 'user'
      elif init_conversation['from'] == 'gpt':
         role = 'assistant'
      else:
        continue
      new_conversation.append(
           { "role": role,
             "content" : [
                {"type" : "text",  "text"  : init_conversation['value']},
             ]
           },
      )
    return {"messages" : new_conversation}

In [6]:
# Apply conversion to the dataset
converted_dataset = [convert_to_conversation(sample) for sample in dataset]

# Extracting the first instruction from the dataset
instruction = dataset[0]["conversations"][0]['value']
instruction

'Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. \n\nFurthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.\n\nFinally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.'

# Plain text Inference test (no image input)

In [7]:
# Prepare model for inference mode (after fine-tuning)
FastVisionModel.for_inference(model) # Enable for inference!

image = None  # No image input since the dataset is text-only

# Format the user prompt as required for the model
messages = [
    {"role": "user", "content": [
        {"type": "text", "text": instruction}
    ]}
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)

print(input_text)

<s>[INST]Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. 

Furthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.

Finally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.[/INST]


In [8]:
# Tokenize the input text for model inference
inputs = tokenizer(
    images=None,
    text=input_text,
    add_special_tokens = False,
    return_tensors = "pt",
).to("cuda")

# Generate model output using the provided text input
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 512,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

### Boolean Operators

Boolean operators are used to combine boolean values (true or false) in logical expressions. The primary boolean operators are:

1. **AND (`&&`)**: Returns true if both operands are true.
2. **OR (`||`)**: Returns true if at least one operand is true.
3. **NOT (`!`)**: Returns the opposite of the operand.

### Examples in Programming

Here are examples of boolean operators in Python:

```python
# AND
a = True
b = False
result_and = a and b  # False

# OR
result_or = a or b  # True

# NOT
result_not = not a  # False
```

### Operator Precedence

Operator precedence determines the order in which operations are evaluated in an expression. For example, the `AND` operator has higher precedence than the `OR` operator. This means that in the expression `A && B || C`, the `&&` operation will be evaluated before the `||` operation.

```python
# Example in Python
A = True
B = False
C = True

result = (A && B) || C  # This will be evaluated as (A && B) || C, not A && (B || 

In [9]:
# Inference for image+text data input
import requests
from PIL import Image


image1 = Image.open(requests.get("https://llava-vl.github.io/static/images/view.jpg", stream=True).raw)

messages = [
    {
        "role": "user",
        "content": [
          {"type": "text", "text": "What are the things I should be cautious about when I visit this place? What should I bring with me?"},
          {"type" : "image", "image" : image1}
        ]
    }
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)


inputs = tokenizer(text=input_text, images=image1, padding=True, return_tensors="pt").to("cuda")

# Generate model output using the provided text input
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 512,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

When visiting a serene natural location like the one depicted in the image, there are several things you should be cautious about and items you should bring with you. Here are some key points to consider:

### Things to Be Cautious About:
1. **Safety on Water**: If the location includes a lake or river, be cautious about swimming or boating. Ensure you know your swimming abilities and follow any posted safety guidelines.
2. **Wildlife**: Be aware of local wildlife. Depending on the area, there could be animals like bears, snakes, or insects that pose a risk.
3. **Weather**: Weather can change quickly, especially in mountainous or forested areas. Be prepared for rain, wind, or temperature drops.
4. **Terrain**: Uneven or slippery terrain can lead to injuries. Wear appropriate footwear and be cautious while walking.
5. **Privacy and Safety**: If the location is remote, be mindful of your personal safety. Let someone know your plans and stick to well-populated or designated areas if possi

# Fine tune the model

In [10]:
# Convert dataset to Hugging Face Dataset format
from datasets import Dataset

my_dataset = Dataset.from_dict({"dataset": converted_dataset})

In [11]:
# Function to format prompts for training
def formatting_prompts_func(examples):
    try:
        convos = examples["dataset"]
        texts = [tokenizer.apply_chat_template(convo['messages'], tokenize = False, add_generation_prompt = False) for convo in convos]
        return { "text" : texts, }
    except:
        print(examples)
        raise

# Apply formatting to dataset
dataset = my_dataset.map(formatting_prompts_func, batched = True,)

# Display formatted dataset example
dataset[2]['text']

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

'<s>[INST]Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions.\n\nFurthermore, discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. Finally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions.[/INST]Boolean operators are logical operators used to combine or manipulate boolean values in programming. They allow you to perform comparisons and create complex logical expressions. The three main boolean operators are:\n\n1. AND operator (&&): Returns true if both operands are true. Otherwise, it returns false. For example:\n   - `true && true` returns true\n   - `true && false` returns false\n\n2. OR operator (||): Returns true i

In [12]:
# Preprocessing functions for dataset tokenization
from functools import partial

def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizes dataset batch
    """

    return tokenizer(
        images=None,
        text=batch["text"],
        max_length = max_length,
        padding=True
        #truncation = True,
    )

def preprocess_dataset(tokenizer, max_length, seed, my_dataset):
    """
    Tokenizes dataset for fine-tuning
    """
    columns_names = my_dataset.column_names
    columns_names.append('text')

    # Apply preprocessing to each batch of the dataset
    _preprocessing_function = partial(preprocess_batch, max_length = max_length, tokenizer = tokenizer)
    my_dataset = my_dataset.map(
        _preprocessing_function,
        batched = True,
        remove_columns = columns_names,
    )

    # Filter out samples that have "input_ids" exceeding "max_length"
    my_dataset = my_dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

    # Shuffle dataset
    my_dataset = my_dataset.shuffle(seed = seed)

    return my_dataset

In [13]:
# Set preprocessing parameters
max_length = 2048
seed = 33

# Preprocess dataset
preprocessed_dataset = preprocess_dataset(tokenizer, max_length, seed, dataset)

print(preprocessed_dataset)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/100000 [00:00<?, ? examples/s]

Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 4000
})


In [14]:
# Custom data collator for text fine-tuning
import torch

class TextDataCollator:
    def __init__(self, model, tokenizer, max_length=2048):
        self.model = model
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, examples):
        # Pad or truncate input_ids and attention_mask
        input_ids = [ex["input_ids"][:self.max_length] for ex in examples]
        attention_mask = [ex["attention_mask"][:self.max_length] for ex in examples]

        # Pad sequences to max_length
        input_ids = torch.nn.utils.rnn.pad_sequence(
            [torch.tensor(ids) for ids in input_ids],
            batch_first=True,
            padding_value=0
        )

        attention_mask = torch.nn.utils.rnn.pad_sequence(
            [torch.tensor(mask) for mask in attention_mask],
            batch_first=True,
            padding_value=0
        )

        # Add labels (same as input_ids for language modeling)
        labels = input_ids.clone()

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }

In [15]:
# Fine-tuning configuration and training using SFTTrainer
from unsloth import is_bf16_supported
from trl import SFTTrainer, SFTConfig


FastVisionModel.for_training(model) # Enable for training!

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator =  TextDataCollator(model, tokenizer),
    train_dataset = preprocessed_dataset,

    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 20,
        # num_train_epochs = 1, # Set this instead of max_steps for full training runs
        learning_rate = 2e-4,
        fp16 = not is_bf16_supported(),
        bf16 = is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",     # For Weights and Biases
        remove_unused_columns = False,
        dataset_text_field = "",
        dataset_kwargs = {"skip_prepare_dataset": True},
        dataset_num_proc = 4,
        max_seq_length = 2048,
    ),
)

In [None]:
# Start training
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 4,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 20
 "-____-"     Number of trainable parameters = 57,016,320
🦥 Unsloth needs about 1-3 minutes to load everything - please wait!
Unsloth: Not an error, but LlavaForConditionalGeneration does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,7.9775
2,7.4062


In [None]:
# Fine-tuned Model Inference Test
FastVisionModel.for_inference(model)

image = None
instruction= "Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. \n\nFurthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.\n\nFinally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages."

messages = [
    {"role": "user", "content": [
        {"type": "text", "text": instruction}
    ]}
]
input_text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True
    )


inputs = tokenizer(
    image,
    input_text,
    add_special_tokens = False,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 256,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

In [None]:
# Inference for image+text data input
import requests
from PIL import Image


image1 = Image.open(requests.get("https://llava-vl.github.io/static/images/view.jpg", stream=True).raw)

messages = [
    {
        "role": "user",
        "content": [
          {"type": "text", "text": "What are the things I should be cautious about when I visit this place? What should I bring with me?"},
          {"type" : "image", "image" : image1}
        ]
    }
]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)


inputs = tokenizer(text=input_text, images=image1, padding=True, return_tensors="pt").to("cuda")

# Generate model output using the provided text input
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 512,
                   use_cache = True, temperature = 1.5, min_p = 0.1)