To start, press "*Runtime*" and press "*Run all*" on the **free*** instance of the Tesla T4 Google Colab!

To install Unsloth on your computer, follow the installation instructions on our Github page [here](https://github.com/unslothai/unsloth#installation-instructions---conda ).

You will learn how to perform [data preparation](#Data), how to [Train](#Train), how to [run the model](#Inference) and [how to save it](#Save) (for example, for Llama.cpp ).

### Loading libraries

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes

### Loading the model

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# The name of the models to download
models = [
    "lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half",
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/Qwen2-7B-Instruct-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
] # Try more models at https://huggingface.co/models
n=2 # Select the model number
model_name = models[n].split('/')[1]
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = models[n], # Reminder we support ANY Hugging Face model!
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2025.8.10: Fast Qwen2 patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


### Parameters for changing the LoRA model (We now add LoRA adapters so we only need to update 1 to 10% of all parameters!)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 1,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

## Preparing a dataset for training

### Classification of posts

In [None]:
import pandas as pd
import numpy as np

import requests
import os

# 10.5281/zenodo.17054610
record_id = '17054610'
filename_to_download = 'dataset_posts_tommyfish.xlsx'

api_url = f'https://zenodo.org/api/records/{record_id}'

try:
    print(f"Searching for file '{filename_to_download}' in Zenodo repository with ID: {record_id}...")

    response = requests.get(api_url)
    response.raise_for_status()
    data = response.json()

    file_url = None
    for file in data.get('files', []):
        if file.get('key') == filename_to_download:
            file_url = file.get('links', {}).get('self')
            break

    if file_url:
        print(f"File '{filename_to_download}' found. Starting download...")

        file_response = requests.get(file_url, stream=True)
        file_response.raise_for_status()

        # Save the file to the current directory
        with open(filename_to_download, 'wb') as f:
            for chunk in file_response.iter_content(chunk_size=8192):
                f.write(chunk)

        print(f"File '{filename_to_download}' downloaded successfully.")
    else:
        print(f"File '{filename_to_download}' not found in the repository.")

except requests.exceptions.RequestException as e:
    print(f"An error occurred while accessing Zenodo: {e}")

posts = pd.read_excel('dataset_posts_tommyfish.xlsx')
posts.drop(posts.index[posts['–ö–ª–∞—Å—Å –ø–æ—Å—Ç–∞']=='1,2,4'], inplace = True)
posts.reset_index(drop=True, inplace=True)

nb_classes = 5
posts.dropna()
posts['–¢–µ–∫—Å—Ç'] = posts['–¢–µ–∫—Å—Ç'].astype(str)

posts = posts[posts['–¢–µ–∫—Å—Ç'].notna()]
posts = posts[['–¢–µ–∫—Å—Ç','–ö–ª–∞—Å—Å –ø–æ—Å—Ç–∞']]
posts["Instruction"] = """You are an expert in analyzing VK group posts in the
        field of food delivery. You will be given the text of the group's post.
        Your task is to determine which class the group's post corresponds to.
        Answer only '0' if the post does not commit to anything (optional)
        Answer only '1' if the post obliges the group to a discount
        Answer only '2' if the post obliges the group to make a gift.
        Only answer '3' if the post obliges the group to give cashback.
        Answer only '4' if the post obliges the group to deliver the goods on
        time. Answer briefly, without explanation."""
def split_dataframe(dataframe, test_proportion):
    total_size = len(dataframe)
    test_size = int(total_size * test_proportion)
    indices = np.arange(total_size)
    np.random.shuffle(indices)
    train_indices = indices[0:total_size-test_size]
    test_indices = indices[total_size - test_size:]
    return dataframe.iloc[train_indices], dataframe.iloc[test_indices]

train, test = split_dataframe(posts, 0.3)

from datasets import Dataset
ds = Dataset.from_dict({"output": train['–ö–ª–∞—Å—Å –ø–æ—Å—Ç–∞'],"input": train['–¢–µ–∫—Å—Ç'],'instruction':train['Instruction']})
ds[0]

Searching for file 'dataset_posts_tommyfish.xlsx' in Zenodo repository with ID: 17048695...
File 'dataset_posts_tommyfish.xlsx' found. Starting download...
File 'dataset_posts_tommyfish.xlsx' downloaded successfully.


{'output': 2,
 'input': '–°–∫–æ—Ä–æ –î–µ–Ω—å —Ä–æ–∂–¥–µ–Ω–∏—è? \n–ê –º–æ–∂–µ—Ç –±—ã—Ç—å –æ–Ω —É–∂–µ —Å–µ–≥–æ–¥–Ω—è?üòª \n \n–û—Ç–º–µ—Ç—å—Ç–µ —ç—Ç–æ—Ç –∑–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–π –ø—Ä–∞–∑–¥–Ω–∏–∫ –≤ –∫—Ä—É–≥—É —Ä–æ–¥—Å—Ç–≤–µ–Ω–Ω–∏–∫–æ–≤ –∏ –±–ª–∏–∑–∫–∏—Ö! –ê –æ –ø–æ–¥–≥–æ—Ç–æ–≤–∫–µ –≤–∫—É—Å–Ω–æ–≥–æ –∑–∞—Å—Ç–æ–ª—å—è –ø–æ–∑–∞–±–æ—Ç–∏–º—Å—è –º—ã‚ù§ \n \n–î–∞—Ä–∏–º –≤—Å–µ–º –∏–º–µ–Ω–∏–Ω–Ω–∏–∫–∞–º –ú–∏–Ω–∏ –°–µ—Ç! –ü—Ä–∏ –º–∏–Ω–∏–º–∞–ª—å–Ω–æ–º –∑–∞–∫–∞–∑–µ –æ—Ç 1200 —Ä—É–±–ª–µ–πüéÅ \n \nüç±–ß—Ç–æ –≤—Ö–æ–¥–∏—Ç –≤ —Å–æ—Å—Ç–∞–≤ —Å–µ—Ç–∞? \n- –§–∏–ª–∞–¥–µ–ª—å—Ñ–∏—è –ª–∞–π—Ç —Å –∫—Ä–µ–≤–µ—Ç–∫–æ–π\n- –ñ–∞—Ä–µ–Ω–Ω—ã–π —Ä–æ–ª–ª —Å –∫—É—Ä–æ—á–∫–æ–π\n–ü—Ä–æ–º–æ–∫–æ–¥: –î–†\n\n**–ê–∫—Ü–∏—è –¥–µ–π—Å—Ç–≤—É–µ—Ç 3 –¥–Ω—è –¥–æ –∏ 3 –¥–Ω—è –ø–æ—Å–ª–µ —Å–æ–±—ã—Ç–∏—è\n–î—Ä—É–∑—å—è, –Ω–∞–¥–µ–µ–º—Å—è —á—Ç–æ –í–∞—à–∏ –±—É–¥–Ω–∏ –ø—Ä–æ—Ö–æ–¥—è—Ç –≤–∫—É—Å–Ω–æ üòã \n–ï—Å–ª–∏ —ç—Ç–æ –µ—â—ë –Ω–µ —Ç–∞–∫, —Ç–æ –í—ã –∑–Ω–∞–µ—Ç–µ, —á—Ç–æ –¥–µ–ª–∞—Ç—å ‚Äì —Å–∫–æ—Ä–µ–µ –ª–µ—Ç–∏—Ç–µ –≤ –Ω–∞—à–µ –º–µ–Ω—é http://vk.cc/c0vSi

### Converting a dataset to a suitable format

In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = ds
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/201 [00:00<?, ? examples/s]

## Model training
Now we will use the `SFTTrainer` from Huggingface TRL! More documentation is here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer ). We do 60 steps for speed, but we can set `num_train_epochs=1` for a full run and disable `max_steps=None'. We also support the `DPOTrainer` from TRL!

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
import os
os.environ["WANDB_DISABLED"] = "true"

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,

        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps = 5,
        max_steps = 60,

        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 5,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Map (num_proc=2):   0%|          | 0/201 [00:00<?, ? examples/s]

In [None]:
#@title Indicators of available video memory
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
10.592 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 201 | Num Epochs = 3 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 40,370,176 of 7,655,986,688 (0.53% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,2.0288
10,1.5158
15,0.9581
20,0.9559
25,0.8406
30,0.8624
35,0.6254
40,0.7099
45,0.6626
50,0.6377


In [None]:
#@title Video memory usage indicators and statistics
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

645.5671 seconds used for training.
10.76 minutes used for training.
Peak reserved memory = 10.592 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 71.854 %.
Peak reserved memory for training % of max memory = 0.0 %.



## Launching the model

### Classifying posts

In [None]:
import re

post_result = []
for text, class_post, Instruction in test.itertuples(index=False, name=None):
    # alpaca_prompt = Copied from above
    FastLanguageModel.for_inference(model) # Unsloth has 2x faster inference!
    inputs = tokenizer(
    [
        alpaca_prompt.format(
            Instruction, # instruction
            text, # input
            "", # output - leave this blank for generation!
        )
    ], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
    message_answer =tokenizer.batch_decode(outputs)[0].rsplit('Response:', 1)[-1]
    match = re.search(r'[012345]', message_answer)
    if match:
        post_result.append(int(match.group()))
    else:
        post_result.append(-1)


In [None]:
test["Answer"] = post_result
df=test
# model_name="gte-Qwen2-7B-instruct"
df.to_excel(model_name+'_output.xlsx', index=False)
df.to_csv(model_name+"_output.csv", index=False)

df_difference = df.loc[df['–ö–ª–∞—Å—Å –ø–æ—Å—Ç–∞'] != df["Answer"]]
#print(df_difference)
test.info()
df_difference.info()
df_difference.to_excel(model_name+'_difference.xlsx', index=False)
df_difference.to_csv(model_name+'_difference.csv')

<class 'pandas.core.frame.DataFrame'>
Index: 86 entries, 113 to 37
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   –¢–µ–∫—Å—Ç        86 non-null     object
 1   –ö–ª–∞—Å—Å –ø–æ—Å—Ç–∞  86 non-null     object
 2   Instruction  86 non-null     object
 3   –û—Ç–≤–µ—Ç        86 non-null     int64 
dtypes: int64(1), object(3)
memory usage: 3.4+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 10 to 133
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   –¢–µ–∫—Å—Ç        4 non-null      object
 1   –ö–ª–∞—Å—Å –ø–æ—Å—Ç–∞  4 non-null      object
 2   Instruction  4 non-null      object
 3   –û—Ç–≤–µ—Ç        4 non-null      int64 
dtypes: int64(1), object(3)
memory usage: 160.0+ bytes


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test["–û—Ç–≤–µ—Ç"] = post_result
