In [1]:
import pandas as pd
from unsloth import FastLanguageModel
import torch
from datasets import Dataset
import random

from tqdm import tqdm
tqdm.pandas()

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


Unsloth: Your Flash Attention 2 installation seems to be broken?
A possible explanation is you have a new CUDA version which isn't
yet compatible with FA2? Please file a ticket to Unsloth or FA2.
We shall now use Xformers instead, which does not have any performance hits!
We found this negligible impact by benchmarking on 1x A100.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
torch.cuda.empty_cache()

In [3]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# BASE_PATH = '/kaggle/input/gen-ai-ucu-2024-task-3'
BASE_PATH = '../data'

In [4]:
train_df = pd.read_json(f"{BASE_PATH}/zno.train.jsonl", lines=True)
test_df = pd.read_json(f"{BASE_PATH}/zno.test.jsonl", lines=True)

In [5]:
MODEL_NAME = "unsloth/Qwen2.5-7B"

model, tokenizer = FastLanguageModel.from_pretrained(
    # "unsloth/Qwen2.5-0.5B", "unsloth/Qwen2.5-1.5B", "unsloth/Qwen2.5-3B"
    # "unsloth/Qwen2.5-14B",  "unsloth/Qwen2.5-32B",  "unsloth/Qwen2.5-72B",
    model_name = MODEL_NAME,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2025.1.5: Fast Qwen2 patching. Transformers: 4.48.0.
   \\   /|    GPU: NVIDIA GeForce RTX 3090. Max memory: 23.691 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.1.5 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [7]:
# Function to preprocess your dataset
def prepare_dpo_data(dataset):
    pairs = []
    for idx, record in dataset.iterrows():
        question = record["question"]
        correct_answer_marker = record["correct_answers"][0]
        answers = {answer["marker"]: answer["text"] for answer in record["answers"]}
        options = '\n'.join([f"({marker}) {text}" for marker, text in answers.items()])

        try:
            correct_answer = f"({correct_answer_marker}) {answers[correct_answer_marker]}"
            incorrect_answer_marker = random.choice([marker for marker in answers.keys() if marker != correct_answer_marker])
            incorrect_answer = f"({incorrect_answer_marker}) {answers[incorrect_answer_marker]}"
        except:
            correct_answer = f"({correct_answer_marker})"
            incorrect_answer = ""

        pairs.append({
            "prompt": f"{question}\nOptions:\n{options}",
            "chosen": correct_answer,
            "rejected": incorrect_answer
        })

    return pairs

In [8]:
def dpo_loss(preferred_logits, non_preferred_logits):
    """Calculates the DPO loss for a pair of preferred and non-preferred logits."""
    return -torch.log(torch.exp(preferred_logits) / (torch.exp(preferred_logits) + torch.exp(non_preferred_logits))).mean()

In [9]:
dpo_data = prepare_dpo_data(dataset=train_df)
dpo_df = pd.DataFrame(data=dpo_data)
train_dataset_dpo = Dataset.from_pandas(dpo_df)

In [10]:
from trl import DPOTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [11]:
model_kwargs = None
ref_model = None
ref_model_kwargs = None
model_adapter_name = None
ref_adapter_name = None

output_dir = './model-dpo/'
beta = 0.1
max_seq_length = 128
max_prompt_length = 128
loss_type = "sigmoid" # "hinge" "ipo"  "kto_pair"

training_arguments = TrainingArguments(
    per_device_train_batch_size= 2,
    gradient_accumulation_steps= 4,
    warmup_ratio= 0.3,
    num_train_epochs= 3,
    learning_rate= 2e-4,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    logging_steps= 20,
    optim = "paged_adamw_8bit",
    lr_scheduler_type= "linear",
    seed = 3407,
    output_dir=output_dir,
    report_to="none",
    save_steps= 200,
    save_strategy="steps",
    eval_steps=200,
    evaluation_strategy="steps",
    do_eval=False,
    group_by_length= False,
)



In [12]:
trainer = DPOTrainer(
    model,
    ref_model,

    model_init_kwargs=model_kwargs,
    ref_model_init_kwargs=ref_model_kwargs,

    args=training_arguments,
    beta=beta,

    tokenizer=tokenizer,

    train_dataset=train_dataset_dpo,
    eval_dataset=train_dataset_dpo,
    loss_type=loss_type,

    max_length=512,
    max_target_length=256,
    max_prompt_length=128,

    model_adapter_name=model_adapter_name,
    ref_adapter_name=ref_adapter_name,
)
trainer.accelerator.print(f"{trainer.model}")

Extracting prompt from train dataset: 100%|██████████| 3063/3063 [00:00<00:00, 20327.00 examples/s]
Applying chat template to train dataset: 100%|██████████| 3063/3063 [00:00<00:00, 34356.92 examples/s]
Extracting prompt from eval dataset: 100%|██████████| 3063/3063 [00:00<00:00, 27068.75 examples/s]
Applying chat template to eval dataset: 100%|██████████| 3063/3063 [00:00<00:00, 34117.69 examples/s]
Tokenizing train dataset: 100%|██████████| 3063/3063 [00:02<00:00, 1324.71 examples/s]
Tokenizing eval dataset: 100%|██████████| 3063/3063 [00:02<00:00, 1327.12 examples/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(152064, 3584, padding_idx=151665)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3584, out_features=3584, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3584, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3584, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora




In [13]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 3,063 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 1,149
 "-____-"     Number of trainable parameters = 40,370,176


Step,Training Loss,Validation Loss,Rewards/chosen,Rewards/rejected,Rewards/accuracies,Rewards/margins,Logps/chosen,Logps/rejected,Logits/chosen,Logits/rejected
200,0.6775,0.619891,0.326793,0.066805,0.640946,0.259988,-16.238506,-19.272909,-2.087404,-2.10513
400,0.6829,0.543439,-0.367319,-0.950133,0.725103,0.582814,-23.179632,-29.442291,-1.898244,-1.92702
600,0.5575,0.407585,-0.328287,-1.458135,0.84171,1.129847,-22.789312,-34.522308,-2.325773,-2.355011
800,0.2143,0.242782,-1.808381,-7.23513,0.908943,5.426749,-37.590248,-92.292267,-1.729807,-1.986972
1000,0.1977,0.091657,-0.958821,-6.977176,0.968016,6.018354,-29.094645,-89.712715,-1.533589,-1.645666


In [14]:
model.save_pretrained("lora_model_dpo") # Local saving
tokenizer.save_pretrained("lora_model_dpo")

('lora_model_dpo/tokenizer_config.json',
 'lora_model_dpo/special_tokens_map.json',
 'lora_model_dpo/vocab.json',
 'lora_model_dpo/merges.txt',
 'lora_model_dpo/added_tokens.json',
 'lora_model_dpo/tokenizer.json')

In [None]:
1 == 1

In [75]:
zno_prompt = """Below is a question about Ukrainian history, language and literature. Select the correct answer marker from the provided options. Example output: [A]

### Question:
{}

### Options:
{}

### Correct Answer:
{}"""

In [76]:
def solve_task(row):
    import re
    question = row['question']
    options = ','.join([f"[{option['marker']}] {option['text']}" for option in row['answers']])

    inputs = tokenizer([zno_prompt.format(question, options, "[",)], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=3, use_cache=True)
    outputs = tokenizer.batch_decode(outputs)

    result = outputs[0].split('Correct Answer:\n[')[1][0]
    return result
    # match = re.search(r"### Correct Answer:\s*\[([A-Za-zА-Яа-я0-9])\]", outputs[0])
    # result = match.group(1)
    # return [result]

In [77]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    zno_prompt.format(
        'Позначте рядок, у якому в усіх словах потрібно писати літеру *и*', # instruction
        '(А) бад..лина, благоч..стивий, кр..хкий, ж..виця;,(Б) вар..во, меж..річчя, вич..пурений, кр..шталь;,(В) п’ят..річка, заруч..ни, нев..димка, обітн..ця;,(Г) зач..нати, виконав..ця, знів..чити, вел..чина;,(Д) нож..чок, печ..во, викор..нити, оз..ратися.', # input
        "",
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 3, use_cache = True)
tokenizer.batch_decode(outputs)

['Below is a question about Ukrainian history, language and literature. Select the correct answer marker from the provided options. Example output: [A]\n\n### Question:\nПозначте рядок, у якому в усіх словах потрібно писати літеру *и*\n\n### Options:\n(А) бад..лина, благоч..стивий, кр..хкий, ж..виця;,(Б) вар..во, меж..річчя, вич..пурений, кр..шталь;,(В) п’ят..річка, заруч..ни, нев..димка, обітн..ця;,(Г) зач..нати, виконав..ця, знів..чити, вел..чина;,(Д) нож..чок, печ..во, викор..нити, оз..ратися.\n\n### Correct Answer:\n(В)']

In [73]:
submission_df = test_df.copy()
submission_df['correct_answers'] = test_df.progress_apply(solve_task, axis=1)
submission_df

100%|██████████| 751/751 [02:19<00:00,  5.39it/s]


Unnamed: 0,question,answers,subject,id,correct_answers
0,«Сміхом крізь сльози» можна схарактеризувати з...,"[{'marker': 'А', 'text': '«Три зозулі з поклон...",ukrainian-language-and-literature,0,Б
1,"Удовин син, мати, сестра, кохана – ключові обр...","[{'marker': 'А', 'text': '«Засвіт встали козач...",ukrainian-language-and-literature,1,В
2,В уривку з історичного джерела «*Створивши бан...,"[{'marker': 'А', 'text': 'Правобережної Україн...",history-of-ukraine,2,Г
3,В уривку\n\n\n*Доки буде жити Україна\n\nВ теп...,"[{'marker': 'А', 'text': 'Василя Стефаника'}, ...",ukrainian-language-and-literature,3,Б
4,Букву ***и*** на місці пропуску треба писати в...,"[{'marker': 'А', 'text': 'пр….хований, пр…звис...",ukrainian-language-and-literature,4,В
...,...,...,...,...,...
746,Укажіть правильний варіант послідовного заповн...,"[{'marker': 'А', 'text': 'дієвих прийомів, які...",ukrainian-language-and-literature,746,Г
747,**Проаналізуйте фрагмент історичного документа...,"[{'marker': 'А', 'text': 'Українська головна в...",history-of-ukraine,747,Г
748,Прочитайте речення *(цифра позначає наступне с...,"[{'marker': 'А', 'text': '3, 4, 5, 10'}, {'mar...",ukrainian-language-and-literature,748,Б
749,Граматично правильне продовження речення «*Пер...,"[{'marker': 'А', 'text': 'мені пригадалися дав...",ukrainian-language-and-literature,749,А


In [74]:
submission_df['correct_answers'] = submission_df['correct_answers'].apply(lambda x: x[0])
submission_df[["id", "correct_answers"]].to_csv("submission-dpo-3epoch.csv", index=False)