In [1]:
%pip install --quiet transformers==4.37.2 accelerate==0.24.0 sentencepiece==0.1.99 optimum==1.13.2 peft==0.5.0 bitsandbytes==0.41.2.post2 datasets==2.14.7

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.3.0 which is incompatible.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.1.0 which is incompatible.
cuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2024.1.0 which is incompatible.
dask-cuda 23.8.0 re

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm.auto import tqdm, trange
import torch
import torch.nn as nn
import torch.nn.functional as F
import peft

import transformers
from datasets import load_dataset

import random
const_seed = 100

In [3]:
assert torch.cuda.is_available(), "check out cuda availability (change runtime type in colab)"

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Part 0: Initializing the model and tokenizer

let's take mistral model for our experiments (https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) that was tuned to follow user instructions. Pay attention that we load model in 4 bit to decrease the memory usage.

model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [5]:
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'

In [6]:
# load llama tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, device_map=device)
tokenizer.pad_token_id = tokenizer.eos_token_id

# Note: to speed up inference you can use flash attention 2 (https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2)
model = AutoModelForCausalLM.from_pretrained(
    model_name, device_map='auto', low_cpu_mem_usage=True, offload_state_dict=True,
    load_in_4bit=True, torch_dtype=torch.float32,  #attn_implementation="flash_attention_2"  # weights are 4-bit; layernorms and activations are fp32
)
for param in model.parameters():
    param.requires_grad=False

model.gradient_checkpointing_enable()  # only store a small subset of activations, re-compute the rest.
model.enable_input_require_grads()     # override an implementation quirk in gradient checkpoints that disables backprop unless inputs require grad
# more on gradient checkpointing: https://pytorch.org/docs/stable/checkpoint.html https://arxiv.org/abs/1604.06174

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

# Part 1 (5 points): Prompt-engineering 

**There are different strategies for text generation in huggingface:**

| Strategy | Description | Pros & Cons |
| --- | --- | --- |
| Greedy Search | Chooses the word with the highest probability as the next word in the sequence. | **Pros:** Simple and fast. <br> **Cons:** Can lead to repetitive and incoherent text. |
| Sampling with Temperature | Introduces randomness in the word selection. A higher temperature leads to more randomness. | **Pros:** Allows exploration and diverse output. <br> **Cons:** Higher temperatures can lead to nonsensical outputs. |
| Nucleus Sampling (Top-p Sampling) | Selects the next word from a truncated vocabulary, the "nucleus" of words that have a cumulative probability exceeding a pre-specified threshold (p). | **Pros:** Balances diversity and quality. <br> **Cons:** Setting an optimal 'p' can be tricky. |
| Beam Search | Explores multiple hypotheses (sequences of words) at each step, and keeps the 'k' most likely, where 'k' is the beam width. | **Pros:** Produces more reliable results than greedy search. <br> **Cons:** Can lack diversity and lead to generic responses. |
| Top-k Sampling | Randomly selects the next word from the top 'k' words with the highest probabilities. | **Pros:** Introduces randomness, increasing output diversity. <br> **Cons:** Random selection can sometimes lead to less coherent outputs. |
| Length Normalization | Prevents the model from favoring shorter sequences by dividing the log probabilities by the sequence length raised to some power. | **Pros:** Makes longer and potentially more informative sequences more likely. <br> **Cons:** Tuning the normalization factor can be difficult. |
| Stochastic Beam Search | Introduces randomness into the selection process of the 'k' hypotheses in beam search. | **Pros:** Increases diversity in the generated text. <br> **Cons:** The trade-off between diversity and quality can be tricky to manage. |
| Decoding with Minimum Bayes Risk (MBR) | Chooses the hypothesis (out of many) that minimizes expected loss under a loss function. | **Pros:** Optimizes the output according to a specific loss function. <br> **Cons:** Computationally more complex and requires a good loss function. |

Documentation references:
- [reference for `AutoModelForCausalLM.generate()`](https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationMixin.generate)
- [reference for `AutoTokenizer.decode()`](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.PreTrainedTokenizer.decode)
- Huggingface [docs on generation strategies](https://huggingface.co/docs/transformers/generation_strategies)

In [7]:
# TODO: create a function for generation with huggingface
def get_answer(tokenizer, model, messages, max_new_tokens=200, 
               temperature=0.5, do_sample=True):
    # TODO: tokenize input, generate answer and decode output. Pay attention to tokenizer methods
    
#     encodeds = tokenizer.apply_chat_template(conversation=messages,tokenize = True,
#                                              padding=True, truncation=True,return_tensors="pt")
#     encodeds = tokenizer(messages[0]['content'], truncation=True, padding=True, return_tensors="pt")

    encodeds = tokenizer(messages, truncation=True, padding=True, return_tensors="pt")
    generated_ids = model.generate(**encodeds, max_new_tokens=max_new_tokens,
                                   temperature=temperature, do_sample=do_sample)
    decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    
    return decoded

****I tried a lot of ways to increase the quality of the output, as for me it is strange, that with .apply_chat_template tokenizer the output is pretty bad. This is not commented code provide much more reliable answer****

In [308]:
# Let's try our model 

messages = [
    {"role": "user", "content": "Write an explanation of tensors for 5 year old"},
]

print(get_answer(tokenizer, model, messages[0]['content'])[0])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Write an explanation of tensors for 5 year olds.

Tensors are special kinds of toys that can change shape in many different ways. Imagine you have a toy cube that can also be a long, flat rectangle or a round ball. But this toy is very smart, it can change shape in many different ways, not just a few. Tensors are like that toy, but for numbers and shapes. They can hold information in many different ways, just like our toy can be many different shapes. And just like how we can do things with our toy, like stacking or counting, we can do things with tensors, like calculating and solving problems.


You should obtain an explanation from the model. If so, let us go further!

Now we will take a sample from boolQ (https://huggingface.co/datasets/google/boolq) dataset and try prompting techniques to extract the needed answer and calculate its quality

In [8]:
df = load_dataset("google/boolq")

Downloading readme:   0%|          | 0.00/6.57k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/3.69M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.26M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/9427 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3270 [00:00<?, ? examples/s]

In [9]:
# Fixing 20 validation examples

random.seed(const_seed)
idx = random.sample(range(1, 3270), 20)

In [10]:
# sample you will work with
df_sample = df["validation"].select(idx)

In [11]:
# For instance, you can construct your prompt the following way
messages = [
    {"role": "user", "content": '''You are given a text and question. Answer only "true" or "false".
text: As with other games in The Elder Scrolls series, the game is set on the continent of Tamriel. The events of the game occur a millennium before those of The Elder Scrolls V: Skyrim and around 800 years before The Elder Scrolls III: Morrowind and The Elder Scrolls IV: Oblivion. It has a broadly similar structure to Skyrim, with two separate conflicts progressing at the same time, one with the fate of the world in the balance, and one where the prize is supreme power on Tamriel. In The Elder Scrolls Online, the first struggle is against the Daedric Prince Molag Bal, who is attempting to meld the plane of Mundus with his realm of Coldharbour, and the second is to capture the vacant imperial throne, contested by three alliances of the mortal races. The player character has been sacrificed to Molag Bal, and Molag Bal has stolen their soul, the recovery of which is the primary game objective.
question: is elder scrolls online the same as skyrim
answer: '''},
]

print(get_answer(tokenizer, model, messages[0]['content'])[0])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
2024-03-03 22:25:42.250881: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-03 22:25:42.251010: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-03 22:25:42.533336: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


You are given a text and question. Answer only "true" or "false".
text: As with other games in The Elder Scrolls series, the game is set on the continent of Tamriel. The events of the game occur a millennium before those of The Elder Scrolls V: Skyrim and around 800 years before The Elder Scrolls III: Morrowind and The Elder Scrolls IV: Oblivion. It has a broadly similar structure to Skyrim, with two separate conflicts progressing at the same time, one with the fate of the world in the balance, and one where the prize is supreme power on Tamriel. In The Elder Scrolls Online, the first struggle is against the Daedric Prince Molag Bal, who is attempting to meld the plane of Mundus with his realm of Coldharbour, and the second is to capture the vacant imperial throne, contested by three alliances of the mortal races. The player character has been sacrificed to Molag Bal, and Molag Bal has stolen their soul, the recovery of which is the primary game objective.
question: is elder scrolls on

Is anything wrong with the output? Now it is time for you to play around and try to come up with some better prompt.

In [12]:
from datasets import load_metric
acc = load_metric('accuracy')

  acc = load_metric('accuracy')


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

In [13]:
import pandas as pd
import re

In [14]:
# TODO: create function to evaluate answers
# Note: you can adapt function for different answer structures, 
# but you should be able to automatically extract the target "true" or "false" components
def evaluate_answers(true_answers, predictions):
    ans = []
    for i in range(len(true_answers)):
        if predictions[i].split('your answer:')[1].split('\n')[0].find('true') != -1 or predictions[i].split('your answer:')[1].split('\n')[0].find('True') != -1:
            ans.append(True)
        else:
            ans.append(False)
    score = acc.compute(predictions=ans, references = true_answers)
    return ans, score

In [None]:
print(few_shot_answers[6].split('your answer:')[1].split('\n')[0])#.find('true')

In [None]:
print(few_shot_answers[6])

TODO: Try and compare "naive" prompting (your best hand-crafted variant), few-shot prompting (https://www.promptingguide.ai/techniques/fewshot) and chain-of-thought prompting (step-be-step thinking - https://www.promptingguide.ai/techniques/cot).

Save the generation results into separate csv files and do not forget to attach them to your homework.

In [15]:
def validating_prompts(tokenizer, model, df_sample, example, naive=False, few_shot=False, chain_of_thought=False):
    model_answers = []
    messages = []
    
    if naive == True:
        for sample in df_sample:
            messages.append('You are given question. Answer only "true" or "false". Dont explain the answer.' \
                            + '\nquestion: ' + sample['question'] + '\nyour answer:')
            
        decoded = get_answer(tokenizer, model, messages)
        model_answers.append(decoded)
            
    elif few_shot == True:
        for sample in df_sample:
            messages.append('You are given 2 examples. Answer only "true" or "false", dont explain.' \
                    + '\nquestion: ' + example[133]['question'] + '\nanswer: '+ str(example[133]['answer']) + '\n' \
                    + '\nquestion: ' + example[1564]['question'] + '\nanswer: ' + str(example[1564]['answer']) + '\n' \
                    + '\nquestion: ' + sample['question'] + '\nyour answer:')
            
        decoded = get_answer(tokenizer, model, messages)
        model_answers.append(decoded)
            
    elif chain_of_thought == True:
        for sample in df_sample:
            messages.append('You are given 2 examples of text and question. Answer only "true" or "false".' \
                    + '\nquestion: ' + example[133]['question'] + '\ntext: ' + example[133]['passage'] + '\nanswer: '+ str(example[133]['answer']) \
                    + '\nquestion: ' + example[1564]['question'] + '\ntext: ' + example[1564]['passage'] + '\nanswer: ' + str(example[1564]['answer']) \
                    + '\nquestion: ' + sample['question'] + '\ntext: ' + sample['passage'] + '\nyour answer:')
        
        decoded = get_answer(tokenizer, model, messages)
        model_answers.append(decoded) 

        
    return model_answers[0]

def validating_prompts(tokenizer, model, df_sample, example, naive=False, few_shot=False, chain_of_thought=False):
    model_answers = []
    
    if naive == True:
        for sample in df_sample:
            message = [
                {
                    'role': "user",
                    'content': 'You are given question. Answer only "true" or "false". Dont explain the answer.' \
                    + '\nquestion: ' + sample['question'] + '\nyour answer:'
  
                }
            ]
            
            decoded = get_answer(tokenizer, model, message)
            model_answers.append(decoded[0])
            
    elif few_shot == True:
        for sample in df_sample:
            message = [
                {
                    'role': "user",
                    'content': f'You are given 2 examples. Answer only "true" or "false", dont explain.' \
                    + '\nquestion: ' + example[133]['question'] + '\nanswer: '+ str(example[133]['answer']) + '\n' \
                    + '\nquestion: ' + example[1564]['question'] + '\nanswer: ' + str(example[1564]['answer']) + '\n' \
                    + '\nquestion: ' + sample['question'] + '\nyour answer:'
  
                }
            ]
            
            decoded = get_answer(tokenizer, model, message)
            model_answers.append(decoded[0])
            
    elif chain_of_thought == True:
        for sample in df_sample:
            message = [
                {
                    'role': "user",
                    'content': 'You are given 2 examples of text and question. Answer only "true" or "false".' \
                    + '\nquestion: ' + example[133]['question'] + '\ntext: ' + example[133]['passage'] + '\nanswer: '+ str(example[133]['answer']) \
                    + '\nquestion: ' + example[1564]['question'] + '\ntext: ' + example[1564]['passage'] + '\nanswer: ' + str(example[1564]['answer']) \
                    + '\nquestion: ' + sample['question'] + '\ntext: ' + sample['passage'] + '\nyour answer:'
  
                }
            ]

            decoded = get_answer(tokenizer, model, message)
            model_answers.append(decoded[0]) 

        
    return model_answers

In [None]:
naive_answers = validating_prompts(tokenizer, model, df_sample, df, naive=True)

In [None]:
print(naive_answers[8])

In [None]:
few_shot_answers = validating_prompts(tokenizer, model, df_sample, df['validation'], few_shot=True)

In [None]:
print(few_shot_answers[2])

In [None]:
chain_of_thoughts_answers = validating_prompts(tokenizer, model, df_sample, df['validation'], chain_of_thought=True)

In [None]:
print(chain_of_thoughts_answers[16])

In [None]:
naive_answers[4].split('/INST] ')[1].split('\n')[0]

In [None]:
# naive_extracted = []
# for i in range(len(naive_answers)):
#     if naive_answers[i].find('INST] True') != -1:
#         naive_extracted.append(True)
#     else:
#         naive_extracted.append(False)
# #naive_extracted

In [None]:
naive_extracted = evaluate_answers(df_sample["answer"], naive_answers)

In [None]:
naive_df = pd.DataFrame(
    data = {
        'Question': df_sample['question'],
        'Model_answer': naive_answers,
        'True_answers': df_sample['answer'],
        'Predictions': naive_extracted[0]
    }
)

naive_df.to_csv('/kaggle/working/naive_answers.csv', index=False)
naive_df.head()

In [None]:
print(f'Accuracy for the naive prompting is {naive_extracted[1]}')

In [None]:
# few_shot_extracted = []
# for i in range(len(few_shot_answers)):
#     if few_shot_answers[i].find('your answer: True') != -1:
#         few_shot_extracted.append(True)
#     else:
#         few_shot_extracted.append(False)
# #few_shot_extracted

In [None]:
few_shot_extracted = evaluate_answers(df_sample["answer"], few_shot_answers)

In [None]:
few_shot_df = pd.DataFrame(
    data = {
        'Question': df_sample['question'],
        'Model_answer': few_shot_answers,
        'True_answers': df_sample['answer'],
        'Predictions': few_shot_extracted[0]
    }
)

few_shot_df.to_csv('/kaggle/working/few_shot_answers.csv', index=False)
few_shot_df.head()

In [None]:
print(f'Accuracy for the few_shot prompting is {few_shot_extracted[1]}')

In [None]:
print(chain_of_thoughts_answers[19])

In [None]:
# chain_of_thoughts_extracted = []
# for i in range(len(chain_of_thoughts_answers)):
#     if chain_of_thoughts_answers[i].find('is "True"') != -1 or chain_of_thoughts_answers[i].find('is "True') != -1 or \
#     chain_of_thoughts_answers[i].find('is "true') != -1 or chain_of_thoughts_answers[i].find('is True') != -1 or\
#     chain_of_thoughts_answers[i].find('is true') != -1 or chain_of_thoughts_answers[i].find('Answer: Yes') != -1 or\
#     chain_of_thoughts_answers[i].find('INST] True') != -1 or chain_of_thoughts_answers[i].find('Answer: True') != -1:
#         chain_of_thoughts_extracted.append(True)
#     else:
#         chain_of_thoughts_extracted.append(False)
# # chain_of_thoughts_extracted

In [None]:
chain_of_thoughts_extracted = evaluate_answers(df_sample["answer"], chain_of_thoughts_answers)

In [None]:
chain_of_thoughts_df = pd.DataFrame(
    data = {
        'Question': df_sample['question'],
        'Model_answer': chain_of_thoughts_answers,
        'True_answers': df_sample['answer'],
        'Predictions': chain_of_thoughts_extracted[0]
    }
)

chain_of_thoughts_df.to_csv('/kaggle/working/chain_of_thoughts_answers.csv', index=False)
chain_of_thoughts_df.head()

In [None]:
print(f'Accuracy for the chain of thoughts prompting is {chain_of_thoughts_extracted[1]}')

# Part 2 (5 points): Fine-tuning with PEFT and LoRA

In [16]:
%pip install trl -q

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [17]:
from transformers import TrainingArguments, Trainer
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from peft import LoraConfig

In [18]:
peft_config = peft.PromptTuningConfig(task_type=peft.TaskType.CAUSAL_LM,
                                      num_virtual_tokens=16,
                                      prompt_tuning_init_text='Answer only "true" or "false"') #
lora_config = LoraConfig(
    r = 16,
    lora_alpha = 10,
    lora_dropout = 0.05,
    target_modules = ["q_proj", "o_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    task_type = peft.TaskType.CAUSAL_LM,
)

model = peft.get_peft_model(model,lora_config)  # note: for most peft methods, this line also modifies model in-plac)))

In [19]:
model.print_trainable_parameters() # Wow so small amount of trainable params

trainable params: 41,943,040 || all params: 7,283,675,136 || trainable%: 0.5758499550960753


In [20]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(
                in_features=4096, out_features=1024, bias=False
 

In [30]:
# creating simple prompt formating
def format_prompt(sample):
    messages = []
    
    for i in range(len(sample['passage'])):
        messages.append(f'''You are given question. Answer only "true" or "false".
        text: {sample['passage'][i]}
        question: {sample['question'][i]}
        answer: {sample['answer'][i]}
        ''')
        
    return messages 

# def formatting_prompts_func(example):
#     output_texts = []
#     for i in range(len(example['question'])):
#         text = f"### Text: {example['passage'][i]}\n ### Question: {example['question'][i]}\n ### Answer: {example['answer'][i]}"
#         output_texts.append(text)
#     return output_texts

#  def generate_and_tokenize_prompt(sample):
#         full_prompt = generate_prompt(sample)
#         tokenized_message = tokenizer(full_prompt, padding=True, truncation=True, return_tensors="pt")
#         return tokenized_message

def tokenize_function(examples):
    return tokenizer(examples, padding=True, truncation=True)

# data = tokenize_function(format_prompt(df['train'][:1000]))

In [34]:
data = format_prompt(df['train'][:500])
tokenized_data = tokenize_function(data)

TODO: initialize Trainer and pass train part of our dataset for 2-3 epoches

Note: carefully set max_seq_length and args (that are transformers.TrainingArguments)

In [None]:
from transformers import TrainingArguments, Trainer

arguments = TrainingArguments(
    output_dir = '/kaggle/working/outputs',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    max_steps=20,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
)

trainer = SFTTrainer(
    model=model,
    args=arguments,
    max_seq_length=min(tokenizer.model_max_length, 1024),
    formatting_func=formatting_prompts_func,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
    tokenizer=tokenizer,
    peft_config=lora_config,
    train_dataset=df['train'][:500]
)

In [None]:
trainer.train()

TODO: save and check your tuned model. Provide scores on our 20 validation examples and save result to csv file

In [None]:
model.save_pretrained("trained-model")