## fine-tuning

In [1]:
import json
import torch
import wandb
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    AutoConfig,
    BitsAndBytesConfig,
)

import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, TaskType
from tqdm import tqdm
tqdm.pandas()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è W&B
wandb.init(project="fine-tuning-lora", 
          )

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdariamishina2812[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


### –º–æ–¥–µ–ª—å

In [3]:
quantization_config = BitsAndBytesConfig(
        load_in_8bit=True
    )

In [4]:
model_name = "Vikhrmodels/Vikhr-7B-instruct_0.4" 

model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    device_map="auto",
    quantization_config=quantization_config,
    # cache_dir='./models' #—Å–∏–ª—å–Ω–æ –∑–∞–º–µ–¥–ª—è–µ—Ç –∑–∞–≥—Ä—É–∑–∫—É
)

Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:05<00:00,  1.46s/it]


In [5]:
# –õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ –∏–º–µ–Ω–∏ –º–æ–¥–µ–ª–∏
wandb.config.update({"model_name": model_name})

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [7]:
# –í —ç—Ç–æ–π —è—á–µ–π–∫–µ –≤—Å–µ –≤–µ—Å–∞ –∏–∑–Ω–∞—á–∞–ª—å–Ω–æ–π –º–æ–¥–µ–ª–∏ –∑–∞–º–æ—Ä–∞–∂–∏–≤–∞—é—Ç—Å—è
for param in model.parameters():
  param.requires_grad = False  
  if param.ndim == 1:
    # –≤ layernorm –Ω—É–∂–Ω—ã –æ—á–µ–Ω—å –º–∞–ª–µ–Ω—å–∫–∏–µ —á–∏—Å–ª–∞, –ø–æ—ç—Ç–æ–º—É –¥–ª—è –Ω–µ–≥–æ –æ—Å—Ç–∞–≤–ª—è—é—Ç fp32 
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

In [8]:
# –≤—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω–∞—è —Ñ—É–Ω–∫—Ü–∏—è –∫–æ—Ç–æ—Ä–∞—è –ø–æ–∫–∞–∂–µ—Ç —Å–∫–æ–ª—å–∫–æ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤ –±—É–¥—É—Ç –æ–±—É—á–∞—Ç—å—Å—è
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

–ê–¥–∞–ø—Ç–µ—Ä—ã –º–æ–∂–Ω–æ –¥–æ–±–∞–≤–ª—è—Ç—å –∫ –ø–æ–ª–Ω–æ—Å–≤—è–∑–Ω—ã–º/–ª–∏–Ω–µ–π–Ω—ã–º/dense —Å–ª–æ—è–º. –í –∑–∞–≤–∏—Å–∏–º–æ—Å—Ç–∏ –æ—Ç –∞—Ä—Ö–∏—Ç–µ–∫—Ç—É—Ä—ã —ç—Ç–∏ —Å–ª–æ–∏ –º–æ–≥—É—Ç –Ω–∞–∑—ã–≤–∞—Ç—å—Å—è –ø–æ-—Ä–∞–∑–Ω–æ–º—É, –ø–æ—ç—Ç–æ–º—É –∏—Ö –Ω—É–∂–Ω–æ —É–∫–∞–∑–∞—Ç—å –≤—Ä—É—á–Ω—É—é —á–µ—Ä–µ–∑ –ø–∞—Ä–º–µ—Ç—Ä target_modules. –¢–∞–∫ –∫–∞–∫ –º–æ–¥–µ–ª–∏ —ç—Ç–æ –≤ –æ—Å–Ω–æ–≤–Ω–æ–º –Ω–∞—Å–ª–æ–µ–Ω–Ω—ã–µ –æ–¥–Ω–æ—Ç–∏–ø–Ω—ã–µ —Ç—Ä–∞–Ω—Å—Ñ–æ—Ä–º–µ—Ä–Ω—ã–µ –±–ª–æ–∫–∏, —Ç–æ –ø–µ—Ä–µ—á–∏—Å–ª–∏—Ç—å –Ω—É–∂–Ω–æ –ª–∏—à—å –Ω–µ—Å–∫–æ–ª—å–∫–æ –∏–º–µ–Ω —Ç–∏–ø–æ–≤—ã—Ö —Å–ª–æ–µ–≤.

In [9]:
from peft import LoraConfig, get_peft_model
# –ö–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—è LoRA:
# - task_type: –¥–ª—è –∑–∞–¥–∞—á –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ —Ç–µ–∫—Å—Ç–∞ –∏—Å–ø–æ–ª—å–∑—É–µ–º CAUSAL_LM
# - r: —Ä–∞–Ω–≥ LoRA (–æ–±—ã—á–Ω–æ 8)
# - lora_alpha: –∫–æ—ç—Ñ—Ñ–∏—Ü–∏–µ–Ω—Ç –º–∞—Å—à—Ç–∞–±–∏—Ä–æ–≤–∞–Ω–∏—è
# - lora_dropout: dropout
# - target_modules: —Å–ø–∏—Å–æ–∫ –º–æ–¥—É–ª–µ–π –º–æ–¥–µ–ª–∏, –∫ –∫–æ—Ç–æ—Ä—ã–º –ø—Ä–∏–º–µ–Ω—è–µ—Ç—Å—è LoRA.
config = LoraConfig(
    r=64, # –≤–Ω—É—Ç—Ä–µ–Ω–Ω—è—è —Ä–∞–∑–º–µ—Ä–Ω–æ—Å—Ç—å –∞–¥–∞–ø—Ç–µ—Ä–∞, –æ—Å–Ω–æ–≤–Ω–æ–π –ø–∞—Ä–∞–º–µ—Ç—Ä
    target_modules=["q_proj", "k_proj", "v_proj", 'out_proj', 'fc1', 'fc2'], # –∫ –∫–∞–∫–∏–º —Å–ª–æ—è–º –¥–æ–±–∞–≤–ª—è—Ç—å –∞–¥–∞–ø—Ç–µ—Ä—ã (–ø–æ–¥—Ä–æ–±–Ω–µ–µ –≤—ã—à–µ)

    # "–≤–µ—Å" –∞–¥–∞–ø—Ç–µ—Ä–∞, —ç—Ç–æ—Ç –ø–∞—Ä–∞–º–µ—Ç—Ä –¥–µ–ª–∏—Ç—Å—è –Ω–∞ r, —Ç–æ –µ—Å—Ç—å –µ—Å–ª–∏ –æ–Ω–∏ —Ä–∞–≤–Ω—ã —Ç–æ
    # –≤–µ—Å –∞–¥–∞–ø—Ç–µ—Ä–∞ = 1 (—Ç–æ –µ—Å—Ç—å –±–∞–∑–æ–≤–∞—è –º–æ–¥–µ–ª—å –∏ –∞–¥–∞–ø—Ç–µ—Ä –æ–¥–∏–Ω–∞–∫–æ–≤—ã –ø–æ –∑–Ω–∞—á–∏–º–æ—Å—Ç–∏)
    # –µ—Å–ª–∏ –ø–æ—Å—Ç–∞–≤–∏—Ç—å —ç—Ç–æ—Ç –ø–∞—Ä–∞–º–µ—Ç—Ä –≤—ã—à–µ, —Ç–æ –∞–¥–∞–ø—Ç–µ—Ä –±—É–¥–µ—Ç —Å–∏–ª—å–Ω–µ–µ –≤–ª–∏—è—Ç—å –Ω–∞ –±–∞–∑–æ–≤—É—é –º–æ–¥–µ–ª—å
    # –∫–∞–∫ —è –ø–æ–Ω–∏–º–∞—é –Ω–∏–∫—Ç–æ –æ—Å–æ–±–æ –Ω–µ –ø–æ–Ω–∏–º–∞–µ—Ç —á—Ç–æ –¥–µ–ª–∞—Ç—å —Å —ç—Ç–∏–º –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–º –ø—Ä–∏ –æ–±—É—á–µ–Ω–∏–∏
    # –ª—É—á—à–µ –æ—Å—Ç–∞–≤–ª—è—Ç—å –µ–≥–æ —Ä–∞–≤–Ω—ã–º r
    lora_alpha=128, 
    
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 37748736 || all params: 7665315840 || trainable%: 0.49246158655349026


In [10]:
# –õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤ LoraConfig
wandb.config.update({"lora_config": config.to_dict()})

In [11]:
model.print_trainable_parameters()

trainable params: 37,748,736 || all params: 7,665,315,840 || trainable%: 0.4925


### –¥–∞–Ω–Ω—ã–µ

In [12]:
#—ç—Ç–æ —Ç–æ, —á—Ç–æ –º—ã –Ω–∞—Ä–∞–∑–º–µ—á–∞–ª–∏ —á–∞—Ç–≥–ø—Ç
data = pd.read_excel("prod_attr_10k.xlsx")
print(data.shape)
data.head()

(10387, 5)


Unnamed: 0,id_tov,name_tov,name_par_group,repr_text,attribute_descriptions
0,52,"–°–º–µ—Ç–∞–Ω–∞ 25%, 350 –≥",–ú–æ–ª–æ—á–Ω–∞—è –ü—Ä–æ–¥—É–∫—Ü–∏—è,–û—á–µ–Ω—å —Ö–æ—Ä–æ—à–∞—è\n–û—á–µ–Ω—å —Ö–æ—Ä–æ—à–∞—è —Å–º–µ—Ç–∞–Ω–∞ –æ—Ç –õ–µ–±–µ–¥—è...,"{""id_tov"": 52, ""name_tov"": ""–°–º–µ—Ç–∞–Ω–∞ 25%, 350 –≥..."
1,61,"–ú–æ–ª–æ–∫–æ —Ü–µ–ª—å–Ω–æ–µ, 1 –ª",–ú–æ–ª–æ—á–Ω–∞—è –ü—Ä–æ–¥—É–∫—Ü–∏—è,–ú–æ–ª–æ–∫–æ –æ—Ç –ö–æ—Å–º–æ–ª –ø–æ—Ö–æ–∂–µ –Ω–∞ –Ω–∞—Ç—É—Ä–∞–ª—å–Ω–æ–µ. –ù–æ –Ω–µ ...,"{""id_tov"": 61, ""name_tov"": ""–ú–æ–ª–æ–∫–æ —Ü–µ–ª—å–Ω–æ–µ, 1 ..."
2,69,–ó–∞–ø–µ–∫–∞–Ω–∫–∞ –∫–ª–∞—Å—Å–∏—á–µ—Å–∫–∞—è,–ö—É–ª–∏–Ω–∞—Ä–∏—è,–ù–∞—Å—Ç–æ—è—â–∏–π —Ç–≤–æ—Ä–æ–≥! –û—á–µ–Ω—å –≤–∫—É—Å–Ω–æ! –°–ø–∞—Å–∏–±–æ!\n–î–µ—Å–µ...,"{""id_tov"": 69, ""name_tov"": ""–ó–∞–ø–µ–∫–∞–Ω–∫–∞ –∫–ª–∞—Å—Å–∏—á–µ..."
3,75,"–ü—Ä–æ—Å—Ç–æ–∫–≤–∞—à–∞ 4%, 250 –≥",–ú–æ–ª–æ—á–Ω–∞—è –ü—Ä–æ–¥—É–∫—Ü–∏—è,–û—á–µ–Ω—å —Ö–∑ –æ—Ä–æ—à–∞—è –ø—Ä–æ—Å—Ç–æ–∫–≤–∞—à–∞! –ì—É—Å—Ç–∞—è –∏ –≤–∫—É—Å–Ω–∞—è....,"{""id_tov"": 75, ""name_tov"": ""–ü—Ä–æ—Å—Ç–æ–∫–≤–∞—à–∞ 4%, 25..."
4,77,"–†—è–∂–µ–Ω–∫–∞ 4%, 450 –≥",–ú–æ–ª–æ—á–Ω–∞—è –ü—Ä–æ–¥—É–∫—Ü–∏—è,—Ä—è–∂–µ–Ω–∫–∞ –∫–∞–∫ —Ä—è–∂–µ–Ω–∫–∞\n–ñ–∏–¥–∫–∞—è\n–í–∫—É—Å–Ω–∞—è \n–•–æ—Ä–æ—à–∞—è...,"{""id_tov"": 77, ""name_tov"": ""–†—è–∂–µ–Ω–∫–∞ 4%, 450 –≥""..."


In [13]:
data.attribute_descriptions[0]

'{"id_tov": 52, "name_tov": "–°–º–µ—Ç–∞–Ω–∞ 25%, 350 –≥", "name_par_group": "–ú–æ–ª–æ—á–Ω–∞—è –ü—Ä–æ–¥—É–∫—Ü–∏—è", "attribute_descriptions": [{"attribute": "–≤–∫—É—Å", "characteristic": "–≤–∫—É—Å–Ω–∞—è"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–≤–∫—É—Å –æ—Ç–ª–∏—á–Ω—ã–π"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–Ω–µ –≤–∫—É—Å–Ω–∞—è"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–≤ –º–µ—Ä—É –∫–∏—Å–ª–∞—è"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–∫–∏—Å–ª–∞—è"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–∂–∏—Ä–Ω–∞—è"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–≤–∫—É—Å–Ω–µ–π—à–∞—è"}, {"attribute": "–≤–∫—É—Å", "characteristic": "–≤–∫—É—Å–Ω—é—â–∞—è"}, {"attribute": "–≥—É—Å—Ç–æ—Ç–∞", "characteristic": "–≥—É—Å—Ç–∞—è"}, {"attribute": "–≥—É—Å—Ç–æ—Ç–∞", "characteristic": "–ø–ª–æ—Ç–Ω–∞—è"}, {"attribute": "–≥—É—Å—Ç–æ—Ç–∞", "characteristic": "–Ω–µ –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –≥—É—Å—Ç–∞—è"}, {"attribute": "–∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ü–∏—è", "characteristic": "–≥–ª–∞–¥–∫–æ–π"}, {"attribute": 

In [14]:
# –§—É–Ω–∫—Ü–∏—è –¥–ª—è —Ñ–æ—Ä–º–∞—Ç–∏—Ä–æ–≤–∞–Ω–∏—è –∫–æ–ª–æ–Ω–∫–∏ attribute_descriptions.
# –û–Ω–∞ –ø—Ä–∏–Ω–∏–º–∞–µ—Ç –∑–Ω–∞—á–µ–Ω–∏–µ –∏–∑ –∫–æ–ª–æ–Ω–∫–∏ (–º–æ–∂–µ—Ç –±—ã—Ç—å —Å—Ç—Ä–æ–∫–æ–π –≤ —Ñ–æ—Ä–º–∞—Ç–µ JSON –∏–ª–∏ —É–∂–µ —Å–ª–æ–≤–∞—Ä—ë–º)
# –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç —Å—Ç—Ä–æ–∫—É –≤–∏–¥–∞: "–∞—Ç—Ä–∏–±—É—Ç: —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∞; –∞—Ç—Ä–∏–±—É—Ç: —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∞; ..."
def format_attributes(attr_field):
    if isinstance(attr_field, str):
        try:
            attr_data = json.loads(attr_field)
        except Exception as e:
            print("–û—à–∏–±–∫–∞ –ø—Ä–∏ —Ä–∞–∑–±–æ—Ä–µ JSON:", e)
            attr_data = {}
    else:
        attr_data = attr_field
    if "attribute_descriptions" in attr_data:
        formatted = "; ".join([f"{item['attribute']}: {item['characteristic']}" 
                               for item in attr_data["attribute_descriptions"]])
    else:
        formatted = ""
    return formatted

In [15]:
format_attributes(data.attribute_descriptions[0])

'–≤–∫—É—Å: –≤–∫—É—Å–Ω–∞—è; –≤–∫—É—Å: –≤–∫—É—Å –æ—Ç–ª–∏—á–Ω—ã–π; –≤–∫—É—Å: –Ω–µ –≤–∫—É—Å–Ω–∞—è; –≤–∫—É—Å: –≤ –º–µ—Ä—É –∫–∏—Å–ª–∞—è; –≤–∫—É—Å: –∫–∏—Å–ª–∞—è; –≤–∫—É—Å: –∂–∏—Ä–Ω–∞—è; –≤–∫—É—Å: –≤–∫—É—Å–Ω–µ–π—à–∞—è; –≤–∫—É—Å: –≤–∫—É—Å–Ω—é—â–∞—è; –≥—É—Å—Ç–æ—Ç–∞: –≥—É—Å—Ç–∞—è; –≥—É—Å—Ç–æ—Ç–∞: –ø–ª–æ—Ç–Ω–∞—è; –≥—É—Å—Ç–æ—Ç–∞: –Ω–µ –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –≥—É—Å—Ç–∞—è; –∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ü–∏—è: –≥–ª–∞–¥–∫–æ–π; –∫–æ–∂–∞: –±–µ–∑ –∫–∏—Å–ª—è—Ç–∏–Ω—ã; —Å–≤–µ–∂–µ—Å—Ç—å: —Å–≤–µ–∂–∞—è; –∫–∞—á–µ—Å—Ç–≤–æ: –∫–∞—á–µ—Å—Ç–≤–æ –≤—ã—Å–æ–∫–æ–µ'

In [16]:
dataset = Dataset.from_pandas(data[['repr_text',
       'attribute_descriptions']]) 

In [17]:
# # –§—É–Ω–∫—Ü–∏—è –¥–ª—è —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏—è –æ–±—É—á–∞—é—â–µ–≥–æ –ø—Ä–∏–º–µ—Ä–∞.
# # –í –¥–∞–Ω–Ω–æ–º –ø—Ä–∏–º–µ—Ä–µ –º—ã —Å–æ—Å—Ç–∞–≤–ª—è–µ–º –∏–Ω—Å—Ç—Ä—É–∫—Ü–∏—é, –≥–¥–µ —Å–Ω–∞—á–∞–ª–∞ –∏–¥—ë—Ç –æ—Ç–∑—ã–≤,
# # –∑–∞—Ç–µ–º –≤–æ–ø—Ä–æ—Å (–∏–Ω—Å—Ç—Ä—É–∫—Ü–∏—è), –∏ –ø–æ—Å–ª–µ –¥–≤–æ–µ—Ç–æ—á–∏—è ‚Äî –æ—Ç—Ñ–æ—Ä–º–∞—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –æ—Ç–≤–µ—Ç.
def generate_prompt(example):
    review = example["repr_text"]
    attributes = format_attributes(example["attribute_descriptions"])
    prompt = (
        "–¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤–∞—Ä—ã. "
        "–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ–º –ø—Ä–æ–¥—É–∫—Ç–µ. –í—ã–¥–µ–ª–∏ –∞—Ç—Ä–∏–±—É—Ç—ã —Ç–æ–≤–∞—Ä–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, –≤–∫—É—Å, –∑–∞–ø–∞—Ö, —Ç–µ–∫—Å—Ç—É—Ä–∞, —Ü–≤–µ—Ç), –ø—Ä–æ –∫–æ—Ç–æ—Ä—ã–µ –ø–∏—à—É—Ç –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏."
        "–£ –æ–¥–Ω–æ–≥–æ –∞—Ç—Ä–∏–±—É—Ç–∞ –º–æ–∂–µ—Ç –±—ã—Ç—å –º–Ω–æ–≥–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫. –ê—Ç—Ä–∏–±—É—Ç –≤—ã—Ä–∞–∂–∞–µ—Ç—Å—è 1-3 —Å–ª–æ–≤–∞–º–∏ –≤—Å–µ–≥–¥–∞, –∫—Ä–∞—Ç–∫–æ –∏ –µ–º–∫–æ"
        "–ß–∞—â–µ –≤—Å–µ–≥–æ –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏ –æ–ø–∏—Å—ã–≤–∞—é—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–ª–∞–¥–∫–∏–π, —Å–≤–µ–∂–∏–π, –º—è–≥–∫–∏–π, —Å–æ—á–Ω—ã–π), –∞ —Ç–µ–±–µ –Ω—É–∂–Ω–æ –≤—ã–¥–µ–ª–∏—Ç—å –∏–º–µ–Ω–Ω–æ —Å–∞–º–∏ –∞—Ç—Ä–∏–±—É—Ç—ã."
        "–ù–∏–∂–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–ø–∏—Å–æ–∫ –æ—Ç–∑—ã–≤–æ–≤ –Ω–∞ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –∫–∞–∂–¥—ã–π –Ω–æ–≤—ã–π –æ—Ç–∑—ã–≤ —Å –Ω–æ–≤–æ–π —Å—Ç—Ä–æ–∫–∏:"
        "–û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ! –ù–∏–∫–∞–∫–∏–µ –∞–±—Ö–∞–∑—Å–∫–∏–µ –Ω–µ –Ω—É–∂–Ω—ã))"
        "–û–≥—Ä–æ–º–Ω—ã–µ, –ø–æ—á—Ç–∏ –±–µ–∑–≤–∫—É—Å–Ω—ã–µ"
        "–•–æ—Ä–æ—à–∏–µ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –≤ –º–µ—Ä—É —Å–ª–∞–¥–∫–∏–µ, –±–µ–∑ –∫–æ—Å—Ç–æ—á–µ–∫"
        "–ù–∞–º –ø–æ–Ω—Ä–∞–≤–∏–ª–∏—Å—å! –û—á–µ–Ω—å —Å–≤–µ–∂–∏–µ, –ø—Ä—è–º–æ  –Ω–∞ –≤–µ—Ç–æ—á–∫–∞—Ö –∏ —Å –ª–∏—Å—Ç–∏–∫–∞–º–∏. –¢–æ–Ω–∫–∞—è —à–∫—É—Ä–∫–∞, —á–∏—Å—Ç–∏—Ç—Å—è –ª–µ–≥–∫–æ. –ù–∞ –≤–∫—É—Å —Ä–∞–∑–Ω—ã–µ. –ó–∞–≤–∏—Å–∏—Ç –æ—Ç —Å–ø–µ–ª–æ—Å—Ç–∏ –º–∞–Ω–¥–∞—Ä–∏–Ω–∞. –ü–æ–ø–∞–¥–∞—é—Ç—Å—è –∏ –∫–∏—Å–ª–µ–Ω—å–∫–∏–µ, –∏ —Å–ª–∞–¥–∫–∏–µ, –∏ –∫–∏—Å–ª–æ-—Å–ª–∞–¥–∫–∏–µ, –Ω–æ –≤—Å–µ —Å–æ—á–Ω—ã–µ –∏ –Ω–µ–∂–Ω—ã–µ. –°–ø–∞—Å–∏–±–æ –¥–æ—Å—Ç–∞–≤–∫–µ –ø–æ –≥–æ—Ä–æ–¥—É –û–¥–∏–Ω—Ü–æ–≤–æ!"
        "–í–∫—É—Å–Ω—ã–µ —Å–≤–µ–∂–∏–µ"
        "–ú–∞–Ω–¥–∞—Ä–∏–Ω—ã –±–µ–∑ –∫–æ—Å—Ç–æ—á–µ–∫, –ª–µ–≥–∫–æ —á–∏—Å—Ç—è—Ç—Å—è, –≤ –æ—Å–Ω–æ–≤–Ω–æ–º —Å–ª–∞–¥–∫–∏–µ. –ü–æ –∫–∏—Å–ª–æ—Å—Ç–∏ –ø–æ–ø–∞–¥–∞—é—Ç—Å—è —Ä–∞–∑–Ω—ã–µ, –ª–∏—á–Ω–æ –º–Ω–µ –Ω—Ä–∞–≤—è—Ç—Å—è —Ç–µ, —á—Ç–æ –ø–æ–∫–∏—Å–ª–µ–µ. –î–∞–∂–µ –≤ –æ–¥–Ω–æ–º –º–∞–Ω–¥–∞—Ä–∏–Ω–µ –∏–Ω–æ–≥–¥–∞ —á–∞—Å—Ç—å –¥–æ–ª–µ–∫ –∫–∏—Å–ª–µ–µ –¥—Ä—É–≥–∏—Ö - —Å—Ç—Ä–∞–Ω–Ω–æ. –ò–Ω–æ–≥–¥–∞ –ø–æ–ø–∞–¥–∞—é—Ç—Å—è –ø–æ–¥—Å—É—à–µ–Ω–Ω—ã–µ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –∫–∞–∫ –±—É–¥—Ç–æ –∏—Ö –ø–µ—Ä–µ–º–æ—Ä–æ–∑–∏–ª–∏, –Ω–æ –≤ –æ—Å–Ω–æ–≤–Ω–æ–º –º–∞–Ω–¥–∞—Ä–∏–Ω—ã —Ö–æ—Ä–æ—à–∏–µ."
        "–ù–µ–ø–ª–æ—Ö–∏–µ, –Ω–æ –Ω–µ–º–Ω–æ–≥–æ –∫–∏—Å–ª–æ–≤–∞—Ç—ã."
        "—Å–ª–∞–¥–∫–∏–µ, –∫–æ—Å—Ç–æ—á–µ–∫ –Ω–µ –ø–æ–ø–∞–ª–æ—Å—å, –∫–æ–∂—É—Ä–∞ —Ç–æ–Ω–∫–∞—è –∏ –ª–µ–≥–∫–æ —á–∏—Å—Ç–∏—Ç—Å—è"
        "–°–ø–∞—Å–∏–±–æ, —Ç–æ–Ω–∫–æ–∫–æ—Ä—ã–µ,—Å–æ—á–Ω—ã–µ ,—Å–ª–∞–¥–∫–∏–µ,–æ—á–µ–Ω—å –ø–æ–Ω—Ä–∞–≤–∏–ª–∏—Å—å"
        "–ú–∞–Ω–¥–∞—Ä–∏–Ω—ã –æ—á–µ–Ω—å —Å–ª–∞–¥–∫–∏–µ. –í–∫—É—Å–Ω—ã–µ"
        "–õ—é–±–∏–º—ã–µ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã"
        "–ë–µ–∑–≤–∫—É—Å–Ω—ã–µ. –ò—Ä–∞–Ω"
        "–∏–∑ —ç—Ç–æ–≥–æ —Å–ø–∏—Å–∫–∞ –æ—Ç–∑—ã–≤–æ–≤ –≤—ã–¥–µ–ª—è–µ–º —Å–ª–µ–¥—É—é—â–∏–µ –∞—Ç—Ä–∏–±—É—Ç—ã –∏ –∏—Ö —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏:"
        "–≤–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, –±–µ–∑–≤–∫—É—Å–Ω—ã–π"
        "—Ä–∞–∑–º–µ—Ä: –æ–≥—Ä–æ–º–Ω—ã–π"
        "–∫–æ–∂—É—Ä–∞: —Ç–æ–Ω–∫–∞—è –∏ –ª–µ–≥–∫–æ —á–∏—Å—Ç–∏—Ç—Å—è"
        "—Å—Ç—Ä—É–∫—Ç—É—Ä–∞: –±–µ–∑ –∫–æ—Å—Ç–æ—á–µ–∫, –ª–µ–≥–∫–æ —á–∏—Å—Ç—è—Ç—Å—è"
        "—Ç–µ–∫—Å—Ç—É—Ä–∞: —Å–æ—á–Ω—ã–µ, –ø–æ–¥—Å—É—à–µ–Ω–Ω—ã–µ"
        "–ù–∞–ø—Ä–∏–º–µ—Ä, –µ—Å–ª–∏ –ø–∏—à—É—Ç, —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç –∫–∏—Å–ª—ã–π, —Å–ª–∞–¥–∫–∏–π, –≥–æ—Ä—å–∫–∏–π, —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –≤–∫—É—Å, –µ—Å–ª–∏ –ø–∏—à—É—Ç —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç —Å –∫–æ–º–æ—á–∫–∞–º–∏, –≤–æ–ª–æ–∫–Ω–∏—Å—Ç—ã–π, —Ç–æ —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ü–∏—è." 
        "–ï—Å–ª–∏ –≤ –æ—Ç–∑—ã–≤–µ —É–ø–æ–º—è–Ω—É—Ç–æ –Ω–µ—Å–∫–æ–ª—å–∫–æ –∞—Ç—Ä–∏–±—É—Ç–æ–≤, –∏–∑–≤–ª–µ–∫–∞–π –∏—Ö –≤—Å–µ. "
        "–í–æ–∑–≤—Ä–∞—â–∞–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç –≤ —Ñ–æ—Ä–º–∞—Ç–µ: —Å–ø–∏—Å–æ–∫ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ –∏ –∏—Ö –æ–ø–∏—Å–∞–Ω–∏–π."
        f"–û—Ç–∑—ã–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª—è: {review}\n"
        f"–û–∂–∏–¥–∞–µ–º—ã–π –æ—Ç–≤–µ—Ç: {attributes}\n"
    )
    return prompt

In [17]:
# —Ç—É—Ç –ø—Ä–æ—Å—Ç–æ –ø—Ä–æ–º–ø—Ç –ø–æ–º–µ–Ω—å—à–µ
# def generate_prompt(example):
#     review = example["repr_text"]
#     attributes = format_attributes(example["attribute_descriptions"])
#     prompt = (
        # " –¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤–∞—Ä—ã. "
        # "–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ–º –ø—Ä–æ–¥—É–∫—Ç–µ. –í—ã–¥–µ–ª–∏ –∞—Ç—Ä–∏–±—É—Ç—ã —Ç–æ–≤–∞—Ä–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, –≤–∫—É—Å, –∑–∞–ø–∞—Ö, —Ç–µ–∫—Å—Ç—É—Ä–∞, —Ü–≤–µ—Ç), –ø—Ä–æ –∫–æ—Ç–æ—Ä—ã–µ –ø–∏—à—É—Ç –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏."
        # "–£ –æ–¥–Ω–æ–≥–æ –∞—Ç—Ä–∏–±—É—Ç–∞ –º–æ–∂–µ—Ç –±—ã—Ç—å –º–Ω–æ–≥–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫. –ê—Ç—Ä–∏–±—É—Ç –≤—ã—Ä–∞–∂–∞–µ—Ç—Å—è 1-3 —Å–ª–æ–≤–∞–º–∏ –≤—Å–µ–≥–¥–∞, –∫—Ä–∞—Ç–∫–æ –∏ –µ–º–∫–æ"
        # "–ß–∞—â–µ –≤—Å–µ–≥–æ –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏ –æ–ø–∏—Å—ã–≤–∞—é—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–ª–∞–¥–∫–∏–π, —Å–≤–µ–∂–∏–π, –º—è–≥–∫–∏–π, —Å–æ—á–Ω—ã–π), –∞ —Ç–µ–±–µ –Ω—É–∂–Ω–æ –≤—ã–¥–µ–ª–∏—Ç—å –∏–º–µ–Ω–Ω–æ —Å–∞–º–∏ –∞—Ç—Ä–∏–±—É—Ç—ã."
        # "–ù–∞–ø—Ä–∏–º–µ—Ä, –µ—Å–ª–∏ –ø–∏—à—É—Ç, —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç –∫–∏—Å–ª—ã–π, —Å–ª–∞–¥–∫–∏–π, –≥–æ—Ä—å–∫–∏–π, —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –≤–∫—É—Å, –µ—Å–ª–∏ –ø–∏—à—É—Ç —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç —Å –∫–æ–º–æ—á–∫–∞–º–∏, –≤–æ–ª–æ–∫–Ω–∏—Å—Ç—ã–π, —Ç–æ —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ü–∏—è." 
        # "–ï—Å–ª–∏ –≤ –æ—Ç–∑—ã–≤–µ —É–ø–æ–º—è–Ω—É—Ç–æ –Ω–µ—Å–∫–æ–ª—å–∫–æ –∞—Ç—Ä–∏–±—É—Ç–æ–≤, –∏–∑–≤–ª–µ–∫–∞–π –∏—Ö –≤—Å–µ. "
        # "–í–æ–∑–≤—Ä–∞—â–∞–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç –≤ —Ñ–æ—Ä–º–∞—Ç–µ: —Å–ø–∏—Å–æ–∫ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ –∏ –∏—Ö –æ–ø–∏—Å–∞–Ω–∏–π."
#         f"–û—Ç–∑—ã–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª—è: {review}\n"
#         f"–û–∂–∏–¥–∞–µ–º—ã–π –æ—Ç–≤–µ—Ç: {attributes}\n"
#     )
#     return prompt

In [18]:
dataset[0]

{'repr_text': '–û—á–µ–Ω—å —Ö–æ—Ä–æ—à–∞—è\n–û—á–µ–Ω—å —Ö–æ—Ä–æ—à–∞—è —Å–º–µ—Ç–∞–Ω–∞ –æ—Ç –õ–µ–±–µ–¥—è–Ω—å–º–æ–ª–æ–∫–æ. –°–ø–∞—Å–∏–±–æ.\n–û—Ç–ª–∏—á–Ω–∞—è —Å–º–µ—Ç–∞–Ω–∞\n–•–æ—Ä–æ—à–∞—è —Å–º–µ—Ç–∞–Ω–∞, –æ—á–µ–Ω—å –Ω—Ä–∞–≤–∏—Ç—Å—è –ü–µ–Ω–∑–µ–Ω—Å–∫–∏–π –∫–æ–º–±–∏–Ω–∞—Ç, –ù-–ì—Ä—É–ø–ø —Ö—É–∂–µ –ø–æ —Å—Ä–∞–≤–Ω–µ–Ω–∏—é.\n–ì—É—Å—Ç–∞—è —Å–º–µ—Ç–∞–Ω–∞, –≤–∫—É—Å –æ—Ç–ª–∏—á–Ω—ã–π.\n–í–∫—É—Å–Ω–∞—è! \n–≤–∫—É—Å–Ω–∞—è \n–•–æ—Ä–æ—à–∞—è —Å–º–µ—Ç–∞–Ω–∞  –ò–∑ –¥—Ä—É–≥–æ–≥–æ –ø—Ä–æ–∏–∑–≤–æ–¥—Å—Ç–≤–∞ –ú–Ω–µ –ø–æ–Ω—Ä–∞–≤–∏–ª–æ—Å—å –¢–µ–ø–µ—Ä—å –±—É–¥—É –∑–∞–ø–æ–º–∏–Ω–∞—Ç—å —Ç—É –ø—Ä–æ–∏–∑–≤–æ–¥—Å—Ç–≤–∞ üòä\n–û—á–µ–Ω—å –≤–∫—É—Å–Ω–∞—è, —Å–≤–µ–∂–∞—è —Å–º–µ—Ç–∞–Ω–∞!\n–ì—É—Å—Ç–∞—è, –Ω–∏—á–µ–≥–æ –Ω–µ —Å–∫–∞–∂–µ—à—å, –Ω–æ –≤–∫—É—Å–∞ –Ω–∞—Ç—É—Ä–∞–ª—å–Ω–æ–≥–æ –ø—Ä–æ–¥—É–∫—Ç–∞ –ù–ï–¢! –õ—É—á—à–∞—è –ø–æ –≤–∫—É—Å—É –ù- –≥—Ä—É–ø–ø\n–ì–£–°–¢–ê–Ø –ò –û–ß–ï–ù–¨ –í–ö–£–°–ù–ê–Ø –°–ú–ï–¢–ê–ù–ö–ê!\n–í–∫—É—Å–Ω–µ–π—à–∞—è\n–í–∫—É—Å–Ω—é—â–∞—è.\n–í–∫—É—Å–Ω–∞—è –ø–µ–Ω–∑–µ–Ω—Å–∫–∞—è —Å–º–µ—Ç–∞–Ω–∞üëçüèª\n–û—Ç–ª–∏—á–Ω–∞—è \n–í–∫—É—Å–

In [19]:
generate_prompt(dataset[0])

'–¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤–∞—Ä—ã. –¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ–º –ø—Ä–æ–¥—É–∫—Ç–µ. –í—ã–¥–µ–ª–∏ –∞—Ç—Ä–∏–±—É—Ç—ã —Ç–æ–≤–∞—Ä–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, –≤–∫—É—Å, –∑–∞–ø–∞—Ö, —Ç–µ–∫—Å—Ç—É—Ä–∞, —Ü–≤–µ—Ç), –ø—Ä–æ –∫–æ—Ç–æ—Ä—ã–µ –ø–∏—à—É—Ç –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏.–£ –æ–¥–Ω–æ–≥–æ –∞—Ç—Ä–∏–±—É—Ç–∞ –º–æ–∂–µ—Ç –±—ã—Ç—å –º–Ω–æ–≥–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫. –ê—Ç—Ä–∏–±—É—Ç –≤—ã—Ä–∞–∂–∞–µ—Ç—Å—è 1-3 —Å–ª–æ–≤–∞–º–∏ –≤—Å–µ–≥–¥–∞, –∫—Ä–∞—Ç–∫–æ –∏ –µ–º–∫–æ–ß–∞—â–µ –≤—Å–µ–≥–æ –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏ –æ–ø–∏—Å—ã–≤–∞—é—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–ª–∞–¥–∫–∏–π, —Å–≤–µ–∂–∏–π, –º—è–≥–∫–∏–π, —Å–æ—á–Ω—ã–π), –∞ —Ç–µ–±–µ –Ω—É–∂–Ω–æ –≤—ã–¥–µ–ª–∏—Ç—å –∏–º–µ–Ω–Ω–æ —Å–∞–º–∏ –∞—Ç—Ä–∏–±—É—Ç—ã.–ù–∏–∂–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–ø–∏—Å–æ–∫ –æ—Ç–∑—ã–≤–æ–≤ –Ω–∞ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –∫–∞–∂–¥—ã–π –Ω–æ–≤—ã–π –æ—Ç–∑—ã–≤ —Å –Ω–æ–≤–æ–π —Å—Ç—Ä–æ–∫–∏:–û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ, —Å–ª–∞–¥

In [20]:
# –ü—Ä–∏–º–µ–Ω—è–µ–º —Ñ—É–Ω–∫—Ü–∏—é generate_prompt –∫–æ –≤—Å–µ–º—É –¥–∞—Ç–∞—Å–µ—Ç—É, —Å–æ–∑–¥–∞–≤–∞—è –Ω–æ–≤—É—é –∫–æ–ª–æ–Ω–∫—É "text"
dataset = dataset.map(lambda x: {"text": generate_prompt(x)})

Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10387/10387 [00:01<00:00, 9087.31 examples/s]


In [21]:
def tokenize_function(example):
    return tokenizer(example["text"]
                     # , truncation=True, max_length=512
                    )

# –ü—Ä–∏–º–µ–Ω—è–µ–º —Ç–æ–∫–µ–Ω–∏–∑–∞—Ü–∏—é –∫ –¥–∞—Ç–∞—Å–µ—Ç—É; —É–±–∏—Ä–∞–µ–º –ª–∏—à–Ω–∏–µ –∫–æ–ª–æ–Ω–∫–∏
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)

Map:   0%|                                     | 0/10387 [00:00<?, ? examples/s]Token indices sequence length is longer than the specified maximum sequence length for this model (4900 > 1512). Running this sequence through the model will result in indexing errors
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10387/10387 [00:04<00:00, 2336.94 examples/s]


### –¥–æ–æ–±—É—á–µ–Ω–∏–µ

In [22]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset,
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=50, #100,
        max_steps=2000, #400,
        learning_rate=1e-4, #2e-4,
        fp16=True,
        logging_steps=10,
        report_to="wandb",
        output_dir='outputs' # –ø–∞–ø–∫–∞ –¥–ª—è —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False, )
)

In [23]:
# –õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤ TrainingArguments
wandb.config.update({"training_args": trainer.args.to_dict()})

In [24]:
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()



Step,Training Loss
10,2.9833
20,2.3274
30,1.8755
40,1.0022
50,1.2144
60,1.3984
70,1.5034
80,1.3683
90,1.148
100,0.9937




TrainOutput(global_step=2000, training_loss=1.1498014419078826, metrics={'train_runtime': 11344.7798, 'train_samples_per_second': 0.705, 'train_steps_per_second': 0.176, 'total_flos': 3.525177901601587e+17, 'train_loss': 1.1498014419078826, 'epoch': 0.7701935111196688})

In [25]:
model.save_pretrained('Vikhr-7B-instruct_0.4_lora_r32_medium_prompt')

### –ø—Ä–æ–≤–µ—Ä—è–µ–º 

In [1]:
# –ø–µ—Ä–µ–¥ –∑–∞–ø—É—Å–∫–æ–º —ç—Ç–æ–π —è—á–µ–π–∫–∏ –Ω—É–∂–Ω–æ –ø–µ—Ä–µ–∑–∞–ø—É—Å—Ç–∏—Ç—å –∫–µ—Ä–Ω–µ–ª
import torch
import pandas as pd
from tqdm import tqdm
tqdm.pandas()
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline


# peft_model_id = "Vikhr-7B-instruct_0.4_lora_r32" 
# peft_model_id = "Vikhr-7B-instruct_0.4_lora_r32_long_prompt" 
peft_model_id = "Vikhr-7B-instruct_0.4_lora_r32_medium_prompt" 
# peft_model_id = "Vikhr-7B-instruct_0.4_lora_r64_long_prompt" 
# peft_model_id = "Vikhr-7B-instruct_0.4_lora_r64_medium_prompt" 
# peft_model_id = "Vikhr-7B-instruct_0.4_lora_r64_long_prompt_50_2000_1e-4"


model_name = "Vikhrmodels/Vikhr-7B-instruct_0.4"

quantization_config = BitsAndBytesConfig(
        load_in_8bit=True
    )
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name,
                                             return_dict=True, 
                                             quantization_config=quantization_config,
                                             device_map='auto'
                                            )
tokenizer = AutoTokenizer.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:11<00:00,  2.87s/it]


In [2]:
config = dict(
    max_new_tokens=512, 
    do_sample=True, 
    num_beams=1, 
    temperature=0.25, 
    top_k=50, 
    top_p=0.98, 
    eos_token_id=79097
)

In [3]:
DEFAULT_SYSTEM_PROMPT  = ("–¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤–∞—Ä—ã. "
        "–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ–º –ø—Ä–æ–¥—É–∫—Ç–µ. –í—ã–¥–µ–ª–∏ –∞—Ç—Ä–∏–±—É—Ç—ã —Ç–æ–≤–∞—Ä–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, –≤–∫—É—Å, –∑–∞–ø–∞—Ö, —Ç–µ–∫—Å—Ç—É—Ä–∞, —Ü–≤–µ—Ç), –ø—Ä–æ –∫–æ—Ç–æ—Ä—ã–µ –ø–∏—à—É—Ç –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏."
        "–£ –æ–¥–Ω–æ–≥–æ –∞—Ç—Ä–∏–±—É—Ç–∞ –º–æ–∂–µ—Ç –±—ã—Ç—å –º–Ω–æ–≥–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫. –ê—Ç—Ä–∏–±—É—Ç –≤—ã—Ä–∞–∂–∞–µ—Ç—Å—è 1-3 —Å–ª–æ–≤–∞–º–∏ –≤—Å–µ–≥–¥–∞, –∫—Ä–∞—Ç–∫–æ –∏ –µ–º–∫–æ"
        "–ß–∞—â–µ –≤—Å–µ–≥–æ –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏ –æ–ø–∏—Å—ã–≤–∞—é—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–ª–∞–¥–∫–∏–π, —Å–≤–µ–∂–∏–π, –º—è–≥–∫–∏–π, —Å–æ—á–Ω—ã–π), –∞ —Ç–µ–±–µ –Ω—É–∂–Ω–æ –≤—ã–¥–µ–ª–∏—Ç—å –∏–º–µ–Ω–Ω–æ —Å–∞–º–∏ –∞—Ç—Ä–∏–±—É—Ç—ã."
        "–ù–∏–∂–µ –ø—Ä–µ–¥—Å—Ç–∞–≤–ª–µ–Ω —Å–ø–∏—Å–æ–∫ –æ—Ç–∑—ã–≤–æ–≤ –Ω–∞ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –∫–∞–∂–¥—ã–π –Ω–æ–≤—ã–π –æ—Ç–∑—ã–≤ —Å –Ω–æ–≤–æ–π —Å—Ç—Ä–æ–∫–∏:"
        "–û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ! –ù–∏–∫–∞–∫–∏–µ –∞–±—Ö–∞–∑—Å–∫–∏–µ –Ω–µ –Ω—É–∂–Ω—ã))"
        "–û–≥—Ä–æ–º–Ω—ã–µ, –ø–æ—á—Ç–∏ –±–µ–∑–≤–∫—É—Å–Ω—ã–µ"
        "–•–æ—Ä–æ—à–∏–µ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –≤ –º–µ—Ä—É —Å–ª–∞–¥–∫–∏–µ, –±–µ–∑ –∫–æ—Å—Ç–æ—á–µ–∫"
        "–ù–∞–º –ø–æ–Ω—Ä–∞–≤–∏–ª–∏—Å—å! –û—á–µ–Ω—å —Å–≤–µ–∂–∏–µ, –ø—Ä—è–º–æ  –Ω–∞ –≤–µ—Ç–æ—á–∫–∞—Ö –∏ —Å –ª–∏—Å—Ç–∏–∫–∞–º–∏. –¢–æ–Ω–∫–∞—è —à–∫—É—Ä–∫–∞, —á–∏—Å—Ç–∏—Ç—Å—è –ª–µ–≥–∫–æ. –ù–∞ –≤–∫—É—Å —Ä–∞–∑–Ω—ã–µ. –ó–∞–≤–∏—Å–∏—Ç –æ—Ç —Å–ø–µ–ª–æ—Å—Ç–∏ –º–∞–Ω–¥–∞—Ä–∏–Ω–∞. –ü–æ–ø–∞–¥–∞—é—Ç—Å—è –∏ –∫–∏—Å–ª–µ–Ω—å–∫–∏–µ, –∏ —Å–ª–∞–¥–∫–∏–µ, –∏ –∫–∏—Å–ª–æ-—Å–ª–∞–¥–∫–∏–µ, –Ω–æ –≤—Å–µ —Å–æ—á–Ω—ã–µ –∏ –Ω–µ–∂–Ω—ã–µ. –°–ø–∞—Å–∏–±–æ –¥–æ—Å—Ç–∞–≤–∫–µ –ø–æ –≥–æ—Ä–æ–¥—É –û–¥–∏–Ω—Ü–æ–≤–æ!"
        "–í–∫—É—Å–Ω—ã–µ —Å–≤–µ–∂–∏–µ"
        "–ú–∞–Ω–¥–∞—Ä–∏–Ω—ã –±–µ–∑ –∫–æ—Å—Ç–æ—á–µ–∫, –ª–µ–≥–∫–æ —á–∏—Å—Ç—è—Ç—Å—è, –≤ –æ—Å–Ω–æ–≤–Ω–æ–º —Å–ª–∞–¥–∫–∏–µ. –ü–æ –∫–∏—Å–ª–æ—Å—Ç–∏ –ø–æ–ø–∞–¥–∞—é—Ç—Å—è —Ä–∞–∑–Ω—ã–µ, –ª–∏—á–Ω–æ –º–Ω–µ –Ω—Ä–∞–≤—è—Ç—Å—è —Ç–µ, —á—Ç–æ –ø–æ–∫–∏—Å–ª–µ–µ. –î–∞–∂–µ –≤ –æ–¥–Ω–æ–º –º–∞–Ω–¥–∞—Ä–∏–Ω–µ –∏–Ω–æ–≥–¥–∞ —á–∞—Å—Ç—å –¥–æ–ª–µ–∫ –∫–∏—Å–ª–µ–µ –¥—Ä—É–≥–∏—Ö - —Å—Ç—Ä–∞–Ω–Ω–æ. –ò–Ω–æ–≥–¥–∞ –ø–æ–ø–∞–¥–∞—é—Ç—Å—è –ø–æ–¥—Å—É—à–µ–Ω–Ω—ã–µ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã, –∫–∞–∫ –±—É–¥—Ç–æ –∏—Ö –ø–µ—Ä–µ–º–æ—Ä–æ–∑–∏–ª–∏, –Ω–æ –≤ –æ—Å–Ω–æ–≤–Ω–æ–º –º–∞–Ω–¥–∞—Ä–∏–Ω—ã —Ö–æ—Ä–æ—à–∏–µ."
        "–ù–µ–ø–ª–æ—Ö–∏–µ, –Ω–æ –Ω–µ–º–Ω–æ–≥–æ –∫–∏—Å–ª–æ–≤–∞—Ç—ã."
        "—Å–ª–∞–¥–∫–∏–µ, –∫–æ—Å—Ç–æ—á–µ–∫ –Ω–µ –ø–æ–ø–∞–ª–æ—Å—å, –∫–æ–∂—É—Ä–∞ —Ç–æ–Ω–∫–∞—è –∏ –ª–µ–≥–∫–æ —á–∏—Å—Ç–∏—Ç—Å—è"
        "–°–ø–∞—Å–∏–±–æ, —Ç–æ–Ω–∫–æ–∫–æ—Ä—ã–µ,—Å–æ—á–Ω—ã–µ ,—Å–ª–∞–¥–∫–∏–µ,–æ—á–µ–Ω—å –ø–æ–Ω—Ä–∞–≤–∏–ª–∏—Å—å"
        "–ú–∞–Ω–¥–∞—Ä–∏–Ω—ã –æ—á–µ–Ω—å —Å–ª–∞–¥–∫–∏–µ. –í–∫—É—Å–Ω—ã–µ"
        "–õ—é–±–∏–º—ã–µ –º–∞–Ω–¥–∞—Ä–∏–Ω—ã"
        "–ë–µ–∑–≤–∫—É—Å–Ω—ã–µ. –ò—Ä–∞–Ω"
        "–∏–∑ —ç—Ç–æ–≥–æ —Å–ø–∏—Å–∫–∞ –æ—Ç–∑—ã–≤–æ–≤ –≤—ã–¥–µ–ª—è–µ–º —Å–ª–µ–¥—É—é—â–∏–µ –∞—Ç—Ä–∏–±—É—Ç—ã –∏ –∏—Ö —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏:"
        "–≤–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, –±–µ–∑–≤–∫—É—Å–Ω—ã–π"
        "—Ä–∞–∑–º–µ—Ä: –æ–≥—Ä–æ–º–Ω—ã–π"
        "–∫–æ–∂—É—Ä–∞: —Ç–æ–Ω–∫–∞—è –∏ –ª–µ–≥–∫–æ —á–∏—Å—Ç–∏—Ç—Å—è"
        "—Å—Ç—Ä—É–∫—Ç—É—Ä–∞: –±–µ–∑ –∫–æ—Å—Ç–æ—á–µ–∫, –ª–µ–≥–∫–æ —á–∏—Å—Ç—è—Ç—Å—è"
        "—Ç–µ–∫—Å—Ç—É—Ä–∞: —Å–æ—á–Ω—ã–µ, –ø–æ–¥—Å—É—à–µ–Ω–Ω—ã–µ"
        "–ù–∞–ø—Ä–∏–º–µ—Ä, –µ—Å–ª–∏ –ø–∏—à—É—Ç, —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç –∫–∏—Å–ª—ã–π, —Å–ª–∞–¥–∫–∏–π, –≥–æ—Ä—å–∫–∏–π, —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –≤–∫—É—Å, –µ—Å–ª–∏ –ø–∏—à—É—Ç —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç —Å –∫–æ–º–æ—á–∫–∞–º–∏, –≤–æ–ª–æ–∫–Ω–∏—Å—Ç—ã–π, —Ç–æ —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ü–∏—è." 
        "–ï—Å–ª–∏ –≤ –æ—Ç–∑—ã–≤–µ —É–ø–æ–º—è–Ω—É—Ç–æ –Ω–µ—Å–∫–æ–ª—å–∫–æ –∞—Ç—Ä–∏–±—É—Ç–æ–≤, –∏–∑–≤–ª–µ–∫–∞–π –∏—Ö –≤—Å–µ. "
        "–í–æ–∑–≤—Ä–∞—â–∞–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç –≤ —Ñ–æ—Ä–º–∞—Ç–µ: —Å–ø–∏—Å–æ–∫ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ –∏ –∏—Ö –æ–ø–∏—Å–∞–Ω–∏–π.")

In [3]:
# DEFAULT_SYSTEM_PROMPT = (" –¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤–∞—Ä—ã. "
#         "–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ–º –ø—Ä–æ–¥—É–∫—Ç–µ. –í—ã–¥–µ–ª–∏ –∞—Ç—Ä–∏–±—É—Ç—ã —Ç–æ–≤–∞—Ä–∞ (–Ω–∞–ø—Ä–∏–º–µ—Ä, –≤–∫—É—Å, –∑–∞–ø–∞—Ö, —Ç–µ–∫—Å—Ç—É—Ä–∞, —Ü–≤–µ—Ç), –ø—Ä–æ –∫–æ—Ç–æ—Ä—ã–µ –ø–∏—à—É—Ç –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏."
#         "–£ –æ–¥–Ω–æ–≥–æ –∞—Ç—Ä–∏–±—É—Ç–∞ –º–æ–∂–µ—Ç –±—ã—Ç—å –º–Ω–æ–≥–æ —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫. –ê—Ç—Ä–∏–±—É—Ç –≤—ã—Ä–∞–∂–∞–µ—Ç—Å—è 1-3 —Å–ª–æ–≤–∞–º–∏ –≤—Å–µ–≥–¥–∞, –∫—Ä–∞—Ç–∫–æ –∏ –µ–º–∫–æ"
#         "–ß–∞—â–µ –≤—Å–µ–≥–æ –ø–æ–∫—É–ø–∞—Ç–µ–ª–∏ –æ–ø–∏—Å—ã–≤–∞—é—Ç —Ö–∞—Ä–∞–∫—Ç–µ—Ä–∏—Å—Ç–∏–∫–∏ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ (–Ω–∞–ø—Ä–∏–º–µ—Ä, —Å–ª–∞–¥–∫–∏–π, —Å–≤–µ–∂–∏–π, –º—è–≥–∫–∏–π, —Å–æ—á–Ω—ã–π), –∞ —Ç–µ–±–µ –Ω—É–∂–Ω–æ –≤—ã–¥–µ–ª–∏—Ç—å –∏–º–µ–Ω–Ω–æ —Å–∞–º–∏ –∞—Ç—Ä–∏–±—É—Ç—ã."
#         "–ù–∞–ø—Ä–∏–º–µ—Ä, –µ—Å–ª–∏ –ø–∏—à—É—Ç, —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç –∫–∏—Å–ª—ã–π, —Å–ª–∞–¥–∫–∏–π, –≥–æ—Ä—å–∫–∏–π, —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –≤–∫—É—Å, –µ—Å–ª–∏ –ø–∏—à—É—Ç —á—Ç–æ –ø—Ä–æ–¥—É–∫—Ç —Å –∫–æ–º–æ—á–∫–∞–º–∏, –≤–æ–ª–æ–∫–Ω–∏—Å—Ç—ã–π, —Ç–æ —ç—Ç–æ –∞—Ç—Ä–∏–±—É—Ç –∫–æ–Ω—Å–∏—Å—Ç–µ–Ω—Ü–∏—è." 
#         "–ï—Å–ª–∏ –≤ –æ—Ç–∑—ã–≤–µ —É–ø–æ–º—è–Ω—É—Ç–æ –Ω–µ—Å–∫–æ–ª—å–∫–æ –∞—Ç—Ä–∏–±—É—Ç–æ–≤, –∏–∑–≤–ª–µ–∫–∞–π –∏—Ö –≤—Å–µ. "
#         "–í–æ–∑–≤—Ä–∞—â–∞–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç –≤ —Ñ–æ—Ä–º–∞—Ç–µ: —Å–ø–∏—Å–æ–∫ –∞—Ç—Ä–∏–±—É—Ç–æ–≤ –∏ –∏—Ö –æ–ø–∏—Å–∞–Ω–∏–π.")

In [4]:
# –ó–∞–≥—Ä—É–∑–∏–º LoRA –≤–µ—Å–∞ 
model = PeftModel.from_pretrained(model, peft_model_id)

In [5]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'Jam

In [6]:
prompt = tokenizer.apply_chat_template([{
    "role": "system",
    "content": DEFAULT_SYSTEM_PROMPT
}, {
    "role": "user",
    "content": "–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω–µ –æ—á–µ–Ω—å –ø–æ–Ω—Ä–∞–≤–∏–ª–∏—Å—å —è–±–ª–æ–∫–∏ —ç—Ç–æ–≥–æ —Å–æ—Ä—Ç–∞. –°–æ—á–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ! –í–∞—Ç–∞, –∞ –Ω–µ —è–±–ª–æ–∫–∏ –°–æ—á–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ –ü—Ä–∏–≤–µ–∑–ª–∏ –æ—á–µ–Ω—å –∫—Ä—É–ø–Ω—ã–µ —è–±–ª–æ–∫–∏! –ù–∞ –≤–∫—É—Å –∞—Ä–æ–º–∞—Ç–Ω—ã–µ –∏ —Å–ª–∞–¥–∫–∏–µ. –î–ª—è —Ç–µ—Ö –∫—Ç–æ –ª—é–±–∏—Ç –º—è–≥–∫–∏–µ. –°–æ—á–Ω—ã–µ —Å–ª–∞–¥–∫–∏–µ –∫—Ä–µ–ø–∫–∏–µ –Ø–±–ª–æ–∫–∏ –ø—Ä–æ—Å—Ç–æ –≤–æ—Å—Ç–æ—Ä–≥!- –Ω–µ –æ–∂–∏–¥–∞–ª–∞! –°–æ—á–Ω—ã–µ, –∞—Ä–æ–º–∞—Ç–Ω—ã–µ, –Ω–µ –∂–µ—Å—Ç–∫–∏–µ- –¥–∞–∂–µ –∫–æ–∂–∞ —Ö–æ—Ä–æ—à–æ –∂—É—ë—Ç—Å—è! –°–ª–∞–¥–∫–∏–µ. –ë—Ä–∞–ª–∞ —Å–æ —Å–∫–∏–¥–∫–æ–π. –ó–∞–∫–∞–∂—É –µ—â—ë –Ω–∞ –∑–∞–≤—Ç—Ä–∞. –ù–µ —É–≤–µ—Ä–µ–Ω–∞ —á—Ç–æ –ø—Ä–∏–≤–µ–∑—É—Ç —Ç–∞–∫–∏–µ –∂–µ- –∏–Ω–∞—á–µ –±—ã –∫–≥ 3 –∑–∞–∫–∞–∑–∞–ª–∞. –ü—Ä–∏–≤–µ–∑–ª–∏ –≤ –∑–∞–∫–∞–∑–µ —É–∂–∞—Å–Ω—ã–µ —è–±–ª–æ–∫–∏!!–ï—Å—Ç—å —Ç–∞–∫–∏–µ –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ (( –ù–µ–±–æ–ª—å—à–∏–µ —Å–æ—á–Ω—ã–µ —Ö—Ä—É—Å—Ç—è—â–∏–µ —è–±–ª–æ—á–∫–∏, –±–ª–µ—Å–∫ –ö—Ä—É–ø–Ω–µ–Ω—å–∫–∏–µ –æ–∫–∞–∑–∞–ª–∏—Å—å, –æ–∂–∏–¥–∞–ª–∞ –º–µ–ª–∫–∏–µ —è–±–ª–æ—á–∫–∏. –ö—Ä–∞—Å–∏–≤–µ–µ, —á–µ–º –Ω–∞ —Ñ–æ—Ç–æ.) –ú–Ω–µ –¥–ª—è –ø–∏—Ä–æ–≥–∞. –°–ø–∞—Å–∏–±–æ. –í–ø–µ—Ä–≤—ã–µ –≤ –ø—Ä–æ–¥–∞–∂–µ —Ç–∞–∫–∏–µ –∫—Ä–∞—Å–∏–≤—ã–µ —Å–µ–∑–æ–Ω–Ω—ã–µ —è–±–ª–æ–∫–∏. –ò –≤–∫—É—Å–Ω—ã–µ. –Ø–±–ª–æ–∫–∏ –≤–∫—É—Å–Ω—ã–µ, –Ω–æ –æ—á–µ–Ω—å –±–æ–ª—å—à–∏–µ. –•–æ—Ç–µ–ª–æ—Å—å –±—ã –ø–æ–º–µ–Ω—å—à–µ —Ä–∞–∑–º–µ—Ä–æ–º. –ö—Ä—É–ø–Ω—ã–µ. –ú–∞–ª–æ –∫–æ—Å—Ç–æ—á–µ–∫. –ö–∏—Å–ª–æ —Å–ª–∞–¥–∫–∏–µ. –ú—è–∫–æ—Ç—å –º—è–≥–∫–∞—è –∏ —Å–æ—á–Ω–∞—è. –ö–æ–∂–∏—Ü–∞ –Ω–µ —Ç–æ–ª—Å—Ç–∞—è. –û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ! –£–∂–∞—Å–Ω—ã–µ —è–±–ª–æ–∫–∏, –∫–æ–∂—É—Ä–∞ –ø–æ—Ç–µ–º–Ω–µ–≤—à–∞—è, –≤–Ω—É—Ç—Ä–∏ –≤–∞—Ç–Ω—ã–µ –û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –∏ —Å–ª–∞–¥–∫–∏–µ —è–±–ª–æ–∫–∏. –°–æ—á–Ω—ã–µ. –ü—Ä–∏—è—Ç–Ω—ã–µ —è–±–ª–æ—á–∫–∏. –°–æ—á–Ω—ã–µ, –≤ –º–µ—Ä—É —Å–ª–∞–¥–∫–∏–µ. –ö–æ–∂–∏—Ü–∞ –Ω–µ—Å–∫–æ–ª—å–∫–æ –ø–ª–æ—Ç–Ω–æ–≤–∞—Ç–∞. –•–æ—Ä–æ—à–∏–µ –∫—Ä—É–ø–Ω—ã–µ —è–±–ª–æ–∫–∏. –í–∫—É—Å–Ω—ã–µ. –û—á–µ–Ω—å –∞—Ä–æ–º–∞—Ç–Ω—ã–µ –∏ —Å–ª–∞–¥–∫–∏–µ! –¢–µ, —á—Ç–æ —Ç–µ–º–Ω–æ-–∫—Ä–∞—Å–Ω—ã–µ –¥–∞–∂–µ –æ—á–µ–Ω—å —Å–ª–∞–¥–∫–∏–µ! –•–æ—Ä–æ—à–∏–µ –º–µ–ª–∫–∏–µ —Å–µ–∑–æ–Ω–Ω—ã–µ —è–±–ª–æ–∫–∏. –ê–¥–µ–∫–≤–∞—Ç–Ω–∞—è —Ü–µ–Ω–∞. –í–∫—É—Å–Ω—ã–µ —è–±–ª–æ–∫–∏, –¥—É—à–∏—Å—Ç—ã–µ"
}], tokenize=False, add_generation_prompt=True)
output = pipe(prompt, **config)
finetuned_output = output[0]['generated_text'][len(prompt):].strip()
finetuned_output

'–í–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, —Å–æ—á–Ω—ã–π, –∞—Ä–æ–º–∞—Ç–Ω—ã–π, –≤–∫—É—Å–Ω—ã–π\n–†–∞–∑–º–µ—Ä: –∫—Ä—É–ø–Ω—ã–µ, —Å—Ä–µ–¥–Ω–∏–µ, –º–µ–ª–∫–∏–µ, –æ—á–µ–Ω—å –±–æ–ª—å—à–∏–µ\n–ö–æ–∂–∏—Ü–∞: —Ç–æ–Ω–∫–∞—è, –Ω–µ–∂–Ω–∞—è, –ø–ª–æ—Ç–Ω–∞—è, –ø–æ—Ç–µ–º–Ω–µ–≤—à–∞—è\n–í–∫—É—Å: –∫–∏—Å–ª—ã–π, –∫–∏—Å–ª–æ-—Å–ª–∞–¥–∫–∏–π, –≤–∞—Ç–Ω—ã–π, —Ö—Ä—É—Å—Ç—è—â–∏–π\n–°–æ—á–Ω–æ—Å—Ç—å: —Å–æ—á–Ω—ã–µ, —Ö—Ä—É—Å—Ç—è—â–∏–µ, –º—è–≥–∫–∏–µ\n–í–∫—É—Å: –≤–∫—É—Å–Ω—ã–µ, –∞—Ä–æ–º–∞—Ç–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–

In [7]:
attr = pd.DataFrame()
attr['repr_text'] = ["test", "–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω–µ –æ—á–µ–Ω—å –ø–æ–Ω—Ä–∞–≤–∏–ª–∏—Å—å —è–±–ª–æ–∫–∏ —ç—Ç–æ–≥–æ —Å–æ—Ä—Ç–∞. –°–æ—á–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ! –í–∞—Ç–∞, –∞ –Ω–µ —è–±–ª–æ–∫–∏ –°–æ—á–Ω—ã–µ, –≤–∫—É—Å–Ω—ã–µ –ü—Ä–∏–≤–µ–∑–ª–∏ –æ—á–µ–Ω—å –∫—Ä—É–ø–Ω—ã–µ —è–±–ª–æ–∫–∏! –ù–∞ –≤–∫—É—Å –∞—Ä–æ–º–∞—Ç–Ω—ã–µ –∏ —Å–ª–∞–¥–∫–∏–µ. –î–ª—è —Ç–µ—Ö –∫—Ç–æ –ª—é–±–∏—Ç –º—è–≥–∫–∏–µ. –°–æ—á–Ω—ã–µ —Å–ª–∞–¥–∫–∏–µ –∫—Ä–µ–ø–∫–∏–µ –Ø–±–ª–æ–∫–∏ –ø—Ä–æ—Å—Ç–æ –≤–æ—Å—Ç–æ—Ä–≥!- –Ω–µ –æ–∂–∏–¥–∞–ª–∞! –°–æ—á–Ω—ã–µ, –∞—Ä–æ–º–∞—Ç–Ω—ã–µ, –Ω–µ –∂–µ—Å—Ç–∫–∏–µ- –¥–∞–∂–µ –∫–æ–∂–∞ —Ö–æ—Ä–æ—à–æ –∂—É—ë—Ç—Å—è! –°–ª–∞–¥–∫–∏–µ. –ë—Ä–∞–ª–∞ —Å–æ —Å–∫–∏–¥–∫–æ–π. –ó–∞–∫–∞–∂—É –µ—â—ë –Ω–∞ –∑–∞–≤—Ç—Ä–∞. –ù–µ —É–≤–µ—Ä–µ–Ω–∞ —á—Ç–æ –ø—Ä–∏–≤–µ–∑—É—Ç —Ç–∞–∫–∏–µ –∂–µ- –∏–Ω–∞—á–µ –±—ã –∫–≥ 3 –∑–∞–∫–∞–∑–∞–ª–∞. –ü—Ä–∏–≤–µ–∑–ª–∏ –≤ –∑–∞–∫–∞–∑–µ —É–∂–∞—Å–Ω—ã–µ —è–±–ª–æ–∫–∏!!–ï—Å—Ç—å —Ç–∞–∫–∏–µ –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ (( –ù–µ–±–æ–ª—å—à–∏–µ —Å–æ—á–Ω—ã–µ —Ö—Ä—É—Å—Ç—è—â–∏–µ —è–±–ª–æ—á–∫–∏, –±–ª–µ—Å–∫ –ö—Ä—É–ø–Ω–µ–Ω—å–∫–∏–µ –æ–∫–∞–∑–∞–ª–∏—Å—å, –æ–∂–∏–¥–∞–ª–∞ –º–µ–ª–∫–∏–µ —è–±–ª–æ—á–∫–∏. –ö—Ä–∞—Å–∏–≤–µ–µ, —á–µ–º –Ω–∞ —Ñ–æ—Ç–æ.) –ú–Ω–µ –¥–ª—è –ø–∏—Ä–æ–≥–∞. –°–ø–∞—Å–∏–±–æ. –í–ø–µ—Ä–≤—ã–µ –≤ –ø—Ä–æ–¥–∞–∂–µ —Ç–∞–∫–∏–µ –∫—Ä–∞—Å–∏–≤—ã–µ —Å–µ–∑–æ–Ω–Ω—ã–µ —è–±–ª–æ–∫–∏. –ò –≤–∫—É—Å–Ω—ã–µ. –Ø–±–ª–æ–∫–∏ –≤–∫—É—Å–Ω—ã–µ, –Ω–æ –æ—á–µ–Ω—å –±–æ–ª—å—à–∏–µ. –•–æ—Ç–µ–ª–æ—Å—å –±—ã –ø–æ–º–µ–Ω—å—à–µ —Ä–∞–∑–º–µ—Ä–æ–º. –ö—Ä—É–ø–Ω—ã–µ. –ú–∞–ª–æ –∫–æ—Å—Ç–æ—á–µ–∫. –ö–∏—Å–ª–æ —Å–ª–∞–¥–∫–∏–µ. –ú—è–∫–æ—Ç—å –º—è–≥–∫–∞—è –∏ —Å–æ—á–Ω–∞—è. –ö–æ–∂–∏—Ü–∞ –Ω–µ —Ç–æ–ª—Å—Ç–∞—è. –û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ! –£–∂–∞—Å–Ω—ã–µ —è–±–ª–æ–∫–∏, –∫–æ–∂—É—Ä–∞ –ø–æ—Ç–µ–º–Ω–µ–≤—à–∞—è, –≤–Ω—É—Ç—Ä–∏ –≤–∞—Ç–Ω—ã–µ –û—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –∏ —Å–ª–∞–¥–∫–∏–µ —è–±–ª–æ–∫–∏. –°–æ—á–Ω—ã–µ. –ü—Ä–∏—è—Ç–Ω—ã–µ —è–±–ª–æ—á–∫–∏. –°–æ—á–Ω—ã–µ, –≤ –º–µ—Ä—É —Å–ª–∞–¥–∫–∏–µ. –ö–æ–∂–∏—Ü–∞ –Ω–µ—Å–∫–æ–ª—å–∫–æ –ø–ª–æ—Ç–Ω–æ–≤–∞—Ç–∞. –•–æ—Ä–æ—à–∏–µ –∫—Ä—É–ø–Ω—ã–µ —è–±–ª–æ–∫–∏. –í–∫—É—Å–Ω—ã–µ. –û—á–µ–Ω—å –∞—Ä–æ–º–∞—Ç–Ω—ã–µ –∏ —Å–ª–∞–¥–∫–∏–µ! –¢–µ, —á—Ç–æ —Ç–µ–º–Ω–æ-–∫—Ä–∞—Å–Ω—ã–µ –¥–∞–∂–µ –æ—á–µ–Ω—å —Å–ª–∞–¥–∫–∏–µ! –•–æ—Ä–æ—à–∏–µ –º–µ–ª–∫–∏–µ —Å–µ–∑–æ–Ω–Ω—ã–µ —è–±–ª–æ–∫–∏. –ê–¥–µ–∫–≤–∞—Ç–Ω–∞—è —Ü–µ–Ω–∞. –í–∫—É—Å–Ω—ã–µ —è–±–ª–æ–∫–∏, –¥—É—à–∏—Å—Ç—ã–µ"]
attr['prompt'] = ["test", DEFAULT_SYSTEM_PROMPT]
attr['ft_model'] = ["test", peft_model_id]
attr['ft_result'] = ["test", finetuned_output]

In [8]:
attr

Unnamed: 0,repr_text,prompt,ft_model,ft_result
0,test,test,test,test
1,–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω...,–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ...,Vikhr-7B-instruct_0.4_lora_r32_medium_prompt,"–í–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, —Å–æ—á–Ω—ã–π, –∞—Ä–æ–º–∞—Ç–Ω—ã–π, –≤–∫—É—Å–Ω—ã–π\n–†–∞–∑..."


In [9]:
final_attr = pd.DataFrame()
final_attr = final_attr._append(attr)
final_attr

Unnamed: 0,repr_text,prompt,ft_model,ft_result
0,test,test,test,test
1,–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω...,–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ...,Vikhr-7B-instruct_0.4_lora_r32,"–í–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, –∞—Ä–æ–º–∞—Ç–Ω—ã–π, –≤–∫—É—Å–Ω—ã–π, –∫–∏—Å–ª–æ-—Å–ª–∞–¥–∫..."
2,test,test,test,test
3,–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω...,"–¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤...",Vikhr-7B-instruct_0.4_lora_r32,"–í–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, –±–µ–∑–≤–∫—É—Å–Ω—ã–π, –∫–∏—Å–ª—ã–π, –∫–∏—Å–ª–æ-—Å–ª–∞–¥–∫..."
4,test,test,test,test
5,–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω...,"–¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ...",Vikhr-7B-instruct_0.4_lora_r32,"–í–∫—É—Å: —Å–ª–∞–¥–∫–∏–π, –∞—Ä–æ–º–∞—Ç–Ω—ã–π, –∫–∏—Å–ª—ã–π, –≤–∞—Ç–Ω—ã–π, —Å–æ—á–Ω..."
6,test,test,test,test
7,–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω...,"–¢—ã –ø–æ–º–æ—â–Ω–∏–∫, –∫–æ—Ç–æ—Ä—ã–π –∞–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç –æ—Ç–∑—ã–≤—ã –Ω–∞ —Ç–æ–≤...",Vikhr-7B-instruct_0.4_lora_r32_long_prompt,"–í–∫—É—Å–Ω—ã–µ, —Å–æ—á–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ, –∞—Ä–æ–º–∞—Ç–Ω—ã–µ, –∫—Ä—É–ø–Ω—ã–µ —è..."
8,test,test,test,test
9,–û–≥—Ä–æ–º–Ω—ã–µ –∏ –Ω–µ –æ—á–µ–Ω—å –≤–∫—É—Å–Ω—ã–µ –ó–∞–º–µ—á–∞—Ç–µ–ª—å–Ω—ã–µ!! –ú–Ω...,–¢—ã –ø–æ–ª—É—á–∏—à—å —Ç–µ–∫—Å—Ç—ã –æ—Ç–∑—ã–≤–æ–≤ –ø–æ–∫—É–ø–∞—Ç–µ–ª–µ–π –æ–± –æ–¥–Ω–æ...,Vikhr-7B-instruct_0.4_lora_r32_long_prompt,"–í–∫—É—Å: –≤–∫—É—Å–Ω—ã–µ, —Å–ª–∞–¥–∫–∏–µ, –∞—Ä–æ–º–∞—Ç–Ω—ã–µ, –∫–∏—Å–ª–æ-—Å–ª–∞–¥–∫..."


In [10]:
final_attr.to_excel('final_attr.xlsx', index=False)

In [43]:
wandb.log({"finetuned_output": finetuned_output})

In [26]:
wandb.finish()

0,1
train/epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà
train/global_step,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñà‚ñà
train/grad_norm,‚ñà‚ñÖ‚ñÑ‚ñÉ‚ñÅ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÉ‚ñÇ‚ñÉ‚ñÇ ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÖ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÅ
train/learning_rate,‚ñÅ‚ñÇ‚ñÇ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñá‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
train/loss,‚ñà‚ñá‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÖ‚ñÜ‚ñÜ‚ñÖ‚ñÑ‚ñÑ‚ñÖ‚ñÅ‚ñÇ‚ñÑ‚ñà‚ñÜ‚ñÖ‚ñÖ‚ñÑ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÖ‚ñÜ‚ñÉ‚ñÖ‚ñÜ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÖ‚ñÖ‚ñÜ‚ñÜ

0,1
total_flos,3.765620287625626e+16
train/epoch,0.15404
train/global_step,400.0
train/grad_norm,0.59094
train/learning_rate,0.0
train/loss,2.6251
train_loss,2.38544
train_runtime,1577.7293
train_samples_per_second,1.014
train_steps_per_second,0.254
