# **IMPORT**

In [1]:
!pip install -U transformers
!pip install -U accelerate bitsandbytes
!pip install -U peft
!pip install --upgrade peft
!pip install bitsandbytes
!pip install deepspeed
!pip install wandb

Collecting transformers
  Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.46.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hDownloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m72.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.19.1
    Uninstalling tokenizers-0.19.1:
      Successfu

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from transformers import BitsAndBytesConfig
import pandas as pd
import numpy as np

import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model

import torch 
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

from datasets import Dataset

from tqdm import tqdm
import ast

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient() # здесь хранится токен от HF;)


In [3]:
if torch.cuda.is_available():
    print('Automatic Mixed Precision (AMP) is supported.')
else:
    print('Automatic Mixed Precision (AMP) is not supported.')


Automatic Mixed Precision (AMP) is supported.


In [4]:
!nvidia-smi

  pid, fd = os.forkpty()


Thu Oct 31 22:23:07 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P0             26W /  250W |       3MiB /  16384MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# **MODEL**

In [6]:
access_token = user_secrets.get_secret("hf_token")

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-2b-it",
    device_map="auto",
    torch_dtype=torch.bfloat16,
    token=access_token)
model = model.to(device)

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/838 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [None]:
%time 

messages = [
    {"role": "user", "content": "Отвечай на русском. Знаешь ли ты что такое quality в датасете OpenAssistant/oasst1"},
]
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt", return_dict=True)

outputs = model.generate(**input_ids, max_new_tokens=1024)
print(tokenizer.decode(outputs[0]))

# **DATA**

In [7]:
train_data = pd.read_csv('/kaggle/input/data4self-alignment/data4alignment_train_mean_label.csv')
test_data = pd.read_csv('/kaggle/input/data4self-alignment/data4alignment_test_mean_label.csv')

train_data['labels'] = train_data['estimate']
test_data['labels'] = test_data['estimate']

train_data = train_data.drop(['estimate'], axis=1)
test_data = test_data.drop(['estimate'], axis=1)


print(train_data.head())
print()
print(test_data.head())

                                         instruction  \
0                    Напиши информацию о игре Hytale   
1              У меня угнали машину, что мне делать?   
2  Какие произведения Булгакова связаны с Иерусал...   
3  Плюсы и минусы языков программирования C++ и R...   
4                Что делать если издеваются в школе?   

                                              answer  labels  
0   Hytale - это игра в жанре sandbox, разработан...       5  
1   \n1. Позвонить в полицию.\n2. Сообщить о краж...       5  
2  \n\n* "Мастер и Маргарита"\n* "Собачье сердце"...       5  
3  \n\n**Плюсы C++:**\n* Высокая производительнос...       4  
4  \n\n1. **Не бойся обратиться за помощью.**  По...       5  

                                         instruction  \
0  Напиши функцию на языке swift, которая сортиру...   
1                        Чему равен абсолютный ноль?   
2                       Что такое сверхпроводимость?   
3  Напиши пошаговый план как сделать ракету в гар...   
4  С

#### Преобразование датафрейма в датасет с добавлением `input_ids` и `attention_mask` для более качественного дообучения модели

In [8]:
def preprocess_data(examples):
    instructions = [str(inst) if inst is not None else "" for inst in examples["instruction"]]
    answers = [str(ans) if ans is not None else "" for ans in examples["answer"]]

    instruction_tokenized = tokenizer(
        instructions,
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt" 
    )

    answer_tokenized = tokenizer(
        answers,
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt"
    )

    return {
        "input_ids": instruction_tokenized["input_ids"],
        "attention_mask": instruction_tokenized["attention_mask"],
        "labels": answer_tokenized["input_ids"]
    }

train_dataset = Dataset.from_pandas(train_data)
train_dataset = train_dataset.map(preprocess_data, batched=True)


Map:   0%|          | 0/617 [00:00<?, ? examples/s]

In [9]:
test_dataset = Dataset.from_pandas(test_data)
test_dataset = test_dataset.map(preprocess_data, batched=True)

Map:   0%|          | 0/35 [00:00<?, ? examples/s]

In [10]:
train_dataset

Dataset({
    features: ['instruction', 'answer', 'labels', 'input_ids', 'attention_mask'],
    num_rows: 617
})

# **Alignment**

## DPO (Direct Preference Optimization)

In [11]:
# Используем PEFT
config = LoraConfig(
    r=16,   
    lora_alpha=32, 
    target_modules=["q_proj", "v_proj"], # Модули, к которым применяем LoRA
    lora_dropout=0.1,
    bias="none",  # Не обновляем веса смещений
    task_type="CAUSAL_LM"  # Тип задачи: языковое моделирование
)
model = get_peft_model(model, config)

model.to(device)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma2ForCausalLM(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 2304, padding_idx=0)
        (layers): ModuleList(
          (0-25): 26 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2304, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2304, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_

In [16]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    weight_decay=0.01,
    report_to="wandb",
)



In [13]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.get("labels")
        device = model.device

        # Move inputs to device
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Model forward pass
        outputs = model(**inputs)
        logits = outputs.logits

        if logits.shape[1] != labels.shape[1]:
            labels = labels[:, :logits.shape[1]] 

        loss = nn.functional.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1), ignore_index=-100)

        return (loss, outputs) if return_outputs else loss


trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

[2024-10-31 22:45:34,221] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/opt/conda/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/opt/conda/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status


#### **Запуск обучения модели**

In [18]:
import wandb
from accelerate import Accelerator

accelerator = Accelerator()
wandb.login() 
wandb.init(project="alignment-gemma")  
trainer.train()



VBox(children=(Label(value='0.026 MB of 0.026 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▃▁
eval/runtime,█▁▁
eval/samples_per_second,▁██
eval/steps_per_second,▁██
train/epoch,▁▅██
train/global_step,▁▅██

0,1
eval/loss,9.01339
eval/runtime,4.8871
eval/samples_per_second,7.162
eval/steps_per_second,1.842
total_flos,2882531584180224.0
train/epoch,3.0
train/global_step,465.0
train_loss,9.9002
train_runtime,570.7574
train_samples_per_second,3.243


Epoch,Training Loss,Validation Loss
1,No log,8.58125
2,No log,8.357142
3,No log,8.2125



Cannot access gated repo for url https://huggingface.co/google/gemma-2-2b-it/resolve/main/config.json.
Access to model google/gemma-2-2b-it is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in google/gemma-2-2b-it.


TrainOutput(global_step=465, training_loss=7.598185483870968, metrics={'train_runtime': 558.2755, 'train_samples_per_second': 3.316, 'train_steps_per_second': 0.833, 'total_flos': 2882531584180224.0, 'train_loss': 7.598185483870968, 'epoch': 3.0})

#### *Сохраняем модель выравненную при помощи техники DPO* 

In [19]:
model.save_pretrained("./fine_tuned_gemma_model_dpo")
tokenizer.save_pretrained("./fine_tuned__gemma_model_dpo")


Cannot access gated repo for url https://huggingface.co/google/gemma-2-2b-it/resolve/main/config.json.
Access to model google/gemma-2-2b-it is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in google/gemma-2-2b-it.


('./fine_tuned__gemma_model_dpo/tokenizer_config.json',
 './fine_tuned__gemma_model_dpo/special_tokens_map.json',
 './fine_tuned__gemma_model_dpo/tokenizer.model',
 './fine_tuned__gemma_model_dpo/added_tokens.json',
 './fine_tuned__gemma_model_dpo/tokenizer.json')