# Libraries

In [8]:
# %%capture

# %pip install -U peft
# %pip install -U trl
# %pip install -U bitsandbytes 

In [9]:
# %pip install kaggle

In [10]:
# !git clone https://github.com/Kaggle/docker-python.git

In [11]:
# import sys
# sys.path.append("./docker-python/patches")

In [12]:
# !mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

In [13]:
# %pip install git+https://github.com/Kaggle/kaggle-secrets.git


In [14]:
import os, torch, wandb

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
from dataclasses import dataclass

KeyboardInterrupt: 

## Setup Huggingface 🤗 & Wandb

In [8]:
from huggingface_hub import login

login(token = "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")

wandb.login(key="ce84c3af2fdee6c3e2696b2a4ad96af49a3dd86e")


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\USER_ELISEY\.cache\huggingface\token
Login successful


[34m[1mwandb[0m: Currently logged in as: [33mez1071[0m ([33mez1071-mipt[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\USER_ELISEY\_netrc


True

In [9]:
run = wandb.init(
    project='Fine-tune Llama 3.1 8B on Russian Dataset', 
    job_type="training"
)

In [10]:
@dataclass
class Config:
#     model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
#     model_name = "AnatoliiPotapov/T-lite-instruct-0.1"
    model_name = "google/gemma-2-9b-it"
    dataset_name = "C:\\Users\\USER_ELISEY\\miracl_"
    new_model = "model_weights"
    torch_dtype = torch.float16
    attn_implementation = "eager"
cfg = Config()

# Loading model and tokenizer

In [11]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    "C:\\Users\\USER_ELISEY\\gemma",
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    attn_implementation=cfg.attn_implementation
)

Loading checkpoint shards: 100%|██████████| 4/4 [00:29<00:00,  7.46s/it]


In [12]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("C:\\Users\\USER_ELISEY\\gemma")
model, tokenizer = setup_chat_format(model, tokenizer)
tokenizer.padding_side = 'right'
tokenizer.padding_token = '<|pad|>'

## LoRA adapter

In [13]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

# Data

## Load

In [14]:
dataset = load_dataset('miracl/miracl-corpus', 'ru', trust_remote_code=True)

In [15]:
data_eval = load_dataset('miracl/miracl', 'ru', trust_remote_code=True)

In [16]:
data_eval['train']['query']

['Когда был спущен на воду первый миноносец «Спокойный»?',
 'Как долго существовало британское телевизионное игровое шоу "Хрустальный лабиринт"?',
 'Когда родилась Князева Марина Леонидовна?',
 'Кто был главным художником мира Зен?',
 'Как звали предполагаемого убийцу Джона Кеннеди?',
 'В каком году была создана группа My Bloody Valentine?',
 'Сколько раз Ли́ля Ю́рьевна Брик была замужем?',
 'В каком немецком городе родилась Екатерина 2?',
 'Где находится Лахта центр?',
 'Какой процент населения Земли ездит на правостороннем движении?',
 'Когда появилась живопись Тибета?',
 'Михаи́л Алекса́ндрович Вру́бель был душевно больным человеком?',
 'Сколько букв в Русском языке в 2018 году?',
 'Дэвид Марк Моррисси играл в театре?',
 'Сколько стран принимало участие в Зимних Олимпийских играх 2014?',
 'Выходит в Калмыкии газета на калмыцком языке на март 2019?',
 'Какая площадь падения Тунгу́сского метеорита?',
 'Когда был создан футбольный клуб «Рома»?',
 'Где происходит действие первой игры Wa

## Format to chat 

In [17]:
def format_chat_template(row):
    row_json = [{"role": "user", "content": row["USER"]},
               {"role": "assistant", "content": row["CHAT"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

In [18]:
# dataset = dataset.map(
#     format_chat_template,
#     num_proc=4,
# )

## Select only part

In [19]:
# dataset_sh = dataset.shuffle(seed=2024).select(range(10_000))

In [20]:
#dataset_sh = dataset.train_test_split(0.1)
# dataset_sh
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['docid', 'title', 'text'],
        num_rows: 9543918
    })
})


# Train model

## Training arguments

In [21]:
training_arguments = TrainingArguments(
    output_dir=cfg.new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
#     num_train_epochs=1,
    #max_steps=500,
    max_steps=10,
    eval_strategy="steps",
    # eval_steps=500,
    eval_steps=10,
    # logging_steps=100,
    # warmup_steps=10,
    logging_steps=5,
    warmup_steps=2,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=True,
    group_by_length=True,
    report_to="wandb",
    run_name="Llama-3.1-rus",
)

## Train model

In [22]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["train"],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

 50%|█████     | 100/200 [34:56<23:41, 14.21s/it] 

{'loss': 2.5051, 'grad_norm': 19.519073486328125, 'learning_rate': 0.00010526315789473685, 'epoch': 0.0}


100%|██████████| 200/200 [1:10:11<00:00, 14.50s/it]

{'loss': 2.3234, 'grad_norm': 18.964954376220703, 'learning_rate': 0.0, 'epoch': 0.0}




In [1]:
path_to_save = "Llama-finetuned"
trainer.save_model(path_to_save)
model.save_pretrained(path_to_save)
tokenizer.save_pretrained(path_to_save)

NameError: name 'trainer' is not defined

In [None]:
del model, tokenizer, trainer

# Compare models

## Init casual LLM

In [None]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=cfg.torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
casual_model = AutoModelForCausalLM.from_pretrained(
    cfg.model_name,
    quantization_config=bnb_config,
#     device_map="auto",
    attn_implementation=cfg.attn_implementation
)

tokenizer = tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
tokenizer.padding_side = 'right'
tokenizer.padding_token = '<|pad_token|>'

In [None]:
casual_model, tokenizer = setup_chat_format(casual_model, tokenizer)

## Get answers

In [None]:
def generate_answer(model, prompt):
    chat = [
        { "role": "user", "content": prompt },
    ]
    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=150)

    return(tokenizer.decode(outputs[0]))

# Comprasion

In [None]:
q1 = "I have severe headaches help me please"
q2 = "I have a suspiciously large mole. Could I have cancer? How can I determine this at home?"
q3 = "What does abutment of the nerve root mean?"

In [None]:
generate_answer(model, q1)

In [None]:
generate_answer(model, q2)

In [None]:
generate_answer(model, q3)

In [None]:
# Free gpu memory
import numba
numba.cuda.close()

In [None]:
print(generate_answer(casual_model, q1))

In [None]:
generate_answer(casual_model, q2)

In [None]:
generate_answer(casual_model, q3)