In [1]:
! pip install transformers peft accelerate
! pip install -U bitsandbytes
! pip install datasets
! pip install trl==0.12.0



In [2]:
import pandas as pd
import json
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from datasets import Dataset, DatasetDict, load_dataset
import yaml

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

from peft import LoraConfig, PeftModel
from datasets import load_from_disk
from trl import SFTTrainer, SFTConfig

In [3]:
def format_example(example):
    formatted_input = f"<s>[INST] {example['prompt']} [/INST] {example['completion']} </s>"
    return {'input': formatted_input}

train_dataset = Dataset.from_json("training_data.jsonl")
validation_dataset = Dataset.from_json("validation_data.jsonl")
test_dataset = Dataset.from_json("test_data.jsonl")

train_dataset = train_dataset.map(format_example)
validation_dataset = validation_dataset.map(format_example)
test_dataset = test_dataset.map(format_example)
dataset_dict = DatasetDict({
    "train": train_dataset,
    "validation": validation_dataset,
    "test": test_dataset
})

dataset_dict.save_to_disk("my_dataset")
dataset = load_from_disk("my_dataset")

Saving the dataset (0/1 shards):   0%|          | 0/15000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/3000 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/2196 [00:00<?, ? examples/s]

### ORIGINAL MODEL


In [4]:
model_name = "NousResearch/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = model.to(torch.bfloat16).cuda()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
import yaml
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

### FINE TUNING MODEL

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=config['training_parameters']['use_4bit'],
    bnb_4bit_quant_type=config['training_parameters']['bnb_4bit_quant_type'],
    bnb_4bit_compute_dtype=getattr(torch,config['training_parameters']['bnb_4bit_compute_dtype']),
    bnb_4bit_use_double_quant=config['training_parameters']['use_nested_quant'],
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=config['training_parameters']['device_map']
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

peft_config = LoraConfig(
    lora_alpha=config['training_parameters']['lora_alpha'],
    lora_dropout=config['training_parameters']['lora_dropout'],
    r=config['training_parameters']['lora_r'],
    bias='none',
    task_type='CAUSAL_LM',
)

training_arguments = SFTConfig(
    output_dir=config['training_parameters']['output_dir'],
    num_train_epochs=config['training_parameters']['num_train_epochs'],
    per_device_train_batch_size=config['training_parameters']['per_device_train_batch_size'],
    gradient_accumulation_steps=config['training_parameters']['gradient_accumulation_steps'],
    optim=config['training_parameters']['optim'],
    save_steps=config['training_parameters']['save_steps'],
    logging_steps=config['training_parameters']['logging_steps'],
    learning_rate=config['training_parameters']['learning_rate'],
    weight_decay=config['training_parameters']['weight_decay'],
    fp16=config['training_parameters']['fp16'],
    bf16=config['training_parameters']['bf16'],
    max_grad_norm=config['training_parameters']['max_grad_norm'],
    max_steps=config['training_parameters']['max_steps'],
    warmup_ratio=config['training_parameters']['warmup_ratio'],
    group_by_length=config['training_parameters']['group_by_length'],
    lr_scheduler_type=config['training_parameters']['lr_scheduler_type'],
    report_to='wandb',
    dataset_text_field='input',
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    peft_config=peft_config,
    max_seq_length=config['training_parameters']['max_seq_length'],
    tokenizer=tokenizer,
    args=training_arguments,
    packing=config['training_parameters']['packing']
)
trainer.train()

### Дозавантаження моделі після того, як вона впала з останнього checkpoint

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model_checkpoint_path = "/content/drive/MyDrive/project/results/checkpoint-45000"

model = AutoModelForCausalLM.from_pretrained(model_checkpoint_path,
    quantization_config=bnb_config,
    device_map=config['training_parameters']['device_map'])
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Завантаження моедлі на Hugging Face


In [23]:
!pip install huggingface_hub
!huggingface-cli login
!huggingface-cli repo create 'Llama-2-7b_ukr_item_descr'


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineG

In [26]:
model.push_to_hub("Llama-2-7b_ukr_item_descr")
tokenizer.push_to_hub("Llama-2-7b_ukr_item_descr")

adapter_model.safetensors:   0%|          | 0.00/134M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Alinkaaa1/Llama-2-7b_ukr_item_descr_readyToUse/commit/c5cf4ae9b08c1429fbd6bebc4346fca48d77baeb', commit_message='Upload tokenizer', commit_description='', oid='c5cf4ae9b08c1429fbd6bebc4346fca48d77baeb', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Alinkaaa1/Llama-2-7b_ukr_item_descr_readyToUse', endpoint='https://huggingface.co', repo_type='model', repo_id='Alinkaaa1/Llama-2-7b_ukr_item_descr_readyToUse'), pr_revision=None, pr_num=None)