In [1]:
!pip install peft datasets

[0m

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import peft
from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training
import torch
from datasets import load_dataset
import os
from transformers import TrainingArguments, Trainer
import transformers



In [3]:
def print_memory():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated(0) / 1024**3
        reserved = torch.cuda.memory_reserved(0) / 1024**3
        print(f'VRAM allocated {allocated}gb, reserved {reserved}gb')
    else:
        print('No cuda')

In [4]:
#model_name = 'BioMistral/BioMistral-Safetensors'
model_name = 'mistralai/Mistral-7B-v0.1'
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
#specials = ["<|user|>", "<|assistant|>", "<|end|>"]
#tokenizer.add_special_tokens({"additional_special_tokens": specials})
foundation_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                        torch_dtype=torch.bfloat16,
                                                        device_map="auto",
                                                        load_in_4bit=True,
                                                        attn_implementation="flash_attention_2")
foundation_model = prepare_model_for_kbit_training(foundation_model)
foundation_model.gradient_checkpointing_enable()
#foundation_model.resize_token_embeddings(len(tokenizer))
foundation_model.config.use_cache = False

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
print("chat_template exists:", bool(getattr(tokenizer, "chat_template", None)))
print("chat_template preview:\n", tokenizer.chat_template)

chat_template exists: False
chat_template preview:
 None


In [6]:
print(tokenizer.chat_template) # Проверка на поддержку чата

None


In [7]:
def get_outputs(model, inputs, max_new_tokens=1000):
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5,
        eos_token_id=tokenizer.eos_token_id,
    )
    return outputs

In [8]:
print_memory()

VRAM allocated 4.662720203399658gb, reserved 5.197265625gb


In [9]:
input_simple_sentences_text = 'I have high temperature. Which pills should I take? Answer with pills, no additional info. Pills:'
input_simple_sentences = tokenizer(input_simple_sentences_text, return_tensors='pt').to(foundation_model.device)
foundational_outputs_sentence = get_outputs(foundation_model, input_simple_sentences)
output = tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.


In [10]:
output[0]

"I have high temperature. Which pills should I take? Answer with pills, no additional info. Pills:\n1) 20 mg of prednisone (orally), every day for a week; then taper down by one pill per month until you’re off it completely in six months or so. This is the standard treatment and works well if your body can tolerate steroids without too many side effects like weight gain/moon face etc., but some people don't respond as quickly to this type of therapy because their immune system has been suppressed over time due lack proper nutrition from food sources that contain anti-inflammatory properties such as omega three fatty acids found naturally within fish oils which help reduce inflammation throughout our bodies including joints where arthritis often occurs when there isn't enough lubricant between bones causing them rub together creating pain sensations we feel all around us!"

In [11]:
input_json_sentences_text = 'I have high temperature. Which pills should I take? Answer with json, contining pills. Example ["carbamazepine", "lamotrigine"]. Your answer in json: '
input_json_sentences = tokenizer(input_json_sentences_text, return_tensors='pt').to(foundation_model.device)
foundational_outputs_sentence = get_outputs(foundation_model, input_json_sentences)
output = tokenizer.batch_decode(foundational_outputs_sentence, skip_special_tokens=True)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [12]:
output[0]

'I have high temperature. Which pills should I take? Answer with json, contining pills. Example ["carbamazepine", "lamotrigine"]. Your answer in json:  [["Carisoprodol"],\nDIPHENHYDRAMINE HYDROCHLORIDE (Benedryl) is an antihistamine used to treat allergy symptoms such as itching and runny nose or sneezing; hives and other skin rashes caused by allergies; cough due to minor throat irritation from the common cold or flu...'

# Начинаем готовить LORA.

In [13]:
dataset_name = 'mlabonne/FineTome-100k'

In [14]:
data = load_dataset(dataset_name)
data

DatasetDict({
    train: Dataset({
        features: ['conversations', 'source', 'score'],
        num_rows: 100000
    })
})

In [15]:
def convert_to_plain(sample):
    conversation_plain = ''
    conversations = sample['conversations']
    for speech in conversations:
        if speech['from'] == 'human':
            conversation_plain = conversation_plain + '<|user|>\n' + speech['value'] + '\n'
        if speech['from'] == 'gpt':
            conversation_plain = conversation_plain + '<|assistant|>\n' + speech['value'] + '\n'

    conversation_plain = conversation_plain + '<|end|>\n'
        
    return {'conversation_plain': conversation_plain}
    

In [16]:
data = data.map(convert_to_plain)

In [17]:
data

DatasetDict({
    train: Dataset({
        features: ['conversations', 'source', 'score', 'conversation_plain'],
        num_rows: 100000
    })
})

In [18]:
data['train']['conversation_plain'][4]

'<|user|>\nPrint the reverse of a string using a for loop.\n<|assistant|>\nHere is an example code using a for loop to print the reverse of a string along with a detailed docstring:\n\n```python\ndef print_reverse_string(string):\n    """\n    Prints the reverse of a given string using a for loop.\n\n    Parameters:\n    string (str): The string to be reversed and printed.\n\n    Returns:\n    None.\n    """\n\n    # Iterate through the characters of the string in reverse order\n    for i in range(len(string)-1, -1, -1):\n        print(string[i], end=\'\')\n\n    # Print a new line after printing the whole reversed string\n    print()\n\n# Example usage\ninput_string = input("Enter a string: ")\nprint_reverse_string(input_string)\n```\n\nIn this code, the `print_reverse_string` function takes a string as input. It uses a for loop to iterate through the characters of the string in reverse order. The for loop starts from the index of the last character (`len(string)-1`) and goes until th

In [19]:
data = data.map(lambda samples: tokenizer(samples['conversation_plain']), batched=True)

In [20]:
data

DatasetDict({
    train: Dataset({
        features: ['conversations', 'source', 'score', 'conversation_plain', 'input_ids', 'attention_mask'],
        num_rows: 100000
    })
})

In [21]:
lora_config = LoraConfig(
    r=8, # As bigger the R bigger the parameters to train.
    lora_alpha=16, # a scaling factor that adjusts the magnitude of the weight matrix. Usually set to 1
    #target_modules=["q_proj","k_proj","v_proj","o_proj", 'gate_proj', 'up_proj', 'down_proj'],
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    #modules_to_save=["embed_tokens", "lm_head"],
    lora_dropout=0.05, # Helps to avoid Overfitting.
    #bias="lora_only", # this specifies if the bias parameter should be trained.
    bias="none",
    task_type="CAUSAL_LM",
)

In [22]:
peft_model = get_peft_model(foundation_model, lora_config)
peft_model.print_trainable_parameters()

trainable params: 6,815,744 || all params: 7,248,547,840 || trainable%: 0.0940


In [23]:
working_dir = './'

output_directory = os.path.join(working_dir, 'peft_lab_outputs')

In [24]:
training_args = TrainingArguments(
    output_dir = output_directory,
    auto_find_batch_size = True,
    learning_rate = 1e-4,
    num_train_epochs = 2,
    use_cpu = False,
    bf16=True
)

In [25]:
train_dataset = data['train'].take(4000)
train_dataset = train_dataset.remove_columns(['conversations', 'source', 'score', 'conversation_plain'])

In [26]:
train_dataset

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 4000
})

In [None]:
trainer = Trainer(
    model = peft_model,
    args = training_args,
    train_dataset = train_dataset,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
trainer.train()

Step,Training Loss
