## Installing libraries

In [None]:
!pip install "transformers==4.35" "datasets==2.13.0" "peft==0.4.0" "accelerate==0.21.0" "bitsandbytes==0.40.2" "trl==0.4.7" "safetensors>=0.3.1" "tiktoken"

Collecting transformers==4.35
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets==2.13.0
  Downloading datasets-2.13.0-py3-none-any.whl (485 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.6/485.6 kB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft==0.4.0
  Downloading peft-0.4.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate==0.21.0
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes==0.40.2
  Downloading bitsandbytes-0.40.2-py3-none-any.whl (92.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

In [None]:
import pandas as pd
import torch
from datasets import Dataset, load_dataset
from random import randrange
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


# Loading train data from drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data ='/content/drive/MyDrive/ColabNotebooks/Data/'

In [None]:
dataset = load_dataset(data)
dataset

Downloading and preparing dataset csv/Data to /root/.cache/huggingface/datasets/csv/Data-bb9e1fb9a45207ef/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/Data-bb9e1fb9a45207ef/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['Text'],
        num_rows: 500
    })
})

# Fine-Tuning base model

In [None]:
model_id = "meta-llama/Llama-2-7b-chat-hf"

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: read).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 's

In [None]:
# Get the type
compute_dtype = getattr(torch, "float16")

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype
)


In [None]:
%%time
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Downloading tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

CPU times: user 390 ms, sys: 39.7 ms, total: 429 ms
Wall time: 2.05 s


In [None]:
%%time
# Load the pretrained model
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             device_map="auto")

Downloading config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

CPU times: user 21.4 s, sys: 29.4 s, total: 50.8 s
Wall time: 2min 41s


In [None]:
# LoRA config based on QLoRA paper
peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM"
)

In [None]:
# Define the training arguments. For full list of arguments, check
#https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments
args = TrainingArguments(
    output_dir= '/content/drive/MyDrive/ColabNotebooks/model/' + 'receipe-llama2-7b',
    num_train_epochs=2, # adjust based on the data size
    per_device_train_batch_size=2, # use 4 if you have more GPU RAM
    save_strategy="epoch", #steps
    # evaluation_strategy="epoch",
    learning_rate=2e-4,
    fp16=True,
    seed=42
)

In [None]:
# Create the trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    # eval_dataset=test,
    dataset_text_field='Text',
    peft_config=peft_config,
    max_seq_length=1042,
    tokenizer=tokenizer,
    args=args,
    packing=True,
)



In [None]:
# train
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss




TrainOutput(global_step=170, training_loss=0.11421544692095588, metrics={'train_runtime': 1218.1376, 'train_samples_per_second': 0.821, 'train_steps_per_second': 0.41, 'total_flos': 1.4033386373480448e+16, 'train_loss': 0.11421544692095588, 'epoch': 1.34})

In [None]:
# save model in local
trainer.save_model()

# Merging the base model and adapters and savig it

Cleaning the memory

In [None]:
# Empty VRAM
del model
del trainer
import gc
gc.collect()
gc.collect()

20730

In [None]:
torch.cuda.empty_cache()

In [None]:
gc.collect()

0

Reload the saved model and merge it then we can save the whole model

In [None]:
%%time
from peft import AutoPeftModelForCausalLM

new_model = AutoPeftModelForCausalLM.from_pretrained('/content/drive/MyDrive/ColabNotebooks/model/' + 'receipe-llama2-7b',
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

CPU times: user 20.2 s, sys: 8.87 s, total: 29.1 s
Wall time: 1min 14s


In [None]:
# Merge LoRA and base model
merged_model = new_model.merge_and_unload()

In [None]:
# Save the merged model
merged_model.save_pretrained("/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged", safe_serialization=True)
tokenizer.save_pretrained("/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged")

('/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged/tokenizer_config.json',
 '/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged/special_tokens_map.json',
 '/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged/tokenizer.model',
 '/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged/added_tokens.json',
 '/content/drive/MyDrive/ColabNotebooks/model/receipe_metallama2-7b-tuned-merged/tokenizer.json')

# Testing the model

In [None]:
prompt = "What is burger"

In [None]:
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
outputs = merged_model.generate(input_ids=input_ids,
                         max_new_tokens=200,
                         temperature=0.6)

In [None]:
result = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]

In [None]:
print(result)

What is burger?
1. A burger is a type of sandwich that typically consists of a cooked patty of ground meat (usually beef, chicken, or pork) served on a bun with various toppings such as cheese, lettuce, tomato, onion, and condiments.
2. The term "burger" can also refer to the meat patty itself, rather than the entire sandwich.
3. Burgers are a popular fast food item and are often served at restaurants, diners, and food trucks.
4. There are many different types of burgers, including classic beef burgers, chicken burgers, vegetarian burgers, and vegan burgers.
5. Burgers can be cooked in a variety of ways, including grilling, pan-frying, or baking.


In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'stor

In [None]:
# push merged model to the hub
%%time
hf_model_repo = "Gornee/receipe_llama2"
merged_model.push_to_hub(hf_model_repo)
tokenizer.push_to_hub(hf_model_repo)

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CPU times: user 1min 21s, sys: 1min 7s, total: 2min 29s
Wall time: 6min 51s


CommitInfo(commit_url='https://huggingface.co/Gornee/receipe_llama2/commit/591ed75c2072cb4cb071ae53a5c91b89dc1771ef', commit_message='Upload tokenizer', commit_description='', oid='591ed75c2072cb4cb071ae53a5c91b89dc1771ef', pr_url=None, pr_revision=None, pr_num=None)