# Install Required Libraries

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets einops

# Log in to WandB to track finetuning metrics
# Log in to Hugging Face

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
!pip install wandb
!wandb login

Collecting wandb
  Downloading wandb-0.15.12-py3-none-any.whl (2.1 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/2.1 MB[0m [31m3.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.1/2.1 MB[0m [31m38.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.34.0-py2.py3-none-any.whl (243 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.9/243.9 kB[0m [31m25.7 MB/s[

# Load & Quantize the model

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "meta-llama/Llama-2-7b-hf"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
# model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"":0})
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

# Import IA3 Config

In [5]:
from peft import IA3Config, get_peft_model

config = IA3Config(
    peft_type="IA3",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"],
    feedforward_modules=["v_proj"],
)

model = get_peft_model(model, config)

In [None]:
print(model)

# Load and preprocessing the dataset

In [7]:
from datasets import load_dataset

# Split the data into 10% test, 10% validation, 80% training
train_data = load_dataset("Hieu-Pham/cooking_squad_splitted", split='train')
val_data = load_dataset("Hieu-Pham/cooking_squad_splitted", split='validation')

def preprocess_dataset(dataset):

    grouped = []

    for row in dataset:
      answers = row['answers']
      input = '[INST] Question: ' + row['question'] + ' Context: ' + row['context'] + ' [/INST]\nAnswer: ' + answers['text']
      grouped.append(input)

    return grouped

input_column = preprocess_dataset(train_data)
train_data = train_data.add_column('Inputs', input_column)

train_data = train_data.map(
    lambda row: tokenizer(row["Inputs"], truncation=True),
    batched=True,
    remove_columns=train_data.column_names,
)

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.25M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/157k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/155k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1793 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
input_column[0]

In [None]:
# Preprocessing validation dataset
# Validation dataset
output_column = preprocess_dataset(val_data)
val_data = val_data.add_column('Outputs', output_column)

val_data = val_data.map(
    lambda row: tokenizer(row["Outputs"], truncation=True),
    batched=True,
    remove_columns=val_data.column_names,
)

# Training Configurations and Start Training

In [12]:
import transformers

training_args = transformers.TrainingArguments(
    output_dir="Llama2-7B-IA3-cooking-text-gen-prompting",
    evaluation_strategy="steps",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    weight_decay=0.01,
    save_steps=10,
    logging_steps=10,
    max_grad_norm=0.3,
    max_steps=250,
    warmup_ratio=0.03,
    fp16=True,
    optim="paged_adamw_8bit",
    push_to_hub=True,
)


In [13]:
# tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mhieupham[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
10,1.5079,1.520704
20,1.5218,1.499739
30,1.473,1.482325
40,1.478,1.46667
50,1.4444,1.452335
60,1.4734,1.438896
70,1.4354,1.42626
80,1.4065,1.414092
90,1.3908,1.402017
100,1.3869,1.390359


TrainOutput(global_step=250, training_loss=1.371774471282959, metrics={'train_runtime': 447.0538, 'train_samples_per_second': 8.947, 'train_steps_per_second': 0.559, 'total_flos': 4.604233875935232e+16, 'train_loss': 1.371774471282959, 'epoch': 2.23})