<a href="https://colab.research.google.com/github/RyanChen12035/w266_final_Anatomy-and-Structured-Prunning/blob/main/Llama2_peft_QLora_superglue_boolq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers datasets accelerate peft trl bitsandbytes

Collecting datasets
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.30.1-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft
  Downloading peft-0.11.0-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.2/251.2 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m5.1 MB

In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
    pipeline,
)
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer

## dataset: super_glue, boolq

In [2]:
dataset_name = 'super_glue'
config = 'boolq'
dataset = load_dataset(dataset_name, config)
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 9427
    })
    validation: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 3270
    })
    test: Dataset({
        features: ['question', 'passage', 'idx', 'label'],
        num_rows: 3245
    })
})

In [3]:
dataset['train'].to_pandas()

Unnamed: 0,question,passage,idx,label
0,do iran and afghanistan speak the same language,"Persian language -- Persian (/ˈpɜːrʒən, -ʃən/)...",0,1
1,do good samaritan laws protect those who help ...,Good Samaritan law -- Good Samaritan laws offe...,1,1
2,is windows movie maker part of windows essentials,Windows Movie Maker -- Windows Movie Maker (fo...,2,1
3,is confectionary sugar the same as powdered sugar,"Powdered sugar -- Powdered sugar, also called ...",3,1
4,is elder scrolls online the same as skyrim,The Elder Scrolls Online -- As with other game...,4,0
...,...,...,...,...
9422,is a us district court a federal court,United States district court -- The United Sta...,9422,1
9423,can a tenant get a restraining order against a...,Landlord harassment -- If a landlord is found ...,9423,1
9424,is the golden state warriors in the playoffs,Golden State Warriors -- The Warriors went int...,9424,1
9425,downton abbey will there be a season 7,List of Downton Abbey episodes -- Downton Abbe...,9425,0


In [4]:
def template_generator(example):
  example['instruction'] = f"### Instruction:\n please answer the following question with true or false, question: {example['question']}\n\n### Response:\n"
  example['output'] = "the correct answer is true" if example['label'] else "the correct answer is false"
  return example

dataset_withtemplate = dataset.map(template_generator, remove_columns=['question', 'passage', 'idx', 'label'])
dataset_withtemplate['train'].to_pandas()

Unnamed: 0,instruction,output
0,### Instruction:\n please answer the following...,the correct answer is true
1,### Instruction:\n please answer the following...,the correct answer is true
2,### Instruction:\n please answer the following...,the correct answer is true
3,### Instruction:\n please answer the following...,the correct answer is true
4,### Instruction:\n please answer the following...,the correct answer is false
...,...,...
9422,### Instruction:\n please answer the following...,the correct answer is true
9423,### Instruction:\n please answer the following...,the correct answer is true
9424,### Instruction:\n please answer the following...,the correct answer is true
9425,### Instruction:\n please answer the following...,the correct answer is false


In [5]:
# Model
base_model = "NousResearch/Llama-2-7b-hf"

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "right"



In [6]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
) # project the model weights to 4bits, pass torch fp16

# LoRA configuration
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    layers_to_transform = [5,6,7,8,9,10], # layers from 6 to 11
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj'] # attention + MLP layers
)

# Load base moodel
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map={"": 0}
) # download the CLM (autoregressive) model, calculate the loss function of next token.

# Cast the layernorm in fp32, make output embedding layer require grads, add the upcasting of the lmhead to fp32
model = prepare_model_for_kbit_training(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [None]:
# Set training arguments
training_arguments = TrainingArguments(
        output_dir="./results",
        num_train_epochs=10,
        per_device_train_batch_size=10,
        gradient_accumulation_steps=1,
        evaluation_strategy="steps",
        eval_steps=1000,
        logging_steps=10,
        optim="paged_adamw_8bit",
        learning_rate=2e-4,
        lr_scheduler_type="linear",
        warmup_steps=10,
)

# Set supervised fine-tuning parameters
# SFT calculate the loss function by averaging the loss across all tokens in the sequence.
# the SFTTrainer would automatically put  instruction + input as input and pass it to tokenizer to generate input_ids and mask. In the foward pass, the predictions were made
# SFT Trainer would also put the output as the labels and calculate the loss by comparing the predictions and the labels.
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_withtemplate['train'],
    eval_dataset=dataset_withtemplate['validation'],
    peft_config=peft_config,
    dataset_text_field="instruction",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
)


# Train model
trainer.train()

# Save trained model
# trainer.model.save_pretrained(new_model)

Map:   0%|          | 0/3270 [00:00<?, ? examples/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss


In [None]:
# Run text generation pipeline with our model
prompt = "Please answer the following question with true or false, question: do iran and afghanistan speak the same language"
instruction = f"### Instruction:\n{prompt}\n\n### Response:\n"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=128)
result = pipe(instruction)
print(result[0]['generated_text'][len(instruction):])