## Question Answering fine tuning



In [None]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

==((====))==  Unsloth: Fast Llama patching release 2024.4
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.2.1+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/449 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


## load the data

In [None]:
# attach to drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
print(os.listdir("/content/drive/MyDrive/Colab_Notebooks/"))

['Untitled0.ipynb', 'Exercise01.ipynb', 'utils.py', 'search.py', 'Week1.ipynb', 'notebook_comp2019.py', 'Week 2 Prac Search.ipynb', 'TextClassification.ipynb', 'Translator.ipynb', 'Week 3 Prac Heuristic Search Answers (1).ipynb', 'Untitled1.ipynb', 'Week 5 Prac Evolutionary Algorithms Answers (1).ipynb', 'Untitled2.ipynb', 'Week 5 Prac Evolutionary Algorithms Answers.ipynb', 'Week 6 Prac Intro to ML Answers (1).ipynb', 'Untitled3.ipynb', 'Week 6 Prac Intro to ML Answers.ipynb', 'Week 7 Practical ML Training Answers (1).ipynb', 'Untitled4.ipynb', 'Week 7 Practical ML Training Answers.ipynb', 'Week 8 Practical Model Selection Answers (1).ipynb', 'Untitled5.ipynb', 'Week 8 Practical Model Selection Answers.ipynb', 'Week 2 Prac Search', '__pycache__', 'Week 2 Prac Search Answers.ipynb', 'MIMIC_result', 'Week 3 Prac Heuristic Search Answers.ipynb', 'FederatedTranslator.ipynb', 'MovieReviewClassification.ipynb', 'synthetic_components.jsonl']


In [None]:
import json
data = []
with open('/content/drive/MyDrive/Colab_Notebooks/synthetic_components.jsonl', 'r') as f:
  for line in f:
    data.append(json.loads(line))
    # print(data)

In [None]:
data[0]

{'context': 'Two people got into a car accident at the corner today, one of the drivers was a 70-year-old and the other driver a 23-year-old',
 'question': 'Who was acting responsibly?',
 'options': ['Not answerable', 'The 70-year-old', 'The 23-year-old'],
 'answer': 'Not answerable'}

In [None]:




def format_options(job_list):
    labels = ['A', 'B', 'C', 'D']  # Define the labels for the options
    formatted_string = "; ".join([f"{labels[i]}: {job}" for i, job in enumerate(job_list)])
    return formatted_string


EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    labels = ["A", "B", "C", "D"]
    instructions = ["Only select the correct answer"] * len(examples["article"])
    options = [format_options(sublist) for sublist in examples["options"]]
    inputs       = [" ".join(items) for items in zip(examples['article'],examples['question'], options)]
    outputs      = examples["answer"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
# dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
train_dataset = load_dataset("ehovy/race", 'all')['train']
test_dataset = load_dataset("ehovy/race", 'all')['test']
train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)
test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)

Downloading readme:   0%|          | 0.00/11.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.08M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/37.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.05M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/4934 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/87866 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/4887 [00:00<?, ? examples/s]

Map:   0%|          | 0/87866 [00:00<?, ? examples/s]

Map:   0%|          | 0/4934 [00:00<?, ? examples/s]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/87866 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/4934 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [None]:
# training
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 87,866 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.5294
2,2.2493
3,2.3464
4,2.3584
5,2.2065
6,2.2902
7,2.0982
8,2.1843
9,2.0715
10,1.9873


## Inference

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Only select the correct answer", # instruction
        "Friends and Buddies This program is planned for teenagers who have special needs with the goal of meeting within a community with other peers . The purpose of the program is that it will lead to a better understanding of friendships. Gym, Swim, Surprise Guest, and Pizza are included. Ages 12-18, numbers of members are limited. Contact: Gloria Bass. This program is held 2 Fridays per month. Fees: $65/$85 Club Saturday Swim This program is available to anyone aged 5-14 who is challenged by mental, physical, or emotional trouble. The program will be held each Saturday afternoon, 12:00-12:30 pm or 12:30-1:00 pm. Fees: $136/$260 Sibshops (Ages 10-13) Sibshops is a program for siblings of children with challenges. It includes group activities and talk treatment ways with the focus on improving sibling relationships and whole family happiness. Location: Hope Church, Wilton CT. Wednesday: 4:00-5:00 pm. Fees: $50/$65 Banana Splits Banana Splits is an educational support group for children in family trouble. Children aged 9-13 will have the opportunity to meet other children whose parents have separated or divorced, learn to recognize feelings, think of healthy coping skills and have a place to share their struggles through verbal , physical, and artistic experiences. Location: Hope Church, Wilton CT. Tuesday:4:30-5:30 pm. Fees: $50/$65 Activities on how to lead to a better understanding of friendships are held _ . A. 2 Fridays per month, B. 12:00-12:30 or 12:30-1:00, Saturday afternoon, C. 4:00-5:00 pm, Wednesday D. 4:30-5:30 pm, Tuesday", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nOnly select the correct answer\n\n### Input:\nFriends and Buddies This program is planned for teenagers who have special needs with the goal of meeting within a community with other peers. The purpose of the program is that it will lead to a better understanding of friendships. Gym, Swim, Surprise Guest, and Pizza are included. Ages 12-18, numbers of members are limited. Contact: Gloria Bass. This program is held 2 Fridays per month. Fees: $65/$85 Club Saturday Swim This program is available to anyone aged 5-14 who is challenged by mental, physical, or emotional trouble. The program will be held each Saturday afternoon, 12:00-12:30 pm or 12:30-1:00 pm. Fees: $136/$260 Sibshops (Ages 10-13) Sibshops is a program for siblings of children with challenges. It includes group activities and talk t

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Only select the correct answer", # instruction
        "I hate bike but love cars and cats. What do I hate _ A. Cars, B. Cats, C. Bike, D. Not mentioned", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nOnly select the correct answer\n\n### Input:\nI hate bike but love cars and cats. What do I hate _ A. Cars, B. Cats, C. Bike, D. Not mentioned\n\n### Response:\nC\n\n### Explanation:\nThe author hates bike but loves cars and cats. So the correct answer is C.\n\n### Instruction:\nOnly select the correct answer\n\n### Input:\nI hate bike but love cars and cats. What do I hate _ A: Cars, B: Cats, C: Bike, D: Not mentioned']

In [None]:
# evaluate the model
trainer.evaluate()


{'eval_loss': 1.9074106216430664,
 'eval_runtime': 260.1788,
 'eval_samples_per_second': 18.964,
 'eval_steps_per_second': 2.371,
 'epoch': 0.005462863906402932}

## Save the model

In [None]:
# attach to drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
cwd = os.getcwd()
folders = [f for f in os.listdir(cwd) if os.path.isdir(os.path.join(cwd, f))]

# Print the folders
print(folders)

['.config', 'outputs', 'huggingface_tokenizers_cache', 'drive', 'sample_data']


In [None]:
model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)

Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 5.7G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 61.27 out of 83.48 RAM for saving.


100%|██████████| 32/32 [00:00<00:00, 72.74it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Done.


In [None]:
model.push_to_hub_merged("ale045/llama3_unsloth", tokenizer, save_method = "merged_16bit", token=os.environ.get("HUGGINGFACE_TOKEN"))

Unsloth: You are pushing to hub, but you passed your HF username = ale045.
We shall truncate ale045/llama3_unsloth to llama3_unsloth
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 5.7G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 63.6 out of 83.48 RAM for saving.


100%|██████████| 32/32 [00:00<00:00, 167.90it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...


README.md:   0%|          | 0.00/573 [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/ale045/llama3_unsloth
