In [11]:
from dataclasses import field

import torch
from gmpy2 import trunc
from peft import LoraConfig, TaskType, get_peft_model
from tenacity import retry_if_exception_message
from tokenizers.trainers import Trainer
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
import torch

In [2]:
from datasets import load_dataset

In [3]:
squad_dataset = load_dataset("squad")

In [4]:
squad_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})

In [5]:
train_dataset = squad_dataset['train']

In [6]:
train_dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})

In [7]:
test_dataset = squad_dataset['validation']

In [8]:
test_dataset

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 10570
})

In [9]:
model = AutoModelForCausalLM.from_pretrained('google/gemma-2-2b')

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [12]:
# Quantization Config (QLoRA)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4'
)

In [13]:
# Load the Model in 4 Bit Precision
model= AutoModelForCausalLM.from_pretrained('google/gemma-2-2b', config=bnb_config, device_map = 'auto')

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


In [14]:
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b')
tokenizer.pad_token = tokenizer.eos_token

In [15]:
# Preprocess the Data
def format_data(example):
    prompt = f"Answer the question based on the context.\n\nContext: {example['context']}\n\nQuestion: {example['question']}\n\nAnswer:"
    answer = example['answers']['text'][0] if example['answers']['text'] else ""
    return {'prompt': prompt, 'answer': answer}

In [16]:
train_dataset = train_dataset.map(format_data)
test_dataset = test_dataset.map(format_data)

In [17]:
def tokenize_function(example):
    input_text = example['prompt']
    target_text = example['answer']
    input_ids = tokenizer(
        input_text,
        padding= 'max_length',
        truncation=True,
        max_length =512
    )['input_ids']
    labels = tokenizer(
        target_text,
        padding = 'max_length',
        truncation = True,
        max_length=128,
    )['input_ids']
    return {
        'input_ids':input_ids,
        'labels':labels
    }

In [18]:
train_dataset_tokenized = train_dataset.map(tokenize_function,batched=True,remove_columns=squad_dataset['train'].column_names)
test_dataset_tokenized = test_dataset.map(tokenize_function, batched= True, remove_columns = squad_dataset['validation'].column_names)

Map:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [19]:
train_dataset_tokenized

Dataset({
    features: ['prompt', 'answer', 'input_ids', 'labels'],
    num_rows: 87599
})

In [20]:
# Create LoRA Configuration object
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    target_modules=['q_proj','k_proj'],
    modules_to_save=['lm_head'],
    r = 8,
    lora_alpha=32,
    lora_dropout=0.1
)
model = get_peft_model(model,lora_config)

In [21]:
# Defining the Trainnig Arguments
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
    output_dir = './lora-squad-gemma2b',
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps=16,
    learning_rate=2e-4,
    num_train_epochs = 3,
    logging_steps=25,
    save_steps=500,
    eval_strategy='steps',
    fp16=True,
    save_total_limit=2,
    report_to='none'
)

In [23]:
# Minimizing the Dataset
small_train_dataset = train_dataset_tokenized.select(range(2000))
small_test_dataset = test_dataset_tokenized.select(range(300))

In [24]:
# Instantiate the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_test_dataset,
)

The model is already on multiple devices. Skipping the move to device specified in `args`.


In [25]:
trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 1000.00 MiB. GPU 0 has a total capacity of 8.00 GiB of which 0 bytes is free. Of the allocated memory 17.42 GiB is allocated by PyTorch, and 344.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [33]:
!pip install transformers accelerate peft bitsandbytes


Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-win_amd64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.1-py3-none-win_amd64.whl (59.5 MB)
   ---------------------------------------- 0.0/59.5 MB ? eta -:--:--
   ---------------------------------------- 0.3/59.5 MB ? eta -:--:--
    --------------------------------------- 1.3/59.5 MB 4.2 MB/s eta 0:00:14
   - -------------------------------------- 1.8/59.5 MB 4.2 MB/s eta 0:00:14
   - -------------------------------------- 2.1/59.5 MB 3.5 MB/s eta 0:00:17
   - -------------------------------------- 2.6/59.5 MB 2.7 MB/s eta 0:00:21
   - -------------------------------------- 2.9/59.5 MB 2.8 MB/s eta 0:00:21
   - -------------------------------------- 2.9/59.5 MB 2.8 MB/s eta 0:00:21
   - -------------------------------------- 2.9/59.5 MB 2.8 MB/s eta 0:00:21
   - -------------------------------------- 2.9/59.5 MB 2.8 MB/s eta 0:00:21
   - -------------------------------------- 2.9/59.5 MB 2.8 MB/s eta 0:00:21
   -