In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import os
import json
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.chdir('D:\\bggpt\\BgGPT-7B-Instruct-v0.2')

In [3]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )

In [4]:
device_map = {"": 0}
model = AutoModelForCausalLM.from_pretrained(
    "D:\\bggpt\\BgGPT-7B-Instruct-v0.2",
    device_map=device_map,
    quantization_config=bnb_config
    
)

Loading checkpoint shards: 100%|██████████| 2/2 [07:09<00:00, 214.83s/it]


In [5]:
tokenizer = AutoTokenizer.from_pretrained('D:\\bggpt\\BgGPT-7B-Instruct-v0.2')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

(True, True)

In [6]:
def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length
get_max_length(model)

Found max lenth: 32768


32768

In [7]:
f = open('preprocessed_data.json', 'r', encoding='utf-8')
chat = json.load(f)
chat = np.reshape(chat, (int(len(chat)/2),2))

In [8]:
dataset = []
for i in chat:
    text = tokenizer.apply_chat_template(i, tokenize=False)
    dataset.append(text)

In [9]:
from datasets import load_dataset, Dataset
df = pd.DataFrame(dataset, columns=['text'])

In [10]:
dataset = Dataset.from_pandas(df)

In [11]:
type(dataset['text'])

list

In [12]:
def tokenize_prompts(prompt):
    return tokenizer(prompt['text'])
tokenized_dataset = dataset.map(tokenize_prompts)

Map: 100%|██████████| 99/99 [00:00<00:00, 182.35 examples/s]


In [13]:
model.gradient_checkpointing_enable()
original_model = prepare_model_for_kbit_training(model)

In [14]:
config = LoraConfig(
    r=32, #Rank
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

# 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
original_model.gradient_checkpointing_enable()

peft_model = get_peft_model(original_model, config)

In [15]:
import time
output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'
import transformers

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    max_steps=1000,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=25,
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=25,
    gradient_checkpointing=True,
    report_to="none",
    overwrite_output_dir = 'True',
    group_by_length=True,
)

peft_model.config.use_cache = False
print(peft_training_args.device)
peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=tokenized_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

cuda:0


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [16]:
peft_trainer.train()



Step,Training Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 270.00 MiB. GPU 0 has a total capacty of 6.00 GiB of which 0 bytes is free. Of the allocated memory 11.23 GiB is allocated by PyTorch, and 1.20 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
print(tokens)

In [23]:
type(tokens)

str

In [15]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available!")
    # Get the number of CUDA devices
    num_devices = torch.cuda.device_count()
    print(f"Number of CUDA devices available: {num_devices}")
    # Print out the names of each CUDA device
    for i in range(num_devices):
        device_name = torch.cuda.get_device_name(i)
        print(f"Device {i}: {device_name}")
else:
    print("CUDA is not available.")


CUDA is available!
Number of CUDA devices available: 1
Device 0: NVIDIA GeForce GTX 1660 Ti
