In [1]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
from peft import LoraConfig, AutoPeftModelForCausalLM
from dotenv import load_dotenv
from huggingface_hub import login
from apply_format import template_from_dir
from datasets import Dataset
# Load the environment file
load_dotenv("token.env")

# Retrieve the API token
api_token = os.getenv("api_token")

login(api_token)

  from .autonotebook import tqdm as notebook_tqdm


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/adit/.cache/huggingface/token
Login successful


In [2]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f'Number of GPUs available: {num_gpus}')

    # Print the name of each GPU
    for i in range(num_gpus):
        print(f'GPU {i}: {torch.cuda.get_device_name(i)}')
else:
    print('No GPUs available, running on CPU.')

Number of GPUs available: 1
GPU 0: NVIDIA GeForce RTX 3060


In [3]:
model_name = "google/gemma-2b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config, token = api_token, trust_remote_code = True)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, token = api_token, trust_remote_code = True)
tokenizer.padding_side = 'right'
# tokenizer.pad_token = eos

# model.to(device)

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 2/2 [00:17<00:00,  8.56s/it]


In [4]:
convo_data = template_from_dir('data')
len(convo_data)

113

In [5]:
dataset = Dataset.from_dict({"chat": convo_data})
dataset = dataset.map(lambda x: {"messages": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
dataset = dataset.remove_columns('chat')
print(dataset['messages'][10])

Map: 100%|██████████| 113/113 [00:00<00:00, 1129.30 examples/s]

<bos><start_of_turn>user
Play a game with me where i am in an enchanted forest full of beasts and loots. You be the narrator and i will be the player, play a dialog game with me.<end_of_turn>
<start_of_turn>model
[Narrator]: In the heart of the enchanted forest, a fierce band of goblins emerges from the shadows, their eyes filled with malice as they brandish their weapons.<end_of_turn>
<start_of_turn>user
[Player]: Fight the goblins.<end_of_turn>
<start_of_turn>model
[Narrator]: With determination, you charge into battle, your weapon at the ready. As the goblins close in, you strike with swift and precise blows, fending off their attacks with skill and agility.<end_of_turn>
<start_of_turn>user
[Player]: Dodge the goblins' strikes.<end_of_turn>
<start_of_turn>model
[Narrator]: With nimble footwork, you evade the goblins' swinging weapons, dodging their strikes and keeping yourself out of harm's way. With each dodge, you create openings for counterattacks.<end_of_turn>
<start_of_turn>use




In [6]:
print(dataset)

Dataset({
    features: ['messages'],
    num_rows: 113
})


In [7]:
dataset = dataset.train_test_split(test_size=0.1)

In [8]:
dataset

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 101
    })
    test: Dataset({
        features: ['messages'],
        num_rows: 12
    })
})

In [9]:
from peft import get_peft_model,TaskType

lora_alpha = 100
lora_dropout = 0.01
lora_r = 100

peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    r = lora_r,
    bias = "none",
    task_type = "CAUSAL_LM"
)

In [10]:
from transformers import TrainingArguments

output_dir = "./results-alt"
gradient_accumulation_steps = 5
save_steps = 10
logging_steps = 5
gradient_checkpointing = True
optim = "paged_adamw_32bit"
learning_rate = 2e-4
max_grad_norm = 0.3
weight_decay = 0.01
max_steps = 100
# num_train_epochs = 20
lr_scheduler_type = "constant"

training_args = TrainingArguments(
    output_dir = output_dir,
    # load_best_model_at_end=True,
    optim = optim,
    # num_train_epochs = num_train_epochs,
    gradient_accumulation_steps = gradient_accumulation_steps,
    save_steps = save_steps,
    logging_steps = logging_steps,
    auto_find_batch_size=True,
    # per_device_train_batch_size=8,
    learning_rate = learning_rate,
    max_grad_norm = max_grad_norm,
    fp16 = True,
    max_steps = max_steps,
    group_by_length = True,
    gradient_checkpointing = True,
    weight_decay = weight_decay,
    lr_scheduler_type = lr_scheduler_type
)

In [11]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 524363776
all model parameters: 1515268096
percentage of trainable model parameters: 34.61%


In [12]:
from trl import SFTTrainer

max_seq_length = 2874

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    dataset_text_field="messages",
    max_seq_length=max_seq_length,
    args=training_args,
    peft_config=peft_config,
    packing=True
)

Generating train split: 15 examples [00:00, 233.25 examples/s]
Generating train split: 1 examples [00:00, 229.89 examples/s]


In [13]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33maditobito[0m ([33madit_ahmedabadi[0m). Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss


In [None]:
print("Training Completed. \nSaving the model.....")

trainer.save_model(training_args.output_dir)

print("Model saved successfully")

del model
del trainer
torch.cuda.empty_cache()

print("Model and trainer deleted from memory")

model = AutoPeftModelForCausalLM.from_pretrained(training_args.output_dir, token=api_token, trust_remote_code=True)

print("PEFT Model loaded successfully")

merged_model = model.merge_and_unload()

print("Model merged successfully")

print("Saving the merged model")
merge_output_dir = './2b-it-peft-alt'
merged_model.save_pretrained(merge_output_dir, safe_serialization=True)

Training Completed. 
Saving the model.....




Model saved successfully
Model and trainer deleted from memory


Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.79s/it]


PEFT Model loaded successfully
Model merged successfully
Saving the merged model
