In [1]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
from peft import LoraConfig, AutoPeftModelForCausalLM
from dotenv import load_dotenv
from huggingface_hub import login
from apply_format import template_from_dir
from datasets import Dataset
# Load the environment file
load_dotenv("token.env")

# Retrieve the API token
api_token = os.getenv("api_token")

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'apply_format'

In [2]:
from huggingface_hub import login

login(token=api_token)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/adit/.cache/huggingface/token
Login successful


In [3]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f'Number of GPUs available: {num_gpus}')

    # Print the name of each GPU
    for i in range(num_gpus):
        print(f'GPU {i}: {torch.cuda.get_device_name(i)}')
else:
    print('No GPUs available, running on CPU.')

Number of GPUs available: 1
GPU 0: NVIDIA GeForce RTX 3060


In [4]:
convo_data = template_from_dir('data')
len(convo_data)

114

In [50]:
model_name = "google/gemma-2b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config, token = api_token, trust_remote_code = True)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, token = api_token, trust_remote_code = True)
tokenizer.padding_side = 'right'
# tokenizer.pad_token = eos

# model.to(device)

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.03it/s]


In [51]:
# text = "Quote: Imagination is more"
# device = "cuda:0"
# inputs = tokenizer(text, return_tensors="pt").to(device)

# outputs = model.generate(**inputs, max_new_tokens=100)
# print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [52]:
# x = tokenizer.apply_chat_template(trial_convo[0], return_tensors="pt").to(device)

In [53]:
# # # messages = [
# # #     {"role": "user", "content": "Hi there!"},
# # #     {"role": "assistant", "content": "Nice to meet you!"},
# # #     {"role": "user", "content": "Can I ask a question?"}
# # # ]

# tokenized_dataset = [tokenizer.apply_chat_template(x,tokenize=True,add_generation_prompt=False) for x in convo_data]


In [54]:
# print(tokenized_dataset[-1])

In [55]:
from datasets import Dataset

dataset = Dataset.from_dict({"chat": convo_data})
dataset = dataset.map(lambda x: {"messages": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})
print(dataset['messages'][0])


Map: 100%|██████████| 114/114 [00:00<00:00, 11018.06 examples/s]

<|im_start|>assistant
[Narrator]: As you venture deeper into the enchanted forest, the dense canopy above casts shifting patterns of light and shadow upon the forest floor. You can feel the mystical energy pulsating around you, hinting at the presence of fantastical beings lurking amidst the trees. Suddenly, you come across a clearing where a majestic creature stands before you, its eyes gleaming with an otherworldly intelligence.<|im_end|>
<|im_start|>user
[Player]: I cautiously approach the creature and observe its demeanor.<|im_end|>
<|im_start|>assistant
[Narrator]: The creature, resembling a graceful unicorn with a shimmering coat of silver, regards you with curiosity. Its long mane sways gently in the breeze as it tilts its head, seemingly assessing you.<|im_end|>
<|im_start|>user
[Player]: I extend my hand slowly, offering it for the creature to sniff, attempting to establish a peaceful interaction.<|im_end|>
<|im_start|>assistant
[Narrator]: The unicorn lowers its head, its muz




In [56]:
print(dataset['messages'][1])

<|im_start|>assistant

[Narrator]: Amidst the ancient trees of the enchanted forest, a fearsome creature prowls, its golden eyes gleaming with hunger and malice. With a low growl, it emerges from the shadows, its massive form poised to strike at any moment.<|im_end|>
<|im_start|>user
[Player]: With a steady hand, I grasp my weapon, prepared to face the creature head-on in battle, determined to emerge victorious.<|im_end|>
<|im_start|>assistant
[Narrator]: As the creature lunges forward, you meet its charge with a swift and decisive strike, your weapon flashing in the dappled light as you engage in a deadly dance of blades. Each clash of steel sends sparks flying, illuminating the darkened forest with bursts of fiery light.<|im_end|>
<|im_start|>user
[Player]: I fight with all my strength, seeking out weaknesses in the creature's defenses as I strive to gain the upper hand in this fierce struggle.<|im_end|>
<|im_start|>assistant
[Narrator]: Despite your valiant efforts, the creature pro

In [57]:
dataset = dataset.remove_columns('chat')
dataset

Dataset({
    features: ['messages'],
    num_rows: 114
})

### Lora Training

###### LoRA alpha is the scaling factor for the weight matrices. The weight matrix is scaled by lora_alpha/lora_rank , and a higher alpha value assigns more weight to the LoRA activations. We chose 16 since this was common practice in training scripts we reviewed and chose a 1:1 ratio so as not to overpower the base model.

In [58]:
from peft import get_peft_model,TaskType

lora_alpha = 100
lora_dropout = 0.01
lora_r = 100

peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    r = lora_r,
    bias = "none",
    task_type = "CAUSAL_LM"
)

# from peft import LoraConfig

# lora_alpha = 16
# lora_dropout = 0.1
# lora_r = 64

# peft_config = LoraConfig(
#     lora_alpha=lora_alpha,
#     lora_dropout=lora_dropout,
#     r=lora_r,
#     bias="none",
#     task_type="CAUSAL_LM",
# )

In [59]:
from transformers import TrainingArguments

output_dir = "./results"
gradient_accumulation_steps = 5
save_steps = 10
logging_steps = 10
gradient_checkpointing = True
optim = "paged_adamw_32bit"
learning_rate = 2e-4
max_grad_norm = 0.3
weight_decay = 0.01
num_train_epochs = 20
lr_scheduler_type = "constant"

training_args = TrainingArguments(
    output_dir = output_dir,
    optim = optim,
    num_train_epochs = num_train_epochs,
    gradient_accumulation_steps = gradient_accumulation_steps,
    save_steps = save_steps,
    logging_steps = logging_steps,
    auto_find_batch_size=True,
    learning_rate = learning_rate,
    max_grad_norm = max_grad_norm,
    fp16 = True,
    group_by_length = True,
    gradient_checkpointing = True,
    weight_decay = weight_decay,
    lr_scheduler_type = lr_scheduler_type
)

In [60]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 524363776
all model parameters: 1515268096
percentage of trainable model parameters: 34.61%


In [61]:
from trl import SFTTrainer

max_seq_length = 2874

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="messages",
    max_seq_length=max_seq_length,
    args=training_args,
    peft_config=peft_config,
    packing=True
)

In [62]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33maditobito[0m ([33madit_ahmedabadi[0m). Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
10,1.461
20,0.9901
30,0.8936
40,0.7963
50,0.7153
60,0.6135




TrainOutput(global_step=60, training_loss=0.9116344690322876, metrics={'train_runtime': 4030.8786, 'train_samples_per_second': 0.089, 'train_steps_per_second': 0.015, 'total_flos': 1.0381028256350208e+16, 'train_loss': 0.9116344690322876, 'epoch': 16.67})

In [63]:
print("Training Completed. \nSaving the model.....")

trainer.save_model(training_args.output_dir)

print("Model saved successfully")

del model
del trainer
torch.cuda.empty_cache()

print("Model and trainer deleted from memory")

model = AutoPeftModelForCausalLM.from_pretrained(training_args.output_dir, token=api_token, trust_remote_code=True)

print("PEFT Model loaded successfully")

merged_model = model.merge_and_unload()

print("Model merged successfully")

print("Saving the merged model")
merge_output_dir = './merged_model'
merged_model.save_pretrained(merge_output_dir, safe_serialization=True)

Training Completed. 
Saving the model.....
Model saved successfully
Model and trainer deleted from memory


Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.91s/it]


PEFT Model loaded successfully
Model merged successfully
Saving the merged model


In [65]:
merged_model.push_to_hub("narrator-gemma-2b")
tokenizer.push_to_hub("narrator-gemma-2b")