> export WANDB_PROJECT=GenAI360
---
> import torch; torch.set_num_threads(8);



In [None]:
!pip install -q transformers==4.32.0 deeplake[enterprise]==3.7.1 trl==0.6.0 peft==0.5.0 wandb==0.15.8

# Load the Deep Lake Dataset

In [None]:
import deeplake

# Connect to the training and testing datasets
ds = deeplake.load('hub://genai360/GAIR-lima-train-set')
ds_test = deeplake.load('hub://genai360/GAIR-lima-test-set')

/

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/genai360/GAIR-lima-train-set



\

hub://genai360/GAIR-lima-train-set loaded successfully.



/

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/genai360/GAIR-lima-test-set



-

hub://genai360/GAIR-lima-test-set loaded successfully.



 

In [None]:
ds

Dataset(path='hub://genai360/GAIR-lima-train-set', tensors=['answer', 'question', 'source'])

In [None]:
def prepare_sample_text(example):
    """Prepare the text from a sample of the dataset."""
    text = f"Question: {example['question'].text()}\n\nAnswer: {example['answer'].text()}"
    return text

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")

In [None]:
from trl.trainer import ConstantLengthDataset

train_dataset = ConstantLengthDataset(
    tokenizer,
    ds,
    formatting_func=prepare_sample_text,
    infinite=True,
    seq_length=1024
)



In [None]:
iterator = iter( train_dataset )
sample = next( iterator )
print( sample )

{'input_ids': tensor([    8, 10437,     9,  ...,    65,    14, 10698]), 'labels': tensor([    8, 10437,     9,  ...,    65,    14, 10698])}


In [None]:
train_dataset.stbart_iteration = 0

In [None]:
eval_dataset = ConstantLengthDataset(
    tokenizer,
    ds_test,
    formatting_func=prepare_sample_text,
    seq_length=1024
)

## Add LoRA Layers

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./OPT-fine_tuned-LIMA-CPU",
    dataloader_drop_last=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    logging_steps=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    warmup_steps=100,
    gradient_accumulation_steps=1,
    bf16=True,
    weight_decay=0.05,
    run_name="OPT-fine_tuned-LIMA-CPU",
    report_to="wandb",
)

In [None]:
from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b", torch_dtype=torch.bfloat16)

Downloading pytorch_model.bin:   0%|          | 0.00/2.63G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [None]:
import torch.nn as nn

for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=lora_config,
    packing=True,
)



In [None]:
print_trainable_parameters(trainer.model)

trainable params: 3145728 || all params: 1318903808 || trainable%: 0.23851079820371554


In [None]:
print("Training...")
trainer.train()

Training...


In [None]:
# import os

# print("Saving last checkpoint of the model")
# trainer.model.save_pretrained(os.path.join("./OPT-fine_tuned-LIMA", "final_checkpoint/"))

Saving last checkpoint of the model


# Merge LoRA weights with Base Model

In [None]:
# from transformers import AutoModelForCausalLM
# import torch

# model = AutoModelForCausalLM.from_pretrained( "facebook/opt-1.3b", return_dict=True)

In [None]:
# from peft import PeftModel

# # Load the Lora model
# model = PeftModel.from_pretrained(model, "./OPT-fine_tuned-LIMA/final_checkpoint/")
# model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): OPTForCausalLM(
      (model): OPTModel(
        (decoder): OPTDecoder(
          (embed_tokens): Embedding(50272, 2048, padding_idx=1)
          (embed_positions): OPTLearnedPositionalEmbedding(2050, 2048)
          (final_layer_norm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (layers): ModuleList(
            (0-23): 24 x OPTDecoderLayer(
              (self_attn): OPTAttention(
                (k_proj): Linear(in_features=2048, out_features=2048, bias=True)
                (v_proj): Linear(
                  in_features=2048, out_features=2048, bias=True
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.05, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=2048, out_features=16, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(

In [None]:
# model = model.merge_and_unload()

# model.save_pretrained("./OPT-fine_tuned-LIMA/merged")

('./checkpoints/merged/tokenizer_config.json',
 './checkpoints/merged/special_tokens_map.json',
 './checkpoints/merged/vocab.json',
 './checkpoints/merged/merges.txt',
 './checkpoints/merged/added_tokens.json',
 './checkpoints/merged/tokenizer.json')

In [None]:
# from transformers import AutoTokenizer

# tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")
# tokenizer.save_pretrained("./OPT-fine_tuned/merged")