# Simplified LoRA Implementation

#### Install Dependencies

In [2]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.9/270.9 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build w

#### Confirm CUDA

In [2]:
import torch
torch.cuda.is_available()

True

#### Load Base Model

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "bigscience/bloom-1b1",
)

tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.13G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/227 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

##### View Model Summary

In [4]:
# print(model)

In [5]:
for param in model.parameters():
  param.requires_grad = False
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

#### Helper Function

In [6]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

#### Obtain LoRA Model

In [7]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=9,
    lora_alpha=18,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

custom_model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 1327104 || all params: 1066641408 || trainable%: 0.12441894624064698


#### Load Sample Dataset

In [8]:
from datasets import load_dataset

qa_dataset = load_dataset("garage-bAInd/Open-Platypus")


Downloading readme:   0%|          | 0.00/5.34k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/15.6M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/24926 [00:00<?, ? examples/s]

In [9]:

def create_prompt(instruction, output):
    prompt_template = f"### INSTRUCTIONS\n{instruction}\n\n### OUTPUT\n{output}</s>"
    return prompt_template

processed_qa_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['instruction'], samples['output'])))

Map:   0%|          | 0/24926 [00:00<?, ? examples/s]

#### Train LoRA

In [10]:
import transformers

custom_trainer = transformers.Trainer(
    model=custom_model,
    train_dataset=processed_qa_dataset["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=100,
        learning_rate=5e-4,
        logging_steps=1,
        output_dir='outputs',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
custom_model.config.use_cache = False
custom_trainer.train()



Step,Training Loss
1,2.3321
2,2.0814
3,2.1975
4,2.4187
5,2.6646
6,1.8416
7,1.8378
8,2.5766
9,2.8606
10,1.9939


TrainOutput(global_step=100, training_loss=2.07262283205986, metrics={'train_runtime': 290.5413, 'train_samples_per_second': 1.377, 'train_steps_per_second': 0.344, 'total_flos': 534300106604544.0, 'train_loss': 2.07262283205986, 'epoch': 0.02})

In [6]:
HUB_USERNAME = "Cheuk-Ki"

model_identifier = "Platypus-finetune"

In [12]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:


custom_model.push_to_hub(f"{HUB_USERNAME}/{model_identifier}", use_auth_token=True)



README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/5.32M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Cheuk-Ki/Platypus-finetune/commit/77ad4effbf252a66c3183a6f5dbc726b9dee65bf', commit_message='Upload model', commit_description='', oid='77ad4effbf252a66c3183a6f5dbc726b9dee65bf', pr_url=None, pr_revision=None, pr_num=None)

In [7]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = f"{HUB_USERNAME}/{model_identifier}"
config = PeftConfig.from_pretrained(peft_model_id)
base_lm_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Peft model
qa_model = PeftModel.from_pretrained(base_lm_model, peft_model_id)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


adapter_config.json:   0%|          | 0.00/590 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.13G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/5.32M [00:00<?, ?B/s]

In [8]:
from IPython.display import display, Markdown

def perform_inference(instruction_text):
    input_sequence = tokenizer(f"### INSTRUCTIONS\n{instruction_text}\n\n### OUTPUT\n", return_tensors='pt')
    # Move the batch tensor to the same device as qa_model
    input_sequence = {k: v.to(qa_model.device) for k, v in input_sequence.items()}
    with torch.cuda.amp.autocast():
        generated_output = qa_model.generate(**input_sequence, max_new_tokens=200)

    display(Markdown((tokenizer.decode(generated_output[0], skip_special_tokens=True))))

In [9]:
instruction_example = "5 balls. take one ball. What is the probability"

perform_inference(instruction_example)

### INSTRUCTIONS
5 balls. take one ball. What is the probability

### OUTPUT
The probability of picking one ball is $\frac{1}{5}$.