<a href="https://colab.research.google.com/github/HazSyl1/LORA/blob/main/LoRa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q datasets loralib


In [2]:
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [3]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
!pip install bitsandbytes



In [5]:
!pip install accelerate



In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig,AutoModelForCausalLM
model_name="bigscience/bloom-7b1"
model=AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map='auto'
)
tokenizer=AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
## Freezing weights
for param in model.parameters():
  param.requires_grad=False #freezing the model - train adapters later
  if param.ndim ==1:
    param.data=param.data.to(torch.float32)

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self,x): return super().forward(x).to(torch.float32)
model.lm_head=CastOutputToFloat(model.lm_head)


## Setting up Lora Adapters

In [8]:
def print_trainable_parameters(model):
  trainable_params=0
  all_params=0
  for _,param in model.named_parameters():
    all_params+=param.numel()
    if param.requires_grad:
      trainable_params+=param.numel()

  print(f"Trainable Parameters: {trainable_params} || All params: {all_params} || Trainable Params: {100*trainable_params/all_params}")

In [9]:
from peft import LoraConfig, get_peft_model
peft_config=LoraConfig(
    r=16,
    lora_alpha=32,
    # trainability=16/32 = 1/2
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
    #meaning decoder only liek gpt , || or is it a seq2seq like t5, flan etc
    )


In [10]:
#model.add_adapter(adapter_name="custom",peft_config=peft_config)
peft_model=get_peft_model(model ,peft_config)
print_trainable_parameters(model)

Trainable Parameters: 7864320 || All params: 7076880384 || Trainable Params: 0.11112693126452029


## DATA

In [11]:
import transformers
from datasets import load_dataset
data =load_dataset("Abirate/english_quotes")

In [12]:
data

DatasetDict({
    train: Dataset({
        features: ['quote', 'author', 'tags'],
        num_rows: 2508
    })
})

In [13]:
data['train']['tags'][0]


['be-yourself',
 'gilbert-perreira',
 'honesty',
 'inspirational',
 'misattributed-oscar-wilde',
 'quote-investigator']

In [14]:
data['train']['quote'][0]

'“Be yourself; everyone else is already taken.”'

In [15]:
def merge_columns(example):
  example["prediction"]=example["quote"] + "-->:" +str(example["tags"])
  return example

data['train']=data['train'].map(merge_columns)
data['train']['prediction'][:5]

["“Be yourself; everyone else is already taken.”-->:['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']",
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”-->:['best', 'life', 'love', 'mistakes', 'out-of-control', 'truth', 'worst']",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.”-->:['human-nature', 'humor', 'infinity', 'philosophy', 'science', 'stupidity', 'universe']",
 "“So many books, so little time.”-->:['books', 'humor']",
 "“A room without books is like a body without a soul.”-->:['books', 'simile', 'soul']"]

In [16]:
data['train'][0]

{'quote': '“Be yourself; everyone else is already taken.”',
 'author': 'Oscar Wilde',
 'tags': ['be-yourself',
  'gilbert-perreira',
  'honesty',
  'inspirational',
  'misattributed-oscar-wilde',
  'quote-investigator'],
 'prediction': "“Be yourself; everyone else is already taken.”-->:['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']"}

In [17]:
data=data.map(lambda samples:tokenizer(samples['prediction']),batched=True)
#tokenizing the prediction data

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [36]:
data

DatasetDict({
    train: Dataset({
        features: ['quote', 'author', 'tags', 'prediction', 'input_ids', 'attention_mask'],
        num_rows: 2508
    })
})

## Training

In [19]:
trainer=transformers.Trainer(
    model=peft_model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        warmup_steps=100,
        max_steps=200,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs"
    )
    ,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer,mlm=False)
)

model.config.use_cache=False #silence the warnings
trainer.train()

Step,Training Loss
1,3.9589
2,2.9714
3,3.0219
4,3.3136
5,3.6508
6,3.9537
7,3.4897
8,3.2619
9,3.3374
10,3.3661


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.54 GiB. GPU 0 has a total capacty of 14.75 GiB of which 293.06 MiB is free. Process 312464 has 14.46 GiB memory in use. Of the allocated memory 13.43 GiB is allocated by PyTorch, and 909.44 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# Using AWS NOTES