In [7]:
import os
import torch
import torch.nn as nn
# import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

In [8]:
# 1) Setup the model
# checkpoint = "bigscience/bloomz-560m"
checkpoint = "bigscience/bigscience-small-testing"

model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='auto') # load_in_8bit=True,
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [9]:
# 2) Freeze the original weights
for param in model.parameters():
    param.requires_grad = False # freeze the model - train adapters later
    if param.ndim == 1:
        # cast the small parameters (e.g. layernorm) to fp32 for stability
        param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable() # reduce the number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
    def forward(self, x): return super().forward(x).to(torch.float32)

model.lm_head = CastOutputToFloat(model.lm_head)

In [10]:
# 3) Setting up the LoRA Adapters
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    
    print(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100*trainable_params/all_param}")
    
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16, # attention heads
    lora_alpha=32, # alpha scaling TODO: Research what this is
    # target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 8192 || all params: 16164736 || trainable%: 0.05067821707697546


In [11]:
# 4) Data
import transformers
from datasets import load_dataset
data = load_dataset("Abirate/english_quotes")

Found cached dataset json (C:/Users/Alexis Strappazzon/.cache/huggingface/datasets/Abirate___json/Abirate--english_quotes-6e72855d06356857/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)
100%|██████████| 1/1 [00:00<00:00, 497.90it/s]


In [12]:
data["train"]["quote"][:5]

['“Be yourself; everyone else is already taken.”',
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.”",
 '“So many books, so little time.”',
 '“A room without books is like a body without a soul.”']

In [13]:
data["train"]["tags"][:5]

[['be-yourself',
  'gilbert-perreira',
  'honesty',
  'inspirational',
  'misattributed-oscar-wilde',
  'quote-investigator'],
 ['best', 'life', 'love', 'mistakes', 'out-of-control', 'truth', 'worst'],
 ['human-nature',
  'humor',
  'infinity',
  'philosophy',
  'science',
  'stupidity',
  'universe'],
 ['books', 'humor'],
 ['books', 'simile', 'soul']]

In [14]:
def merge_columns(example):
    example["prediction"] = example["quote"] + " ->: " + str(example["tags"])
    return example

data["train"] = data["train"].map(merge_columns)
data["train"]["prediction"][:5]

Loading cached processed dataset at C:\Users\Alexis Strappazzon\.cache\huggingface\datasets\Abirate___json\Abirate--english_quotes-6e72855d06356857\0.0.0\e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4\cache-8a7463b65fe0bb13.arrow


["“Be yourself; everyone else is already taken.” ->: ['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']",
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.” ->: ['best', 'life', 'love', 'mistakes', 'out-of-control', 'truth', 'worst']",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.” ->: ['human-nature', 'humor', 'infinity', 'philosophy', 'science', 'stupidity', 'universe']",
 "“So many books, so little time.” ->: ['books', 'humor']",
 "“A room without books is like a body without a soul.” ->: ['books', 'simile', 'soul']"]

In [15]:
data['train'][0]

{'quote': '“Be yourself; everyone else is already taken.”',
 'author': 'Oscar Wilde',
 'tags': ['be-yourself',
  'gilbert-perreira',
  'honesty',
  'inspirational',
  'misattributed-oscar-wilde',
  'quote-investigator'],
 'prediction': "“Be yourself; everyone else is already taken.” ->: ['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']"}

In [16]:
data = data.map(lambda samples: tokenizer(samples['prediction']), batched=True)

Loading cached processed dataset at C:\Users\Alexis Strappazzon\.cache\huggingface\datasets\Abirate___json\Abirate--english_quotes-6e72855d06356857\0.0.0\e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4\cache-782786d61fc02314.arrow


In [17]:
data

DatasetDict({
    train: Dataset({
        features: ['quote', 'author', 'tags', 'prediction', 'input_ids', 'attention_mask'],
        num_rows: 2508
    })
})

In [18]:
# 5) Training

trainer = transformers.Trainer(
    model=model,
    train_dataset=data['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2, # number of example per batch
        gradient_accumulation_steps=8, # number of batch to see before applying gradient update -> per_device_train_batch_size * gradient_accumulation_steps = number of example seen before an update
        warmup_steps=100, # starts with a very low lr and linearly goes up to the target lr every steps
        max_steps=10000, # number of steps after which the traning stops
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()

  0%|          | 0/10 [00:00<?, ?it/s]You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
 10%|█         | 1/10 [00:01<00:15,  1.77s/it]

{'loss': 12.4345, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.01}


 20%|██        | 2/10 [00:02<00:07,  1.12it/s]

{'loss': 12.4354, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.01}


 30%|███       | 3/10 [00:02<00:04,  1.47it/s]

{'loss': 12.4311, 'learning_rate': 6e-06, 'epoch': 0.02}


 40%|████      | 4/10 [00:02<00:03,  1.86it/s]

{'loss': 12.4335, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.03}


 50%|█████     | 5/10 [00:03<00:02,  2.32it/s]

{'loss': 12.4321, 'learning_rate': 1e-05, 'epoch': 0.03}


 60%|██████    | 6/10 [00:03<00:01,  2.60it/s]

{'loss': 12.4321, 'learning_rate': 1.2e-05, 'epoch': 0.04}


 70%|███████   | 7/10 [00:03<00:01,  2.99it/s]

{'loss': 12.435, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.04}


 80%|████████  | 8/10 [00:04<00:00,  2.45it/s]

{'loss': 12.4335, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.05}


 90%|█████████ | 9/10 [00:04<00:00,  2.77it/s]

{'loss': 12.4359, 'learning_rate': 1.8e-05, 'epoch': 0.06}


100%|██████████| 10/10 [00:04<00:00,  2.16it/s]

{'loss': 12.4319, 'learning_rate': 2e-05, 'epoch': 0.06}
{'train_runtime': 4.6159, 'train_samples_per_second': 34.662, 'train_steps_per_second': 2.166, 'train_loss': 12.433521842956543, 'epoch': 0.06}





TrainOutput(global_step=10, training_loss=12.433521842956543, metrics={'train_runtime': 4.6159, 'train_samples_per_second': 34.662, 'train_steps_per_second': 2.166, 'train_loss': 12.433521842956543, 'epoch': 0.06})

In [19]:
trainer.save_model()

In [None]:
# 6) Push model to hub
from huggingface_hub import notebook_login

notebook_login()

In [None]:

model.push_to_hub(user/repoid,
                  use_auth_token=True,
                  commit_message="basic training",
                  private=False)

In [None]:
# Inference

import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "username/repoid"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, device_map="auto") # load_in_8bits=True,
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

In [28]:
batch = tokenizer('“Training models with PEFT and LoRA is cool” ->: ', return_tensors='pt').to(model.device)
print(batch)
model.eval()
with torch.cuda.amp.autocast():
    output_tokens = model.generate(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"],  max_new_tokens=50)
    
print("\n\n", tokenizer.decode(output_tokens[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


{'input_ids': tensor([[ 86753,   7508,    386,  20038,   1002,    426, 108045,    530,   9810,
          14062,    632,  35847,    982,  11953,     29,    210]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}


 “Training models with PEFT and LoRA is cool” ->:                                                   
