In [None]:
!pip install bitsandbytes -q
!pip install datasets -q
!pip install -U accelerate -q
!pip install loralib -q
!pip install transformers -q
!pip install -U peft -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.6/536.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.3/38.3 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ibis-framework 7.1.0 requires pyarrow<15,>=2, but you have pyarrow 15.0.0 which is incompatible.[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m7.5 MB/s

In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-3343edfe-b149-8512-7319-5137d929e706)


In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
import torch
from torch import nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-3b", load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-3b")

## Freezing the original weights

In [None]:
class CastOutputToFloat(nn.Sequential):

  def forward(self, x):
    return super().forward(x).to(torch.float32)

In [None]:
for param in model.parameters():
  param.requires_grad = False
  if param.ndim == 1:
    param.data = param.data.to(torch.float32)     # Casting small parameters (e.g. normalization layers) to fp32 for stability

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

model.lm_head = CastOutputToFloat(model.lm_head)

## Setting up LoRA adapters

In [None]:
def print_trainable_parameters(model):
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()

  print(f'Trainable parameters: {trainable_params} || All parameters: {all_param} || Trainable %: {100 * trainable_params / all_param}')

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,                     # Attention heads
    lora_alpha=32,            # Alpha scaling
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM'
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

Trainable parameters: 4915200 || All parameters: 3007472640 || Trainable %: 0.1634329082375293


In [None]:
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer
from datasets import load_dataset

data = load_dataset('Abirate/english_quotes')

In [None]:
def merge_columns(example):
  example['prediction'] = example['quote'] + ' ->: ' + str(example['tags'])
  return example

data['train'] = data['train'].map(merge_columns)
data['train']['prediction'][:5]

["“Be yourself; everyone else is already taken.” ->: ['be-yourself', 'gilbert-perreira', 'honesty', 'inspirational', 'misattributed-oscar-wilde', 'quote-investigator']",
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.” ->: ['best', 'life', 'love', 'mistakes', 'out-of-control', 'truth', 'worst']",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.” ->: ['human-nature', 'humor', 'infinity', 'philosophy', 'science', 'stupidity', 'universe']",
 "“So many books, so little time.” ->: ['books', 'humor']",
 "“A room without books is like a body without a soul.” ->: ['books', 'simile', 'soul']"]

In [None]:
data['train'][2011]

{'quote': '“Everything must be made as simple as possible. But not simpler.”',
 'author': 'Albert Einstein',
 'tags': ['einstein', 'paraphrased', 'science', 'systems']}

In [None]:
data = data.map(lambda samples: tokenizer(samples['prediction']), batched=True)

In [None]:
trainer = Trainer(
    model=model,
    train_dataset=data['train'],
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=200,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir='outputs'
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

model.config.use_cache = False
trainer.train()

Step,Training Loss
1,3.0611
2,3.2762
3,3.0675
4,4.1769
5,3.2532
6,3.0926
7,3.3668
8,3.4832
9,3.3574
10,3.4677


TrainOutput(global_step=200, training_loss=2.437908639907837, metrics={'train_runtime': 663.1547, 'train_samples_per_second': 1.206, 'train_steps_per_second': 0.302, 'total_flos': 636083302010880.0, 'train_loss': 2.437908639907837, 'epoch': 0.32})

In [None]:
batch = tokenizer("Parameter-efficient fine-tuning is cool” ->: ", return_tensors='pt')

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))