In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/transformers.git@main 
!pip install -q git+https://github.com/huggingface/peft.git

## summary

- bigscience/bloom-7b1
- lora fine-tune bloom: 可插拔式的（plugin/adapter）
    - freeeze original weights
    - plugin lora adapters (peft)
- huggingface transformers 库
    - trainer.train 的参数及过程；
    - mlm 与 clm 的差异：（都是 unsupervised learning，都可以自动地构建 input/labels）
        - mlm：bert
        - clm：gpt（bloom）
    - pipeline
        - dataset/tasks
        - tokenizer
        - training (fine-tune base lora)
        - inference

## base model & lora adapters

In [15]:
import torch
import torch.nn as nn
import bitsandbytes as bnb 
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model 

AttributeError: module 'bitsandbytes' has no attribute 'nn'

In [3]:
%load_ext watermark

In [4]:
%watermark --iversions

torch       : 2.0.0+cu118
bitsandbytes: 0.38.1



In [3]:
from watermark import watermark
print(watermark(packages='peft,torch,loralib,transformers,accelerate,datasets'))


The following directories listed in your path were found to be non-existent: {WindowsPath('D')}
The following directories listed in your path were found to be non-existent: {WindowsPath('module'), WindowsPath('/matplotlib_inline.backend_inline')}
The following directories listed in your path were found to be non-existent: {WindowsPath('/usr/local/cuda/lib64')}
DEBUG: Possible options found for libcudart.so: set()
CUDA SETUP: PyTorch settings found: CUDA_VERSION=117, Highest Compute Capability: 8.6.
CUDA SETUP: To manually override the PyTorch CUDA version please see:https://github.com/TimDettmers/bitsandbytes/blob/main/how_to_use_nonpytorch_cuda.md
CUDA SETUP: Loading binary D:\anaconda3\envs\d2l-zh\lib\site-packages\bitsandbytes\libbitsandbytes_cuda117.so...
argument of type 'WindowsPath' is not iterable
CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.
CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be 

RuntimeError: 
        CUDA Setup failed despite GPU being available. Please run the following command to get more information:

        python -m bitsandbytes

        Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
        to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
        and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues

In [4]:
model = AutoModelForCausalLM.from_pretrained(
    "bigscience/bloom-7b1", 
    load_in_8bit=True, 
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-7b1")

NameError: name 'AutoModelForCausalLM' is not defined

In [None]:
# model.config
AutoConfig.from_pretrained("bigscience/bloom-7b1")

In [None]:
model

In [None]:
# model.transformer.word_embeddings
model.get_input_embeddings()

In [None]:
tokenizer

### freeze original weights

In [None]:
list(model.parameters())[0].dtype

In [None]:
for i, param in enumerate(model.parameters()):
    param.requires_grad = False  # freeze the model - train adapters later
#     print(i, 'param.requires_grad = False')
    if param.ndim == 1:
        # cast the small parameters (e.g. layernorm) to fp32 for stability
        param.data = param.data.to(torch.float32)
#         print(i, 'ndim == 1, torch.float16 to torch.float32')

In [None]:
# reduce number of stored activations
model.gradient_checkpointing_enable()  
model.enable_input_require_grads()

In [None]:
class CastOutputToFloat(nn.Sequential):
    def forward(self, x): 
        return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

### LoRa Adapters

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model 
config = LoraConfig(
    r=16, #low rank
    lora_alpha=32, #alpha scaling， scale lora weights/outputs
    # target_modules=["q_proj", "v_proj"], #if you know the 
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM" # set this for CLM or Seq2Seq
)

In [None]:
model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
model

## pipeline

### data

In [None]:
import transformers
from datasets import load_dataset
dataset = load_dataset("Abirate/english_quotes")

In [None]:
dataset

In [None]:
dataset['train']

In [None]:
dataset['train'].to_pandas()

In [None]:
dataset['train']['quote'][:4]

In [None]:
dataset['train']['author'][:4]

In [None]:
dataset['train'][:4]

In [None]:
str(dataset['train']['tags'][0])

In [None]:
def merge(row):
    row['prediction'] = row['quote'] + ' ->: ' + str(row['tags'])
    return row
dataset['train'] = dataset['train'].map(merge)

In [None]:
dataset['train']['prediction'][:5]

In [None]:
dataset['train'][4]

In [None]:
tokenizer(dataset['train']['prediction'][:4])

### tokenize

In [None]:
dataset = dataset.map(lambda samples: tokenizer(samples['prediction']), batched=True)

In [None]:
# 'input_ids', 'attention_mask'
dataset

### training

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

In [None]:
trainer = Trainer(
    model=model, 
    train_dataset=dataset['train'],
    args=TrainingArguments(
        per_device_train_batch_size=4, 
        gradient_accumulation_steps=4,
        warmup_steps=100, 
        max_steps=200, 
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=1, 
        output_dir='outputs'
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  
trainer.train()

### inference

In [None]:
batch = tokenizer("“Training models with PEFT and LoRa is cool” ->: ", return_tensors='pt')

with torch.cuda.amp.autocast():
    output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

In [None]:
 
batch = tokenizer("“An important paradigm of natural language processing consists of large-scale pre-training on general domain data and adaptation to particular tasks or domains.” ->: ", return_tensors='pt')

with torch.cuda.amp.autocast():
    output_tokens = model.generate(**batch, max_new_tokens=50)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))

In [None]:
trainer.data_collator