In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import Dataset, concatenate_datasets
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel, PeftConfig
from huggingface_hub import login
import wandb
from datetime import datetime


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so
'NoneType' object has no attribute 'cadam32bit_grad_fp32'
CUDA SETUP: Loading binary /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so...
dlopen(/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so, 0x0006): tried: '/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (not a mach-o file), '/System/Volumes/Preboot/Cryptexes/OS/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so' (no such file), '/Library/Frameworks/Python.frame

  warn("The installed version of bitsandbytes was compiled without GPU support. "


ImportError: cannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/peft/__init__.py)

### Args

In [1]:
date_time_start_run = datetime.now().strftime("%Y%m%d-%H%M").replace('-', '_')
args = {
    'hugging_face_api_key': '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',
    'wandb_api_key': '59759d7f774b09319a3e0e3aebefc7fcf7ccf4f1',
    'dataset_paths': [
        '../../data/training/share_gpt_no_code_conversations_40k_translated.json',
    ],
    'base_model_name': 'bigscience/bloomz-7b',
    'tokenizer_args': {
        'max_length': 2048,
        'padding': 'max_length',
        'truncation': True,
    },
    'lora_args': {
        'rank': 16,
        'alpha': 32,
        'dropout': 0.05,
    },
    'training_args': {
        'output_dir': 'checkpoints_' + date_time_start_run,
        'num_train_epochs': 1,
        'per_device_train_batch_size': 4,
        'gradient_accumulation_steps': 4,
        'optim': 'paged_adamw_8bit',
        'logging_steps': 100,
        'learning_rate': 2e-4,
        'fp16': True,
        'warmup_ratio': 0.05,
        'lr_scheduler_type': 'cosine',
        'report_to': 'wandb',
        'push_to_hub': True,
        'group_by_length': True,
        'max_steps': 1,
    },
    'hub_adapter_repo_name': 'chatbot_qlora_' + date_time_start_run,
}

### Hub login

In [None]:
login(args['huggingface_api_key'])
wandb.login(key=args['wandb_api_key'])

### Load dataset

In [8]:
def load_dataset(file_paths):
    data = []
    for path in file_paths:
        ds = Dataset.from_json(path)
        data.append(ds)
    return concatenate_datasets(data, axis=0).shuffle()
ds = load_dataset(args['dataset_paths'])

### Config and load model

In [None]:
# load base model
model = AutoModelForCausalLM.from_pretrained(
    args['base_model_name'],
    quantization_config=BitsAndBytesConfig(
    load_in_4bit=True,
    device_map='auto',
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
),
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(args['base_model_name'], trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.max_length = args['tokenizer_args']['max_length']

# load peft model
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model = get_peft_model(
    model,
    LoraConfig(
    lora_alpha=args['lora_args']['alpha'],
    lora_dropout=args['lora_args']['dropout'],
    r=args['lora_args']['rank'],
    bias="none",
    task_type="CAUSAL_LM"
)
)
model.config.use_cache = False
model.print_trainable_parameters()

### Test inference

In [None]:
prompt="""Write a poem
"""
encoding = tokenizer(prompt,return_tensors='pt')
with torch.inference_mode():
  outputs = model.generate(**encoding)
print(tokenizer.decode(outputs[0],skip_special_tokens=True))
print("Inference complete!")

### Training

In [None]:
training_ds = ds.map(lambda x: tokenizer(x['text'], padding=True, truncation=True, max_length=args['tokenizer_args']['max_length']), remove_columns=['text'])
training_arguments = TrainingArguments(**args['training_args'])
trainer = Trainer(
    model=model,
    train_dataset=training_ds,
    args=training_arguments,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
trainer.train()

In [None]:
model.save_pretrained('./finals')
model.push_to_hub(args['hub_adapter_repo_name'])
print('\n-----------Finish-----------\n')

In [12]:
import torch
torch.__version__ > '2'

False

Train ds:

0) quora_chat (10k) (chat instruct) (format chat) (medium)
1) share gpt no code (40k) (chat instruct) (format chat) (long)
2) alpaca chat cleaned (51k) (instruct) (format chat) (short)
3) all_faqs (2k7) ==> duplicate resample to (10k) (format chat) (medium)
4) dialog sum (10k) (instruct) (format instruct) (medium)
5) cnn dailymail (30k) (instruct) (format instruct) (long)
6) gpt4_instruct_0.9 (17k) (instruct) (format instruct) (short)

### Training size: 100k
0) 10k
1) 25k
2) 15k
3) 10k
4) 10k
5) 15k
6) 15k