# QLoRA Fine-Tuning on Mistral 7B

by Benjamin Kissinger & Andreas Sünder

## Install required packages (only needed once)

```bash
%pip install -r requirements.txt
```

## Setup

Open up a terminal and run the following commands:

```bash
huggingface-cli login
wandb login
```

In [None]:
dataset_name = ''
base_model_id = 'mistralai/Mistral-7B-Instruct-v0.1'
prompt_template = ''
run_name = ''
max_input_length = -1
hub_model_id = ''

project_name = ''
%env WANDB_PROJECT=$project_name

## Load Dataset

In [None]:
from datasets import load_dataset
dataset = load_dataset(dataset_name)

## Load Base Model

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model = AutoModelForCausalLM.from_pretrained(
  base_model_id,
  quantization_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
  ),
  torch_dtype=torch.float16,
  device_map='auto'
)

tokenizer = AutoTokenizer.from_pretrained(
  base_model_id,
  padding_side='left',
  add_eos_token=True,
  add_bos_token=True
)
tokenizer.pad_token = tokenizer.eos_token

## Tokenize dataset

In [None]:
def tokenize_sample(prompt):
  result = tokenizer(
    prompt_template.format(**prompt),
    padding='max_length',
    max_length=max_input_length,
    truncation=True,
  )
  result['labels'] = result['input_ids'].copy()
  return result

tokenized_dataset = dataset.map(tokenize_sample, batched=True)

## Setup LoRA

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()

  print(f'trainable params: {trainable_params} || all params: {all_param} || trainable: {100 * trainable_params / all_param: .2f}%')

In [None]:
from peft import LoraConfig, get_peft_model
from peft import TaskType

config = LoraConfig(
  r=8,
  lora_alpha=8,
  target_modules=[
    'q_proj',
    'k_proj',
    'v_proj',
    'o_proj',
    'gate_proj',
    'up_proj',
    'down_proj',
  ],
  bias='none',
  lora_dropout=0.05,
  task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

## Run Training

In [None]:
from datetime import datetime
from transformers import (DataCollatorForLanguageModeling,
                          EarlyStoppingCallback, Trainer, TrainingArguments)

trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['validation'],
    args=TrainingArguments(
        output_dir='./output',
        logging_dir='./logs',
        per_device_train_batch_size=1,
        gradient_checkpointing=True,
        gradient_accumulation_steps=1,
        warmup_steps=100,
        num_train_epochs=1,
        learning_rate=1e-3,
        bf16=True,
        optim='paged_adamw_8bit',
        logging_strategy='steps',
        logging_steps=200,
        save_strategy='epoch',
        save_steps=1,
        evaluation_strategy='steps',
        eval_steps=500,
        do_eval=True,
        # load_best_model_at_end=True,
        # metric_for_best_model='loss',
        # greater_is_better=False,
        report_to='wandb',
        run_name=run_name if run_name else
          f'{project_name}-{datetime.now().strftime("%Y-%m-%d-%H-%M")}'
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
    # callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
)

model.config.use_cache = True
trainer.train()

## Push to hub

In [None]:
trainer.push_to_hub(hub_model_id)