https://github.com/NielsRogge/Transformers-Tutorials/blob/master/Mistral/Supervised_fine_tuning_(SFT)_of_an_LLM_using_Hugging_Face_tooling.ipynb

In [2]:
!pip install -U transformers[torch] datasets



In [3]:
!pip install -q bitsandbytes trl peft

In [4]:
!pip install flash-attn --no-build-isolation



In [5]:
from datasets import load_dataset

# based on config
raw_datasets = load_dataset("HuggingFaceH4/ultrachat_200k")

In [6]:
from datasets import DatasetDict

# remove this when done debugging
indices = range(0,100)

dataset_dict = {"train": raw_datasets["train_sft"].select(indices),
                "test": raw_datasets["test_sft"].select(indices)}

raw_datasets = DatasetDict(dataset_dict)
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['prompt', 'prompt_id', 'messages'],
        num_rows: 100
    })
    test: Dataset({
        features: ['prompt', 'prompt_id', 'messages'],
        num_rows: 100
    })
})

In [7]:
raw_datasets["train"][:2]['messages']

[[{'content': "These instructions apply to section-based themes (Responsive 6.0+, Retina 4.0+, Parallax 3.0+ Turbo 2.0+, Mobilia 5.0+). What theme version am I using?\nOn your Collections pages & Featured Collections sections, you can easily show the secondary image of a product on hover by enabling one of the theme's built-in settings!\nYour Collection pages & Featured Collections sections will now display the secondary product image just by hovering over that product image thumbnail.\nDoes this feature apply to all sections of the theme or just specific ones as listed in the text material?",
   'role': 'user'},
  {'content': 'This feature only applies to Collection pages and Featured Collections sections of the section-based themes listed in the text material.',
   'role': 'assistant'},
  {'content': 'Can you guide me through the process of enabling the secondary image hover feature on my Collection pages and Featured Collections sections?',
   'role': 'user'},
  {'content': "Sure, h

In [8]:
example = raw_datasets["train"][0]
messages = example["messages"]
for message in messages:
  role = message["role"]
  content = message["content"]
  print('{0:20}:  {1}'.format(role, content))

user                :  These instructions apply to section-based themes (Responsive 6.0+, Retina 4.0+, Parallax 3.0+ Turbo 2.0+, Mobilia 5.0+). What theme version am I using?
On your Collections pages & Featured Collections sections, you can easily show the secondary image of a product on hover by enabling one of the theme's built-in settings!
Your Collection pages & Featured Collections sections will now display the secondary product image just by hovering over that product image thumbnail.
Does this feature apply to all sections of the theme or just specific ones as listed in the text material?
assistant           :  This feature only applies to Collection pages and Featured Collections sections of the section-based themes listed in the text material.
user                :  Can you guide me through the process of enabling the secondary image hover feature on my Collection pages and Featured Collections sections?
assistant           :  Sure, here are the steps to enable the secondary 

In [9]:
from transformers import AutoTokenizer

# model_id = "mistralai/Mistral-7B-v0.2"
model_id = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(model_id)

# set pad_token_id equal to the eos_token_id if not set
if tokenizer.pad_token_id is None:
  tokenizer.pad_token_id = tokenizer.eos_token_id

# Set reasonable default for models without max length
if tokenizer.model_max_length > 100_000:
  tokenizer.model_max_length = 2048

# Set chat template
DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
tokenizer.chat_template = DEFAULT_CHAT_TEMPLATE

In [10]:
import re
import random
from multiprocessing import cpu_count

def apply_chat_template(example, tokenizer):
    messages = example["messages"]
    # We add an empty system message if there is none
    if messages[0]["role"] != "system":
        messages.insert(0, {"role": "system", "content": ""})
    example["text"] = tokenizer.apply_chat_template(messages, tokenize=False)

    return example

column_names = list(raw_datasets["train"].features)
raw_datasets = raw_datasets.map(apply_chat_template,
                                num_proc=cpu_count(),
                                fn_kwargs={"tokenizer": tokenizer},
                                remove_columns=column_names,
                                desc="Applying chat template",)

# create the splits
train_dataset = raw_datasets["train"]
eval_dataset = raw_datasets["test"]

for index in random.sample(range(len(raw_datasets["train"])), 3):
  print(f"Sample {index} of the processed training set:\n\n{raw_datasets['train'][index]['text']}")

Sample 46 of the processed training set:

<|system|>
</s>
<|user|>
Write a character-driven short story that essentially deals with the theme of social media addiction and delve into the psychological effects that it could have on young minds. Pay close attention to the character arc of the protagonist, portraying how their mental health takes a hit as they struggle to cope with the pressures and the incessant demands of the virtual world. You can depict the impact of social media addiction on various aspects of their life, including their relationships, academics, and self-esteem. Use descriptive language and vivid imagery to make the reader connect with the character on an emotional level.</s>
<|assistant|>
Zoey had always been a shy and introverted girl, content with the company of books and the occasional online video game. But in high school, she discovered the thrill of social media - the likes, the comments, the validation that came with every post. Before she knew it, she was c

In [11]:
from transformers import BitsAndBytesConfig
import torch

# specify how to quantize the model
quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
)
device_map = "auto"#{"": torch.cuda.current_device()} if torch.cuda.is_available() else None

model_kwargs = dict(
#     attn_implementation=False,#"flash_attention_2", # set this to True if your GPU supports it (Flash Attention drastically speeds up model computations)
    torch_dtype="auto",
    use_cache=False, # set to False as we're going to use gradient checkpointing
    device_map=device_map,
    quantization_config=quantization_config,
)

In [12]:
from trl import SFTTrainer
from peft import LoraConfig
from transformers import TrainingArguments

2024-04-16 19:23:28.851691: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-16 19:23:28.851763: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-16 19:23:28.863153: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [13]:
!pip install huggingface_hub



In [14]:
from huggingface_hub import interpreter_login
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .


Enter your token (input will not be visible):  ·····································
Add token as git credential? (Y/n)  n


Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [15]:
import gc
torch.cuda.empty_cache()
gc.collect()

23

In [16]:
# path where the Trainer will save its checkpoints and logs
output_dir = 'trained_models/Mistral-7b-sft-lora-ultrachat'

# based on config
training_args = TrainingArguments(
    fp16=False, # specify bf16=True instead when training on GPUs that support bf16 else fp16
    bf16=False,
    do_eval=True,
    evaluation_strategy="epoch",
    gradient_accumulation_steps=128,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    learning_rate=2.0e-05,
    log_level="info",
    logging_steps=5,
    logging_strategy="steps",
    lr_scheduler_type="cosine",
    max_steps=-1,
    num_train_epochs=1,
    output_dir=output_dir,
    overwrite_output_dir=True,
    per_device_eval_batch_size=1, # originally set to 8
    per_device_train_batch_size=1, # originally set to 8
    push_to_hub=True,
    hub_model_id="Mistral-7b-sft-lora-ultrachat",
    # hub_strategy="every_save",
    # report_to="tensorboard",
    save_strategy="no",
    save_total_limit=None,
    seed=42,
)

In [17]:
# based on config
peft_config = LoraConfig(
        r=64,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
)

In [18]:
trainer = SFTTrainer(
        model=model_id,
        model_init_kwargs=model_kwargs,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        dataset_text_field="text",
        tokenizer=tokenizer,
        packing=True,
        peft_config=peft_config,
        max_seq_length=tokenizer.model_max_length,
    )



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2479 > 2048). Running this sequence through the model will result in indexing errors
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.


In [19]:
train_result = trainer.train()

***** Running training *****
  Num examples = 67
  Num Epochs = 1
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 128
  Total optimization steps = 1
  Number of trainable parameters = 54,525,952
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Currently logged in as: [33mavishek-paul[0m ([33mds_montreal[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,No log,1.380752


***** Running Evaluation *****
  Num examples = 64
  Batch size = 1


Training completed. Do not forget to share your model on huggingface.co/models =)




In [21]:
train_result

TrainOutput(global_step=1, training_loss=0.7032263875007629, metrics={'train_runtime': 2250.8542, 'train_samples_per_second': 0.03, 'train_steps_per_second': 0.0, 'total_flos': 5899069012574208.0, 'train_loss': 0.7032263875007629, 'epoch': 1.0})

In [23]:
dir(trainer)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_activate_neftune',
 '_add_sm_patterns_to_gitignore',
 '_created_lr_scheduler',
 '_deactivate_neftune',
 '_finish_current_push',
 '_fsdp_qlora_plugin_updates',
 '_gather_and_numpify',
 '_get_collator_with_removed_columns',
 '_get_eval_sampler',
 '_get_learning_rate',
 '_get_output_dir',
 '_get_train_sampler',
 '_globalstep_last_logged',
 '_hp_search_setup',
 '_inner_training_loop',
 '_load_best_model',
 '_load_from_checkpoint',
 '_load_optimizer_and_scheduler',
 '_load_rng_state',
 '_loggers_initialized',
 '_maybe_log_save_evaluate',
 '_memory_tracker',
 '_move_model_to_device',
 '_nested_gather',
 '_prepare_dataset',
 '

In [24]:
trainer.push_to_hub("Mistral-7b-sft-lora-ultrachat")

Saving model checkpoint to trained_models/Mistral-7b-sft-lora-ultrachat
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.2/snapshots/41b61a33a2483885c981aa79e0df6b32407ed873/config.json
Model config MistralConfig {
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "rms_norm_eps": 1e-05,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.39.3",
  "use_cache": true,
  "vocab_size": 32000
}

tokenizer config file saved in trained_models/Mistral-7b-sft-lora-ultrachat/tokenizer_config.json
Special tokens fi

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

events.out.tfevents.1713294955.e8dd955df3fb.34.0:   0%|          | 0.00/5.23k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/218M [00:00<?, ?B/s]

events.out.tfevents.1713295515.e8dd955df3fb.361.0:   0%|          | 0.00/5.84k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/APaul1/Mistral-7b-sft-lora-ultrachat/commit/b55a2c6dbd557a0c5276e53d77ded48ba774bb05', commit_message='Mistral-7b-sft-lora-ultrachat', commit_description='', oid='b55a2c6dbd557a0c5276e53d77ded48ba774bb05', pr_url=None, pr_revision=None, pr_num=None)

In [26]:
metrics = train_result.metrics
max_train_samples = 1000#training_args.max_train_samples if training_args.max_train_samples is not None else len(train_dataset)
metrics["train_samples"] = min(max_train_samples, len(train_dataset))
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

***** train metrics *****
  epoch                    =        1.0
  total_flos               =  5493936GF
  train_loss               =     0.7032
  train_runtime            = 0:37:30.85
  train_samples            =        100
  train_samples_per_second =       0.03
  train_steps_per_second   =        0.0
