## Introduction

In [1]:
# Import Modules
from datasets import load_dataset
from huggingface_hub import notebook_login
from transformers import DataCollatorForLanguageModeling, Trainer, TrainingArguments, AutoTokenizer, AutoModelForCausalLM
from unsloth import FastLanguageModel, save

In [2]:
# Set Name Constants
model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
hf_model_name = 'Mistral-Instruct-7B-v0.2-ChatAlpacaV2'

In [3]:
# HuggingFace Hub Login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Create Mistral model with Unsloth

In [4]:
# Constants
MAX_LEN = 8192

# Load model
model, tokenizer = FastLanguageModel.from_pretrained(model_name = model_name,
                                                     max_seq_length = MAX_LEN,
                                                     dtype = None, 
                                                     load_in_4bit = True)

# Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(model,
                                         r = 32,
                                         target_modules = ['k_proj', 'o_proj', 'q_proj', 'v_proj'],
                                         lora_alpha = 16,
                                         lora_dropout = 0,
                                         bias = "none",
                                         use_gradient_checkpointing = True,
                                         random_state = 42)

# Set cache to False
model.config.use_cache = False

# Show Model Summary
print(model)

Unsloth: You passed in `mistralai/Mistral-7B-Instruct-v0.2` and `load_in_4bit = True`.
We shall load `unsloth/mistral-7b-instruct-v0.2-bnb-4bit` for 4x faster loading.
==((====))==  Unsloth: Fast Mistral patching release 2024.1
   \\   /|    GPU: NVIDIA GeForce RTX 4060 Ti. Max memory: 15.706 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.1.1+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.23. FA = True.
 "-____-"     Apache 2 free license: http://github.com/unslothai/unsloth
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` attribute will be overwritten with the one you passed to `from_pretrained`.
Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2024.1 patched 32 layers with 32 QKV layers, 32 O layers and 0 MLP layers.


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_featu

## Dataset and Tokenization

In [5]:
# Set Padding
tokenizer.pad_token = tokenizer.unk_token
model.config.pad_token_id = tokenizer.unk_token_id

# Tokenizer Summary
print(tokenizer)

LlamaTokenizerFast(name_or_path='unsloth/mistral-7b-instruct-v0.2-bnb-4bit', vocab_size=32000, model_max_length=32768, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}


In [6]:
# Load ChatAlpaca
datasets = load_dataset('robinsmits/ChatAlpaca-20K')

# Train and Val datasets
train_data = datasets['train']
test_data = datasets['test']

# Summary
print(train_data)
print(test_data)

Dataset({
    features: ['id', 'messages'],
    num_rows: 19600
})
Dataset({
    features: ['id', 'messages'],
    num_rows: 400
})


## Show Chat Template Examples

In [7]:
# Show some samples
counter = 0
for item_data in train_data:
    print('\n=======================================================================')
    chat_template_string = tokenizer.apply_chat_template(item_data["messages"], tokenize = False)
    print(chat_template_string)

    counter += 1
    if counter == 2:
        break


<s>[INST] Find the product of the numbers: 5 and 8 [/INST]The product of 5 and 8 is 40.</s>[INST] What is the sum of the numbers 6 and 12? [/INST]The sum of the numbers 6 and 12 is 18.</s>[INST] Can you tell me the quotient of 20 and 5? [/INST]Yes, the quotient of 20 and 5 is 4.</s>[INST] What is the difference between 25 and 13? [/INST]The difference between 25 and 13 is 12.</s>[INST] What is the square of 9? [/INST]The square of 9 is 81.</s>[INST] What is the cube of 6? [/INST]The cube of 6 is 216.</s>

<s>[INST] Provide five tips for effectively using tape measures [/INST]1. Tighten the tape measure so that it stays in place when measuring.
2. Make sure that the hook of the tape measure hits the starting point precisely.
3. Allow for a bit of slack when measuring around curves or corners.
4. Measure from a stable surface, such as a table or workbench.
5. Mark the beginning and end of each measurement with a pen or marker.</s>[INST] Can you give me an example of when to use tip numb

## Tokenize Datasets

In [8]:
train_data = train_data.shuffle().map(lambda x: {"input_ids": tokenizer.apply_chat_template(x['messages'], 
                                                                                            tokenize = True, 
                                                                                            max_length = MAX_LEN,
                                                                                            truncation = True,
                                                                                            add_generation_prompt = False)})

test_data = test_data.map(lambda x: {"input_ids": tokenizer.apply_chat_template(x['messages'], 
                                                                                tokenize = True,
                                                                                max_length = MAX_LEN,
                                                                                truncation = True,
                                                                                add_generation_prompt = False)})

## Train Model

In [9]:
# Set Steps
eval_steps = 120
save_steps = 240
logging_steps = 30

# Config Trainer
trainer = Trainer(model = model,
                  train_dataset = train_data,
                  eval_dataset = test_data,
                  args = TrainingArguments(num_train_epochs = 2,
                                           learning_rate = 5.0e-5,
                                           lr_scheduler_type = 'cosine',
                                           evaluation_strategy = "steps",
                                           logging_steps = logging_steps,
                                           save_strategy = 'steps',
                                           eval_steps = eval_steps,
                                           save_steps = save_steps,
                                           save_total_limit = 1,
                                           per_device_train_batch_size = 2,
                                           per_device_eval_batch_size = 4,
                                           gradient_accumulation_steps = 16,
                                           gradient_checkpointing = True, 
                                           gradient_checkpointing_kwargs = {'use_reentrant': False},
                                           warmup_ratio = 0.05,
                                           bf16 = True,
                                           output_dir = hf_model_name,
                                           hub_model_id = hf_model_name,
                                           push_to_hub = True,
                                           hub_private_repo = True,
                                           optim = 'paged_adamw_8bit',
                                           report_to = 'tensorboard'),
                  data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False))

# Perform Training
trainer.train()

  0%|          | 0/1224 [00:00<?, ?it/s]

{'loss': 1.8172, 'learning_rate': 2.4193548387096777e-05, 'epoch': 0.05}
{'loss': 1.1202, 'learning_rate': 4.8387096774193554e-05, 'epoch': 0.1}
{'loss': 0.9424, 'learning_rate': 4.9928401131991305e-05, 'epoch': 0.15}
{'loss': 0.8873, 'learning_rate': 4.969326495615839e-05, 'epoch': 0.2}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8802626729011536, 'eval_runtime': 175.6026, 'eval_samples_per_second': 2.278, 'eval_steps_per_second': 0.569, 'epoch': 0.2}
{'loss': 0.8866, 'learning_rate': 4.929577200516983e-05, 'epoch': 0.24}
{'loss': 0.8795, 'learning_rate': 4.873853577196727e-05, 'epoch': 0.29}
{'loss': 0.8643, 'learning_rate': 4.802522005224495e-05, 'epoch': 0.34}
{'loss': 0.8507, 'learning_rate': 4.716051485520689e-05, 'epoch': 0.39}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.855945885181427, 'eval_runtime': 175.5994, 'eval_samples_per_second': 2.278, 'eval_steps_per_second': 0.569, 'epoch': 0.39}
{'loss': 0.8646, 'learning_rate': 4.6150105567030287e-05, 'epoch': 0.44}
{'loss': 0.8481, 'learning_rate': 4.500063556978337e-05, 'epoch': 0.49}
{'loss': 0.8509, 'learning_rate': 4.3719662561576214e-05, 'epoch': 0.54}
{'loss': 0.8502, 'learning_rate': 4.2315608865137044e-05, 'epoch': 0.59}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8461549282073975, 'eval_runtime': 175.6038, 'eval_samples_per_second': 2.278, 'eval_steps_per_second': 0.569, 'epoch': 0.59}
{'loss': 0.8504, 'learning_rate': 4.079770605153206e-05, 'epoch': 0.64}
{'loss': 0.8441, 'learning_rate': 3.917593424312474e-05, 'epoch': 0.69}
{'loss': 0.8571, 'learning_rate': 3.746095649485412e-05, 'epoch': 0.73}
{'loss': 0.8275, 'learning_rate': 3.5664048685271176e-05, 'epoch': 0.78}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8406099081039429, 'eval_runtime': 175.6011, 'eval_samples_per_second': 2.278, 'eval_steps_per_second': 0.569, 'epoch': 0.78}
{'loss': 0.8504, 'learning_rate': 3.379702537829583e-05, 'epoch': 0.83}
{'loss': 0.8469, 'learning_rate': 3.187216214314904e-05, 'epoch': 0.88}
{'loss': 0.833, 'learning_rate': 2.990211484320202e-05, 'epoch': 0.93}
{'loss': 0.8452, 'learning_rate': 2.789983642441356e-05, 'epoch': 0.98}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8299362659454346, 'eval_runtime': 176.3256, 'eval_samples_per_second': 2.269, 'eval_steps_per_second': 0.567, 'epoch': 0.98}
{'loss': 0.8355, 'learning_rate': 2.587849175046676e-05, 'epoch': 1.03}
{'loss': 0.8349, 'learning_rate': 2.3851371044559023e-05, 'epoch': 1.08}
{'loss': 0.8085, 'learning_rate': 2.183180250696062e-05, 'epoch': 1.13}
{'loss': 0.8296, 'learning_rate': 1.9833064682876176e-05, 'epoch': 1.18}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8259414434432983, 'eval_runtime': 175.6054, 'eval_samples_per_second': 2.278, 'eval_steps_per_second': 0.569, 'epoch': 1.18}
{'loss': 0.8211, 'learning_rate': 1.7868299156785362e-05, 'epoch': 1.22}
{'loss': 0.8248, 'learning_rate': 1.5950424147292275e-05, 'epoch': 1.27}
{'loss': 0.8225, 'learning_rate': 1.4092049570592541e-05, 'epoch': 1.32}
{'loss': 0.8243, 'learning_rate': 1.2305394131010587e-05, 'epoch': 1.37}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8241989016532898, 'eval_runtime': 176.3259, 'eval_samples_per_second': 2.269, 'eval_steps_per_second': 0.567, 'epoch': 1.37}
{'loss': 0.829, 'learning_rate': 1.0602204983732231e-05, 'epoch': 1.42}
{'loss': 0.8187, 'learning_rate': 8.993680497945334e-06, 'epoch': 1.47}
{'loss': 0.8267, 'learning_rate': 7.490396628216237e-06, 'epoch': 1.52}
{'loss': 0.8133, 'learning_rate': 6.1022373782061e-06, 'epoch': 1.57}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8232150077819824, 'eval_runtime': 176.33, 'eval_samples_per_second': 2.268, 'eval_steps_per_second': 0.567, 'epoch': 1.57}
{'loss': 0.812, 'learning_rate': 4.838329813923997e-06, 'epoch': 1.62}
{'loss': 0.8181, 'learning_rate': 3.7069840538008327e-06, 'epoch': 1.67}
{'loss': 0.8254, 'learning_rate': 2.715638630145964e-06, 'epoch': 1.71}
{'loss': 0.8265, 'learning_rate': 1.870811581231585e-06, 'epoch': 1.76}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8227222561836243, 'eval_runtime': 176.3221, 'eval_samples_per_second': 2.269, 'eval_steps_per_second': 0.567, 'epoch': 1.76}
{'loss': 0.8219, 'learning_rate': 1.1780575955717354e-06, 'epoch': 1.81}
{'loss': 0.8253, 'learning_rate': 6.419314901696671e-07, 'epoch': 1.86}
{'loss': 0.8206, 'learning_rate': 2.6595826286200386e-07, 'epoch': 1.91}
{'loss': 0.8194, 'learning_rate': 5.260991566382878e-08, 'epoch': 1.96}


  0%|          | 0/100 [00:00<?, ?it/s]

{'eval_loss': 0.8225213885307312, 'eval_runtime': 176.3276, 'eval_samples_per_second': 2.269, 'eval_steps_per_second': 0.567, 'epoch': 1.96}
{'train_runtime': 50031.5248, 'train_samples_per_second': 0.784, 'train_steps_per_second': 0.024, 'train_loss': 0.8709218907200433, 'epoch': 2.0}


TrainOutput(global_step=1224, training_loss=0.8709218907200433, metrics={'train_runtime': 50031.5248, 'train_samples_per_second': 0.784, 'train_steps_per_second': 0.024, 'train_loss': 0.8709218907200433, 'epoch': 2.0})

## Push to Hub

In [10]:
# Push model to hub
trainer.push_to_hub()

# Push tokenizer to hub
tokenizer.push_to_hub(hf_model_name, private = True)

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/109M [00:00<?, ?B/s]

events.out.tfevents.1707513917.DS10.5100.0:   0%|          | 0.00/14.3k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.96k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/robinsmits/Mistral-Instruct-7B-v0.2-ChatAlpacaV2/commit/3826747606f54a14d00c5f405b7e9e67fdcb014d', commit_message='Upload tokenizer', commit_description='', oid='3826747606f54a14d00c5f405b7e9e67fdcb014d', pr_url=None, pr_revision=None, pr_num=None)

## Merge and Push to Hub 4bit version

In [11]:
# Merge, Save and Push - 4 Bits
save.unsloth_save_model(model, tokenizer, save_directory = f'./{hf_model_name}-4bit', save_method = "merged_4bit_forced")

# Push Model
model_4bit = AutoModelForCausalLM.from_pretrained(f'./{hf_model_name}-4bit', load_in_4bit = True)
model_4bit.push_to_hub(f'{hf_model_name}-4bit', private = True)

# Push Tokenizer
tokenizer_4bit = AutoTokenizer.from_pretrained(f'./{hf_model_name}-4bit')
tokenizer_4bit.push_to_hub(f'{hf_model_name}-4bit', private = True)

Unsloth: Merging 4bit and LoRA weights to 4bit...
This might take 5 minutes...




Done.
Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 10 minutes for Llama-7b...

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` attribute will be overwritten with the one you passed to `from_pretrained`.


 Done.


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/robinsmits/Mistral-Instruct-7B-v0.2-ChatAlpacaV2-4bit/commit/ef475aa02811e8d775ab06a4aef8db47d54ff6e5', commit_message='Upload tokenizer', commit_description='', oid='ef475aa02811e8d775ab06a4aef8db47d54ff6e5', pr_url=None, pr_revision=None, pr_num=None)