# Mistral v0.2 Fine-Tuning

## Installing Dependencies

In [None]:
%%capture
%pip install accelerate peft bitsandbytes transformers trl
!pip install datasets,nltk

# Importing the desired Libraries

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

## Setting huggingface account token

In [None]:
from huggingface_hub import login

login(
  token="here will be you Hf token", # add your HF token here
  add_to_git_credential=True
)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Loading Base Model & Custome Dataset

In [None]:
# Model from Hugging Face hub
base_model_id = "mistralai/Mistral-7B-Instruct-v0.2"

# New instruction dataset
pinescript_dataset =  "here will be your custome dataset path"

# Fine-tuned model
new_model = "cmaktek/mistral_7b_v2_adafters"

# 4 bit Quantized model loading

In [None]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=False,
   bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=nf4_config,
    device_map={"": 0}
)

model.config.use_cache = False
model.config.pretraining_tp = 1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Tokenizer Loading for the Base loaded Model.

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


# Lora configuration

In [None]:
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
# model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
print(model)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )

In [None]:
import bitsandbytes as bnb
def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
  lora_module_names = set()
  for name, module in model.named_modules():
    if isinstance(module, cls):
      names = name.split('.')
      lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names: # needed for 16-bit
      lora_module_names.remove('lm_head')
  return list(lora_module_names)

In [None]:
modules = find_all_linear_names(model)
print(modules)

['q_proj', 'v_proj', 'gate_proj', 'down_proj', 'o_proj', 'k_proj', 'up_proj']


In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=64,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.01,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
trainable, total = model.get_nb_trainable_parameters()
print(f"Trainable: {trainable} | total: {total} | Percentage: {trainable/total*100:.4f}%")

Trainable: 167772160 | total: 7409504256 | Percentage: 2.2643%


## Setting Training Parameters

In [None]:
training_params = TrainingArguments(
    output_dir="here will be you out dir path for checkpoints saving /checkpoints",
    num_train_epochs=5,
    per_device_train_batch_size=5,
    gradient_accumulation_steps=10,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard"
)

## SFT Trainer

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=instruct_tune_dataset,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

In [None]:
trainer.train()

	save_steps: 100 (from args) != 200 (from trainer_state.json)


Step,Training Loss
610,0.3141
620,0.4486
630,0.4337
640,0.3179
650,0.2674
660,0.3949
670,0.389
680,0.2856
690,0.185
700,0.4129




TrainOutput(global_step=810, training_loss=0.08416251606411404, metrics={'train_runtime': 9174.4234, 'train_samples_per_second': 4.435, 'train_steps_per_second': 0.088, 'total_flos': 3.0509472345100616e+18, 'train_loss': 0.08416251606411404, 'epoch': 4.99017199017199})

### Local Path for saving.

In [None]:
new_model="/content/drive/MyDrive/llm_reasearch and evaulation/mistral_7b/mistral_finetuning/"

In [None]:
trainer.push_to_hub("path of huggingface model repo to push adapters")



events.out.tfevents.1715363676.475e28c060b5.4397.0:   0%|          | 0.00/5.42k [00:00<?, ?B/s]

events.out.tfevents.1715363177.475e28c060b5.419.0:   0%|          | 0.00/5.42k [00:00<?, ?B/s]

Upload 13 LFS files:   0%|          | 0/13 [00:00<?, ?it/s]

events.out.tfevents.1715363858.475e28c060b5.5357.0:   0%|          | 0.00/5.42k [00:00<?, ?B/s]

events.out.tfevents.1715363449.475e28c060b5.3015.0:   0%|          | 0.00/5.42k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/671M [00:00<?, ?B/s]

events.out.tfevents.1715364178.475e28c060b5.6902.0:   0%|          | 0.00/20.5k [00:00<?, ?B/s]

events.out.tfevents.1715374398.475e28c060b5.50189.0:   0%|          | 0.00/5.42k [00:00<?, ?B/s]

events.out.tfevents.1715374479.475e28c060b5.50189.1:   0%|          | 0.00/6.26k [00:00<?, ?B/s]

events.out.tfevents.1715404460.18e011a72d58.6962.0:   0%|          | 0.00/14.3k [00:00<?, ?B/s]

events.out.tfevents.1715426200.18e011a72d58.99038.0:   0%|          | 0.00/10.2k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.11k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/engrzulqarnain/checkpoints/commit/9782a13152ea51e0ce08bd745174945be274a88d', commit_message='cmaktek/mistral_7b_v2_adafters', commit_description='', oid='9782a13152ea51e0ce08bd745174945be274a88d', pr_url=None, pr_revision=None, pr_num=None)

# Locally Saving the model and tokenizer

In [None]:
trainer.model.save_pretrained(new_model)

In [None]:
trainer.tokenizer.save_pretrained(new_model)

('/content/drive/MyDrive/llm_reasearch and evaulation/mistral_7b/mistral_finetuning/tokenizer_config.json',
 '/content/drive/MyDrive/llm_reasearch and evaulation/mistral_7b/mistral_finetuning/special_tokens_map.json',
 '/content/drive/MyDrive/llm_reasearch and evaulation/mistral_7b/mistral_finetuning/tokenizer.model',
 '/content/drive/MyDrive/llm_reasearch and evaulation/mistral_7b/mistral_finetuning/added_tokens.json',
 '/content/drive/MyDrive/llm_reasearch and evaulation/mistral_7b/mistral_finetuning/tokenizer.json')

In [None]:
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Merging Trained Adafter to Base Model.

In [None]:
from peft import PeftModel
merged_model= PeftModel.from_pretrained(base_model, new_model)
merged_model= merged_model.merge_and_unload()

# pushing fully merged finetuned model to hugging face hub

In [None]:
merged_model.push_to_hub("you huggingface model repo")

README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/cmaktek/mistral_7b_fully_merged_version_2/commit/4f0d9c9478e6b5ca209020ecaab869d50acbe7c7', commit_message='Upload MistralForCausalLM', commit_description='', oid='4f0d9c9478e6b5ca209020ecaab869d50acbe7c7', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
tokenizer.push_to_hub("you hf repo id")

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/cmaktek/mistral_7b_fully_merged_version_2/commit/f4ea0dc433b8adc542afc7456952b3297566e3e4', commit_message='Upload tokenizer', commit_description='', oid='f4ea0dc433b8adc542afc7456952b3297566e3e4', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# Generating response from Fine-tuned model

In [None]:
res=generate_response(query,merged_model)

In [None]:
res[len(query):]

'se:\n\nNone of the published indicators meet my needs, so I had to create my own.//@version=5indicator("Session Box",shorttitle="SB",overlay=true,max_boxes_count=100)// --- Settings ---startTime=input.string("0930-1000",title="Start Time",group="Time")EndTime=input.string("1000-1100",title="End Time",group="Time")boxColor=input.color(color.new(#ffb74d,75),title="Box Color",group="Style")textColor=input.color(color.white,title="Text Color",group="Style")textSize=input.string(size.small,title="Text Size",options=[size.tiny,size.small,size.normal,size.large,size.huge],group="Style")ShowLabel=input.bool(true,title="Show Labels",group="Label")ShowDate=input.bool(true,title="Show Date",group="Label")ShowDayName=input.bool(true,title="Show Day Name",group="Label")ShowHour=input.bool(true,title="Show Hour",group="Label")ShowMinutes=input.bool(true,title="Show Minutes",group="Label")// --- Translate text to labels ---dowTranslate(n)=>dayname=switchn1=>"Sunday"2=>"Monday"3=>"Tuesday"4=>"Wednesd