# Fine Tuning Mistral-7B

### Road Map;
* Loading Tokenizer of Mistral
* Loading Base Model of Mistral
* Size Reduction with Quantization
* Loading Dataset
* Training with PEFT(Parameter Efficient Fine Tuning) Technique
* Inference

In [None]:
!pip install -q -U transformers bitsandbytes peft datasets trl

### Loading Tokenizer

In [None]:
from transformers import AutoTokenizer

base_model = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(base_model,
                                         padding_side="right",
                                         add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token

In [None]:
tokenizer.add_bos_token, tokenizer.add_eos_token

### Loading the Model with Quantization

In [None]:
from transformers import BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_quant_type="nf4",
                                bnb_4bit_use_double_quant=False,
                                bnb_4bit_compute_dtype=torch.bfloat16)

In [None]:
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(base_model,
                                            quantization_config=bnb_config,
                                            torch_dtype=torch.bfloat16,
                                            device_map="auto")

### Loading the Dataset

In [None]:
from datasets import load_dataset

dataset_name = "databricks/databricks-dolly-15k"

train_dataset = load_dataset(dataset_name, split="train[0:800]")
eval_dataset = load_dataset(dataset_name, split="train[800:1000]")

In [None]:
train_dataset

In [None]:
train_dataset.to_pandas().dtypes

In [None]:
train_dataset.to_pandas().value_counts("category")

### Setting Prompt Format

In [None]:
def generate_prompt(sample):
    
    full_prompt = f"""<s>[INST]{sample['instruction']}
    {f"Here is some context: {sample['context']}" if len(sample['context']) > 0 else None}
    [/INST] {sample['response']}</s>
    """
    
    return {"text": full_prompt}

In [None]:
train_dataset[0]

In [None]:
generate_prompt(train_dataset[0])

In [None]:
generated_train_dataset = train_dataset.map(generate_prompt,
                                           remove_columns=list(train_dataset.features))

generated_val_dataset = eval_dataset.map(generate_prompt,
                                        remove_columns=list(train_dataset.features))

In [None]:
generated_train_dataset[5]

In [None]:
generated_train_dataset

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()

model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"trainable params: {trainable_params} || all params: {all_param} || trainable %{100*trainable_params/all_param}")

#### LoRA Configuraion

In [None]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(r=8,# egitilecek parametre sayisi 8-128
                         lora_alpha=16,# ogrenilecek agirliklar icin olcekleme parametresidir
                         target_modules=[
                             "q_proj",
                             "k_proj",
                             "v_proj",
                             "o_proj",
                             "gate_proj",
                             "up_proj",
                             "down_proj",
                             "lm_head",
                         ],
                         bias="none",
                         lora_dropout=0.05,
                         task_type="CAUSAL_LM",
                        )

In [None]:
model = get_peft_model(model, lora_config)

print_trainable_parameters(model) # ne kadarlik kisim train edilecek bakalim

In [None]:
print(model) # modele eklenen LoRA katmanlari

### Setting Hyperparameters

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from transformers import TrainingArguments

training_arguments = TrainingArguments(output_dir="./results",
                                      num_train_epochs=5,
                                      per_device_train_batch_size=4,
                                      gradient_accumulation_steps=1,
                                      optim="paged_adamw_32bit",
                                      save_strategy="steps",
                                      save_steps=25,
                                      learning_rate=2e-4,
                                      weight_decay=0.001,
                                      max_steps=100,
                                      evaluation_strategy="steps",
                                      eval_steps=25,
                                      do_eval=True,
                                      report_to="none")

### Training the Model

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(model=model,
                    tokenizer=tokenizer,
                    args=training_arguments,
                    train_dataset=generated_train_dataset,
                    eval_dataset=generated_val_dataset,
                    peft_config=lora_config,
                    dataset_text_field="text")

In [None]:
model.config.use_cache = False

In [None]:
trainer.train()

In [None]:
my_fine_tuned_model = "mistral-7b-dolly-fine-tuned"
trainer.model.push_to_hub(my_fine_tuned_model)