In [None]:
# SETUP destination directory
from google.colab import drive
drive.mount('/content/drive')

%cd '/content/drive/MyDrive/'

In [None]:
# SETUP environement
%%capture
!pip install transformers==4.36.0
!pip install git+https://github.com/huggingface/accelerate.git -q -U
!pip install bitsandbytes
!pip install git+https://github.com/huggingface/peft.git -q -U
!pip install --no-cache-dir sentencepiece
!pip install -q datasets einops wandb trl

import sentencepiece, torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
# IMPORT DATASET from huggingFace
%%capture
dataset_name = "Maxime62/JuniaLLM"
dataset = load_dataset(dataset_name)['train']

In [None]:
# We will use the Vigogne model
# Vigogne is a collection of powerful French large language models (LLMs) that are open-source and designed for instruction-following
model_name = "bofenghuang/vigogne-2-7b-instruct"

# Fine-tuned model name
new_model = "vigogne-2-7b-Junia"

# !!! FineTuning parameters have been optimized with Unsloth !!!

In [None]:
# Creating new tokens for our JUNIA LLM
new_tokens = [
"ISEN", "ISA", "HEI", "JUNIA"
]

In [None]:
# LOADING TOKENIZER
max_seq_length = 2048

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    max_seq_length = max_seq_length,
    use_fast=False,
    truncation=True
)
tokenizer.pad_token = tokenizer.bos_token

In [None]:
# LOADING MODEL
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, # dataset load is done in 4-bit
    bnb_4bit_quant_type="nf4",# The "nf4" value suggests that the model is using "narrow full" 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16, #computation are done in 16-bit fp
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)
model.config.use_cache = False

In [None]:
# Add new tokens to the tokenizer's vocabulary
tokenizer.add_tokens(new_tokens)

# Resize the token embedding matrix to match the new vocabulary size
model.resize_token_embeddings(len(tokenizer))

In [None]:
# Function is designed to format the elements of a dataset
def formatting_prompts_func(examples):
    output_text = [] # will hold the formatted text
    for i in range(len(examples)):
        instruction = examples["instruction"][i]
        response = examples["output"][i]

        text = f"""<s>Ci-dessous se trouve une instruction qui décrit une demande d'un étudiant de chez Junia. Rédigez une réponse qui répond de manière précise à la demande.

### Instruction:
{instruction}

### Response:
{response}</s>"""

        output_text.append(text)
    return output_text # return the final output of the formated text

In [None]:
# LoRA
# Setting up hyperparameters for LoraConfig
lora_r = 64
lora_alpha = 16
lora_dropout = 0

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
    task_type="CAUSAL_LM"
)

In [None]:
# Setting up the training configuration for the model

training_arguments = TrainingArguments(
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 4,
    warmup_steps = 5,
    max_steps = 60,
    learning_rate = 2e-4,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 1,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir = "./Fine Tuning"
)

In [None]:
# Setting up the trainer for fine-tuning
%%capture
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    max_seq_length = max_seq_length,
    peft_config=peft_config,
    formatting_func=formatting_prompts_func, # Formatting the dataset with the function defined above
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args=training_arguments,
)

In [None]:
# Training the model
trainer.train()

# Save trained model
trainer.model.save_pretrained("Output")

In [None]:
# Merge the model and LoRa
peftModel = PeftModel.from_pretrained(model, "Output")

In [None]:
# Upload the model on HuggingFace
!huggingface-cli login

peftModel.push_to_hub(new_model)
tokenizer.push_to_hub(new_model)