In [None]:
!pip install streamlit torch datasets huggingface_hub transformers trl
!pip install streamlit torch datasets
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
# Install necessary packages
!pip install streamlit transformers datasets huggingface_hub trl unsloth

Collecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-77_jxkxs/unsloth_f66b699b703b4672b7dc50d85aa22bc4
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-77_jxkxs/unsloth_f66b699b703b4672b7dc50d85aa22bc4
  Resolved https://github.com/unslothai/unsloth.git to commit 64bb8cfd512a9dcd860d21563b624676f7432ec5
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting xformers
  Using cached xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl (222.7 MB)
Collecting trl<0.9.0
  Using cached trl-0.8.6-py3-none-any.whl (245 kB)
Collecting peft
  Using cached peft-0.11.1-py3-none-any.whl (251 kB)
Collecting bitsandbytes
  Using cached bitsandbytes-0.43.1-py3-no

In [None]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import json
import streamlit as st
import torch
from datasets import load_dataset
from huggingface_hub import login
from transformers import TrainingArguments
from trl import SFTTrainer
from unsloth import FastLanguageModel
from unsloth import is_bfloat16_supported

# User inputs
base_model = "unsloth/mistral-7b-v0.3-bnb-4bit"
finetuned_model_info = "finetuned_model"
max_seq_length = 2048
load_in_4bit = True

# LoRA configuration
r = 16
lora_alpha = 16
lora_dropout = 0.0
bias = "none"
dataset_info = "AnonY0324/orca-math-word-problems-200k"
split = "train"
input_field = "prompt"
batch_size = 2
gradient_accumulation_steps = 4
warmup_steps = 5
max_steps = 60
num_train_epochs = 1
learning_rate = 2e-4
logging_steps = 1
optim = "adamw_8bit"
weight_decay = 0.01
lr_scheduler_type = "linear"
seed = 3407
output_dir = "outputs"
hugging_face_username = "UKV"
hugging_face_token = "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"
online_save = ["local_save"]
save_methods = ["merged_16bit", "merged_4bit", "lora"]

# Login to Hugging Face
login(token=hugging_face_token)

def load_model(base_model, max_seq_length, load_in_4bit):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=base_model,
        max_seq_length=max_seq_length,
        dtype=None,
        load_in_4bit=load_in_4bit,
    )
    return model, tokenizer

def get_peft_model(_model, r, lora_alpha, bias):
    model = FastLanguageModel.get_peft_model(
        _model,
        r=r,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        lora_alpha=lora_alpha,
        lora_dropout=0,
        bias=bias,
        use_gradient_checkpointing="unsloth",
        use_rslora=False,
        loftq_config=None,
    )
    return model
def load_dataset_train(dataset_info):
    alpaca_prompt = """
    Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
    ### Instruction:
    {}
    ### Input:
    {}
    ### Response:
    {}
    """
    EOS_TOKEN = tokenizer.eos_token
    def formatting_prompts_func(examples):
        instructions = examples["instruction"]
        inputs = examples["input"]
        outputs = examples["output"]
        texts = []
        for instruction, input, output in zip(instructions, inputs, outputs):
            text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
            texts.append(text)
        return {"text": texts}

    dataset_train = load_dataset(dataset_info, split="train")
    dataset_train = dataset_train.map(formatting_prompts_func, batched=True)
    return dataset_train
def setup_trainer(_model, _tokenizer, _dataset_train, _training_args, _dataset_text_field, _max_seq_length):
    trainer = SFTTrainer(
        model=_model,
        tokenizer=_tokenizer,
        train_dataset=_dataset_train,
        dataset_text_field=_dataset_text_field,
        max_seq_length=_max_seq_length,
        dataset_num_proc=2,
        packing=False,
        args=_training_args,
    )
    return trainer

config = {
    "hugging_face_username": hugging_face_username,
    "model_config": {
        "base_model": base_model,
        "finetuned_model": f"{hugging_face_username}/{finetuned_model_info}",
        "max_seq_length": max_seq_length,
        "load_in_4bit": load_in_4bit,
    },
    "lora_config": {
        "r": r,
        "lora_alpha": lora_alpha,
        "lora_dropout": lora_dropout,
        "bias": bias,
        "use_gradient_checkpointing": "unsloth",
        "use_rslora": False,
    },
    "training_dataset": {
        "name": dataset_info,
        "split": split,
        "input_field": input_field,
    },
    "training_config": {
        "per_device_train_batch_size": batch_size,
        "gradient_accumulation_steps": gradient_accumulation_steps,
        "warmup_steps": warmup_steps,
        "max_steps": max_steps,
        "num_train_epochs": num_train_epochs,
        "learning_rate": learning_rate,
        "fp16": not is_bfloat16_supported(),
        "bf16": is_bfloat16_supported(),
        "logging_steps": logging_steps,
        "optim": optim,
        "weight_decay": weight_decay,
        "lr_scheduler_type": lr_scheduler_type,
        "seed": seed,
        "output_dir": output_dir,
    }
}

model, tokenizer = load_model(
    config["model_config"]["base_model"],
    config["model_config"]["max_seq_length"],
    config["model_config"]["load_in_4bit"]
)

model = get_peft_model(
    model,
    r=config["lora_config"]["r"],
    lora_alpha=config["lora_config"]["lora_alpha"],
    bias=config["lora_config"]["bias"]
)

dataset_train = load_dataset_train(config["training_dataset"]["name"])

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset_train,
    dataset_text_field="text",
    max_seq_length=config["model_config"]["max_seq_length"],
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=config["training_config"]["per_device_train_batch_size"],
        gradient_accumulation_steps=config["training_config"]["gradient_accumulation_steps"],
        warmup_steps=config["training_config"]["warmup_steps"],
        max_steps=config["training_config"]["max_steps"],
        learning_rate=config["training_config"]["learning_rate"],
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=config["training_config"]["logging_steps"],
        optim=config["training_config"]["optim"],
        weight_decay=config["training_config"]["weight_decay"],
        lr_scheduler_type=config["training_config"]["lr_scheduler_type"],
        seed=config["training_config"]["seed"],
        output_dir=config["training_config"]["output_dir"],
    ),
)

trainer.train()
save_path = "finetuned_model"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful
==((====))==  Unsloth: Fast Mistral patching release 2024.6
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. Xformers = 0.0.26.post1. FA = False.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth: Will load unsloth/mistral-7b-v0.3-bnb-4bit as a legacy tokenizer.
  self.pid = os.fork()


Map (num_proc=2):   0%|          | 0/200035 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 200,035 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,1.0197
2,1.031
3,0.8884
4,0.8103
5,0.8121
6,0.6993
7,0.5495
8,0.527
9,0.5605
10,0.5447


RuntimeError: Unsloth: Merging into 4bit will cause your model to lose accuracy if you plan
to merge to GGUF or others later on. I suggest you to do this as a final step
if you're planning to do multiple saves.
If you are certain, change `save_method` to `merged_4bit_forced`.

In [None]:
model.save_pretrained_merged(save_path, tokenizer, save_method="merged_4bit_forced")

print(f"Model saved locally to {save_path}!")


Unsloth: Merging 4bit and LoRA weights to 4bit...
This might take 5 minutes...




Done.
Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 10 minutes for Llama-7b... Done.
Unsloth: Merging 4bit and LoRA weights to 4bit...
This might take 5 minutes...
Done.


RuntimeError: Unsloth: Pushing to HF requires a token. Pass `token = 'hf_....'`
Go to https://huggingface.co/settings/tokens.

In [None]:
model.push_to_hub_merged(save_path, tokenizer, save_method="merged_4bit_forced",token='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')

Unsloth: Merging 4bit and LoRA weights to 4bit...
This might take 5 minutes...
Done.
Unsloth: Saving 4bit Bitsandbytes model. Please wait...


README.md:   0%|          | 0.00/584 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/590 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

Saved merged_4bit model to https://huggingface.co/finetuned_model


In [None]:
!zip -r /content/finetuned_model.zip /content/finetuned_model/

  adding: content/finetuned_model/ (stored 0%)
  adding: content/finetuned_model/tokenizer.model (deflated 61%)
  adding: content/finetuned_model/adapter_config.json (deflated 54%)
  adding: content/finetuned_model/special_tokens_map.json (deflated 77%)
  adding: content/finetuned_model/README.md (deflated 66%)
  adding: content/finetuned_model/tokenizer_config.json (deflated 96%)
  adding: content/finetuned_model/adapter_model.safetensors (deflated 8%)


In [None]:
model.push_to_hub_gguf(save_path+"_f8", tokenizer,token='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')
model.push_to_hub_gguf(save_path + "_f16", tokenizer, quantization_method="f16",token='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')
model.push_to_hub_gguf(save_path + "_q4_k_m", tokenizer, quantization_method="q4_k_m",token='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')

Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which will take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 4.1G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 4.26 out of 12.67 RAM for saving.


  3%|▎         | 1/32 [00:00<00:03,  9.51it/s]We will save to Disk and not RAM now.
100%|██████████| 32/32 [08:06<00:00, 15.21s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... This might take 5 minutes for Llama-7b...
Unsloth: Saving finetuned_model_f8/pytorch_model-00001-of-00006.bin...
Unsloth: Saving finetuned_model_f8/pytorch_model-00002-of-00006.bin...
Unsloth: Saving finetuned_model_f8/pytorch_model-00003-of-00006.bin...
Unsloth: Saving finetuned_model_f8/pytorch_model-00004-of-00006.bin...
Unsloth: Saving finetuned_model_f8/pytorch_model-00005-of-00006.bin...
