In [None]:
!pip uninstall -y bitsandbytes triton
!pip install -q "bitsandbytes==0.43.1" "triton==2.3.0" \
"transformers==4.40.2" "peft==0.11.1" "accelerate==0.30.1" \
"datasets==2.19.0" "huggingface_hub==0.23.2"


Found existing installation: bitsandbytes 0.43.1
Uninstalling bitsandbytes-0.43.1:
  Successfully uninstalled bitsandbytes-0.43.1
Found existing installation: triton 2.3.0
Uninstalling triton-2.3.0:
  Successfully uninstalled triton-2.3.0


In [None]:
!pip install -q transformers==4.40.2


In [None]:
from datasets import load_dataset, DatasetDict, load_from_disk
from random import randrange, randint
from itertools import chain
from functools import partial
import os
import argparse
import torch
import bitsandbytes as bnb
from huggingface_hub import login
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    set_seed,
    default_data_collator,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
)


In [None]:
os.environ["HF_TOKEN"] = ""

Load Dolly Dataset

In [None]:

dataset = load_dataset("databricks/databricks-dolly-15k", split="train")
print(f"Dataset size: {len(dataset)}")
print(dataset[randrange(len(dataset))])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset size: 15011
{'instruction': 'When was the internet created?', 'context': '', 'response': 'The birthday of the internet is January 1, 1983.  This is an important day because it gave computers and computer networks for the first time in history a way to communicate with each other.  Prior to this networks were on closed loops.  On this this day a new internet protocol was also created called TCP/IP.  This stands for Transmission Control Protocol/Internet Protocol.', 'category': 'open_qa'}


In [None]:
def format_dolly(sample):
    instruction = f"Instruction: \n{sample['instruction']}"
    context = f"Context: \n{sample['context']}" if len(sample["context"]) > 0 else None
    response = f"Answer: \n{sample['response']}"
    prompt = "\n\n".join([i for i in [instruction, context, response] if i is not None])
    return prompt

print(format_dolly(dataset[randrange(len(dataset))]))

Instruction: 
Give me a list of the Adventure motorbikes that I can buy on my LAMS (Learner Approved Motorcycle Scheme) restriction, living in Australia

Answer: 
This is not a comprehensive list but you can use this as a starting point:
Kawasaki Versys-X 300
Kawazaki KLR650
Honda CRF300 Rally
BMW G 310 GS
KTM 390 Adventure
Honda CB500XA
Yamaha XT250
Yamaha WR250R
Suzuki V-Strom 650XT
Royal Enfield Himalayan
Suzuki DRZ400/DR650


Tokenization and Chunking

In [None]:
from transformers import AutoTokenizer

model_id = "mistralai/Mixtral-8x7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

# Add formatted text to dataset
def template_dataset(sample):
    sample["text"] = f"{format_dolly(sample)}{tokenizer.eos_token}"
    return sample

dataset = dataset.map(template_dataset, remove_columns=list(dataset.features))

# Chunking logic
remainder = {"input_ids": [], "attention_mask": [], "token_type_ids": []}

def chunk(sample, chunk_length=2048):
    global remainder
    concatenated_examples = {k: list(chain(*sample[k])) for k in sample.keys()}
    concatenated_examples = {
        k: remainder[k] + concatenated_examples[k] for k in concatenated_examples.keys()
    }
    batch_total_length = len(concatenated_examples[list(sample.keys())[0]])
    if batch_total_length >= chunk_length:
        batch_total_length = (batch_total_length // chunk_length) * chunk_length

    result = {
        k: [t[i : i + chunk_length] for i in range(0, batch_total_length, chunk_length)]
        for k, t in concatenated_examples.items()
    }
    remainder = {
        k: concatenated_examples[k][batch_total_length:] for k in concatenated_examples.keys()
    }
    result["labels"] = result["input_ids"].copy()
    return result

lm_dataset = (
    dataset.map(lambda sample: tokenizer(sample["text"]), batched=True, remove_columns=list(dataset.features))
    .map(partial(chunk, chunk_length=2048), batched=True)
)






In [None]:
print(f"Total number of chunks: {len(lm_dataset)}")

Total number of chunks: 1553


Spliting and Saving dataset

In [None]:
split = lm_dataset.train_test_split(test_size=0.2, seed=42)

final_dataset = DatasetDict({
    "train": split["train"],
    "validation": split["test"],
})s


save_path = "/content/drive/MyDrive/Colab Notebooks/Finetune/llm_dataset"
final_dataset.save_to_disk(save_path)



Saving the dataset (0/1 shards):   0%|          | 0/1242 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/311 [00:00<?, ? examples/s]

Argument Parsing

In [None]:


def parse_args():
    parser = argparse.ArgumentParser(description="Fine-tune Mixtral model with QLoRA")

    parser.add_argument(
        "--model_id",
        type=str,
        default="mistralai/Mixtral-8x7B-Instruct-v0.1",
        help="Base model from Hugging Face Hub to fine-tune",
    )
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="/content/drive/MyDrive/Colab Notebooks/Finetune/lm_dataset",
        help="Path to the processed dataset saved with save_to_disk()",
    )
    parser.add_argument(
        "--hf_token",
        type=str,
        default="",
        help="Hugging Face access token",
    )
    parser.add_argument("--epochs", type=int, default=8, help="Number of training epochs")
    parser.add_argument("--per_device_train_batch_size", type=int, default=2, help="Batch size per device")
    parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate")
    parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility")
    parser.add_argument(
        "--gradient_checkpointing",
        type=bool,
        default=True,
        help="Enable gradient checkpointing",
    )
    parser.add_argument(
        "--bf16",
        type=bool,
        default=True if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] == 8 else False,
        help="Use bfloat16 training if supported",
    )
    parser.add_argument(
        "--merge_weights",
        type=bool,
        default=True,
        help="Merge LoRA weights into the base model after training",
    )

    args, _ = parser.parse_known_args()

    if args.hf_token:
        login(token=args.hf_token)

    return args


Lora_utils

In [None]:

def print_trainable_parameters(model, use_4bit=False):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        num_params = param.numel()
        if num_params == 0 and hasattr(param, "ds_numel"):
            num_params = param.ds_numel
        all_param += num_params
        if param.requires_grad:
            trainable_params += num_params
    if use_4bit:
        trainable_params /= 2
    print(
        f" All params: {all_param:,d}  Trainable params: {trainable_params:,d}  Trainable%: {100 * trainable_params / all_param:.2f}%"
    )

def find_all_linear_names(model):
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, bnb.nn.Linear4bit):
            names = name.split(".")
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if "lm_head" in lora_module_names:
        lora_module_names.remove("lm_head")
    return list(lora_module_names)

def create_peft_model(model, gradient_checkpointing=True, bf16=True):
    from peft import (
        get_peft_model,
        LoraConfig,
        TaskType,
        prepare_model_for_kbit_training,
    )
    from peft.tuners.lora import LoraLayer

    model = prepare_model_for_kbit_training(
        model, use_gradient_checkpointing=gradient_checkpointing
    )
    if gradient_checkpointing:
        model.gradient_checkpointing_enable()

    modules = find_all_linear_names(model)
    print(f" Found {len(modules)} modules to quantize: {modules}")

    peft_config = LoraConfig(
        r=64,
        lora_alpha=16,
        target_modules=modules,
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.CAUSAL_LM,
    )

    model = get_peft_model(model, peft_config)

    for name, module in model.named_modules():
        if isinstance(module, LoraLayer):
            if bf16:
                module = module.to(torch.bfloat16)
        if "norm" in name:
            module = module.to(torch.float32)
        if "lm_head" in name or "embed_tokens" in name:
            if hasattr(module, "weight") and bf16 and module.weight.dtype == torch.float32:
                module = module.to(torch.bfloat16)

    model.print_trainable_parameters()
    return model



Training

In [None]:
def training_function(args):
    set_seed(args.seed)

    dataset = load_from_disk(args.dataset_path)
    train_dataset = dataset["train"]
    val_dataset = dataset["validation"]

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        llm_int8_enable_fp32_cpu_offload=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        args.model_id,
        use_cache=not args.gradient_checkpointing,
        device_map="auto",
        quantization_config=bnb_config,

    )

    base_model_dir = "/content/drive/MyDrive/Colab Notebooks/Finetune/base_model"
    model.save_pretrained(base_model_dir)
    tokenizer = AutoTokenizer.from_pretrained(args.model_id)
    tokenizer.save_pretrained(base_model_dir)
    print(f"Base model saved to: {base_model_dir}")


    model = create_peft_model(
        model, gradient_checkpointing=args.gradient_checkpointing, bf16=args.bf16
    )

    tmp_dir = "/content/drive/MyDrive/Colab Notebooks/Finetune/tmp"
    final_model_dir = "/content/drive/MyDrive/Colab Notebooks/Finetune/model"

    training_args = TrainingArguments(
        output_dir=tmp_dir,
        per_device_train_batch_size=args.per_device_train_batch_size,
        bf16=args.bf16,
        learning_rate=args.lr,
        num_train_epochs=args.epochs,
        gradient_checkpointing=args.gradient_checkpointing,
        logging_dir=f"{tmp_dir}/logs",
        logging_strategy="steps",
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
    )


    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=default_data_collator,
    )

    trainer.train()

    if args.merge_weights:
        trainer.model.save_pretrained(tmp_dir, safe_serialization=False)
        del model
        del trainer
        torch.cuda.empty_cache()

        from peft import AutoPeftModelForCausalLM
        model = AutoPeftModelForCausalLM.from_pretrained(
            tmp_dir, low_cpu_mem_usage=True, torch_dtype=torch.float16
        )
        model = model.merge_and_unload()
        model.save_pretrained(final_model_dir, safe_serialization=True, max_shard_size="2GB")
    else:
        trainer.model.save_pretrained(final_model_dir, safe_serialization=True)

    tokenizer.save_pretrained(final_model_dir)
    print(f"Fine-tuned model and tokenizer saved to: {final_model_dir}")


if __name__ == "__main__":
    args = parse_args()
    training_function(args)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]



Base model saved to: /content/drive/MyDrive/Colab Notebooks/Finetune/base_model
 Found 8 modules to quantize: ['w1', 'q_proj', 'v_proj', 'k_proj', 'o_proj', 'w2', 'gate', 'w3']
trainable params: 968,900,608 || all params: 47,671,693,312 || trainable%: 2.0324


  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mvitalshadrackk[0m ([33mvitalshadrackk-st-edward-s-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Detected [huggingface_hub.inference] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/


Epoch,Training Loss,Validation Loss
1,1.3064,1.24881
2,1.1623,1.239387
3,1.2938,1.238829




Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
!nvidia-smi
