In [1]:
# -*- coding: utf-8 -*-
"""LLaMA Factory Supervised Fine-tuning for Kaggle"""

# Disable wandb logging
import os
os.environ["WANDB_DISABLED"] = "true"

In [2]:
# Install dependencies and authenticate with HuggingFace
!pip install -q --upgrade huggingface_hub
!huggingface-cli login --token "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"  # Replace with your actual token

[0mThe token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
The token `sjsu assignments` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `sjsu assignments`


In [4]:
# Clean up existing directory and reinstall
!rm -rf LLaMA-Factory
!pip install -q --upgrade huggingface_hub
!huggingface-cli login --token "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"  # Replace with your actual token
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
!pip install -q -e .[torch,bitsandbytes]

[0mThe token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
The token `sjsu assignments` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `sjsu assignments`
Cloning into 'LLaMA-Factory'...
remote: Enumerating objects: 315, done.[K
remote: Counting objects: 100% (315/315), done.[K
remote: Compressing objects: 100% (238/238), done.[K
remote: Total 315 (delta 78), reused 194 (delta 64), pack-reused 0 (from 0)[K
Receiving objects: 100% (315/315), 9.03 MiB | 18.39 MiB/s, done.
Resolving deltas: 100% (78/78), done.
/kaggle/working/LLaMA-Factory/LLaMA-Factory
[0m

In [5]:
# Verify GPU availability
import torch
try:
    assert torch.cuda.is_available() is True
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
except AssertionError:
    print("Please set up a GPU runtime")
    raise



GPU is available: Tesla P100-PCIE-16GB


In [6]:
# Update identity dataset
import json

NAME = "Llama-3"
AUTHOR = "LLaMA Factory"

with open("data/identity.json", "r", encoding="utf-8") as f:
    dataset = json.load(f)

for sample in dataset:
    sample["output"] = sample["output"].replace("{{"+ "name" + "}}", NAME).replace("{{"+ "author" + "}}", AUTHOR)

with open("data/identity.json", "w", encoding="utf-8") as f:
    json.dump(dataset, f, indent=2, ensure_ascii=False)



In [7]:
# Training arguments
training_args = {
    "stage": "sft",
    "do_train": True,
    "model_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "dataset": "identity,alpaca_en_demo",
    "template": "llama3",
    "finetuning_type": "lora",
    "lora_target": "all",
    "output_dir": "llama3_lora",
    
    # Training hyperparameters
    "per_device_train_batch_size": 2,
    "gradient_accumulation_steps": 4,
    "learning_rate": 5e-5,
    "num_train_epochs": 3.0,
    "max_samples": 500,
    "max_grad_norm": 1.0,
    
    # LoRA specific settings
    "loraplus_lr_ratio": 16.0,
    
    # Optimization settings
    "lr_scheduler_type": "cosine",
    "warmup_ratio": 0.1,
    "fp16": True,
    
    # Logging and saving
    "logging_steps": 10,
    "save_steps": 1000,
    
    # Disable wandb
    "report_to": "none"
}

# Save training configuration
json.dump(training_args, open("train_llama3.json", "w", encoding="utf-8"), indent=2)



In [8]:
# Start training
!llamafactory-cli train train_llama3.json



10/29/2024 03:02:07 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.float16
config.json: 100%|█████████████████████████| 1.26k/1.26k [00:00<00:00, 6.85MB/s]
[INFO|configuration_utils.py:672] 2024-10-29 03:02:08,046 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--unsloth--llama-3-8b-Instruct-bnb-4bit/snapshots/f296897830363557c84cc4a942c2cd1f91818ae4/config.json
[INFO|configuration_utils.py:739] 2024-10-29 03:02:08,048 >> Model config LlamaConfig {
  "_name_or_path": "unsloth/llama-3-8b-Instruct-bnb-4bit",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128009,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "mlp_bias": false,
  "model_type": "llama"

In [9]:
# After training, save the trained LoRA adapter
export_args = {
    "model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
    "adapter_name_or_path": "llama3_lora",
    "template": "llama3",
    "finetuning_type": "lora",
    "export_dir": "llama3_lora_merged",
    "export_size": 2,                    # Size in GB of each shard
    "export_device": "cpu"               # Use CPU for merging to avoid OOM
}

# Save export configuration
json.dump(export_args, open("export_llama3.json", "w", encoding="utf-8"), indent=2)

# Export the model
!llamafactory-cli export export_llama3.json

print("\nTraining and export completed!")
print("Your trained LoRA adapters are saved in: llama3_lora/")
print("The merged model (if successful) is saved in: llama3_lora_merged/")

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_http.py", line 406, in hf_raise_for_status
    response.raise_for_status()
  File "/opt/conda/lib/python3.10/site-packages/requests/models.py", line 1024, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 403 Client Error: Forbidden for url: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/resolve/main/config.json

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/transformers/utils/hub.py", line 403, in cached_file
    resolved_file = hf_hub_download(
  File "/opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/huggingface_hub/file_download.py", line 862, in hf_hub_download
    return _hf_hub_