In [None]:
!pip install torch transformers datasets peft trl bitsandbytes
!pip install git+https://github.com/huggingface/peft.git

In [None]:
!pip install --upgrade trl

In [None]:
import torch
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"  # Reduce memory fragmentation

from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model, PeftModel
from trl import SFTTrainer
from datasets import load_dataset

# Step 1: Set up the environment and authentication
from huggingface_hub import login
login(token="")  # Your provided token

# Step 2: Define model and dataset
model_id = "devatar/quantized_Llama-3.1-8B-Instruct"
dataset_name = "mlabonne/FineTome-100k"
output_dir = "./llama-3.1-8b-dora-finetuned"

# Step 3: Load the model and tokenizer without custom quantization
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    use_cache=False  # Disable cache to save memory
)

# Ensure padding token is set
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

# Step 4: Prepare the model for DoRA fine-tuning
peft_config = LoraConfig(
    r=8,  # Low rank to save memory
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Fewer modules
    use_dora=True  # Try DoRA; set to False if issues persist
)

# Apply DoRA configuration
model = get_peft_model(model, peft_config)

# Enable gradients for trainable parameters
for name, param in model.named_parameters():
    if "lora" in name:  # Target DoRA/LoRA parameters
        param.requires_grad = True

# Step 5: Load and inspect the dataset
dataset = load_dataset(dataset_name, split="train")
print("Dataset sample:", dataset[0])
dataset = dataset.shuffle(seed=42).select(range(5000))  # 5k samples for memory

# Step 6: Define a formatting function for the dataset
def format_prompt(example):
    conversations = example['conversations']
    text = ""
    for turn in conversations:
        role = turn['from']
        content = turn['value']
        text += f"### {role.capitalize()}: {content}\n"
    return text

# Step 7: Define training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,  # Minimize VRAM
    gradient_accumulation_steps=8,  # Effective batch size of 8
    optim="paged_adamw_8bit",
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    max_steps=500,
    logging_steps=10,
    save_strategy="steps",
    save_steps=100,
    fp16=True,
    report_to="none",
    overwrite_output_dir=True
)

# Step 8: Clear GPU memory
torch.cuda.empty_cache()

# Step 9: Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    peft_config=peft_config,
    formatting_func=format_prompt
)

# Step 10: Start fine-tuning
trainer.train()

# Step 11: Save the fine-tuned model
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

# Step 12: Merge the DoRA adapters with the base model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16
)
model = PeftModel.from_pretrained(model, output_dir)
merged_model = model.merge_and_unload()

# Step 13: Save the merged model
merged_model.save_pretrained(f"{output_dir}/merged")
tokenizer.save_pretrained(f"{output_dir}/merged")

# Optional: Push to Hugging Face Hub
# merged_model.push_to_hub("your-username/llama-3.1-8b-dora-finetuned")
# tokenizer.push_to_hub("your-username/llama-3.1-8b-dora-finetuned")

In [None]:
!pip install huggingface_hub

In [None]:
import os
from huggingface_hub import login, upload_folder, HfApi, create_repo
from datetime import datetime

# Step 1: Authenticate with Hugging Face
login(token="")  # Your provided token

# Step 2: Define repository and folder details
repo_id = "avinashhm/llama-3.1-8b-dora-finetuned"  # Your Hugging Face username and repo name
adapters_dir = "./llama-3.1-8b-dora-finetuned"
merged_dir = "./llama-3.1-8b-dora-finetuned/merged"

# Step 3: Verify directories exist
if not os.path.exists(adapters_dir):
    raise FileNotFoundError(f"Adapters directory '{adapters_dir}' does not exist.")
if not os.path.exists(merged_dir):
    raise FileNotFoundError(f"Merged directory '{merged_dir}' does not exist.")

# Step 4: Initialize Hugging Face API
api = HfApi()

# Step 5: Create the repository if it doesn't exist
try:
    api.repo_info(repo_id=repo_id, repo_type="model")
    print(f"Repository '{repo_id}' already exists.")
except Exception as e:
    print(f"Creating repository '{repo_id}'...")
    create_repo(repo_id=repo_id, repo_type="model", private=False)  # Set private=True for a private repo
    print(f"Repository '{repo_id}' created successfully.")

# Step 6: Push adapters directory to 'adapters/' subfolder
print(f"Uploading '{adapters_dir}' to '{repo_id}/adapters' at {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}")
upload_folder(
    folder_path=adapters_dir,
    path_in_repo="adapters",
    repo_id=repo_id,
    commit_message="Upload DoRA adapters and checkpoints",
    ignore_patterns=["*.git*"],  # Ignore Git-related files
    repo_type="model"
)

# Step 7: Push merged directory to repository root
print(f"Uploading '{merged_dir}' to '{repo_id}' root at {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}")
upload_folder(
    folder_path=merged_dir,
    path_in_repo="",  # Empty string to upload to root
    repo_id=repo_id,
    commit_message="Upload merged model, tokenizer, and README",
    ignore_patterns=["*.git*"],
    repo_type="model"
)

print(f"Successfully pushed folders to https://huggingface.co/{repo_id}")