## Importing Libraries

In [1]:
# pytorch
import torch

# huggingface
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

## Hugging Face

In [2]:
model_name = "Waktaverse-Llama-3-KO-8B-Instruct"  # ADD YOUR MODEL NAME HERE
username = "PathFinderKR"  # ADD YOUR USERNAME HERE
repo_id = f"{username}/{model_name}"  # repository id

## Device

In [3]:
# Device setup
device = (
    "cuda:0" if torch.cuda.is_available() else # Nvidia GPU
    "mps" if torch.backends.mps.is_available() else # Apple Silicon GPU
    "cpu"
)
print(f"Device = {device}")

## Merge

In [4]:
tokenizer_id = "meta-llama/Meta-Llama-3-8B-Instruct"
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

In [5]:
# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map=device,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

In [6]:
# Merge and unload
model = PeftModel.from_pretrained(base_model, model_name)
model = model.merge_and_unload()

## Upload

In [7]:
tokenizer.push_to_hub(
    repo_id=repo_id,
    use_temp_dir=False
)
model.push_to_hub(
    repo_id= repo_id,
    use_temp_dir=False
)