In [None]:
import os
from huggingface_hub import login

hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
if not hf_token:
    raise ValueError("Set HF_TOKEN (or HUGGINGFACE_HUB_TOKEN) in your environment.")

login(
  token = hf_token,
  add_to_git_credential = True
)

In [None]:
OUTPUT_PATH = "./merged/Llama3-8B-Arcee-IST-Math"  # folder to store the result in
LORA_MERGE_CACHE = "/tmp"  # change if you want to keep these for some reason
CONFIG_YML = "./examples/llama/arcee_fusion.yml"  # merge configuration file
COPY_TOKENIZER = True  # you want a tokenizer? yeah, that's what i thought
LAZY_UNPICKLE = False  # experimental low-memory model loader
LOW_CPU_MEMORY = False  # enable if you somehow have more VRAM than RAM+swap

In [None]:
import torch
import yaml

from mergekit.config import MergeConfiguration
from mergekit.merge import MergeOptions, run_merge

with open(CONFIG_YML, "r", encoding="utf-8") as fp:
    merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp))

run_merge(
    merge_config,
    out_path=OUTPUT_PATH,
    options=MergeOptions(
        lora_merge_cache=LORA_MERGE_CACHE,
        cuda=torch.cuda.is_available(),
        copy_tokenizer=COPY_TOKENIZER,
        lazy_unpickle=LAZY_UNPICKLE,
        low_cpu_memory=LOW_CPU_MEMORY,
    ),
)
print("Done!")

In [None]:
from huggingface_hub import HfApi

username = "Alelcv27"
MODEL_NAME = "Llama3-8B-Arcee-IST-Math"

api = HfApi(token = "...")

In [None]:
api.create_repo(
    repo_id = f"{username}/{MODEL_NAME}",
    repo_type="model"
)

api.upload_folder(
    repo_id = f"{username}/{MODEL_NAME}",
    folder_path = f"merged/{MODEL_NAME}"
)