# Merge Models with SLERP for `whats-up-doc`

## Install `mergekit`

In [1]:
!python3 -m pip install --upgrade pip
!git clone https://github.com/cg123/mergekit.git
!cd mergekit && pip install -q -e .

Collecting pip
  Downloading pip-24.0-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.3.2
    Uninstalling pip-23.3.2:
      Successfully uninstalled pip-23.3.2
Successfully installed pip-24.0
Cloning into 'mergekit'...
remote: Enumerating objects: 1928, done.[K
remote: Counting objects: 100% (1174/1174), done.[K
remote: Compressing objects: 100% (505/505), done.[K
remote: Total 1928 (delta 920), reused 766 (delta 668), pack-reused 754[K
Receiving objects: 100% (1928/1928), 572.57 KiB | 5.45 MiB/s, done.
Resolving deltas: 100% (1314/1314), done.
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependenc

## Log-in to HuggingFace Using a Write Token

In [2]:
from huggingface_hub import notebook_login as hfl
hfl()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Merged Model Metadata & Merge Config

In [15]:
MODEL_NAME = "Meta-Llama-3-8B-Instruct-Hermes-2-Pro-SLERP"
OUTPUT_DIR = "output-dir"

In [6]:
config = """
slices:
  - sources:
    - model: meta-llama/Meta-Llama-3-8B-Instruct
      layer_range: [0, 32]
    - model: NousResearch/Hermes-2-Pro-Llama-3-8B
      layer_range: [0, 32]
base_model: meta-llama/Meta-Llama-3-8B-Instruct
merge_method: slerp
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5
dtype: bfloat16
"""

In [7]:
with open("config.yaml", "w") as f:
    f.write(config)

## Perform Merge with `mergekit`

In [8]:
!mergekit-yaml config.yaml output-dir --allow-crimes --copy-tokenizer --out-shard-size 1B --low-cpu-memory --write-model-card --lazy-unpickle

config.json: 100%|█████████████████████████████| 700/700 [00:00<00:00, 3.34MB/s]
config.json: 100%|█████████████████████████████| 654/654 [00:00<00:00, 4.02MB/s]
Warmup loader cache:   0%|                                | 0/2 [00:00<?, ?it/s]
Fetching 12 files:   0%|                                 | 0/12 [00:00<?, ?it/s][A

dpo-adapter/adapter_config.json: 100%|█████████| 791/791 [00:00<00:00, 4.73MB/s][A[A

Fetching 12 files:  17%|████▏                    | 2/12 [00:00<00:00, 13.19it/s][A

model.safetensors.index.json: 100%|████████| 23.9k/23.9k [00:00<00:00, 83.9MB/s][A[A


generation_config.json: 100%|███████████████████| 142/142 [00:00<00:00, 980kB/s][A[A


model-00002-of-00004.safetensors:   0%|             | 0.00/5.00G [00:00<?, ?B/s][A[A


model-00001-of-00004.safetensors:   0%|             | 0.00/4.98G [00:00<?, ?B/s][A[A[A



model-00004-of-00004.safetensors:   0%|             | 0.00/1.17G [00:00<?, ?B/s][A[A[A[A




model-00003-of-00004.safetensors:   0%|   

In [9]:
from kaggle_secrets import UserSecretsClient

userSecrets = UserSecretsClient()
HF_WRITE_TOKEN = userSecrets.get_secret("HF_WRITE_TOKEN")

In [None]:
from huggingface_hub import HfApi

username = "0xVolt"

# Defined in the secrets tab in Kaggle Secrets
api = HfApi(token=HF_WRITE_TOKEN)

api.create_repo(
    repo_id=f"{username}/{MODEL_NAME}",
    repo_type="model"
)

# Push the whole merged model's folder to the hub
api.upload_folder(
    repo_id=f"{username}/{MODEL_NAME}",
    folder_path=OUTPUT_DIR,
)

Upload 17 LFS files:   0%|          | 0/17 [00:00<?, ?it/s]

model-00002-of-00017.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

model-00001-of-00017.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

model-00004-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00003-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00005-of-00017.safetensors:   0%|          | 0.00/998M [00:00<?, ?B/s]

model-00006-of-00017.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00007-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00008-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00009-of-00017.safetensors:   0%|          | 0.00/998M [00:00<?, ?B/s]

model-00010-of-00017.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00011-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00012-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00013-of-00017.safetensors:   0%|          | 0.00/998M [00:00<?, ?B/s]

model-00014-of-00017.safetensors:   0%|          | 0.00/948M [00:00<?, ?B/s]

model-00015-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00016-of-00017.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

model-00017-of-00017.safetensors:   0%|          | 0.00/201M [00:00<?, ?B/s]