In [None]:
import yaml

MODEL_NAME = "Papa-mistral"
yaml_config = """
dtype: float16
merge_method: passthrough
slices:
- sources:
  - layer_range: [0, 8]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [4, 12]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [8, 16]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [12, 20]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [16, 24]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [20, 28]
    model: mistralai/Mistral-7B-Instruct-v0.2
"""

In [None]:
!git clone https://github.com/cg123/mergekit.git
%cd mergekit
!pip install -q -e .
!pip install -q git+https://github.com/huggingface/transformers.git
# @title 🌟  VortexMergekit
# @markdown  Select the branch to determine the command to run and set the trust_remote_code parameter.
# Assuming yaml_config is defined elsewhere in your script
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(yaml_config)

branch = "main" # @param ["main", "moe"]
trust_remote_code = False # @param {type:"boolean"}
clone_tensors = True # @param {type:"boolean"}

command = "mergekit-moe" if branch == "moe" else "mergekit-yaml"
trust_remote_code_option = "--trust-remote-code" if trust_remote_code else "--no-trust-remote-code"
clone_tensors_option = "--clone-tensors" if clone_tensors else "--no-clone-tensors"

print(f"Executing command for the '{branch}' branch...")
!{command} config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle {trust_remote_code_option} {clone_tensors_option}

from huggingface_hub import ModelCard, ModelCardData
from jinja2 import Template

username = "Abhaykoul" # @param {type:"string"}

template_text = """
---
license: apache-2.0
tags:
- merge
- mergekit
- vortexmergekit
{%- for model in models %}
- {{ model }}
{%- endfor %}
---

# {{ model_name }}

Hey there! 👋 Welcome to the {{ model_name }}! This is a merge of multiple models brought together using the awesome [VortexMerge kit](https://colab.research.google.com/drive/1YjcvCLuNG1PK7Le6_4xhVU5VpzTwvGhk#scrollTo=UG5H2TK4gVyl).

Let's see what we've got in this merge:

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }}) 🚀
{%- endfor %}

## 🧩 Configuration

```yaml
{{- yaml_config -}}
"""

# Create a Jinja template object
jinja_template = Template(template_text.strip())

# Get list of models from config
data = yaml.safe_load(yaml_config)
if "models" in data:
    models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]]
elif "parameters" in data:
    models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))]
elif "slices" in data:
    models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))]
else:
    raise Exception("No models or slices found in yaml config")

# Fill the template
content = jinja_template.render(
    model_name=MODEL_NAME,
    models=models,
    yaml_config=yaml_config,
    username=username,
)

# Save the model card
card = ModelCard(content)
card.save('merge/README.md')

from google.colab import userdata
from huggingface_hub import HfApi

# username = username

# Defined in the secrets tab in Google Colab
api = HfApi(token=userdata.get("HF_TOKEN"))

api.create_repo(
    repo_id=f"{username}/{MODEL_NAME}",
    repo_type="model",
    exist_ok=True,
)
api.upload_folder(
    repo_id=f"{username}/{MODEL_NAME}",
    folder_path="merge",
)

Cloning into 'mergekit'...
remote: Enumerating objects: 1219, done.[K
remote: Counting objects: 100% (647/647), done.[K
remote: Compressing objects: 100% (244/244), done.[K
remote: Total 1219 (delta 538), reused 426 (delta 403), pack-reused 572[K
Receiving objects: 100% (1219/1219), 342.05 KiB | 5.10 MiB/s, done.
Resolving deltas: 100% (829/829), done.
/content/mergekit
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m394.9/394.9 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.

model-00002-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00001-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00003-of-00012.safetensors:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Upload 13 LFS files:   0%|          | 0/13 [00:00<?, ?it/s]

model-00004-of-00012.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

model-00005-of-00012.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00006-of-00012.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

model-00007-of-00012.safetensors:   0%|          | 0.00/1.90G [00:00<?, ?B/s]

model-00008-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00009-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00010-of-00012.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00011-of-00012.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00012-of-00012.safetensors:   0%|          | 0.00/235M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Abhaykoul/Papa-mistral/commit/7a4517faf514c30cc815983afe75a93040207967', commit_message='Upload folder using huggingface_hub', commit_description='', oid='7a4517faf514c30cc815983afe75a93040207967', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
!mergekit-moe --help

In [None]:
!mergekit-yaml --help