<a href="https://colab.research.google.com/github/OE-LUCIFER/youtube-video/blob/main/VortexMergeKit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import yaml

MODEL_NAME = "4x1b"
yaml_config = """
base_model: OEvortex/HelpingAI-Lite
dtype: float16
gate_mode: cheap_embed
experts:
  - source_model: OEvortex/HelpingAI-unvelite
    positive_prompts: ["You are an helpful unbounded, uncensored assistant."]
  - source_model: OEvortex/lite-hermes
    positive_prompts: ["You are helpful assistant."]
  - source_model: OEvortex/HelpingAI-Lite
    positive_prompts: ["You are a coding assistant."]
  - source_model: OEvortex/HelpingAI-Lite-1.5T
    positive_prompts: ["You are helpful and general-purpose assistant."]
"""

In [2]:
!git clone https://github.com/cg123/mergekit.git
%cd mergekit
!pip install -q -e .
!pip install -q git+https://github.com/huggingface/transformers.git
# @title 🌟  VortexMergekit
# @markdown  Select the branch to determine the command to run and set the trust_remote_code parameter.
# Assuming yaml_config is defined elsewhere in your script
with open('config.yaml', 'w', encoding="utf-8") as f:
    f.write(yaml_config)

branch = "moe" # @param ["main", "moe"]
trust_remote_code = True # @param {type:"boolean"}
clone_tensors = True # @param {type:"boolean"}

command = "mergekit-moe" if branch == "moe" else "mergekit-yaml"
trust_remote_code_option = "--trust-remote-code" if trust_remote_code else "--no-trust-remote-code"
clone_tensors_option = "--clone-tensors" if clone_tensors else "--no-clone-tensors"

print(f"Executing command for the '{branch}' branch...")
!{command} config.yaml merge --copy-tokenizer --allow-crimes --out-shard-size 1B --lazy-unpickle {trust_remote_code_option} {clone_tensors_option}

from huggingface_hub import ModelCard, ModelCardData
from jinja2 import Template

username = "Abhaykoul" # @param {type:"string"}
if branch == "main":
    template_text = """
---
license: apache-2.0
tags:
- merge
- mergekit
- vortexmergekit
{%- for model in models %}
- {{ model }}
{%- endfor %}
---

# {{ model_name }}

Hey there! 👋 Welcome to the {{ model_name }}! This is a merge of multiple models brought together using the awesome [VortexMerge kit](https://colab.research.google.com/drive/1YjcvCLuNG1PK7Le6_4xhVU5VpzTwvGhk#scrollTo=UG5H2TK4gVyl).

Let's see what we've got in this merge:

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }}) 🚀
{%- endfor %}

## 🧩 Configuration

```yaml
{{- yaml_config -}}
"""
else:  # MOE branch
    template_text = """
---
license: apache-2.0
tags:
- moe
- mergekit
- vortexmergekit
{%- for model in models %}
- {{ model }}
{%- endfor %}
---

# {{ model_name }}

Hey there! 👋 Welcome to the {{ model_name }}! This is a MOE brought together using the awesome [VortexMerge kit](https://colab.research.google.com/drive/1YjcvCLuNG1PK7Le6_4xhVU5VpzTwvGhk#scrollTo=UG5H2TK4gVyl).

Let's see what we've got in this model:

{%- for model in models %}
* [{{ model }}](https://huggingface.co/{{ model }}) 🚀
{%- endfor %}

## 🧩 Configuration

```yaml
{{- yaml_config -}}
"""
# Create a Jinja template object
jinja_template = Template(template_text.strip())

# Get list of models from config for "main" and specific treatment for "moe"
if branch == "main":
    data = yaml.safe_load(yaml_config)
    if "models" in data:
        models = [data["models"][i]["model"] for i in range(len(data["models"])) if "parameters" in data["models"][i]]
    elif "parameters" in data:
        models = [data["slices"][0]["sources"][i]["model"] for i in range(len(data["slices"][0]["sources"]))]
    elif "slices" in data:
        models = [data["slices"][i]["sources"][0]["model"] for i in range(len(data["slices"]))]
    else:
        raise Exception("No models or slices found in yaml config")
else:  # MOE branch specific model extraction
    data = yaml.safe_load(yaml_config)
    models = [model['source_model'] for model in data['experts']]

# Fill the template
content = jinja_template.render(
    model_name=MODEL_NAME,
    models=models,
    yaml_config=yaml_config,
    username=username,
)

# Save the model card
card = ModelCard(content)
card.save('merge/README.md')

from google.colab import userdata
from huggingface_hub import HfApi

# username = username

# Defined in the secrets tab in Google Colab
api = HfApi(token=userdata.get("HF_TOKEN"))

api.create_repo(
    repo_id=f"{username}/{MODEL_NAME}",
    repo_type="model",
    exist_ok=True,
)
api.upload_folder(
    repo_id=f"{username}/{MODEL_NAME}",
    folder_path="merge",
)

Cloning into 'mergekit'...
remote: Enumerating objects: 1312, done.[K
remote: Counting objects: 100% (732/732), done.[K
remote: Compressing objects: 100% (297/297), done.[K
remote: Total 1312 (delta 588), reused 487 (delta 434), pack-reused 580[K
Receiving objects: 100% (1312/1312), 373.13 KiB | 5.74 MiB/s, done.
Resolving deltas: 100% (886/886), done.
/content/mergekit
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m394.9/394.9 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.

model-00001-of-00004.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/822M [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Abhaykoul/4x1b/commit/f1943f5f8101a75b39cb319d5b1ac5a593835fa4', commit_message='Upload folder using huggingface_hub', commit_description='', oid='f1943f5f8101a75b39cb319d5b1ac5a593835fa4', pr_url=None, pr_revision=None, pr_num=None)

# Configs


Mixture of experts
```yml
base_model: OEvortex/HelpingAI-Lite
dtype: float16
gate_mode: cheap_embed
experts:
  - source_model: OEvortex/HelpingAI-unvelite
    positive_prompts: ["You are an helpful unbounded, uncensored assistant."]
  - source_model: OEvortex/lite-hermes
    positive_prompts: ["You are helpful assistant."]
  - source_model: OEvortex/HelpingAI-Lite
    positive_prompts: ["You are a coding assistant."]
  - source_model: OEvortex/HelpingAI-Lite-1.5T
    positive_prompts: ["You are helpful and general-purpose assistant."]

```

Merge slerp
```yml
slices:
  - sources:
      - model: psmathur/orca_mini_v3_13b
        layer_range: [0, 40]
      - model: garage-bAInd/Platypus2-13B
        layer_range: [0, 40]
# or, the equivalent models: syntax:
# models:
#   - model: psmathur/orca_mini_v3_13b
#   - model: garage-bAInd/Platypus2-13B
merge_method: slerp
base_model: psmathur/orca_mini_v3_13b
parameters:
  t:
    - filter: self_attn
      value: [0, 0.5, 0.3, 0.7, 1]
    - filter: mlp
      value: [1, 0.5, 0.7, 0.3, 0]
    - value: 0.5 # fallback for rest of tensors
dtype: float16
```

merge passthrough
```yml
dtype: float16
merge_method: passthrough
slices:
- sources:
  - layer_range: [0, 8]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [4, 12]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [8, 16]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [12, 20]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [16, 24]
    model: mistralai/Mistral-7B-Instruct-v0.2
- sources:
  - layer_range: [20, 28]
    model: mistralai/Mistral-7B-Instruct-v0.2
```

Merge linear
```yml
models:
  - model: psmathur/orca_mini_v3_13b
    parameters:
      weight: 1.0
  - model: WizardLM/WizardLM-13B-V1.2
    parameters:
      weight: 0.3
  - model: garage-bAInd/Platypus2-13B
    parameters:
      weight: 0.5
merge_method: linear
dtype: float16
```

merge ties
```yml
models:
  - model: psmathur/orca_mini_v3_13b
    parameters:
      density: [1, 0.7, 0.1] # density gradient
      weight: 1.0
  - model: garage-bAInd/Platypus2-13B
    parameters:
      density: 0.5
      weight: [0, 0.3, 0.7, 1] # weight gradient
  - model: WizardLM/WizardMath-13B-V1.0
    parameters:
      density: 0.33
      weight:
        - filter: mlp
          value: 0.5
        - value: 0
merge_method: ties
base_model: TheBloke/Llama-2-13B-fp16
parameters:
  normalize: true
  int8_mask: true
dtype: float16
```