# Recipe Pipeline
Configure your ⚙️ Pipeline Parameters, then run all cells ▶️

> 🔧 **Adjust parameters** such as `--cpu-tensors-max-size` or `--gpu-quants` as needed for your specific hardware.

> ⚠️ q\*_K and q\*_KV quants must be used with a capital "K" and "KV" letters at the end of their name. All other quants are lowercase.

- List of quants compatible with `ik_llama.cpp`:

> iq1_bn iq1_kt iq1_m iq1_s iq1_s_r4 iq2_bn iq2_bn_r4 iq2_k iq2_k_r4 iq2_kl iq2_ks iq2_kt iq2_m iq2_m_r4 iq2_s iq2_xs iq2_xs_r4 iq2_xxs iq2_xxs_r4 iq3_k iq3_k_r4 iq3_kl iq3_ks iq3_kt iq3_m iq3_s iq3_s_r4 iq3_xs iq3_xxs iq3_xxs_r4 iq4_k iq4_k_r4 iq4_ks iq4_ks_r4 iq4_kss iq4_kt iq4_nl iq4_nl_r4 iq4_xs iq4_xs_r8 iq5_k iq5_k_r4 iq5_ks iq5_ks_r4 iq6_k q1_m_r4 q2_K q2_k_r4 q2_k_s q3_K q3_k_l q3_k_m q3_k_r4 q3_k_s q4_0 q4_0_4_4 q4_0_4_8 q4_0_8_8 q4_0_r8 q4_1 q4_K q4_k_m q4_k_r4 q4_k_s q5_0 q5_0_r4 q5_1 q5_K q5_k_m q5_k_r4 q5_k_s q6_0 q6_0_r4 q6_K q6_k_r4 q8_0 q8_0_r8 q8_k_r8 q8_KV q8_kv_r8

- List of [quants compatible](https://github.com/ggml-org/llama.cpp/blob/master/tools/quantize/README.md) with `llama.cpp`:

> iq1_m iq1_s iq2_m iq2_s iq2_xs iq2_xxs iq3_m iq3_s iq3_xs iq3_xxs iq4_nl iq4_xs mxfp4_moe tq1_0 tq2_0 q2_K q2_k_s q3_K q3_k_l q3_k_m q3_k_s q4_0 q4_1 q4_K q4_k_m q4_k_s q5_0 q5_1 q5_K q5_k_m q5_k_s q6_K q8_0

See https://huggingface.co/Thireus/collections for the complete list of supported models and available quants - NOT ALL QUANTS ARE AVAILABLE! PLEASE CHECK FIRST!

Need help choosing your quants? See how quants perform on different hardware here: https://github.com/Thireus/GGUF-Tool-Suite/tree/main/quants_graphs

> Recipe files can also be turned back into Google Colab pipeline parameters - [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Thireus/GGUF-Tool-Suite/blob/main/recipe_to_colab_params.ipynb) or locally with `recipe_to_colab_params.py`.

In [None]:
# @title ⚙️ Pipeline Parameters
repo_url = "https://github.com/Thireus/GGUF-Tool-Suite.git"         #@param {type:"string"}
model_name = "DeepSeek-R1-0528"                                     #@param {type:"string"}
model_link = "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528"  #@param {type:"string"}

# regex lists as Python lists of strings - CPU/GPU-friendly tensor names can be found in *.recipe file of the model directory
cpu_tensors = [r"^blk\.([3-9]|[1-5][0-9]|60)\.ffn_down_exps\.weight$", r"^blk\.([3-9]|[1-5][0-9]|60)\.ffn_up_exps\.weight$", r"^blk\.([3-9]|[1-5][0-9]|60)\.ffn_gate_exps\.weight$"]   #@param {type:"raw"}
gpu_tensors = [r".*"]    #@param {type:"raw"}

# quant types for cpu-friendly and gpu-friendly tensor assignments
cpu_quants = ["iq4_ks", "iq3_k", "iq2_ks", "iq1_m_r4"]   #@param {type:"raw"}
gpu_quants = ["q8_0", "iq5_k_r4", "iq6_k"]              #@param {type:"raw"}

# sizes & tuning
cpu_tensors_max_size = "230"    #@param {type:"string"}
gpu_tensors_max_size = "95%"    #@param {type:"string"}
tolerance = 0.01                #@param {type:"number"}
exponential_factor = 8          #@param {type:"integer"}

# assignment override
cpu_assign_qtype = ""        #@param {type:"string"}
cpu_assign_tensors = []        #@param {type:"raw"}
gpu_assign_qtype = "iq4_xs"    #@param {type:"string"}
gpu_assign_tensors = [r"^blk\.([0-9]|[1-5][0-9]|60)\.attn_k_b\.weight$=q8_0"] #@param {type:"raw"}

# harmonization options (optional)
# harmonize_tensors: list-of-lists of regex strings; each inner list declares a group whose matching tensors (within a class) will be qtype harmonized layer-wise.
# Default harmonizes ffn_up_exps and ffn_gate_exps fused pairs used by ik_llama.cpp (speed boost ~15%).
harmonize_tensors = [[r"^blk\..*\.ffn_up_exps.*", r"^blk\..*\.ffn_gate_exps.*"]]   #@param {type:"raw"}
# harmonization_technique: 0=disabled, 1=max, 2=mean, 3=min (default)
harmonization_technique = 3    #@param {type:"integer"}

# calibration data filename ("ppl_results.csv" or "ppl_results_partial.csv" are automatically used by default when empty)
csv_filename = "" #@param {type:"string"}

# calibration data qtype (leave empty for auto-selection which will choose the lowest bpw) - list of available qtypes can be found in the calibration data file
qtype = ""                  #@param {type:"string"}

# additional flags (advanced and optional)
debug = False               #@param {type:"boolean"}
info = False                #@param {type:"boolean"}
ignore_f32 = False          #@param {type:"boolean"}
tensors_from_csv = False    #@param {type:"boolean"}
cpu_irq_k = 1.5             #@param {type:"number"}
gpu_irq_k = 1.5             #@param {type:"number"}
skip_gpg = False            #@param {type:"boolean"}

# other pipeline parameters (optional)
display_graphs = True       #@param {type:"boolean"}

In [None]:
%cd ~
!rm -rf GGUF-Tool-Suite # Clear all the things

In [None]:
%%bash -e -s "$repo_url" "$model_name" "$csv_filename"
REPO_URL="$1"
MODEL_NAME="$2"
CSV_FILENAME="$3"

# 1) Clone (if needed) and cd into repo
if [ ! -d GGUF-Tool-Suite ]; then
  echo "↳ GGUF-Tool-Suite not found; cloning from $REPO_URL..."
  GIT_LFS_SKIP_SMUDGE=1 git clone "$REPO_URL" \
    || { echo "❌ ERROR: failed to clone GGUF-Tool-Suite. Aborting."; exit 1; }
fi
cd GGUF-Tool-Suite

# 2) Verify model directory exists
if [ ! -d models/$MODEL_NAME ]; then
  echo "❌ ERROR: models/$MODEL_NAME not found; this model is not supported yet."
  exit 1
fi

# 3) Link download.conf (or abort if missing)
if [ -f models/$MODEL_NAME/download.conf ]; then
  ln -sf models/$MODEL_NAME/download.conf .
else
  echo "❌ ERROR: download.conf for '$MODEL_NAME' missing; this model isn't meant to be used here."
  exit 1
fi

# 4) Link calibration data csv file (or abort with warning)
rm -f *.csv
if [ -n "$CSV_FILENAME" ]; then
  # CSV filename provided
  if [ -f "models/$MODEL_NAME/$CSV_FILENAME" ]; then
    ln -sf "models/$MODEL_NAME/$CSV_FILENAME" custom_results.csv
  else
    echo "❌ ERROR: File 'models/$MODEL_NAME/$CSV_FILENAME' does not exist."
    # If the filename ends with "_interpolated.csv", try to suggest a matching "_partial" file.
    if [[ "$CSV_FILENAME" == *_interpolated.csv ]]; then
      base="${CSV_FILENAME%_partial*}"
      suggestion=$(find "models/$MODEL_NAME" -maxdepth 1 -type f -name "${base}*" | grep -v "_interpolated.csv" | grep "_partial" | head -n 1)
      if [ -n "$suggestion" ]; then
        suggestion_name=$(basename "$suggestion")
        echo "⚠️  Hint: It looks like '$CSV_FILENAME' was an auto-generated interpolated file."
        echo "    You may need to use '$suggestion_name' instead in your 'csv_filename' pipeline parameter."
      else
        echo "ℹ️  No '_partial' CSV found either."
      fi
    fi
    exit 1
  fi
else
  # CSV filename not provided
  if [ -f "models/$MODEL_NAME/ppl_results.csv" ]; then
    ln -sf "models/$MODEL_NAME/ppl_results.csv" .
  elif [ -f "models/$MODEL_NAME/ppl_results_partial.csv" ]; then
    ln -sf "models/$MODEL_NAME/ppl_results_partial.csv" .
    echo "⚠️ WARNING: partial calibrated ppl_results_partial.csv found for '$MODEL_NAME'; will try to interpolate missing results as best as we can, but this will unlikely produce ppl-optimum quant mixes (so please don't use for production) - full calibrated data likely coming soon."
  else
    echo "❌ ERROR: ppl_results.csv (and ppl_results_partial.csv) missing; support for '$MODEL_NAME' likely coming soon."
    exit 1
  fi
fi

# 5) Make all scripts executable
chmod +x *.sh *.py

# 6) Link download.conf
if [ -f models/$MODEL_NAME/download.conf ]; then
  ln -sf models/$MODEL_NAME/download.conf .
else
  echo "❌ ERROR: download.conf not found for '$MODEL_NAME'; support for '$MODEL_NAME' likely coming soon."
  exit 1
fi


In [None]:
%cd GGUF-Tool-Suite/models/{model_name}

In [None]:
%%bash -e -s "$csv_filename"
CSV_FILENAME="$1"

if [ -n "$CSV_FILENAME" ]; then
  # split base and extension safely
  case "$CSV_FILENAME" in
    *.*)
      base="${CSV_FILENAME%.*}"
      ;;
    *)
      base="$CSV_FILENAME"
      ;;
  esac

  if [ -f "$CSV_FILENAME" ] && [ "${base##*_}" != "partial" ]; then
    # Complete CSV exists and is not a _partial file
    echo "Complete '$CSV_FILENAME' already exists. Skipping interpolation..."
    ln -sf "$CSV_FILENAME" "csv_results_to_plot.csv"

  elif [ -f "$CSV_FILENAME" ]; then
    # File exists but filename ends with _partial -> interpolation path
    echo "Interpolation of '$CSV_FILENAME' necessary."
    ln -sf "$CSV_FILENAME" "csv_results_to_plot.csv"

    # prepare pattern for interpolated outputs: e.g. base_*interpolated.csv
    pattern="${base}"_*interpolated.csv

    # enable nullglob so the pattern expands to zero args if no match
    shopt -s nullglob
    # remove any old interpolated files that match the pattern
    old_interpolated=( $pattern )
    if [ "${#old_interpolated[@]}" -gt 0 ]; then
      rm -f "${old_interpolated[@]}"
    fi

    # run interpolation script (adjust path as needed)
    python ../../fill_missing_metric.py "$CSV_FILENAME"

    # look for generated interpolated file(s)
    new_interpolated=( $pattern )
    if [ "${#new_interpolated[@]}" -gt 0 ]; then
      # link the first matching interpolated file to csv_results_to_plot_inter.csv
      ln -sf "${new_interpolated[0]}" "csv_results_to_plot_inter.csv"
    else
      echo "❌ ERROR: interpolation did not produce any '${base}_*interpolated.csv' output."
      shopt -u nullglob
      exit 1
    fi
    # restore nullglob off
    shopt -u nullglob

  else
    echo "❌ ERROR: File '$CSV_FILENAME' not found."
    exit 1
  fi
elif [ -f ppl_results.csv ]; then \
    echo "Complete ppl_results.csv already exists. Skipping interpolation..."; \
    ln -sf ppl_results.csv csv_results_to_plot.csv; \
elif [ -f ppl_results_partial.csv ]; then \
    echo "Interpolation of ppl_results.csv necessary."; \
    ln -sf ppl_results_partial.csv csv_results_to_plot.csv; \
    rm -f ppl_results_partial_*interpolated.csv; \
    python ../../fill_missing_metric.py ppl_results_partial.csv; \
    ln -sf ppl_results_partial_*interpolated.csv csv_results_to_plot_inter.csv; \
else \
    echo "❌ Error: No calibration data file found. Aborting."; \
    exit 1; \
fi

In [None]:
![ "$display_graphs" = "True" ] && cp ../../plot_ppl.py plot_ppl.tmp.py && \
sed -Ei \
  -e '/^[[:space:]]*root[[:space:]]*=[[:space:]]*tk\.Tk\(\)/s|.*|# &|' \
  -e '/^[[:space:]]*root\./s|.*|# &|' \
  -e '$a plt.show()' \
  plot_ppl.tmp.py

import os
interp_csv = os.path.isfile("csv_results_to_plot_inter.csv")

if display_graphs:
  # Utility to strip “=…” from any entries
  def strip_assign(regex):
      return regex.split('=')[0]

  # Function to run the plotting script for each regex
  def run_for_list(name, regex_list, strip_eq=False):
      if not regex_list:
        return
      print(f"## Using `{name}`")
      for rx in regex_list:
        clean_rx = strip_assign(rx) if strip_eq else rx
        # this print can be copy‑&‑pasted into a new cell, or you can %run directly below
        if interp_csv:
          print(f"%run plot_ppl.tmp.py csv_results_to_plot.csv --interp_csv csv_results_to_plot_inter.csv --tensors '{clean_rx}'")
          %run plot_ppl.tmp.py csv_results_to_plot.csv --interp_csv csv_results_to_plot_inter.csv --tensors '{clean_rx}'
        else:
          print(f"%run plot_ppl.tmp.py csv_results_to_plot.csv --tensors '{clean_rx}'")
          %run plot_ppl.tmp.py csv_results_to_plot.csv --tensors '{clean_rx}'
      print()

  #Now invoke for each
  run_for_list("gpu_assign_tensors", gpu_assign_tensors, strip_eq=True)
  run_for_list("cpu_assign_tensors", cpu_assign_tensors, strip_eq=True)
  run_for_list("gpu_tensors", gpu_tensors)
  run_for_list("cpu_tensors", cpu_tensors)

In [None]:
%pip install pgpy # Install dependency to validate gpg signatures

In [None]:
import os, glob
import shlex, subprocess
import json

def add_flag(cmd, key, val):
    if isinstance(val, bool):
        if val:
            cmd.append(f"--{key}")
    elif val is not None:
        cmd.extend([f"--{key}", str(val)])

def add_list_flag(cmd, key, vals):
    if vals:
        cmd.append(f"--{key}")
        cmd.extend(vals)

def add_list_of_list_flag(cmd, key, val_groups, *, allow_empty=False):
    """
    Add --<key> followed by group values. By default, empty string or empty list is treated
    as "no flag". If allow_empty is True, an explicit empty argument is added (['--key', '']).
    """
    if val_groups is None:
        return

    # explicit empty string or empty list -> only add when allow_empty=True
    if (isinstance(val_groups, str) and val_groups == "") or (
        isinstance(val_groups, (list, tuple)) and len(val_groups) == 0
    ):
        if not allow_empty:
            return
        cmd.append(f"--{key}")
        cmd.append("")   # explicit empty argument
        return

    # pre-joined string (non-empty)
    if isinstance(val_groups, str):
        cmd.append(f"--{key}")
        cmd.append(val_groups)
        return

    # iterable of groups
    cmd.append(f"--{key}")
    for grp in val_groups:
        if isinstance(grp, (list, tuple)):
            cmd.append(",".join(str(x) for x in grp))
        elif isinstance(grp, str):
            cmd.append(grp)
        else:
            raise TypeError(f"Unsupported group type for --{key}: {type(grp)}")

# Determine which file to use
if csv_filename and os.path.isfile(csv_filename):
    if not csv_filename.endswith("_partial.csv"):
        input_file = csv_filename
    else:
        # Search for the first matching file in the current directory
        partial_files = glob.glob("*_partial_*interpolated.csv")
        if partial_files:
            input_file = sorted(partial_files)[0]  # Use the first one alphabetically
        else:
            raise FileNotFoundError("No suitable input file found: *_partial_*interpolated.csv")
elif os.path.isfile("ppl_results.csv"):
    input_file = "ppl_results.csv"
else:
    # Search for the first matching file in the current directory
    partial_files = glob.glob("*_*interpolated.csv")
    if partial_files:
        input_file = sorted(partial_files)[0]  # Use the first one alphabetically
    else:
        raise FileNotFoundError("No suitable input file found: ppl_results.csv, *_*interpolated.csv, or *_partial_*interpolated.csv")

cmd = ["python", "../../quant_assign.py", input_file]

add_flag(cmd, "tolerance", tolerance)
add_flag(cmd, "cpu-irq-k", cpu_irq_k)
add_flag(cmd, "gpu-irq-k", gpu_irq_k)
if qtype:
    add_flag(cmd, "qtype", qtype)
if cpu_assign_qtype:
    add_flag(cmd, "cpu-assign-qtype", cpu_assign_qtype)
if gpu_assign_qtype:
    add_flag(cmd, "gpu-assign-qtype", gpu_assign_qtype)
if cpu_tensors_max_size:
    add_flag(cmd, "cpu-tensors-max-size", cpu_tensors_max_size)
if gpu_tensors_max_size:
    add_flag(cmd, "gpu-tensors-max-size", gpu_tensors_max_size)
add_flag(cmd, "exponential-factor", exponential_factor)
add_flag(cmd, "debug", debug)
add_flag(cmd, "info", info)
add_flag(cmd, "ignore-f32", ignore_f32)
add_flag(cmd, "tensors-from-csv", tensors_from_csv)
add_flag(cmd, "skip-gpg", skip_gpg)

add_list_flag(cmd, "cpu-tensors", cpu_tensors)
add_list_flag(cmd, "gpu-tensors", gpu_tensors)
add_list_flag(cmd, "cpu-quants", cpu_quants)
add_list_flag(cmd, "gpu-quants", gpu_quants)
add_list_flag(cmd, "cpu-assign-tensors", cpu_assign_tensors)
add_list_flag(cmd, "gpu-assign-tensors", gpu_assign_tensors)

if harmonize_tensors or harmonize_tensors == []:
    if harmonize_tensors == []:
        add_list_of_list_flag(cmd, "harmonize-tensors", harmonize_tensors, allow_empty=True)
    else:
        add_list_of_list_flag(cmd, "harmonize-tensors", harmonize_tensors)
if harmonization_technique or harmonization_technique == 0:
    add_flag(cmd, "harmonization-technique", harmonization_technique)

# Print for verification
print("\nRunning quant_assign.py command:")
print(" ".join(shlex.quote(c) for c in cmd))

# Run quant_assign.py
result = subprocess.run(cmd, capture_output=True, text=True)

# Print stderr and stdout for debugging
print("quant_assign.py stdout:", result.stdout)
print("quant_assign.py stderr:", result.stderr)

if result.returncode != 0:
    print("quant_assign.py failed:", result.stderr)
    raise SystemExit(1)

# Merge regex
merge_cmd = [
    "bash", "../../quants_regex_merger.sh",
    "--add-ppl", "0"
]
if model_name:  # Checks for not None and not empty
    merge_cmd += ["--model-name", model_name]
if model_link:
    merge_cmd += ["--model-link", model_link]
merge = subprocess.run(merge_cmd, input=result.stdout, capture_output=True, text=True)

# Print final output
print(merge.stdout)

if merge.returncode != 0:
    print("quants_regex_merger.sh failed")
    raise SystemExit(1)

In [None]:
import glob
from google.colab import files

# List all .recipe files matching the prefix
recipe_files = glob.glob(f"{model_name}*.recipe")

# Print the found files
print("Downloading .recipe file:")
for file in recipe_files:
    print(f"- {file}")

# Auto‑start download
for file in recipe_files:
    files.download(file)