# Recipe Pipeline
Configure your parameters below, then run all cells ▶️

In [None]:
# @title ⚙️ Pipeline Parameters
repo_url = "https://github.com/Thireus/GGUF-Tool-Suite.git"         #@param {type:"string"}
model_name = "DeepSeek-R1-0528"                                     #@param {type:"string"}
model_link = "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528"  #@param {type:"string"}

# regex lists (as Python lists of strings)
gpu_tensors = [r".*"]    #@param {type:"raw"}
cpu_tensors = [r"blk\.([3-9]|[1-5][0-9]|60)\.ffn_down_exps\.weight", r"blk\.([3-9]|[1-5][0-9]|60)\.ffn_up_exps\.weight", r"blk\.([3-9]|[1-5][0-9]|60)\.ffn_gate_exps\.weight"]   #@param {type:"raw"}

# quant types
cpu_quants = ["iq4_ks", "iq3_k", "iq2_k", "iq1_m_r4"]   #@param {type:"raw"}
gpu_quants = ["q8_0", "iq5_k_r4", "iq6_k"]              #@param {type:"raw"}

# sizes & tuning
cpu_tensors_max_size = "230"       #@param {type:"string"}
gpu_tensors_max_size = "95%"       #@param {type:"string"}
tolerance = 0.01                #@param {type:"number"}
exponential_factor = 8          #@param {type:"integer"}

# assignment override
gpu_assign_qtype = "iq4_xs"    #@param {type:"string"}
gpu_assign_tensors = [r"blk\.([0-9]|[1-5][0-9]|60)\.attn_k_b\.weight=q8_0"] #@param {type:"raw"}
cpu_assign_qtype = None         #@param {type:"raw"}
cpu_assign_tensors = []         #@param {type:"raw"}

# additional flags
debug = False              #@param {type:"boolean"}
info = False                #@param {type:"boolean"}
ignore_f32 = False         #@param {type:"boolean"}
tensors_from_csv = False   #@param {type:"boolean"}
cpu_irq_k = 1.5            #@param {type:"number"}
gpu_irq_k = 1.5            #@param {type:"number"}

In [None]:
%%bash -e -s "$repo_url" "$model_name"
REPO_URL="$1"
MODEL_NAME="$2"

# 1) Clone (if needed) and cd into repo
if [ ! -d GGUF-Tool-Suite ]; then
  echo "↳ GGUF-Tool-Suite not found; cloning from $REPO_URL..."
  git clone "$REPO_URL" \
    || { echo "❌ ERROR: failed to clone GGUF-Tool-Suite. Aborting."; exit 1; }
fi
cd GGUF-Tool-Suite

# 2) Verify model directory exists
if [ ! -d models/$MODEL_NAME ]; then
  echo "❌ ERROR: models/$MODEL_NAME not found; this model is not supported yet."
  exit 1
fi

# 3) Link download.conf (or abort if missing)
if [ -f models/$MODEL_NAME/download.conf ]; then
  ln -sf models/$MODEL_NAME/download.conf .
else
  echo "❌ ERROR: download.conf for '$MODEL_NAME' missing; this model isn't meant to be used here."
  exit 1
fi

# 4) Link ppl_results.csv (or abort with warning)
if [ -f models/$MODEL_NAME/ppl_results.csv ]; then
  ln -sf models/$MODEL_NAME/ppl_results.csv .
else
  echo "⚠️ WARNING: ppl_results.csv missing; support for '$MODEL_NAME' likely coming soon."
  exit 1
fi

# 5) Make all scripts executable
chmod +x *.sh *.py

In [None]:
%cd GGUF-Tool-Suite/models/{model_name}

In [None]:
import shlex, subprocess

def add_flag(cmd, key, val):
    if isinstance(val, bool):
        if val:
            cmd.append(f"--{key}")
    elif val is not None:
        cmd.extend([f"--{key}", str(val)])

def add_list_flag(cmd, key, vals):
    if vals:
        cmd.append(f"--{key}")
        cmd.extend(vals)

cmd = ["python", "../../quant_assign.py", "ppl_results.csv"]

add_flag(cmd, "tolerance", tolerance)
add_flag(cmd, "cpu-irq-k", cpu_irq_k)
add_flag(cmd, "gpu-irq-k", gpu_irq_k)
add_flag(cmd, "qtype", None)
add_flag(cmd, "cpu-assign-qtype", cpu_assign_qtype)
add_flag(cmd, "gpu-assign-qtype", gpu_assign_qtype)
add_flag(cmd, "cpu-tensors-max-size", cpu_tensors_max_size)
add_flag(cmd, "gpu-tensors-max-size", gpu_tensors_max_size)
add_flag(cmd, "exponential-factor", exponential_factor)
add_flag(cmd, "debug", debug)
add_flag(cmd, "info", info)
add_flag(cmd, "ignore-f32", ignore_f32)
add_flag(cmd, "tensors-from-csv", tensors_from_csv)

add_list_flag(cmd, "cpu-tensors", cpu_tensors)
add_list_flag(cmd, "gpu-tensors", gpu_tensors)
add_list_flag(cmd, "cpu-quants", cpu_quants)
add_list_flag(cmd, "gpu-quants", gpu_quants)
add_list_flag(cmd, "cpu-assign-tensors", cpu_assign_tensors)
add_list_flag(cmd, "gpu-assign-tensors", gpu_assign_tensors)

# Print for verification
#print("\nRunning command:")
#print(" \n".join(shlex.quote(c) for c in cmd))

# Run quant_assign.py
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
    print("quant_assign.py failed:", result.stderr)
    raise SystemExit(1)

# Merge regex
merge_cmd = [
    "bash", "../../quants_regex_merger.sh",
    "--model-name", model_name,
    "--add-ppl", "0",
    "--model-link", model_link
]
merge = subprocess.run(merge_cmd, input=result.stdout, capture_output=True, text=True)

# Print final output
print(merge.stdout)

if merge.returncode != 0:
    print("quants_regex_merger.sh failed")
    raise SystemExit(1)